Difference between revisions 18626559 and 18626560 on frwiktionary

local languages = mw.loadData("Module:languages")
local export = {}

-- transliterate the text, if possible
function export.translit(lang, text)
    -- TODO: the table's information should be moved to [[Module:languages]]
    local translit_modules = {
        ["ae"] = "Module:Avst-translit",
        ["ady"] = "Module:ady-translit",
        ["el"] = "Module:el-translit",
        ["ru"] = "Module:ru-translit",
        ["ug"] = "Module:ug-translit",
        ["tg"] = "Module:tg-translit",
        ["os"] = "Module:os-translit",
        ["ka"] = "Module:ka-translit",  
        ["xcl"] = "Module:xcl-translit",
        ["axm"] = "Module:axm-translit",
        ["hy"] = "Module:hy-translit"
    }
 
    if translit_modules[lang] then
        return require(translit_modules[lang]).tr(text)
    end
end
 
-- Detect the script based on the first alphabetical characters of a string
function export.detect_script(text, lang)
    -- list of characters that may occur at the beginning of a word
    -- TODO: move to [[Module:scripts]]
    local chars_table = {
        ["Latn"] = "0-z¡-ɏḀ-ỿ",
        ["Arab"] = "؀-ۿݐ-ݿ",
        ["Armn"] = "Ա-֊",
        ["Beng"] = "ঁ-৺",
        ["Cyrl"] = "Ѐ-ӿ",
        ["Deva"] = "ँ-ॽ",
        ["Geor"] = "Ⴀ-ჼ",
        ["Goth"] = "𐌰-𐍊",
        ["Grek"] = "ʹ-Ͽ",
        ["Hebr"] = "א-ת",
        ["Khmr"] = "ក-៹",
        ["Laoo"] = "ກ-ໝ",
        ["Mong"] = "᠀-ᢪ",
        ["Mymr"] = "က-ၙ", 
        ["Thai"] = "ก-ฺ",        
        ["Sinh"] = "ං-෴",        
        -- TODO
    }
    chars_table["fa-Arab"] = chars_table["Arab"]
    chars_table["glk-Arab"] = chars_table["Arab"]
    chars_table["kk-Arab"] = chars_table["Arab"]
    chars_table["ks-Arab"] = chars_table["Arab"]
    chars_table["ku-Arab"] = chars_table["Arab"]
    chars_table["mzn-Arab"] = chars_table["Arab"]
    chars_table["ota-Arab"] = chars_table["Arab"]
    chars_table["pa-Arab"] = chars_table["Arab"]
    chars_table["ps-Arab"] = chars_table["Arab"]
    chars_table["sd-Arab"] = chars_table["Arab"]
    chars_table["tt-Arab"] = chars_table["Arab"]
    chars_table["ug-Arab"] = chars_table["Arab"]
    chars_table["ur-Arab"] = chars_table["Arab"]

    chars_table["Latf"] = chars_table["Latn"]
    chars_table["Latinx"] = chars_table["Latn"]
    chars_table["nv-Latn"] = chars_table["Latn"]
    chars_table["pjt-Latn"] = chars_table["Latn"]

    -- first try to detect the script based on the native script(s) of the language
    local scripts = languages[lang].scripts or {}
    for i, script in ipairs(scripts) do
        if chars_table[script] and mw.ustring.match(text, "[" .. chars_table[scriptlocal sc_chars = mw.loadData("Module:scripts").sc_chars

    -- first try to detect the script based on the native script(s) of the language
    local scripts = languages[lang].scripts or {}
    for i, script in ipairs(scripts) do
        local script2check = script:gsub(".-%-", "") -- removes the language code from script name, e.g. "nv-Latn" > "Latn"
        if script2check == "Latf" or script2check == "Latinx" or script2check == "unicode" then
            script2check = "Latn"
        end

        if sc_chars[script2check] and mw.ustring.match(text, "[" .. sc_chars[script2check] .. "]") then
            return script
        end
    end
    
    -- not written in native script(s); check for all scripts
    for script, chars in pairs(sc_chars_table) do
        if mw.ustring.match(text, "[%[%d%p%s]-[" .. chars .. "]") then
            return script
        end
    end
end

-- Format the categories with the appropriate sort key
(contracted; show full)        i = i + 1
        cat = args[i]
    end
    
    return export.format_categories(categories, lang, sort_key)
end

return export