Revision 18626532 of "Module:utilities" on frwiktionary

local languages = mw.loadData("Module:languages")
local export = {}

-- transliterate the text, if possible
function export.translit(lang, text)
    -- TODO: the table's information should be moved to [[Module:languages]]
    local translit_modules = {
        ["ae"] = "Module:Avst-translit",
        ["ady"] = "Module:ady-translit",
        ["el"] = "Module:el-translit",
        ["ru"] = "Module:ru-translit",
        ["ug"] = "Module:ug-translit",
        ["tg"] = "Module:tg-translit",
        ["ka"] = "Module:ka-translit",  
        ["xcl"] = "Module:xcl-translit",
        ["axm"] = "Module:axm-translit",
        ["hy"] = "Module:hy-translit"
    }
 
    if translit_modules[lang] then
        return require(translit_modules[lang]).tr(text)
    end
end
 
-- Detect the script based on the first alphabetical characters of a string
function export.detect_script(text, lang)
    -- list of characters that may occur at the beginning of a word
    local chars_table = {
        ["Latn"] = "A-Za-z",
        ["Arab"] = "ءاآأإئؤبتثجچحخدذرزسشصضطظعغفقكلمنوهي",
        ["Armn"] = "Ա-֊",
        ["Beng"] = "ঁ-৺",
        ["Cyrl"] = "Ѐ-ӿ",
        ["Deva"] = "ँ-ॽ",
        ["fa-Arab"] = "کیٔ",
        ["Geor"] = "Ⴀ-ჼ",
        ["Goth"] = "𐌰-𐍊",
        ["Grek"] = "ʹ-Ͽ",
        ["Hebr"] = "א-ת",
        ["Khmr"] = "ក-៹",
        ["Laoo"] = "ກ-ໝ",
        ["Mong"] = "᠀-ᢪ",
        ["Mymr"] = "က-ၙ", 
        ["Thai"] = "ก-ฺ",        
        ["Sinh"] = "ං-෴",        
        ["ug-Arab"] = "ۈۇې",        
        -- TODO
    }
    
    -- first try to detect the script based on the native scripts of the language
    local scripts = languages[lang].scripts or {}
    for i, script in ipairs(scripts) do
        if chars_table[script] and mw.ustring.match(text, "[%[%*%d%p%s]-[" .. chars_table[script] .. "]") then
            return script
        end
    end
    
    -- not written in native scripts; check for all scripts
    for script, chars in ipairs(chars_table) do
        if mw.ustring.match(text, "[%[%d%p%s]-[" .. chars .. "]") then
            return script
        end
    end
end

-- Format the categories with the appropriate sort key
function export.format_categories(categories, lang, sort_key)
    local langinfo = languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
    NAMESPACE = NAMESPACE or mw.title.getCurrentTitle().nsText
    
    if NAMESPACE == "" or NAMESPACE == "Appendix" then
        PAGENAME = PAGENAME or mw.title.getCurrentTitle().text
        SUBPAGENAME = SUBPAGENAME or mw.title.getCurrentTitle().subpageText
        
        -- If no sort key was given, generate one
        if not sort_key then
            -- Generate a default language-independent sort key
            sort_key = mw.ustring.gsub(SUBPAGENAME, "^[-־ـ]", "")
            
            -- If there are language-specific rules to generate the key, use those
            if langinfo.sort_key then
                for i, from in ipairs(langinfo.sort_key.from) do
                    local to = langinfo.sort_key.to[i] or ""
                    sort_key = mw.ustring.gsub(sort_key, from, to)
                end
            end
        end
        
        -- If the resulting key is the same as the wiki software's default, remove it
        if sort_key == PAGENAME then
            sort_key = nil
        end
        
        for key, cat in ipairs(categories) do
            categories[key] = "[[Category:" .. cat .. (sort_key and "|" .. sort_key or "") .. "]]"
        end
        
        return table.concat(categories, "")
    else
        return ""
    end
end

-- Used by {{categorize}}
function export.template_categorize(frame)
    NAMESPACE = NAMESPACE or mw.title.getCurrentTitle().nsText
    local args = frame:getParent().args
    
    local lang = args[1]
    local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end
    local categories = {}
    local format = args["format"]
    
    if lang == "" or lang == nil then
        if NAMESPACE == "Template" then
            lang = "und"
        else
            error("Language code has not been specified. Please pass parameter 1 to the template.")
        end
    end
    
    local langinfo = languages[lang] or error("The language code \"" .. lang .. "\" is not valid.")
    
    local prefix = ""
    if format == "pos" then
        prefix = langinfo.names[1] .. " "
    elseif format == "topic" then
        prefix = lang .. ":"
    end
    
    local i = 2
    local cat = args[i]
    
    while cat do
        if cat ~= "" then
            table.insert(categories, prefix .. cat)
        end
        
        i = i + 1
        cat = args[i]
    end
    
    return export.format_categories(categories, lang, sort_key)
end

return export