Revision 22373 of "Модул:links" on mkwiktionary

local export = {}
local languages = mw.loadData("Module:languages")
local utilities = require("Module:utilities")

-- TODO: This table should be moved to [[Module:languages]]
local override_manual_translit = {
    el = true,
    hy = true,
    axm = true,
    xcl = true,
    ka = true,
    tg = true,
    ug = true
    }

-- This is supposed to be used in {{l}}.
function export.template_l(frame)
    local args = frame:getParent().args

    local lang = args[1]; if lang == nil or lang == "" then error("The first parameter (language code) has not been given") end
    local text = args[2]; if text == nil or text == "" then error("The second parameter (word) has not been given") end
    local alt = args[3]; if alt == "" then alt = nil end -- alternative text as link title
    local oth = args["oth"]; if oth == "" then oth = nil end
    local oth2 = args["oth2"]; if oth2 == "" then oth2 = nil end
    local gloss = args["gloss"]; if gloss == "" then gloss = nil end

    -- gender and number
    local g1 = args["g"]
    if not g1 or g1 == "" then
        g1 = args["g1"]
        if g1 == "" then g1 = nil end
    end
    local g2 = args["g2"]; if g2 == "" then g2 = nil end
    local g3 = args["g3"]; if g3 == "" then g3 = nil end

    local script = args["sc"]; if script == "" then script = nil end
    if not script then
        if languages[lang].scripts[2] then -- script is not specified; try to detect it based on the given text
            script = utilities.detect_script(alt or text, lang)
        end
        if not script then -- script is not specified, and was not detected; use the default (first) script of the language
            script = languages[lang].scripts[1]
        end
    end

    -- transliteration
    local translit = args["tr"]
    if not script:find("Latn") and (not translit or translit == "" or override_manual_translit[lang]) then
        -- the given text is in a non-Latin script, and the transliteration is not specified or we are
        -- allowed to override the specified transliteration with an automated one; so try to auto-transliterate it
        translit = utilities.translit(lang, alt or text)
    end
    if translit == "" then translit = nil end

    local id = languages[lang].names[1]
    if args["id"] and args["id"] ~= "" then -- senseid
        id = id .. "-" .. args["id"]
    end

    text = export.language_link(text, alt, lang, id)
    if oth then
        text = text .. ", " .. export.language_link(oth, nil, lang, id)
    end
    if oth2 then
        text = text .. ", " .. export.language_link(oth2, nil, lang, id)
    end

    return utilities.annotate(text, lang, "span", script, nil, translit, gloss, nil, {g1, g2, g3}, frame)
end

-- This is supposed to be used in {{term}}.
function export.template_term(frame)
    local args = frame:getParent().args

    local lang = args[1]; if lang == nil or lang == "" then lang = "und" end
    local text = args[2]; if text == nil or text == "" then error("The second parameter (word) has not been given") end
    local alt = args[3]; if alt == "" then alt = nil end
    local gloss = args[4]; if gloss == "" then gloss = nil end
    local lit = args["lit"]; if lit == "" then lit = nil end

    local class = args["sc"]; if class == "" then class = nil end
    if not class then
        if languages[lang].scripts[2] then
            class = utilities.detect_script(text, lang)
        end
        if not class then
            class = languages[lang].scripts[1]
        end
    end

    if class == "Latn" or class == "unicode" then
        class = class .. " mention-Latn"
    end

    local translit = args["tr"]
    if (not translit or translit == "") and not class:find("Latn") then
        translit = utilities.translit(lang, alt or text)
    end
    if translit == "" then translit = nil end

    return export.annotated_link(text, alt, lang, "span", class, "mention-tr", translit, gloss, lit)
end

-- Used in {{l-list}}.
function export.template_l_list(frame)
    local args = frame:getParent().args
    local lang = args[1]; if lang == nil or lang == "" then error("The first parameter (language code) has not been given") end
    local list = args[2]; if list == nil or list == "" then error("The second parameter (list) has not been given") end
    local script = args["sc"]; if script == "" then script = nil end
    if not script then
        script = languages[lang].scripts[1]
    end

    list = mw.text.trim(list, "%*\t\r\n\f ")
    items = mw.text.split(list, "\n-%*")
    list = ""

    for i, item in ipairs(items) do
        local word, annotaions, ending, title, alt, translit, gloss
        item = mw.text.trim(item)

        word, annotaions, ending = mw.ustring.match(item, "(.-)[ \t]-%((.-)%)(.-)$")

        if word then
            translit, gloss = mw.ustring.match(annotaions, "(.-),?\"(.-)\"")
            if not translit then
                translit = annotaions
            elseif translit == "" then
                translit = utilities.translit(lang, word)
            end
        else
            word = item
            translit = utilities.translit(lang, word)
        end

        title, alt = mw.ustring.match(word, "%[%[(.-)|(.-)%]%]")
        if not title then
            title = export.prepare_title(lang, word)
            alt = word
        end

        list = list .. "\n* " .. export.annotated_link(title, alt, lang, "span", script, nil, translit, gloss)

        if ending then
            list = list .. ending
        end
    end

    --list = utilities.tag_text(list, lang, "div", script) -- only if language is written top to bottom

    return list
end

function export.template_l_xform(frame)
    local pargs = frame:getParent().args
    local lang = (frame.args[1] ~= '') and frame.args[1] or nil
    local text = frame.args[2] or ''
    local autolink = frame.args.autolink

    if not mw.ustring.match(text, "%[%[.-%]%]") then
        if autolink == 'words' then
            text = mw.ustring.gsub(text, "([^{}%[%]%(%)%s,]+)", function (word)
                return '[[' .. word .. ']]'
            end)
        elseif autolink == 'all' then
            if mw.ustring.match(text, "^[^{}%[%]%(%)%s,]+$") then
                text = '[[' .. text .. ']]'
            end
        end
    end

    if not lang then
        return text
    end
    local id = lang and languages[lang].names[1]

    text = mw.ustring.gsub(text, "%[%[([^#]-)|(.-)%]%]", function(pagetitle, linktitle)
        return "[[" .. export.prepare_title(lang, pagetitle) .. "#" .. id .. "|" .. linktitle .. "]]"
    end)
    text = mw.ustring.gsub(text, "%[%[([^#|]-)%]%]", function(pagetitle)
        return "[[" .. export.prepare_title(lang, pagetitle) .. "#" .. id .. "|" .. pagetitle .. "]]"
    end)

    return text
end

-- Prepare page title.
function export.prepare_title(lang, text)
    -- Link to appendix for reconstructed terms and terms in appendix-only languages
    if mw.ustring.sub(text, 0, 1) == "*" then
        return "Appendix:" .. languages[lang].names[1] .. "/" .. mw.ustring.sub(text, 2)
    elseif languages[lang].type == "reconstructed" then
        error("The specified language " .. languages[lang].names[1] .. " is unattested,"
              .. " while the given word in not marked with '*' to indicate that it is reconstructed")
    elseif languages[lang].type == "appendix-constructed" then
        return "Appendix:" .. languages[lang].names[1] .. "/" .. text
    end

    -- Remove punctuation
    text = mw.ustring.gsub(text, "[؟?!]$", "")

    -- Macrons n stuff
    local strip = {
        ang = "[\204\132\204\135]", -- macron and above dot
        ar  = "[\217\139\217\140\217\141\217\142\217\143\217\144\217\145\217\146]",
        fa  = "[\217\142\217\143\217\144\217\145\217\146]",
        ur  = "[\217\139\217\140\217\141\217\142\217\143\217\144\217\145\217\146]",
        chl = "[\204\132]", -- acute accent
        he  = "[\214\176\214\177\214\178\214\179\214\180\214\181\214\182\214\183\214\184\214\185\214\186\214\187\214\188\214\189\214\191\215\129\215\130]",
        hr  = "[\204\143\204\128\204\145\204\129\204\132]",
        la  = "[\204\132]", -- macron
        lt  = "[\204\128\204\129\204\131]",
        nci = "[\204\132]", -- macron
        ru  = "[\204\128\204\129]",
        uk  = "[\204\128\204\129]",
        be  = "[\204\128\204\129]",
        bg  = "[\204\128\204\129]",
        mk  = "[\204\128\204\129]",
        sh  = "[\204\143\204\128\204\145\204\129\204\132]",
        sr  = "[\204\143\204\128\204\145\204\129\204\132]",
        sl  = "[\204\163\204\129\204\128\204\130\204\145\204\143]",
        tr  = "[\204\130]",
        zu  = "^\-" -- initial hyphen
    }
    if strip[lang] then
        text = mw.ustring.toNFD(text)
        if lang == "sh" then
            text = mw.ustring.gsub(text, mw.ustring.toNFD("ć"), "ć")
            text = mw.ustring.gsub(text, mw.ustring.toNFD("Ć"), "Ć")
        end
        text = mw.ustring.gsub(text, strip[lang], "")
        text = mw.ustring.toNFC(text)
    end

    return text
end

-- Creates a basic wikilink to the given term. If the text already contains
-- links, these are replaced with links to the correct section.
function export.language_link(text, alt, lang, id)
    id = (id and ("#" .. id)) or ""

    if text:find("%[%[") then
        if mw.ustring.sub(text, 0, 1) == "*" then
            text = mw.ustring.gsub(text, "%[%[([^%*][^#%]]-)|", "[[*%1|")
            text = mw.ustring.gsub(text, "%[%[([^%*][^#|]-)%]", "[[*%1|%1]")
        end
    
        text = mw.ustring.gsub(text, "%[%[([^#%]]-)|(.-)%]%]", function(target, linktitle)
            return "[[" .. export.prepare_title(lang, target) .. id .. "|" .. linktitle .. "]]"
        end)
        text = mw.ustring.gsub(text, "%[%[([^#|]-)%]%]", function(target)
            return "[[" .. export.prepare_title(lang, target) .. id .. "|" .. target .. "]]"
        end)
        
        text = mw.ustring.gsub(text, "^%*%[%[(.-)|%*", "[[%1|*") -- remove the initial "*"
    else
        text = "[[" .. export.prepare_title(lang, text) .. id .. "|" .. (alt or text) .. "]]"
    end

    return text
end

-- Create a full link to a term.
function export.annotated_link(text, alt, lang, tag, class, class_tr, translit, gloss, lit, gender, frame, id)
    text = export.language_link(text, alt, lang, id or languages[lang].names[1])

    return utilities.annotate(text, lang, tag, class, class_tr, translit, gloss, lit, gender, frame)
end

return export