Difference between revisions 22373 and 34804 on mkwiktionary

local export = {}
local languages = mw.loadData("Module:languages")
local utilities = require("Module:utilities")

-- TODO: This table should be moved to [[Module:languages]]
local override_manual_translit = {
    el = true,
    hy = true,
    axm = true,
    xcl = true,
    ka = true,
    tg = true,
    ug = true
    }

-- This is supposed to be used in {{l}}.
function export.template_l(frame)
    local args = frame:getParent().args

    local lang = args[1]; if lang == nil or lang == "" then error("The first parameter (language code) has not been given") end
    local text = args[2]; if text == nil or text == "" then error("The second parameter (word) has not been given") end
    local alt = args[3]; if alt == "" then alt = nil end -- alternative text as link title
    local oth = args["oth"]; if oth == "" then oth = nil end
    local oth2 = args["oth2"]; if oth2 == "" then oth2 = nil end
    local gloss = args["gloss"]; if gloss == "" then gloss = nil end

    -- gender and number
    local g1 = args["g"]
    if not g1 or g1 == "" then
        g1 = args["g1"]
        if g1 == "" then g1 = nil end
    end
    local g2 = args["g2"]; if g2 == "" then g2 = nil end
    local g3 = args["g3"]; if g3 == "" then g3 = nil end

    local script = args["sc"]; if script == "" then script = nil end
    if not script then
        if languages[lang].scripts[2] then -- script is not specified; try to detect it based on the given text
            script = utilities.detect_script(alt or text, lang)
        end
        if not script then -- script is not specified, and was not detected; use the default (first) script of the language
            script = languages[lang].scripts[1]
        end
    end

    -- transliteration
    local translit = args["tr"]
    if not script:find("Latn") and (not translit or translit == "" or override_manual_translit[lang]) then
        -- the given text is in a non-Latin script, and the transliteration is not specified or we are
        -- allowed to override the specified transliteration with an automated one; so try to auto-transliterate it
        translit = utilities.translit(lang, alt or text)
    end
    if translit == "" then translit = nil end

    local id = languages[lang].names[1]
    if args["id"] and args["id"] ~= "" then -- senseid
        id = id .. "-" .. args["id"]
    end

    text = export.language_link(text, alt, lang, id)
    if oth then
        text = text .. ", " .. export.language_link(oth, nil, lang, id)
    end
    if oth2 then
        text = text .. ", " .. export.language_link(oth2, nil, lang, id)
    end

    return utilities.annotate(text, lang, "span", script, nil, translit, gloss, nil, {g1, g2, g3}, frame)
end

-- This is supposed to be used in {{term}}.
function export.template_term(frame)
    local args = frame:getParent().args

    local lang = args[1]; if lang == nil or lang == "" then lang = "und" end
    local text = args[2]; if text == nil or text == "" then error("The second parameter (word) has not been given") end
    local alt = args[3]; if alt == "" then alt = nil end
    local gloss = args[4]; if gloss == "" then gloss = nil end
    local lit = args["lit"]; if lit == "" then lit = nil end

    local class = args["sc"]; if class == "" then class = nil end
    if not class then
        if languages[lang].scripts[2] then
            class = utilities.detect_script(text, lang)
        end
        if not class then
            class = languages[lang].scripts[1]
        end
    end

    if class == "Latn" or class == "unicode" then
        class = class .. " mention-Latn"
    end

    local translit = args["tr"]
    if (not translit or translit == "") and not class:find("Latn") then
        translit = utilities.translit(lang, alt or text)
    end
    if translit == "" then translit = nil end

    return export.annotated_link(text, alt, lang, "span", class, "mention-tr", translit, gloss, lit)
end

-- Used in {{l-list}}.
function export.template_l_list(frame)
    local args = frame:getParent().args
    local lang = args[1]; if lang == nil or lang == "" then error("The first parameter (language code) has not been given") end
    local list = args[2]; if list == nil or list == "" then error("The second parameter (list) has not been given") end
    local script = args["sc"]; if script == "" then script = nil end
    if not script then
        script = languages[lang].scripts[1]
    end

    list = mw.text.trim(list, "%*\t\r\n\f ")
    items = mw.text.split(list, "\n-%*")
    list = ""

    for i, item in ipairs(items) do
        local word, annotaions, ending, title, alt, translit, gloss
        item = mw.text.trim(item)

        word, annotaions, ending = mw.ustring.match(item, "(.-)[ \t]-%((.-)%)(.-)$")

        if word then
            translit, gloss = mw.ustring.match(annotaions, "(.-),?\"(.-)\"")
            if not translit then
                translit = annotaions
            elseif translit == "" then
                translit = utilities.translit(lang, word)
            end
        else
            word = item
            translit = utilities.translit(lang, word)
        end

        title, alt = mw.ustring.match(word, "%[%[(.-)|(.-)%]%]")
        if not title then
            title = export.prepare_title(lang, word)
            alt = word
        end

        list = list .. "\n* " .. export.annotated_link(title, alt, lang, "span", script, nil, translit, gloss)

        if ending then
            list = list .. ending
        end
    end

    --list = utilities.tag_text(list, lang, "div", script) -- only if language is written top to bottom

    return list
end

function export.template_l_xform(frame)
    local pargs = frame:getParent().args
    local lang = (frame.args[1] ~= '') and frame.args[1] or nil
    local text = frame.args[2] or ''
    local autolink = frame.args.autolink

    if not mw.ustring.match(text, "%[%[.-%]%]") then
        if autolink == 'words' then
            text = mw.ustring.gsub(text, "([^{}%[%]%(%)%s,]+)", function (word)
                return '[[' .. word .. ']]'
            end)
        elseif autolink == 'all' then
            if mw.ustring.match(text, "^[^{}%[%]%(%)%s,]+$") then
                text = '[[' .. text .. ']]'
            end
        end
    end

    if not lang then
        return text
    end
    local id = lang and languages[lang].names[1]

    text = mw.ustring.gsub(text, "%[%[([^#]-)|(.-)%]%]", function(pagetitle, linktitle)
        return "[[" .. export.prepare_title(lang, pagetitle) .. "#" .. id .. "|" .. linktitle .. "]]"
    end)
    text = mw.ustring.gsub(text, "%[%[([^#|]-)%]%]", function(pagetitle)
        return "[[" .. export.prepare_title(lang, pagetitle) .. "#" .. id .. "|" .. pagetitle .. "]]"
    end)

    return text
end

-- Prepare page title.
function export.prepare_title(lang, text)
    -- Link to appendix for reconstructed terms and terms in appendix-only languages
    if mw.ustring.sub(text, 0, 1) == "*" then
        return "Appendix:" .. languages[lang].names[1] .. "/" .. mw.ustring.sub(text, 2)
    elseif languages[lang].type == "reconstructed" then
        error("The specified language " .. languages[lang].names[1] .. " is unattested,"
              .. " while the given word in not marked with '*' to indicate that it is reconstructed")
    elseif languages[lang].type == "appendix-constructed" then
        return "Appendix:" .. languages[lang].names[1] .. "/" .. text
    end

    -- Remove punctuation
    text = mw.ustring.gsub(text, "[؟?!]$", "")

    -- Macrons n stuff
    local strip = {
        ang = "[\204\132\204\135]", -- macron and above dot
        ar  = "[\217\139\217\140\217\141\217\142\217\143\217\144\217\145\217\146]",
        fa  = "[\217\142\217\143\217\144\217\145\217\146]",
        ur  = "[\217\139\217\140\217\141\217\142\217\143\217\144\217\145\217\146]",
        chl = "[\204\132]", -- acute accent
        he  = "[\214\176\214\177\214\178\214\179\214\180\214\181\214\182\214\183\214\184\214\185\214\186\214\187\214\188\214\189\214\191\215\129\215\130]",
        hr  = "[\204\143\204\128\204\145\204\129\204\132]",
        la  = "[\204\132]", -- macron
        lt  = "[\204\128\204\129\204\131]",
        nci = "[\204\132]", -- macron
        ru  = "[\204\128\204\129]",
        uk  = "[\204\128\204\129]",
        be  = "[\204\128\204\129]",
        bg  = "[\204\128\204\129]",
        mk  = "[\204\128\204\129]",
        sh  = "[\204\143\204\128\204\145\204\129\204\132]",
        sr  = "[\204\143\204\128\204\145\204\129\204\132]",
        sl  = "[\204\163\204\129\204\128\204\130\204\145\204\143]",
        tr  = "[\204\130]",
        zu  = "^\-" -- initial hyphen
    }
    if strip[lang] then
        text = mw.ustring.toNFD(text)
        if lang == "sh" then
            text = mw.ustring.gsub(text, mw.ustring.toNFD("ć"), "ć")
            text = mw.ustring.gsub(text, mw.ustring.toNFD("Ć"), "Ć")
        end
        text = mw.ustring.gsub(text, strip[lang], "")
        text = mw.ustring.toNFC(text)
    end

    return text
end

-- Creates a basic wikilink to the given term. If the text already contains
-- links, these are replaced with links to the correct section.
function export.language_link(text, alt, lang, id)
    id = (id and ("#" .. id)) or ""

    if text:find("%[%[") then
        if mw.ustring.sub(text, 0, 1) == "*" then
            text = mw.ustring.gsub(text, "%[%[([^%*][^#%]]-)|", "[[*%1|")
            text = mw.ustring.gsub(text, "%[%[([^%*][^#|]-)%]", "[[*%1|%1]")
        end
    
        text = mw.ustring.gsub(text, "%[%[([^#%]]-)|(.-)%]%]", function(target, linktitle)
            return "[[" .. export.prepare_title(lang, target) .. id .. "|" .. linktitle .. "]]"
        end)
        text = mw.ustring.gsub(text, "%[%[([^#|]-)%]%]", function(target)
            return "[[" .. export.prepare_title(lang, target) .. id .. "|" .. target .. "]]"
        end)
        
        text = mw.ustring.gsub(text, "^%*%[%[(.-)|%*", "[[%1|*") -- remove the initial "*"
    else
        text = "[[" .. export.prepare_title(lang, text) .. id .. "|" .. (alt or text) .. "]]"
    end

    return text
end

-- Create a full link to a term.
function export.annotated_link(text, alt, lang, tag, class, class_tr, translit, gloss, lit, gender, frame, id)
    text = export.language_link(text, alt, lang, id or languages[lang].names[1])

    return utilities.annotate(text, lang, tag, class, class_tr, translit, gloss, lit, gender, frame)-- Модул:links
-- Imported from mk.wikipedia
-- 2016-06-22 -- V2 -- last modified by DenisWasRight
-- Extended version for mk.wiktionary

local export = {}

--TODO: move to [[Module:languages]]
local override_translit = {
	["ab"] = true,
	["abq"] = true,
	["ady"] = true,
	["av"] = true,
	["axm"] = true,
	["ba"] = true,
	["bo"] = true,
	["bua"] = true,
	["ce"] = true,
	["chm"] = true,
	["cv"] = true,	
	["dar"] = true,
	["dv"] = true,
	["dz"] = true,
	["el"] = true,
	["gmy"] = true,
	["grc"] = true,
	["hy"] = true,
	["inh"] = true,
	["iu"] = true,
	["ka"] = true,
	["kk"] = true,
	--["ko"] = true,
	["kbd"] = true,
	["kca"] = true,
	["kjh"] = true,
	["kjj"] = true,
	["kn"] = true,
	["koi"] = true,
	["kpv"] = true,
	["ky"] = true,
	["kv"] = true,
	["lo"] = true,
	["lbe"] = true,
	["lez"] = true,
	["lzz"] = true,
	["mdf"] = true,
	["ml"] = true,
	["mn"] = true,
	["my"] = true,
	["myv"] = true,
	["nog"] = true,	
	["oge"] = true,
	["os"] = true,
	["sah"] = true,
	["si"] = true,
	["sgh"] = true,
	["sva"] = true,
	["ta"] = true,
	["tab"] = true,
	["te"] = true,
	["tg"] = true,
	["tt"] = true,
	["tyv"] = true,
	["ug"] = true,
	["udi"] = true,
	["udm"] = true,
	["xal"] = true,
	["xcl"] = true,
	["xmf"] = true,
}

local ignore_cap = {
	["ko"] = true,
}

local phonetic_extraction = {
	["th"] = "Модул:th"
}

local pos_tags = {
	["a"] = "adjective",
	["adv"] = "adverb",
	["int"] = "interjection",
	["n"] = "noun",
	["pron"] = "pronoun",
	["v"] = "verb",
	["vi"] = "intransitive verb",
	["vt"] = "transitive verb",
	["vti"] = "transitive and intransitive verb",
}

function export.getLinkPage(target, lang)
	-- If the link contains unexpanded template parameters, then don't create a link.
	if target:find("{{{", nil, true) then
		return nil
	end
	
	if target:find("^:") or target:find("^w:") or target:find("^wikipedia:") then
		return target
	end
	
	-- Remove diacritics from the page name
	target = lang:makeEntryName(target)
	
	-- Link to appendix for reconstructed terms and terms in appendix-only languages
	if target:find("^*.") then
		if lang:getCode() == "und" then
			return nil
		end
		
		target = "Викиречник:" .. lang:getCanonicalName() .. "/" .. mw.ustring.sub(target, 2)
	elseif lang:getType() == "reconstructed" then
		error("The specified language " .. lang:getCanonicalName() .. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed")
	elseif lang:getType() == "appendix-constructed" then
		target = "Помош:" .. lang:getCanonicalName() .. "/" .. target
	end
	
	return target
end

-- Make a language-specific link from given link's parts
local function makeLangLink(link, lang, id, allowSelfLink)
	-- If there is no display form, then create a default one
	if not link.display then
		link.display = link.target
		
		-- Strip the prefix from the displayed form
		-- TODO: other interwiki links?
		if link.display:find("^:") then
			link.display = link.display:gsub("^:", "")
		elseif link.display:find("^w:") then
			link.display = link.display:gsub("^w:", "")
		elseif link.display:find("^wikipedia:") then
			link.display = link.display:gsub("^wikipedia:", "")
		end
	end
	
	-- Process the target
	link.target = export.getLinkPage(link.target, lang)
	
	if not link.target then
		return link.display
	end
	
	-- If the target is the same as the current page, then return a "self-link" like the software does
	if not allowSelfLink and not id and (link.target == mw.title.getCurrentTitle().prefixedText or link.target == ":" .. mw.title.getCurrentTitle().prefixedText) then
		return "<strong class=\"selflink\">" .. link.display .. "</strong>"
	end
	
	-- Add fragment
	-- Do not add a section link to "Undetermined", as such sections do not exist and are invalid.
	-- TabbedLanguages handles links without a section by linking to the "last visited" section,
	-- but adding "Undetermined" would break that feature.
	if not (link.target:find("^w:") or link.target:find("^wikipedia:")) then
		if link.fragment or mw.ustring.find(link.target, "#$") then
			require("Модул:debug").track("links/fragment")
			require("Модул:debug").track("links/fragment/" .. lang:getCode())
		end
		
		if not link.fragment and lang:getCode() ~= "und" then
			if id then
				link.fragment = lang:getCanonicalName() .. "-" .. id
			elseif not link.target:find("^Помош:") and not link.target:find("^Викиречник:") then
				link.fragment = lang:getCanonicalName()
			end
		end
	end
	
	-- This allows linking to pages like [[sms:a]] without it being treated weirdly.
	link.target = mw.ustring.gsub(link.target, ":", "&#x3a;")
	
	return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
end


-- Split a link into its parts
local function parseLink(linktext)
	local link = {target = linktext}
	local found, _, first, second
	
	found, _, first, second = mw.ustring.find(link.target, "^([^|]+)|(.+)$")
	
	if found then
		link.target = first
		link.display = second
	else
		link.display = link.target
	end
	
	found, _, first, second = mw.ustring.find(link.target, "^(.+)#(.+)$")
	
	if found then
		link.target = first
		link.fragment = second
	end
	
	return link
end


-- Creates a basic wikilink to the given term. If the text already contains
-- links, these are replaced with links to the correct section.
local function language_link2(terminfo, allowSelfLink, dontLinkRecons)
	local text = terminfo.term
	
	if ignore_cap[terminfo.lang:getCode()] and text then
		text = mw.ustring.gsub(text, "%^", "")
	end
	
	-- If the text begins with * and another character,
	-- then act as if each link begins with *
	local allReconstructed = false
	
	if text:find("^*.") then
		allReconstructed = true
	end
	
	-- Do we have embedded wikilinks?
	if text:find("[[", nil, true) then
		if terminfo.id then
			require("Модул:debug").track("links/bad id")
		end
		
		-- Begins and ends with a wikilink tag
		if mw.ustring.find(text, "^%[%[(.+)%]%]$") then
			-- There are no [ ] in between.
			-- This makes the wikilink tag redundant.
			if mw.ustring.find(text, "^%[%[[^%[%]]+%]%]$") then
				require("Модул:debug").track("links/redundant wikilink")
			else
				local temp = mw.ustring.gsub(text, "^%[%[(.+)%]%]$", "%1")
				temp = mw.ustring.gsub(temp, "%]%], %[%[", "|")
				
				if not mw.ustring.find(temp, "[%[%]]") then
					require("Модул:debug").track("links/list")
				end
			end
		end
		
		text = mw.ustring.gsub(text, "%[%[([^%]]+)%]%]",
			function(linktext)
				local link = parseLink(linktext)
				
				if allReconstructed then
					link.target = "*" .. link.target
				end
				
				return makeLangLink(link, terminfo.lang, terminfo.id, allowSelfLink, dontLinkRecons)
			end
			)
		
		-- Remove the extra * at the beginning if it's immediately followed
		-- by a link whose display begins with * too
		if allReconstructed then
			text = mw.ustring.gsub(text, "^%*%[%[([^|%]]+)|%*", "[[%1|*")
		end
	else
		-- There is no embedded wikilink, make a link using the parameters.
		text = makeLangLink({target = text, display = terminfo.alt}, terminfo.lang, terminfo.id, allowSelfLink, dontLinkRecons)
	end
	
	return text
end


-- Format the annotations (things following the linked term)
function export.format_link_annotations(terminfo, face)
	local ret = ""
	
	-- Interwiki link
	if terminfo.interwiki then
		ret = ret .. terminfo.interwiki
	end
	
	-- Genders
	if terminfo.genders and #terminfo.genders > 0 then
		local gen = require("Модул:gender and number")
		ret = ret .. "&nbsp;" .. gen.format_list(terminfo.genders, terminfo.lang)
	end
	
	local glosses = {}
	
	-- Transliteration
	if terminfo.tr then
		if face == "term" then
			table.insert(glosses, "<span lang=\"\" class=\"tr mention-tr\">" .. terminfo.tr .. "</span>")
		else
			table.insert(glosses, "<span lang=\"\" class=\"tr\">" .. terminfo.tr .. "</span>")
		end
	end
	
	-- Gloss/translation
	if terminfo.gloss then
		table.insert(glosses, "<span class=\"mention-gloss-double-quote\">“</span><span class=\"mention-gloss\">" .. terminfo.gloss .. "</span><span class=\"mention-gloss-double-quote\">”</span>")
	end
	
	-- Part of speech
	if terminfo.pos then
		table.insert(glosses, pos_tags[terminfo.pos] or terminfo.pos)
	end
	
	-- Literal/sum-of-parts meaning
	if terminfo.lit then
		table.insert(glosses, "literally <span class=\"mention-gloss-double-quote\">“</span><span class=\"mention-gloss\">" .. terminfo.lit .. "</span><span class=\"mention-gloss-double-quote\">”</span>")
	end
	
	if #glosses > 0 then
		ret = ret .. " &lrm;(" .. table.concat(glosses, ", ") .. ")"
	end
	
	return ret
end


-- A version of {{l}} or {{m}} that can be called from other modules too
function export.full_link(term, alt, lang, sc, face, id, annotations, allowSelfLink, dontLinkRecons)
	local terminfo = term
	
	if type(terminfo) == "table" then
		face = alt
		allowSelfLink = lang
		dontLinkRecons = sc
	else
		terminfo = {term = term, alt = alt, lang = lang, sc = sc, id = id, genders = annotations and annotations.genders, tr = annotations and annotations.tr, gloss = annotations and annotations.gloss, pos = annotations and annotations.pos, lit = annotations and annotations.lit, interwiki = annotations and annotations.interwiki}
		require("Модул:debug").track("links/term not table")
	end
	
	-- Create the link
	local link = ""
	
	local m_utilities = require("Модул:utilities")
	local m_scriptutils = require("Модул:script utilities")
	
	-- Is there any text to show?
	if (terminfo.term or terminfo.alt) then
		-- Try to detect the script if it was not provided
		if not terminfo.sc then
			terminfo.sc = require("Модул:scripts").findBestScript(terminfo.alt or terminfo.term, terminfo.lang)
		end
		
		-- Only make a link if the term has been given, otherwise just show the alt text without a link
		link = m_scriptutils.tag_text(terminfo.term and language_link2(terminfo, allowSelfLink, dontLinkRecons) or terminfo.alt, terminfo.lang, terminfo.sc, face)
	else
		-- No term to show.
		-- Is there at least a transliteration we can work from?
		link = m_scriptutils.request_script(terminfo.lang, terminfo.sc)
		
		if link == "" or not terminfo.tr or terminfo.tr == "-" then
			-- No link to show, and no transliteration either. Show a term request.
			local category = ""
			
			if mw.title.getCurrentTitle().nsText ~= "Template" then
				category = "[[Категорија:" .. terminfo.lang:getCanonicalName() .. " term requests]]"
			end
			
			link = "<small>[Термин?]</small>" .. category
		end
	end
	
	local mantrFix, redtrFix
	local manual_tr = ""
	
	if terminfo.tr == "" or terminfo.tr == "-" then
		terminfo.tr = nil
		
	elseif phonetic_extraction[terminfo.lang:getCode()] then
		local m_phonetic = require(phonetic_extraction[terminfo.lang:getCode()])
		terminfo.tr = terminfo.tr or m_phonetic.getTranslit(export.remove_links(terminfo.term))
	
	elseif (terminfo.term or terminfo.alt) and not ((terminfo.sc:getCode():find("Latn", nil, true)) or terminfo.sc:getCode() == "Latinx") and (not terminfo.tr or override_translit[terminfo.lang:getCode()]) then
		-- Try to generate a transliteration if necessary
		local automated_tr = terminfo.lang:transliterate(export.remove_links(terminfo.alt or terminfo.term), terminfo.sc)
		
		if automated_tr then
			if terminfo.tr ~= automated_tr then
				if terminfo.tr then
					manual_tr = terminfo.tr
					mantrFix = true
				end
				
				if terminfo.lang:link_tr() then
					automated_tr = makeLangLink({target = automated_tr}, terminfo.lang)
				end
				
				terminfo.tr = automated_tr
			else
				redtrFix = true
			end
		end
	end
	
	return link .. export.format_link_annotations(terminfo, face)
				.. (mantrFix and "[[Категорија:Terms with manual transliterations different from the automated ones]][[Category:Terms with manual transliterations different from the automated ones/" .. terminfo.lang:getCode() .. "]]" or "")
				.. (redtrFix and "[[Категорија:Terms with redundant transliterations]][[Category:Terms with redundant transliterations/" .. terminfo.lang:getCode() .. "]]" or "")
end


function export.language_link(text, alt, lang, id, allowSelfLink)
	local terminfo = text
	
	if type(terminfo) == "table" then
		allowSelfLink = alt
	else
		terminfo = {term = text, alt = alt, lang = lang, id = id}
		require("Модул:debug").track("links/term not table")
	end
	
	return language_link2(terminfo, allowSelfLink)
end


-- Strips all square brackets out or replaces them.
function export.remove_links(text)
	if type(text) == "table" then text = text.args[1] end; if not text then text = "" end
	
	text = text:gsub("%[%[Категорија:[^|%]]-|?[^|%]]-%]%]", "")
	text = text:gsub("%[%[[^|%]]-|", "")
	text = text:gsub("%[%[", "")
	text = text:gsub("%]%]", "")

	return text
end

return export