Module:ur-headword

From Wiktionary, the free dictionary
Jump to navigation Jump to search


local export = {}
local pos_functions = {}
local m_links = require("Module:links")
local m_labels = require("Module:labels")
local m_scripts = require("Module:scripts")

local lang = require("Module:languages").getByCode("ur")
local hi_lang = require("Module:languages").getByCode("hi")
local hi_sc = require("Module:scripts").getByCode("Deva")
local PAGENAME = mw.title.getCurrentTitle().text

local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local usub = mw.ustring.sub

local gender_to_full = {
	["m"] = "masculine", ["f"] = "feminine"
}

local function glossary_link(anchor, text)
	text = text or anchor
	return "[[Appendix:Glossary#" .. anchor .. "|" .. text .. "]]"
end

local function track(page)
	require("Module:debug").track("ur-headword/" .. page)
end

local function process_hindis(hindis)
	local inflection = {}
	for _, hindi in ipairs(hindis) do
		table.insert(inflection, {term = m_links.remove_links(hindi), lang = hi_lang, sc = hi_sc, translit = "-"})
	end
	inflection.label = "Hindi spelling"
	return inflection
end

-- Auto-add links to a "space word" (after splitting on spaces). We split off
-- final punctuation, and then split on hyphens if split_hyphen is given.
-- Code ported from [[Module:fr-headword]].
local function add_space_word_links(space_word, split_hyphen)
	local space_word_no_punct, punct = rmatch(space_word, "^(.*)([,;:?!])$")
	space_word_no_punct = space_word_no_punct or space_word
	punct = punct or ""
	local words
	-- don't split prefixes and suffixes
	if not split_hyphen or rfind(space_word_no_punct, "^%-") or rfind(space_word_no_punct, "%-$") then
		words = {space_word_no_punct}
	else
		words = rsplit(space_word_no_punct, "%-")
	end
	local linked_words = {}
	for _, word in ipairs(words) do
		word = "[[" .. word .. "]]"
		table.insert(linked_words, word)
	end
	return table.concat(linked_words, "-") .. punct
end

-- Auto-add links to a lemma. We split on spaces, and also on hyphens
-- if split_hyphen is given or the word has no spaces. We don't always
-- split on hyphens because of cases like "आदान-प्रदान करना" where
-- "आदान-प्रदान" should be linked as a whole. If there's no space, however, then
-- it makes sense to split on hyphens by default.
local function add_lemma_links(lemma, split_hyphen)
	if not rfind(lemma, " ") then
		split_hyphen = true
	end
	local words = rsplit(lemma, " ")
	local linked_words = {}
	for _, word in ipairs(words) do
		table.insert(linked_words, add_space_word_links(word, split_hyphen))
	end
	local retval = table.concat(linked_words, " ")
	-- If we ended up with a single link consisting of the entire lemma,
	-- remove the link.
	local unlinked_retval = rmatch(retval, "^%[%[([^%[%]]*)%]%]$")
	return unlinked_retval or retval
end


function export.show(frame)
	PAGENAME = mw.title.getCurrentTitle().text
	
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	
	local params = {
		["head"] = {list = true},
		["tr"] = {list = true, allow_holes = true},
		["sort"] = {},
		["hi"] = {list = true},
		["splithyphen"] = {type = "boolean"},
	}

	if PAGENAME:find(" ") then
		track("space")
	end

	if pos_functions[poscat] then
		for key, val in pairs(pos_functions[poscat].params) do
			params[key] = val
		end
	end

	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)

	local data = {lang = lang, sc = current_script,
		pos_category = poscat,
		heads = args["head"],
		translits = args["tr"],
		categories = {},
		genders = {},
		inflections = {enable_auto_translit = true},
		sort_key = args["sort"],
	}

	if #data.translits > 0 then
		track("manual-translit/" .. poscat)
	end

	local heads = data.heads
	local auto_linked_head = add_lemma_links(PAGENAME, args["splithyphen"])
	if #heads == 0 then
		data.heads = {auto_linked_head}
	else
		for _, head in ipairs(heads) do
			if head == auto_linked_head then
				track("redundant-head")
			end
		end
	end

	if pos_functions[poscat] then
		pos_functions[poscat].func(args, data)
	end

	if #args["hi"] > 0 then
		table.insert(data.inflections, process_hindis(args["hi"]))
	else 
		if #heads > 0 then
			local hindis = {}
			for _, head in ipairs(heads) do
				table.insert(hindis, require("Module:ur-hi-convert").tr(head))
			end
			table.insert(data.inflections, process_hindis(hindis))
		end
	end

	return require("Module:headword").full_headword(data)
end

pos_functions.adjectives = {
	params = {
		["comparative"] = {},
		["superlative"] = {},
		[1] = {alias_of = "comparative"},
		[2] = {alias_of = "superlative"},
		["f"] = {list = true},
		["m"] = {list = true},
		["ind"] = {type = "boolean"},
	},
	func = function(args, data)
		if args["ind"] then
			table.insert(data.inflections, {label = glossary_link("indeclinable")})
			table.insert(data.categories, "Urdu indeclinable adjectives")
		end
		if args["comparative"] then
			table.insert(data.inflections, {label = "comparative", args["comparative"]})
		end
		if args["superlative"] then
			table.insert(data.inflections, {label = "superlative", args["superlative"]})
		end
		if #args["m"] > 0 then
			args["m"].label = "masculine"
			table.insert(data.inflections, args["m"])
		end
		if #args["f"] > 0 then
			args["f"].label = "feminine"
			table.insert(data.inflections, args["f"])
		end
	end,
}

local function process_genders(data, genders)
	for _, g in ipairs(genders) do
		if g == "m" or g == "f" or g == "m-p" or g == "f-p" or g == "mf" or g == "mf-p" or g == "mfbysense" or g == "mfbysense-p" or g == "?" then
			table.insert(data.genders, g)
		else
			error("Invalid gender: " .. (g or "(nil)"))
		end
	end
end


local function nouns(plpos)
	return {
		params = {
			["g"] = {list = true, default = "?"},
			["f"] = {list = true},
			["m"] = {list = true},
			["pl"] = {list = true},
			["ind"] = {type = "boolean"},
		},
		func = function(args, data)
			process_genders(data, args["g"])
			if args["ind"] then
				table.insert(data.inflections, {label = glossary_link("indeclinable")})
				table.insert(data.categories, "Urdu indeclinable " .. plpos)
			end
			if #args["pl"] > 0 then
				args["pl"].label = "formal plural"
				table.insert(data.inflections, args["pl"])
			end
			if #args["m"] > 0 then
				args["m"].label = "masculine"
				table.insert(data.inflections, args["m"])
			end
			if #args["f"] > 0 then
				args["f"].label = "feminine"
				table.insert(data.inflections, args["f"])
			end
		end,
	}
end

pos_functions.nouns = nouns("nouns")
pos_functions["proper nouns"] = nouns("proper nouns")

pos_functions.pronouns = {
	params = {
		["g"] = {list = true},
	},
	func = function(args, data)
		process_genders(data, args["g"])
	end,
}

pos_functions.verbs = {
	params = {
		[1] = {},
		["g"] = {list = true},
	},
	func = function(args, data)
		data.genders = args["g"]

		if args[1] then
			local label, cat
			if args[1] == "t" then
				label = "transitive"
				table.insert(data.categories, "Urdu transitive verbs")
			elseif args[1] == "i" then
				label = "intransitive"
				table.insert(data.categories, "Urdu intransitive verbs")
			elseif args[1] == "d" then
				label = "ditransitive"
				table.insert(data.categories, "Urdu ditransitive verbs")
			elseif args[1] == "it" or args[1] == "ti" or args[1] == "a" then
				label = "ambitransitive"
				table.insert(data.categories, "Urdu ambitransitive verbs")
				table.insert(data.categories, "Urdu transitive verbs")
				table.insert(data.categories, "Urdu intransitive verbs")
			else
				error("Unrecognized param 1=" .. args[1] .. ": Should be 'i' = intransitive, 't' = transitive, or 'it'/'ti'/'a' = ambitransitive")
			end
			table.insert(data.inflections, {label = glossary_link(label)})
		end

		local head = data.heads[1]
		if head:find(" ") then
			local base_verb = m_links.remove_links(head):gsub("^.* ", "")
			table.insert(data.categories, "Urdu compound verbs formed with " .. lang:makeEntryName(base_verb))
		end
	end,
}

local function pos_with_gender()
	return {
		params = {
			["g"] = {list = true},
		},
		func = function(args, data)
			data.genders = args["g"]
		end,
	}
end

pos_functions.numerals = pos_with_gender()
pos_functions.suffixes = pos_with_gender()
pos_functions["adjective forms"] = pos_with_gender()
pos_functions["noun forms"] = pos_with_gender()
pos_functions["pronoun forms"] = pos_with_gender()
pos_functions["determiner forms"] = pos_with_gender()
pos_functions["verb forms"] = pos_with_gender()
pos_functions["postposition forms"] = pos_with_gender()

return export