Module:zhx-sic-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}
local m_string_utils = require("Module:string utilities")

local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local gmatch = m_string_utils.gmatch
local gsplit = mw.text.gsplit
local lower = m_string_utils.lower
local upper = m_string_utils.upper

local initialConv = {
	["b"] = "p", ["d"] = "t", ["g"] = "k",
	["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ",
	["z"] = "t͡s", ["j"] = "t͡ɕ",
	["c"] = "t͡sʰ", ["q"] = "t͡ɕʰ",
	["m"] = "m", ["n"] = "n", ["ny"] = "nʲ", ["ng"] = "ŋ",
	["f"] = "f", ["s"] = "s", ["x"] = "ɕ", ["h"] = "x",
	["w"] = "v", ["r"] = "z",
	[""] = "",
}

-- note that 'ir' is for internal use by the code and not used in actual sichuanese pinyin
local finalConv = {
	["ir"] = "z̩", ["er"] = "ɚ",

	["a"] = "a", ["o"] = "o", ["e"] = "ɛ",
	["ai"] = "ai", ["ei"] = "ei", ["ao"] = "au", ["ou"] = "əu",
	["an"] = "an", ["en"] = "ən", ["ang"] = "aŋ", ["ong"] = "oŋ",

	["i"] = "i", ["ia"] = "ia", ["ie"] = "iɛ",
	["iai"] = "iɛi", ["iao"] = "iau", ["iu"] = "iəu",
	["ian"] = "iɛn", ["in"] = "in", ["iang"] = "iaŋ",

	["u"] = "u", ["ua"] = "ua", ["ue"] = "uɛ",
	["uai"] = "uai", ["ui"] = "uei",
	["uan"] = "uan", ["un"] = "uən", ["uang"] = "uaŋ",

	["ü"] = "y", ["üo"] = "yo", ["üe"] = "ye",
	["üan"] = "yan", ["ün"] = "yn", ["iong"] = "yoŋ",
}

local toneConv = {
	["1"] = "⁵⁵", ["2"] = "²¹", ["3"] = "⁵³", ["4"] = "²¹³", ["-"] = "⁻",
}

local initialConv_swz = {
	["j"] = "g", ["q"] = "k", ["n"] = "l", ["ny"] = "n", ["h"] = "x", ["w"] = "", ["r"] = "rh",
}
local finalConv_swz = {
	["ir"] = "", ["er"] = "r", ["ong"] = "ung", ["uang"] = "uong", ["ü"] = "y", ["üo"] = "iuo", ["üe"] = "ye", ["üan"] = "uan", ["ün"] = "un", ["iong"] = "yng",
}

local function fix(initial, final)
	-- ju /tɕy/
	if find(initial, '^[yjqx]$') and find(final, '^u') then
		final = gsub(final, '^u', 'ü')
	end
	
	if initial == 'y' then
		initial = ''
		if final == 'ou' then
			final = 'iu'
		elseif not find(final, '^[iü]') then -- yin /in/, yuan /yan/, ya /ia/
			final = 'i' .. final
		end
	end
	
	-- wei /uei/ (/-uei/ is usually spelled <-ui> but /uei/ is not <wui>)
	-- wu /vu/
	-- wai /uai/
	-- wen /uən/
	if initial == 'w' then
		initial = (final == 'u') and 'w' or ''
		if final == 'ei' then
			final = 'ui'
		elseif final == 'en' then
			final = 'un'
		elseif final ~= 'u' then
			final = 'u' .. final
		end
	end

	-- distinguish the two 'i's
	if find(initial, '^[zcsr]$') and final == 'i' then
		final = 'ir'
	end

	return initial, final
end

local function warn(initial, final, tone)
	if initial == "" and find(final, "^[iu]") then
		error("Syllables in Sichuanese Pinyin do not begin with i-/u-. Add y-/w-.")
	end
	
	if not initialConv[initial] and initial ~= "y" then
		error("Invalid initial: " .. initial)
	end

	if not finalConv[final] and final ~= "uo" then
		error("Invalid final: " .. final)
	end

	if tone == "5" then
		error("Chengdu does not have the fifth tone anymore. Use 2.")
	end
end

function export.convert(text, scheme)
	if type(text) == "table" then
		text, scheme = text.args[1], text.args[2]
	end

	local result = {}
	for word in gsplit(text, '/') do
		local converted = {}

		local extra2 = match(word, '^[^A-Za-zü]*')
		for syllable in gmatch(word, '[A-Za-zü]+[%d%-]+[^A-Za-zü]*') do
			local initial, final, erhua, tone, extra = match(syllable, '^([BDGPTKZJCQMNFSXHVRWYbdgptkzjcqmnfsxhvrwy]?[yg]?)([AEOaiueoüng]+)(r?)([%d%-]+)([^A-Za-zü]*)$')
			local caps = false

			if find(initial .. final, '[A-Z]') then
				caps = true
				initial, final = lower(initial), lower(final)
			end

			warn(initial, final, tone)

			initial, final = fix(initial, final)
			if final == 'e' and erhua == 'r' then
				final, erhua = 'er', ''
			end

			if scheme == 'IPA' then
				initial = initialConv[initial]
				final = finalConv[final]
				tone = gsub(tone, '.', function(char) return toneConv[char] end)

				if erhua == 'r' then
					if find(final, '^y') then -- 撮口呼
						final = 'yɚ'
					elseif find(final, '^i') then -- 齊齒呼
						final = 'iɚ'
					elseif find(final, '^u') then -- 合口呼
						final = 'uɚ'
					elseif (final == 'o' or final == 'oŋ') and find(initial, '^[pmfv]') then
						final = 'ɚ'
					elseif final == 'o' or final == 'oŋ' then
						final = 'uɚ'
					else -- 開口呼
						final = 'ɚ'
					end
				end

				syllable = initial .. final .. tone

				table.insert(converted, syllable)
			elseif scheme == 'SWZ' then
				initial = initialConv_swz[initial] or initial
				final = finalConv_swz[final] or final

				tone = gsub(tone, '(%d)%-(%d)', '%2')

				-- XXX: what happens with erhua? (disabled output for now)
				-- cf the given example 貓(mer)
				if erhua == 'r' then return false end

				if tone == '3' and (final == 'a' or final == 'ai') then
					final = 'a' .. final
				end

				syllable = initial .. final

				if caps then syllable = gsub(syllable, '^.', upper) end

				table.insert(converted, '@' .. syllable .. extra)
			else
				error('Convert to what representation?')
			end
		end

		if scheme == 'IPA' then
			local text = '/' .. table.concat(converted, ' ') .. '/'
			table.insert(result, text)
		elseif scheme == 'SWZ' then
			local text = table.concat(converted, '')
			text = gsub(text, '([a-z])@(u)', '%1w')
			text = gsub(text, '([a-z])@(i)', '%1j')
			text = gsub(text, '([ng])@(y)', '%1j')
			text = gsub(text, '@un', 'wen')
			text = gsub(text, '@', '')
			table.insert(result, extra2 .. text)
		end
	end

	if scheme == 'IPA' then
		return table.concat(result, ', ')
	else
		return table.concat(result, ' / ')
	end
end

return export