ข้ามไปเนื้อหา

มอดูล:sjd-IPA

จาก วิกิพจนานุกรม พจนานุกรมเสรี

local export = {}

local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("sjd")
local rsub = mw.ustring.gsub
local rlower = mw.ustring.lower

local macron = mw.ustring.char(0x0304)

local V = "[aɒeɛiuo]ː?" -- vowels
local C = "[bvɡdžzjklʎmnɲŋprstfxhʃɕ]ʲ?" -- consonants

local phon = {
	-- consonants
	["б"]="b",	["в"]="v",	["г"]="ɡ",	["д"]="d",	["ж"]="ʒ",	["з"]="z",
	["й"]="j",	["ҋ"]="j̥",	["ј"]="j̥",	["һ"]="h",	["'"]="h",	["к"]="k",
	["л"]="l",	["ӆ"]="l̥",	["м"]="m",	["ӎ"]="m̥",	["н"]="n",	["ӊ"]="n̥",
	["ӈ"]="ŋ",	["п"]="p",	["р"]="r",	["ҏ"]="r̥",	["с"]="s",	["т"]="t",
	["ф"]="f",	["х"]="x",	["ц"]="ts",	["ч"]="tʃ",	["ш"]="ʃ",	["щ"]="ɕ",
	-- vowels
	["оа"]="ɒ",	["а"]="a",	["и"]="i",	["о"]="o",	["у"]="u",	["ӯ"]="uː",
	["э"]="ɛ",	["ы"]="ɨ",	["ъ"]="j",  ["ӣ"]="iː",
}

local function phonemic(text)
	text = rlower(text)
	-- general phonology
	text = rsub(text, ".", phon)
	-- palatalization
	text = mw.ustring.gsub(text, "([Nn])%1ь", "ɲː")
	text = mw.ustring.gsub(text, "([Nn])ь", "ɲ")
	text = mw.ustring.gsub(text, "([bvɡdʒzklmnŋprstfxһʒʃ])ь", "%1ʲ")
	text = mw.ustring.gsub(text, "([bvɡdʒzklmnn̥ŋprstfxһʒʃ])ҍ", "%1ʲ")
		-- Some consonants are affected if the preceding one is palatalized:
	text = mw.ustring.gsub(text, "([bɡdvlmnŋɲps])ʲ([vlnprst])", "%1ʲ%2ʲ")
	text = mw.ustring.gsub(text, "h([ptk])ʲ", "hʲ%1ʲ")
	text = mw.ustring.gsub(text, "xxʲ([ptk])", "xxʲ%1ʲ")
	
	text = mw.ustring.gsub(text, "([bdɡ])([ptk])ʲ", "%1ʲ%2ʲ") --semi-voiced geminates bp, dt, gk
	 -- n,d,t preceding "semi-soft" ӓ and ӭ
	text = mw.ustring.gsub(text, "([NnDdTt])%1ӓ", "%1ːʲa")
	text = mw.ustring.gsub(text, "([NnDdTt])ӓ", "%1ʲa")
	text = mw.ustring.gsub(text, "([NnDdTt])%1ӭ", "%1ːʲɛ")
	text = mw.ustring.gsub(text, "([NnDdTt])ӭ", "%1ʲɛ")
	-- palatal н/ɲ + vowels
	text = mw.ustring.gsub(text, "([Nn])%1я", "ɲːa")
	text = mw.ustring.gsub(text, "([Nn])я", "ɲa")
	text = mw.ustring.gsub(text, "([Nn])%1е", "ɲːe")
	text = mw.ustring.gsub(text, "([Nn])е", "ɲe")
	text = mw.ustring.gsub(text, "([Nn])%1ё", "ɲːo")
	text = mw.ustring.gsub(text, "([Nn])ё", "ɲo")
	text = mw.ustring.gsub(text, "([Nn])%1и", "ɲːi")
	text = mw.ustring.gsub(text, "([Nn])и", "ɲi")
	text = mw.ustring.gsub(text, "([Nn])%1у", "ɲːu")
	text = mw.ustring.gsub(text, "([Nn])ю", "ɲu")
	
	text = mw.ustring.gsub(text, "llʲj", "ʎː") -- palatal ʎ
	text = mw.ustring.gsub(text, "lʲj", "ʎ")
	text = mw.ustring.gsub(text, "llʲ", "lʲː") -- palatalized l
	text = mw.ustring.gsub(text, "ll", "lː")
	
	-- consonant-ъ-consonant (creates a syllable boundary)
	text = mw.ustring.gsub(text, "([bvlmnst])j%1", "%1.%1") -- CjC → C.C
	
	-- j + vowels
		--е̄/е
	text = mw.ustring.gsub(text, "^" .. "([Ее])" .. macron, "ji̯e") -- initial е̄
	text = mw.ustring.gsub(text, "(" .. C .. ")([Ее])" .. macron, "%1i̯e") -- е̄ following a consonant
	text = mw.ustring.gsub(text, "^" .. "([Ее])", "je")
	text = mw.ustring.gsub(text, "jе", "jje")	-- replaces cyrillic е after j with je
	text = mw.ustring.gsub(text, "(" .. C .. ")([Ее])", "%1ʲe")
		--я̄/я
	text = mw.ustring.gsub(text, "^" .. "([Яя])" .. macron, "je̯a")
	text = mw.ustring.gsub(text, "([Яя])" .. macron, "e̯a")
	text = mw.ustring.gsub(text, "^" .. "([Яя])", "ja")
	text = mw.ustring.gsub(text, "([Яя])", "ʲa")
		-- ӣ, ё, ю̄, ю
	text = mw.ustring.gsub(text, "^" .. "([Ӣӣ])", "jiː")
	text = mw.ustring.gsub(text, "^" .. "([Ёё])", "jo")
	text = mw.ustring.gsub(text, "([Ёё])", "ʲo")
	text = mw.ustring.gsub(text, "^" .. "([Юю])" .. macron, "juː")
	text = mw.ustring.gsub(text, "^" .. "([Юю])", "ju")
	text = mw.ustring.gsub(text, "([Юю])", "ʲu")
	
	-- long vowels
	text = rsub(text, "([aɒeɛiuo])̄" .. macron, "%1ː")
	text = rsub(text, "([iu])M", "%1ː")
	-- long consonants
	text = mw.ustring.gsub(text, "([bvɡdʒzjklmnŋrstfxʃɕ])%1ʲ", "%1ʲː")
	text = mw.ustring.gsub(text, "([bvɡdʒzjklmnŋrstfxʃɕ])%1", "%1ː")
	text = mw.ustring.gsub(text, "(j̥)%1", "%1ː")
	text = mw.ustring.gsub(text, "(l̥)%1", "%1ː")
	text = mw.ustring.gsub(text, "(m̥)%1", "%1ː")	
	text = mw.ustring.gsub(text, "(n̥)%1", "%1ː")
	text = mw.ustring.gsub(text, "(r̥)%1", "%1ː")	
	
	-- bringing everything into the proper form
	-- affricates
	text = rsub(text, "ts", "t͡s")
	text = rsub(text, "t͡st͡s", "t͡sː")
	text = rsub(text, "t͡sʲt͡sʲ", "t͡sʲː")
	text = rsub(text, "tʃ", "t͡ʃʲ")
	text = rsub(text, "t͡ʃʲt͡ʃʲ", "t͡ʃʲː")
	text = rsub(text, "dz", "d͡z")
	text = rsub(text, "dž", "d͡ʒ")
	text = rsub(text, "dʒʲ", "d͡ʒʲ")
	-- diphtongs
	text = rsub(text, "ua", "u̯a")
	text = rsub(text, "uɛ", "u̯ɛ")
	text = rsub(text, "uo", "u̯o")
	
	-- fixing possible mistakes
	text = rsub(text, "oa", "ɒ") -- oa → ɒ
	text = rsub(text, "oa" .. macron, "ɒː")
	text = rsub(text, "е", "e") -- replacing the remaining cyrillic е's with latin
	text = rsub(text, "(".. macron ..")", "ː") -- replacing possible remaining macrons
	text = rsub(text, "ːʲ", "ʲː")
	text = rsub(text, "jʲ", "j")
	text = rsub(text, "ɲʲ", "ɲ")
	text = rsub(text, "ʎʲ", "ʎ")
	text = rsub(text, "lʲʲ", "ʎː")
	text = rsub(text, "tʲs", "t͡s")
	
	-- stress (used in words with 2+ syllables only, this needs to be replaced with something decent – it looks awful (and works too)
		--CVCCV
	text = rsub(text, "^" .. "(" .. C .. ")(" .. V .. ")(" .. C .. ").(" .. C .. ")(" .. V .. ")", "ˈ%1%2%3%4%5")
		--CVVCCV (for diphtongs)
	text = rsub(text, "^" .. "(" .. C .. ")(" .. V .. ")(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. V .. ")", "ˈ%1%2%3%4%5%6")
		--VCV
	text = rsub(text, "^" .. "(" .. V .. ")(" .. C .. ")(" .. V .. ")", "ˈ%1%2%3")		
	text = rsub(text, "^" .. "(" .. V .. ")(" .. C .. ")ː(" .. V .. ")", "ˈ%1%2ː%3")	
		--VCCV
	text = rsub(text, "^" .. "(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. V .. ")", "ˈ%1%2%3%4")
	text = rsub(text, "^" .. "(" .. V .. ")(" .. C .. ")ː(" .. V .. ")", "ˈ%1%2ː%3")
		--VCCCV
	text = rsub(text, "^" .. "(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. C .. ")(" .. V .. ")", "ˈ%1%2%3%4%5")
	text = rsub(text, "^" .. "(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. C .. ")ː(" .. V .. ")", "ˈ%1%2%3%4ː%5")
		--VVCCV
	text = rsub(text, "^" .. "(" .. V .. ")(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. V .. ")", "ˈ%1%2%3%4%5")
	text = rsub(text, "ˈ%-", "-")
	-- (secondary stress should be implemented later for more complex words)
	
	return text
end

function export.IPA(frame)
	local words = {}
	
	for _, word in ipairs(frame:getParent().args) do
		table.insert(words, word)
	end
	
	if #words == 0 then
		words = {mw.title.getCurrentTitle().text}
	end
	
	local IPA_results = {}
	
	for _, word in ipairs(words) do
		table.insert(IPA_results, { pron = "/" .. phonemic(word) .. "/" })
	end
	
	return m_IPA.format_IPA_full(lang, IPA_results)
end

return export