มอดูล:hi-IPA

จาก วิกิพจนานุกรม พจนานุกรมเสรี
local export = {}

local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")

local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
	["c"] = "t͡ʃ", ["j"] = "d͡ʒ", 
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
	["t"] = "t̪", ["d"] = "d̪",
	["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ",
	["ś"] = "ʃ", ["ṣ"] = "ʂ", ["ź"] = "ʒ", ["ž"] = "ʒ", ["h"] = "ɦ",
	["ṛ"] = "ɽ", ["ẓ"] = "ʒ", ["ḷ"] = "l", ["ḻ"] = "l", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n", ["ṟ"] = "ɾ",

	["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ",
	["ī"] = "iː", ["o"] = "oː", ["e"] = "eː",
	["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

	["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː",  ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",

	["ॐ"] = "oːm", ["ḥ"] = "(ɦ)", ["'"] = "(ʔ)",
}

local perso_arabic = {
	["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ź"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "",
}

local urdu = {
	["ṣ"] = "ʃ", ["ṇ"] = "n",
}

local deccani = {
	["q"] = "x",
}

local lengthen = {
	["a"] = "ā", ["i"] = "ī", ["u"] = "ū",
}

local vowels = "aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "[aāiīuūoǒŏěĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?"
local weak_h = "([gjdḍbṛnm])h"
local aspirate = "([kctṭp])"
local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.%-]+)([" .. vowels .. "]̃?)"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in mw.ustring.gcodepoint(text .. " ") do
		local ch = mw.ustring.char(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉḥ]$") or find(current .. ch, "^[kgcjṭḍtdpbṛ]h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local function syllabify(text)
	for count = 1, 2 do
		text = gsub(text, syllabify_pattern, function(a, b, c)
			b_set = find_consonants(b)
			table.insert(b_set, #b_set > 1 and 2 or 1, ".")
			return a .. table.concat(b_set) .. c
			end)
		text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
	end
	for count = 1, 2 do
		text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
	end
	-- syllabification corrections
	-- ([^.]) is added in front, just in case one of the (unlikely) clusters 
	-- would occur after a blank space (temporarily reformatted as '..')
	text =  gsub(text, '([^.])%.([kqgcjṭḍtdpb])(h?)([kqgcjṭḍtdpbxġfnɳmsśzź])', '%1%2%3.%4')
	text =  gsub(text, '([^.])%.([qgcjṭḍtdpb])(h?)ṣ', '%1%2%3.ṣ')
	text =  gsub(text, '([^.])%.khṣ', '%1kh.ṣ') 						-- not kṣ/क्ष 
	text =  gsub(text, '([^.])%.([xġfnɳmzźyrlv])([kqgcjṭḍtdpbxġfnɳmsśṣzźh])', '%1%2.%3')
	text =  gsub(text, '([^.])%.([sśṣ])([gjḍdbġsśṣzźh])', '%1%2.%3')
	return text	
end

local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	--return (lang:transliterate(text))
	return lang:transliterate(text, nil, "hi-translit-Latn")
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.toIPA(text, style)
	text = gsub(text, '॰', '-')
	local translit = text
	if lang:findBestScript(text):isTransliterated() then
		translit = transliterate(text)
	end
	if not translit then
		error('The term "' .. text .. '" could not be transliterated.')
	end
	
	if style == "nonpersianized" then
		translit = gsub(translit, "[xġqźzf']", perso_arabic)
	end

	if style == "dakhini" then
		translit = gsub(translit, "[q]", deccani)
	end
	
	-- force final schwa for Hindi
	translit = gsub(translit, "a~$", "ə")

	if style == "desanskritanize" then
		translit = gsub(translit, "(...)ə$", "%1ɑ(ː)")
		translit = gsub(translit, "[ṣṇ]", urdu)
	end
	
	-- vowels
	translit = gsub(translit, "͠", "̃")
	translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
	translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
	translit = gsub(translit, "%-$", "")
	translit = gsub(translit, "^%-", "")
	translit = gsub(translit, "ŕ$", "r")
	translit = gsub(translit, "ŕ(" .. vowel .. ")", "r%1")
	translit = gsub(translit, "ŕ", "ri")
    
	translit = gsub(translit, 'jñ', 'gy')
	translit = gsub(translit, ",", "")
	translit = gsub(translit, " ", "..")
	translit = syllabify(translit)
	translit = gsub(translit, "%.ː", "ː.")
	translit = gsub(translit, "%.̃", "̃")

	translit = gsub(translit, aspirate .. "h", '%1ʰ')
	translit = gsub(translit, weak_h, '%1ʱ')
	
	local result = gsub(translit, ".", correspondences)
	
	-- remove final schwa (Pandey, 2014)
	-- actually weaken
	result = gsub(result, "(...)ə$", "%1ᵊ")
	result = gsub(result, "(...)ə ", "%1ᵊ ")
	result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
	
	-- formatting	
	result = gsub(result, "%.?%-", ".")
	result = gsub(result, "%.%.", " ")
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː%.̃", "̃ː.")
	result = gsub(result, "%.$", "")
    
    -- ñ
    result = gsub(result, "ñ", "n")

	-- i and u lengthening
	result = gsub(result, "ʊ(̃?)(ɦ?)$", "u%1ː%2")
	result = gsub(result, "ɪ(̃?)(ɦ?)$", "i%1ː%2")
	
	-- deaffricate first affricate in geminates
	result = gsub(result, "t͡ʃ(%.?)t͡ʃ", "t̪%1t͡ʃ")	
	result = gsub(result, "d͡ʒ(%.?)d͡ʒ", "d̪%1d͡ʒ")
	
	-- silent h in 'lh-', 'vh-' (Ohala 1983, p.45)
	result = gsub(result, "^([lʋ])ɦ", "%1")  
    result = gsub(result, "([ .])([lʋ])ɦ", "%1%2")
    
	result = gsub(result, "ɛː(%.?)j", function(a)
		local res = "ə̯i"
		res = res .. a .. "j"
		return res
	end)
	result = gsub(result, "ɔː(%.?)ʋ", function(a)
		local res = "ə̯u"
		res = res .. a .. "ʋ"
		return res
	end)
	
	return result
end

function export.narrow_IPA(ipa)
	-- what /ɑ/ and /ə/ really are
	ipa = gsub(ipa, 'ɑ', 'ä')
	ipa = gsub(ipa, 'ə', 'ɐ')
	-- uvular /x/, /ɣ/ ??
	-- ipa = gsub(ipa, 'x', 'χ')
	-- ipa = gsub(ipa, 'ɣ', 'ʁ')
	-- retroflex s rules
	ipa = gsub(ipa, 'ʂ(%.?)([^ʈɖ.])', 'ʃ%1%2')
	ipa = gsub(ipa, 'ʂ$', 'ʃ')
	-- nasal allophones
	ipa = gsub(ipa, 'ŋ(%.?)([qχʁ])', 'ɴ%1%2')
	ipa = gsub(ipa, 'n%.j', 'ɲ.j')
	ipa = gsub(ipa, '[nɳ](%.?)ʃ', 'ɲ%1ʃ')  -- this nasal is likely more front than before /j/, but not doing a too narrow transcription seems preferable
	ipa = gsub(ipa, 'n(%.?)([td])̪', 'n̪%1%2̪')
    ipa = gsub(ipa, 'm(%.?)f', 'ɱ%1f')  
	-- nasals induce nasalization
	ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(ː?)([nɳɲŋɴmɱ])', '%1̃%2%3')
	-- cc, jj
	ipa = gsub(ipa, 't̪(%.?)t͡ʃ', 't̚%1t͡ʃ')
	ipa = gsub(ipa, 'd̪(%.?)d͡ʒ', 'd̚%1d͡ʒ')
	-- syllable boundary consonants
	ipa = gsub(ipa, '([kɡ])%.([kɡ])', '%1̚.%2')
	ipa = gsub(ipa, '([ʈɖ])%.([ʈɖ])', '%1̚.%2')
	ipa = gsub(ipa, '([td]̪?)%.([tdn])', '%1̚.%2')
	ipa = gsub(ipa, '([pb])%.([pb])', '%1̚.%2')
	-- aspiration rules
	ipa = gsub(ipa, 'ɐɦ([%. ])', 'ɛɦ%1')
	ipa = gsub(ipa, 'ɐɦ$', 'ɛɦ')
	ipa = gsub(ipa, 'ɐ%.ɦɐ', 'ɛ.ɦɛ')
	ipa = gsub(ipa, 'ɐ%(ɦ%)', 'ɛ(ɦ)')
	ipa = gsub(ipa, 'ʊɦ%.', 'ɔɦ.')
	ipa = gsub(ipa, 'ʊ%.ɦɐ', 'ɔ.ɦɔ')
	ipa = gsub(ipa, 'ɐ%.ɦʊ', 'ɔ.ɦɔ')
	ipa = gsub(ipa, '([ɐäɪiʊueɛoɔæ])(̃?)(ː?)ɦ', '%1%2%3ʱ')
	-- v/w
	ipa = gsub(ipa, '([kɡŋtdɲʈɖɳnpbm]̪?%.?)ʋ', '%1w')

	-- geminate /ɾ/ is trill
	ipa = gsub(ipa, "ɾ%.ɾ", "r.r")	
	-- for onomatopeic words ending on -र्र 
	ipa = gsub(ipa, "ɾɾ", "rː")	
	-- final geminates often pronounced as singletons
	ipa = gsub(ipa, "kk", "k(ː)")
	ipa = gsub(ipa, "ɡɡ", "ɡ(ː)")
	ipa = gsub(ipa, "ʈʈ", "ʈ(ː)")
	ipa = gsub(ipa, "ɖɖ", "ɖ(ː)")
	ipa = gsub(ipa, "ɳɳ", "ɳ(ː)")
	ipa = gsub(ipa, "t̪t̪", "t̪(ː)")
	ipa = gsub(ipa, "d̪d̪", "d̪(ː)")
	ipa = gsub(ipa, "nn", "n(ː)")
	ipa = gsub(ipa, "pp", "p(ː)")
	ipa = gsub(ipa, "bb", "b(ː)")
	ipa = gsub(ipa, "mm", "m(ː)")
	ipa = gsub(ipa, "ll", "l(ː)")
	-- final cc, jj
	ipa = gsub(ipa, "t̚t͡ʃ", "(t̚)t͡ʃ")
	ipa = gsub(ipa, "d̚d͡ʒ", "(d̚)d͡ʒ")
	
	ipa = gsub(ipa, "ɪ%.j", "i.j")		
	ipa = gsub(ipa, " ", "‿")
	return ipa
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Hindi in ipairs(p) do
		local persianized = export.toIPA(Hindi, "persianized")
		local nonpersianized = export.toIPA(Hindi, "nonpersianized")
		table.insert(results, { pron = "/" .. persianized .. "/" })
		local narrow = export.narrow_IPA(persianized)
		if narrow ~= persianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		if persianized ~= nonpersianized then
			table.insert(results, { pron = "/" .. nonpersianized .. "/" })
			local narrow = export.narrow_IPA(nonpersianized)
			if narrow ~= nonpersianized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		end
	end
	
	return m_a.show({'เดลี'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

function export.make_ur(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		local desanskritanize = export.toIPA(Urdu, "desanskritanize")
		table.insert(results, { pron = "/" .. desanskritanize .. "/" })
	end
	
	return m_a.show({'อูรดู'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

function export.make_deccani(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		local dakhini = export.toIPA(Urdu, "dakhini")
		table.insert(results, { pron = "/" .. dakhini .. "/" })
	end
	
	return m_a.show({'ทักขินี'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

return export