มอดูล:tl-pron

จาก วิกิพจนานุกรม พจนานุกรมเสรี
local export = {}
local gsub = mw.ustring.gsub
local len = mw.ustring.len

function export.show(word, colloquial, phonetic, do_debug)
	local debug = {}
	
	if type(word) == "table" then
		do_debug = word.args[4]
		word = word.args[1]
	end
	local orig_word = word
	word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
	word = mw.ustring.gsub(word,"[^abcdefghijklmnopqrstuvwxyzáâàéíîìóôòúüñ7.]","") -- 7 is for glottal stop
	
	table.insert(debug,word)
	
	local V = "[aeiouàáâéèêìíîòóôùúû]" -- vowel
	local C = "[^aeiouàáâéèêìíîòóôùúû.]" -- consonant
	--determining whether "y" is a consonant or a vowel
	word = mw.ustring.gsub(word,"y(" .. C .. ")","i%1")
	word = mw.ustring.gsub(word,"y(" .. V .. ")","ɟ%1") -- not the real sound
	
	--x
	word = mw.ustring.gsub(word,"x","ks")
	
	--"c" & "g" before "i" and "e", only in proper nouns from Spanish ("g" before "i" and "e" excluded to avoid affecting transcriptions of native words)
	word = mw.ustring.gsub(word,"c([ie])","s%1")
	word = mw.ustring.gsub(word,"gü([ie])","ɡw%1")
	word = mw.ustring.gsub(word,"ü","")
	word = mw.ustring.gsub(word,"gu([ie])","ɡ%1")

	table.insert(debug,word)

    --alphabet-to-phoneme (native sounds)
    word = gsub(word, "7", "ʔ")
    word = gsub(word, "e", "ɛ")
    word = gsub(word, "g", "ɡ")
    word = gsub(word, "r", "ɾ")
    word = gsub(word, "y", "ɟ") --not the real sound
    word = gsub(word, "nɡ", "ŋ")
   
    
    --native digraphs and trigraphs
    word = gsub(word, "[n]j", "ñ") --not the real sound
    word = gsub(word, "[sdt]y", {["sy"] = "ʃ", ["dy"] = "ǰ", ["ty"] = "č"})
    word = gsub(word, "ts", "ĉ")
    
    --alphabet-to-phoneme (for assimilated Spanish forms)
    word = gsub(word, "c([aɛilounst])", "k%1")
    word = gsub(word, "qu", "k")
	word = gsub(word, "v", "b")
	word = gsub(word, "ñ", "ɲ")
	word = gsub(word, "([aɐɛiou]?)ll([aɐɛiou])", "%1ʎ%2")
	word = gsub(word, "[c]h", "č")
	word = gsub(word, "[fjz]",{["f"]="p", ["j"]="h", ["z"]="s"})

    table.insert(debug, word)
	
    --"cu" before any vowel (proper nouns from Spanish, such as place names, given names, and surnames, or Spanish transcriptions of native words only) 
    word = gsub(word, "cu([aou])", "(kw) .. %1")
	
    table.insert(debug, word)

    --syllable division
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C .. ")(" .. V .. ")","%1.%2%3")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. V .. ")","%1%2.%3%4")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,"(" .. V .. ")(" .. C .. ")(" .. C .. ")(" .. C .. ")(" .. V .. ")","%1%2.%3%4%5")
	end
    word = gsub(word, "([aɛiou][́̀̂]?)([aɛiou][́̀̂]?)", "%1%2")
	
	table.insert(debug, word)

	--accentuation
	local syllables = mw.text.split(word,"%.")
	if mw.ustring.find(word,"[áâéêíîóôúû]") then
		for i=1,#syllables do
			if mw.ustring.find(syllables[i],"[áâéêíîóôúû]") then syllables[i] = "ˈ"..syllables[i] end
		end
	else
		if mw.ustring.find(word,"[^aɛiouns]$") then
			syllables[#syllables] = "ˈ"..syllables[#syllables]
		else
			if #syllables > 1 then syllables[#syllables-1] = "ˈ"..syllables[#syllables-1] end
		end
	end

	table.insert(debug,word)
    --remove accents
    --"e" and "é" are ambiguous, so either use the accented forms
	local remove_accent = {["à"] = "aʔ", ["á"] = "a", ["â"] = "aʔ", ["è"] = "ɛʔ", ["é"] = "ɛ", ["ê"] = "ɛʔ", ["ì"] = "iʔ", ["í"] = "i", ["î"] = "iʔ", ["ò"] = "oʔ", ["ó"] = "o", ["ô"] = "oʔ", ["ù"] = "uʔ", ["ú"] = "u", ["û"] = "uʔ"}
	for i=1,#syllables do
		syllables[i] = mw.ustring.gsub(syllables[i],"[àáâèéêìíîòóôùúû]",remove_accent)
	end
	word = table.concat(syllables)
	
	--back-replace
	word = gsub(word, "[ĉčǰ]", {["ĉ"] = "t͡s", ["č"] = "t͡ʃ", ["ǰ"] = "d͡ʒ"})
	
    --secondary stress
	word = gsub(word, "ˈ(.+)ˈ", "ˌ%1ˈ")
	word = gsub(word, "ˈ(.+)ˌ", "ˌ%1ˌ")
	word = gsub(word, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")

	--phonetic transcription
	if phonetic then
        --allophones used in colloquial pronunciations
	    word = gsub(word, "([ˈ]?[d])j", "d͡ʒ")
        word = gsub(word, "([ˈ]?[s])j", "ʃ")
        word = gsub(word, "([ˈ]?[t])j", "t͡ʃ")
        word = gsub(word, "[t]s", "t͡ʃ")
        word = gsub(word, "[ɾɲ]",{["ɾ"]="r", ["ɲ"]="nj"})
    	word = gsub(word, "([ˈ]?)ʃ([aɛiou])", "sj%2")
        word = gsub(word, "([aɛiou][ˈ]?)k", "%1x")
        word = gsub(word, "k([aɛinoɾu])", "kx%1")
        word = gsub(word, "t͡s", "t͡ʃ")
        word = gsub(word, "t͡ʃ", "t͡s")
	end
	
	-- real sound of "ɟ", "ʎ", "ng", and "nj"
	word = gsub(word, "ɟ", "j")
	word = gsub(word, "ʎ", "lj")
	
	-- add glottal stop if lead with vowel
    word = gsub(word, "^([ˈ]?)([aɛiou])", "%1ʔ%2")

	return word .. (do_debug == "yes" and table.concat(debug, "") or "")
end

function export.colloquial(frame)
	return export.show(frame, true)
end

function export.phonetic(frame)
	return export.show(frame, false, true)
end

function export.phoneticcolloquial(frame)
	return export.show(frame, true, true)
end

return export