มอดูล:cmn-pron-Xian
หน้าตา
- This มอดูล lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
-- see the encoding below
local initials = {
b = "p", p = "pʰ", m = "m", f = "f", v = "v", B = "pf", P = "pfʰ",
d = "t", t = "tʰ", n = "n", l = "l",
g = "k", k = "kʰ", N = "ŋ", h = "x",
j = "t͡ɕ", q = "t͡ɕʰ", x = "ɕ",
Z = "t͡ʂ", C = "t͡ʂʰ", S = "ʂ", r = "ʐ",
z = "t͡s", c = "t͡sʰ", s = "s",
[""] = "",
}
-- see the encoding below (U=ü, N=ng)
local finals = {
a = "a", ia = "ia", ua = "ua",
ar = "ɐr", iar = "iɐr", uar = "uɐr",
o = "o", uo = "uo", Uo = "yo",
er = "ər", uor = "uər",
e = "ɤ",
ue = "ɯ", ie = "iɛ", Ue = "yɛ",
ier = "iɛr", Uer = "yɛr",
ii = "z̩", ih = "ʐ̩", i = "i", u = "u", U = "y",
iir = "ər", ihr = "ər", ir = "iər", ur = "uər", Ur = "yər",
ai = "æ", iai = "iæ", uai = "uæ",
air = "ær", iair = "iær", uair = "uær",
ei = "ei", ui = "uei",
eir = "er", uir = "uer",
ao = "au", iao = "iau",
aor = "ɔr", iaor = "iɔr",
ou = "ɤu", iu = "iɤu",
our = "ər", iur = "iər",
an = "ã", ian = "iã", uan = "uã", Uan = "yã",
anr = "ɐ̃r", ianr = "iɐ̃r", uanr = "uɐ̃r", Uanr = "yɐ̃r",
en = "ẽ", ["in"] = "iẽ", un = "uẽ", Un = "yẽ",
enr = "ə̃r", inr = "iə̃r", unr = "uə̃r", Unr = "yə̃r",
aN = "aŋ", iaN = "iaŋ", uaN = "uaŋ",
aNr = "ɐ̃r", iaNr = "iɐ̃r", uaNr = "uɐ̃r",
eN = "əŋ", iN = "iŋ", oN = "uəŋ", ioN = "yoŋ",
eNr = "ə̃r", iNr = "iə̃r", oNr = "uə̃r", ioNr = "yə̃r",
}
local tones = {
["1"] = "²¹", --陰平(T1)
["2"] = "²⁴", --陽平(T2)
["3"] = "⁵³", --上(T3)
["4"] = "⁵⁵", --去(T4)
["5"] = "", -- toneless (T0)
}
-- internal use, encode and decode digraphs
local digraph_encode = {
bv = "B", pf = "P", ng = "N", zh = "Z", ch = "C", sh = "S",
["\204\140"] = "\1",
["\204\129"] = "\2",
["\204\128"] = "\3",
["\204\132"] = "\4",
}
local digraph_decode = {
B = "bv", P = "pf", N = "ng", Z = "zh", C = "ch", S = "sh", U = "ü",
["\1"] = "\204\140",
["\2"] = "\204\129",
["\3"] = "\204\128",
["\4"] = "\204\132",
["\5"] = '<span style="background-color:#F5DEB3">',
["\6"] = "</span>",
}
local function encode(text)
text = mw.ustring.toNFD(text)
:gsub("u\204\136","U")
:gsub("[bpnzcs\204][vfgh\128\129\132\140]",digraph_encode)
return text
end
local function decode(text)
text = mw.ustring.toNFC(text:gsub("[BPNZCSU\1-\7]",digraph_decode))
return text
end
local function py_join_syllables(text)
text = text:gsub("'(\5?[bpmfvBPdtnlgkhjqxZCSrzcsyw])","%1"):gsub("ng","N")
return text
end
local function py_divide_syllables(text)
local res = text
:gsub("([aeiouU\1-\4])N%f[aeiouU]","%1n'g")
:gsub("[bpmfvBPdtnlgkNhjqxZCSrzcsyw][aeiouU]","'%0")
:gsub("''+","'")
:gsub("%f[^ %z]'","")
local check = py_join_syllables(res)
if text ~= check then
error("Xi'an: error with apostrophes, "..decode(text).." should be "..decode(check)..".")
end
return res
end
local function py_put_tone(syllable, tone)
syllable = syllable:gsub("[iuU]?[aeiouU]", "%0" .. (tone~="5" and string.char(tone) or ""), 1)
return syllable
end
local function py_transf(syllable)
local tone = tostring((syllable:match("[\1-\4]") or "\5"):byte(1))
local syllable_detone, count = syllable:gsub("[\1-\4]","")
if count > 1 then error("Xi'an: two tones in one syllable: " .. decode(syllable)) end
local check = py_put_tone(syllable_detone,tone)
if check ~= syllable then
error("Xi'an: error with tone placement, "..decode(syllable).." should be "..decode(check)..".")
end
return tone .. syllable_detone
end
-- canonize to adhere to pinyin rules, e.g. jü -> ju
local function py_canonize(text)
text = text
:gsub("([jqx])U","%1u")
:gsub("%f[%l%u]u[in]?",{u="w",ui="wei",un="wen"})
:gsub("%f[%l%u]oN","weN")
:gsub("w(r?)%f[^%l%u]","wu%1")
:gsub("%f[%l%u]i[hu]?",{i="y",ih="ri",iu="you"})
:gsub("y([nN]?r?)%f[^%l%u]","yi%1")
:gsub("%f[%l%u]U","yu")
:gsub("i[ih]","i")
return text
end
-- normalize to initial+final, e.g. ju -> jü
local function py_normalize(text)
local res = text
:gsub("([jqx])u","%1U")
:gsub("w[ue][inN]?",{wu="u",wei="ui",wen="un",weN="oN"})
:gsub("w","u")
:gsub("y[iuo]u?",{yi="i",yu="U",you="iu"})
:gsub("y","i")
:gsub("([zcs])i","%1ii")
:gsub("([ZCSr])i","%1ih")
:gsub("rih%f[^%l%u]","ih")
local check = py_canonize(res)
if text ~= check then
error("Xi'an: invalid syllable: "..decode(text).." should be "..decode(check))
end
return res
end
local function py_to_ipa(text)
text = text:gsub("[^ ]+",function(syllable)
local a,b,c,d = syllable:match("^([12345])([bpmfvBPdtnlgkNhjqxZCSrzcs]?)([aeiouU][%lN]*)([12345]?)$")
if not a then error("Xi'an: Invalid syllable: " .. decode(syllable)) end
return (initials[b] or error("Xi'an: Invalid initial: " .. decode(b)))
.. (finals[c] or error ("Xi'an: Invalid final: " .. decode(c)))
.. tones[a]
.. (d~="" and "⁻"..tones[d] or "")
end)
return "/" .. text .. "/"
end
-- returns (display_text, phonetic_text, ipa)
function export.py_process(text)
local conv_display = {}
local conv_hidden = {}
local conv_ipa = {}
local i = 0
for reading in mw.text.gsplit(text,"/",true) do
i = i + 1
conv_display[i] = reading:gsub("[12345]","")
-- no check is done for things like "xUān", any capitalisation is valid
reading = mw.ustring.lower(reading)
reading = encode(reading)
reading = py_divide_syllables(reading)
if reading:match("[12345]") then
local phonetic = reading
:gsub("([bpmfvBPdtnlgkNhjqxZCSrzcsyw]?[iuU]?[aeiouU])[\1-\4]?([%lN]*)([1-5])", function(a,b,c)
return "\5" .. a .. (c~="5" and string.char(c) or "") .. b .. "\6"
end)
phonetic = py_join_syllables(phonetic)
conv_hidden[i] = conv_display[i] .. " [Phonetic: " .. decode(phonetic) .. "]"
else
conv_hidden[i] = conv_display[i]
end
reading = reading:gsub("'"," "):gsub("[^ ]+",py_transf)
reading = py_normalize(reading)
conv_ipa[i] = py_to_ipa(reading)
end
return table.concat(conv_display, " / "),
table.concat(conv_hidden, " / "),
table.concat(conv_ipa, ", ")
end
return export