มอดูล:vi-translit

จาก วิกิพจนานุกรม พจนานุกรมเสรี

This module will transliterate ภาษาเวียดนาม text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:vi-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local match = mw.ustring.match
local u = mw.ustring.char

local initialConsonants = {
	-- native words
	["b"] = {"บ","บ่","บ๋","บ๊"}, ["c"] = {"ก","ก่","ก๋","ก๊"}, ["ch"] = {"จ","จ่","จ๋","จ๊"},
	["d"] = {"ซ","ส่","ส","ซ้"}, ["đ"] = {"ด","ด่","ด๋","ด๊"},
	["g"] = {"ก","ก่","ก๋","ก๊"}, ["gh"] = {"ก","ก่","ก๋","ก๊"}, ["gi"] = {"ซ","ส่","ส","ซ้"},
	["h"] = {"ฮ","ห่","ห","ฮ้"}, ["k"] = {"ก","ก่","ก๋","ก๊"}, ["kh"] = {"ค","ข่","ข","ค้"},
	["l"] = {"ล","หฺล่","หฺล","ล้"}, ["m"] = {"ม","หฺม่","หฺม","ม้"}, ["n"] = {"น","หฺน่","หฺน","น้"},
	["ng"] = {"ง","หฺง่","หฺง","ง้"}, ["ngh"] = {"ง","หฺง่","หฺง","ง้"}, ["nh"] = {"ญ","หฺญ่","หฺญ","ญ้"},
	["ph"] = {"ฟ","ฝ่","ฝ","ฟ้"}, ["qu"] = {"กฺว","กฺว่","กฺว๋","กฺว๊"},
	["r"] = {"ซ","ส่","ส","ซ้"}, ["s"] = {"ซ","ส่","ส","ซ้"},
	["t"] = {"ต","ต่","ต๋","ต๊"}, ["th"] = {"ท","ถ่","ถ","ท้"}, ["tr"] = {"จ","จ่","จ๋","จ๊"},
	["v"] = {"ว","หฺว่","หฺว","ว้"}, ["x"] = {"ซ","ส่","ส","ซ้"}, ["_"] = {"อ","อ่","อ๋","อ๊"},
	["ꞗ"] = {"ว","หฺว่","หฺว","ว้"},
	-- borrowed words
	["p"] = {"ป","ป่","ป๋","ป๊"}, ["br"] = {"บฺร","บฺร่","บฺร๋","บฺร๊"}, ["cr"] = {"กฺร","กฺร่","กฺร๋","กฺร๊"},
	["gr"] = {"กฺร","กฺร่","กฺร๋","กฺร๊"}, ["xt"] = {"ซฺต","สฺต่","สฺต๋","ซฺต๊"}, 
}
local finalConsonants = {
	["c"] = "ก", ["ch"] = "ก", ["k"] = "ก", ["m"] = "ม", ["n"] = "น",
	["ng"] = "ง", ["nh"] = "ญ", ["p"] = "ป", ["t"] = "ต", ["_"] = "",
}
local vowels = {
	["ă"] = "%1ั%2", ["â"] = "เ%1ิ%2", ["e"] = "แ%1%2", ["ơ"] = "เ%1%2อ",
	["ai"] = "%1%2าย", ["ao"] = "%1%2าว", ["au"] = "เ%1%2า", ["âu"] = "เ%1ิ%2ว", ["ay"] = "%1ั%2ย", ["ây"] = "เ%1็%2ย",
	["eo"] = "แ%1%2ว", ["êu"] = "เ%1%2ว", ["ia"] = "เ%1ี%2ย", ["iê"] = "เ%1ี%2ย", ["iu"] = "%1ี%2ว", ["iêu"] = "เ%1ี%2ยว",
	["oa"] = "%1ฺว%2า", ["oă"] = "%1ฺวั%2", ["oe"] = "แ%1ฺว%2", ["oi"] = "%1%2อย", ["ôi"] = "โ%1%2ย", ["ơi"] = "เ%1%2ย",
	["oai"] = "%1ฺว%2าย", ["oao"] = "%1ฺว%2าว", ["oay"] = "%1ฺวั%2ย",
	["ua"] = "%1ั%2ว", ["ưa"] = "เ%1ื%2อ", ["uâ"] = "%1%2ว", ["uê"] = "เ%1ฺว%2", ["ui"] = "%1ู%2ย", ["ưi"] = "%1ื%2ย",
	["uô"] = "%1%2ว", ["uơ"] = "%1ั%2ว", ["ươ"] = "เ%1ื%2อ", ["ưu"] = "%1ื%2ว", ["uy"] = "%1ฺวี%2",
	["uây"] = "เ%1ฺว็%2ย", ["uôi"] = "%1%2วย", ["uya"] = "เ%1ฺวี%2ย", ["uyê"] = "เ%1ฺวี%2ย", ["uyu"] = "%1ฺวี%2ว", ["ươi"] = "เ%1ื%2อย", ["ươu"] = "เ%1ื%2อว",
	["yê"] = "เ%1ี%2ย", ["yêu"] = "เ%1ี%2ยว",
	["oo"] = "%1%2อ", ["ôô"] = "โ%1%2", 
}
local vowels2 = {
	["a"] = {"%1%2า","%1ั%2"}, ["ê"] = {"เ%1%2", "เ%1็%2"}, ["i"] = {"%1ี%2", "%1ิ%2"}, ["y"] = {"%1ี%2", "%1ิ%2"},
	["o"] = {"%1%2อ", "%1็%2อ"}, ["ô"] = {"โ%1%2", "%1%2"}, ["u"] = {"%1ู%2", "%1ุ%2"}, ["ư"] = {"%1ื%2อ", "%1ึ%2"},
}
local toneMarks = {
	[u(0x0300)] = 2, [u(0x0303)] = 3, [u(0x0309)] = 4, [u(0x0301)] = 5, [u(0x0323)] = 6,
}
local toneClass = "["..u(0x0300)..u(0x0303)..u(0x0309)..u(0x0301)..u(0x0323).."]"
local vowelClass = "["..u(0x0306)..u(0x0302)..u(0x031B).."aeiouy]+"

local function isempty(s)
	return s == nil or s == ""
end

local function spell(lex)
	local tlex = "�"
	if initialConsonants[lex.init] ~= nil then
		if lex.tone == 2 or lex.tone == 6 then
			tlex = initialConsonants[lex.init][2] or "�"
		elseif lex.tone == 3 or lex.tone == 4 then
			tlex = initialConsonants[lex.init][3] or "�"
		elseif lex.tone == 5 then
			tlex = initialConsonants[lex.init][4] or "�"
		else
			tlex = initialConsonants[lex.init][1] or "�"
		end
	end

	local vowel = "�"
	local final = finalConsonants[lex.final] or "�"
	if not isempty(lex.vowel) then
		if vowels[lex.vowel] ~= nil then
			vowel = vowels[lex.vowel]
		elseif vowels2[lex.vowel] ~= nil then
			if match(final, "[กงญ]") then -- c, ch, k, ng, nh
				vowel = vowels2[lex.vowel][2] or "�"
			else
				vowel = vowels2[lex.vowel][1] or "�"
			end
		end
	end
	tlex = gsub(tlex, "^([^"..u(0x0E48).."-"..u(0x0E4B).."]+)(["..u(0x0E48).."-"..u(0x0E4B).."]?)$", vowel) .. final

	-- if some words can"t spell, fall back
	if match(tlex, "�") then
		tlex = nil
	end
	return tlex
end

function export.tr(text, lang, sc, debug_mode)

	if type(text) == "table" then -- called directly from a template
		text = text.args[1]
	end

	if sc == "Hani" then
		return nil
	end

	text = mw.ustring.toNFD(mw.ustring.lower(text))
	local syllables = mw.text.split(text, "[ %-]", false)

	local temp, punc1, punc2
	local lex = {}
	for k,v in ipairs(syllables) do
		if match(v, "^%p*[a-zđ"..u(0x0300)..u(0x0303)..u(0x0309)..u(0x0301)..u(0x0323)..u(0x0306)..u(0x0302)..u(0x031B).."]-%p*$") then
			-- remember punctuations
			punc1 = match(v, "^%p*")
			v = gsub(v, "^%p*", "")
			punc2 = match(v, "%p*$")
			v = gsub(v, "%p*$", "")

			-- get tone and cut it off
			lex.tone = toneMarks[match(v, toneClass)] or 1
			v = gsub(v, toneClass, "")

			-- "gi" exception
			if v == "gi" then
				v = "giy"
			end

			-- get vowel
			lex.vowel = match(v, vowelClass)

			-- get initial and final consonants
			temp = mw.text.split(v, lex.vowel, true)
			lex.init = (isempty(temp[1]) and "_" or temp[1])
			lex.final = (isempty(temp[2]) and "_" or temp[2])

			lex.vowel = mw.ustring.toNFC(lex.vowel)
			if lex.init == "q" and match(lex.vowel, "^u") then
				lex.init = "qu"
				lex.vowel = mw.ustring.sub(lex.vowel, 2)
			end
			if lex.init == "g" and match(lex.vowel, "^i") then
				lex.init = "gi"
				lex.vowel = mw.ustring.sub(lex.vowel, 2)
			end

			syllables[k] = punc1 .. (spell(lex) or v) .. punc2
		end
	end

	return table.concat(syllables, " ")

end

return export