ข้ามไปเนื้อหา

มอดูล:Lana-translit

จาก วิกิพจนานุกรม พจนานุกรมเสรี

This module will transliterate text in the อักษรไทธรรม. It is used to transliterate ไทลื้อ, เขิน, คำเมือง, บาลี, and สันสกฤต. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Lana-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local gsub = mw.ustring.gsub
local u = mw.ustring.char

local tt = {
	-- consonants
	["ᨠ"] = "ก", ["ᨡ"] = "ข", ["ᨢ"] = "ฃ", ["ᨣ"] = "ค", ["ᨤ"] = "ฅ", ["ᨥ"] = "ฆ", ["ᨦ"] = "ง",
	["ᨧ"] = "จ", ["ᨨ"] = "ฉ", ["ᨩ"] = "ช", ["ᨪ"] = "ซ", ["ᨫ"] = "ฌ", ["ᨬ"] = "ญ",
	["ᨭ"] = "ฏ", ["ᨮ"] = "ฐ", ["ᨯ"] = "ด", ["ᨰ"] = "ฒ", ["ᨱ"] = "ณ",
	["ᨲ"] = "ต", ["ᨳ"] = "ถ", ["ᨴ"] = "ท", ["ᨵ"] = "ธ", ["ᨶ"] = "น",
	["ᨷ"] = "บ", ["ᨸ"] = "ป", ["ᨹ"] = "ผ", ["ᨺ"] = "ฝ", ["ᨻ"] = "พ", ["ᨼ"] = "ฟ", ["ᨽ"] = "ภ", ["ᨾ"] = "ม",
	["ᨿ"] = "ย", ["ᩀ"] = "ย̱", ["ᩁ"] = "ร", ["ᩂ"] = "ฤ", ["ᩃ"] = "ล", ["ᩄ"] = "ฦ", ["ᩅ"] = "ว",
	["ᩆ"] = "ศ", ["ᩇ"] = "ษ", ["ᩈ"] = "ส", ["ᩉ"] = "ห", ["ᩊ"] = "ฬ", ["ᩋ"] = "อ", ["ᩌ"] = "ฮ",
	-- independent vowels
	["ᩍ"] = "อิ", ["ᩎ"] = "อี", ["ᩏ"] = "อุ", ["ᩐ"] = "อู", ["ᩑ"] = "อ↶เ", ["ᩒ"] = "อ↶โ",
	-- medials and miscellaneous
	["ᩓ"] = "ล↶แ", ["ᩔ"] = "ส*ส", ["ᩕ"] = "*ร", ["ᩖ"] = "*ล", ["ᩗ"] = "*ล", ["ᩘ"] = "ง*",
	["ᩙ"] = "ง*", ["ᩙ"] = "พ*", ["ᩜ"] = "*ม", ["ᩝ"] = "*บ", ["ᩞ"] = "*ส",
	["᪢"] = "สวัรค์",
	-- dependent vowels and diacritics
	["᩠"] = "*", ["ᩡ"] = "ะ", ["ᩢ"] = "ั", ["ᩣ"] = "า", ["ᩤ"] = "า", -- ignore bindu
	["ᩥ"] = "ิ", ["ᩦ"] = "ี", ["ᩧ"] = "ึ", ["ᩨ"] = "ื", ["ᩩ"] = "ุ", ["ᩪ"] = "ู",
	["ᩫ"] = "็", ["ᩬ"] = "อ", ["ᩭ"] = "อย",
	["ᩮ"] = "↶เ", ["ᩯ"] = "↶แ", ["ᩰ"] = "↶โ", ["ᩱ"] = "↶ไ", ["ᩲ"] = "↶ใ",
	["ᩳ"] = "อ", ["ᩴ"] = "ํ", ["᩵"] = "่", ["᩶"] = "้",
	["᩺"] = "์", ["᩻"] = "๎", ["᩼"] = "์", ["᩿"] = "ฺ",
	-- numerals
	["᪀"] = "0", ["᪁"] = "1", ["᪂"] = "2", ["᪃"] = "3", ["᪄"] = "4",
	["᪅"] = "5", ["᪆"] = "6", ["᪇"] = "7", ["᪈"] = "8", ["᪉"] = "9",
	["᪐"] = "๐", ["᪑"] = "๑", ["᪒"] = "๒", ["᪓"] = "๓", ["᪔"] = "๔",
	["᪕"] = "๕", ["᪖"] = "๖", ["᪗"] = "๗", ["᪘"] = "๘", ["᪙"] = "๙",
	-- punctuation marks
	["ᪧ"] = "ๆ", ["᪨"] = "ฯ", ["᪩"] = "๚", ["᪪"] = "ฯ", ["᪫"] = "๚", ["᪬"] = "๛",
	-- zero-width space (display it if it hides in a word)
	[u(0x200B)] = "‼", [u(0x200C)] = "‼", [u(0x200D)] = "‼",
}

function export.tr(text, lang, sc, debug_mode)

	if type(text) == "table" then -- called directly from a template
		text = text.args[1]
	end

	-- haang "ᩛ" can be ᨮ, ᨳ, or ᨻ
	text = gsub(text, "([ᨭ-ᨱ])ᩛ", "%1᩠ᨮ")
	text = gsub(text, "([ᨲ-ᨶ])ᩛ", "%1᩠ᨳ")
	text = gsub(text, "([ᨷ-ᨾ])ᩛ", "%1᩠ᨻ")

	text = gsub(text, ".", tt)

	text = gsub(text, "๎%f[%s%p%z]", "ๆ")
	text = gsub(text, "([ก-ฮ]̱?)%*([ก-ฮ]̱?)๎", "%1๎%2")

	text = gsub(text, "([ก-ฮ]̱?)↶([เแโใไ])", "%2%1")

	if lang == "pi" or lang == "sa" then
		text = gsub(text, "ด", "ฑ")
		text = gsub(text, "บ", "ป")
		text = gsub(text, "เ([ก-ฮ]̱?)า", "โ%1") --TODO: what about "au" in Sanskrit?
		text = gsub(text, "%*", "ฺ")
	else
		text = gsub(text, "%*", "")
	end

	return text

end

return export