มอดูล:Java-translit

จาก วิกิพจนานุกรม พจนานุกรมเสรี

This module will transliterate text in the อักษรชวา. It is used to transliterate สันสกฤต (sa) และ ชวา (jv). The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Java-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local u = mw.ustring.char

local conv = {
	-- finals (U+A980 - U+A983):
	
	["ꦀ"] = "มฺ",
	["ꦁ"] = "งฺ",
	["ꦂ"] = "รฺ",
	["ꦃ"] = "หฺ",
	
	-- independent vowels (U+A984 - U+A98E):
	
	["ꦄ"] = "อ",   ["ꦄꦴ"] = "อา",
	["ꦅ"] = "อิ",
	["ꦆ"] = "อิ",
	["ꦇ"] = "อี",
	["ꦈ"] = "อุ",   ["ꦈꦴ"] = "อู",
	["ꦉ"] = "ฤ",  ["ꦉꦴ"] = "ฤๅ",
	["ꦊ"] = "ฦ",
	["ꦋ"] = "ฦๅ",
	["ꦌ"] = "อ↶เ",
	["ꦍ"] = "อ↶ไ",
	["ꦎ"] = "อ↶โ",
	
	-- independent consonants (U+A98F - U+A9B2):
	
	["ꦏ"] = "ก",   ["ꦏ꦳"] = "ฃ",
	["ꦐ"] = "ก̱",
	["ꦑ"] = "ข",
	["ꦒ"] = "ค",   ["ꦒ꦳"] = "ฅ",
	["ꦓ"] = "ฆ",
	["ꦔ"] = "ง",  ["ꦔ꦳"] = "อ̱",
	
	["ꦕ"] = "จ",
	["ꦖ"] = "ฉ",
	["ꦗ"] = "ช",   ["ꦗ꦳"] = "ซ",
	["ꦘ"] = "ช̱",
	["ꦙ"] = "ฌ",
	["ꦚ"] = "ญ",
	
	["ꦛ"] = "ฏ",
	["ꦜ"] = "ฐ",
	["ꦝ"] = "ฑ",
	["ꦞ"] = "ฒ",
	["ꦟ"] = "ณ",
	
	["ꦠ"] = "ต",
	["ꦡ"] = "ถ",
	["ꦢ"] = "ท",   ["ꦢ꦳"] = "ท̱",
	["ꦣ"] = "ธ",
	["ꦤ"] = "น",
	
	["ꦥ"] = "ป",   ["ꦥ꦳"] = "ฟ",
	["ꦦ"] = "ผ",
	["ꦧ"] = "พ",
	["ꦨ"] = "ภ",
	["ꦩ"] = "ม",
	
	["ꦪ"] = "ย",
	["ꦫ"] = "ร",
	["ꦬ"] = "ฬ",
	["ꦭ"] = "ล",
	["ꦮ"] = "ว",   ["ꦮ꦳"] = "ว̱",
	["ꦯ"] = "ศ",
	["ꦰ"] = "ษ",
	["ꦱ"] = "ส",   ["ꦱ꦳"] = "ส̱",
	["ꦲ"] = "ห",   ["ꦲ꦳"] = "ฮ",
	
	-- cecak_telu/nukta (U+A9B3):
	
	["꦳"] = "",
	
	-- dependent vowels (U+A9B4 - A9BD):
	
	["ꦴ"] = "า", -- tarung
	["ꦵ"] = "↶โ",
	["ꦶ"] = "ิ",
	["ꦷ"] = "ี",
	["ꦸ"] = "ุ",
	["ꦹ"] = "ู",
	["ꦺ"] = "↶เ",   ["ꦺꦴ"] = "↶โ",
	["ꦻ"] = "↶ไ",  ["ꦻꦴ"] = "↶เา",
	["ꦼ"] = "↶เอ̂ะ",  ["ꦼꦴ"] = "↶เอ̂",
	["ꦽ"] = "ฺฤ",  ["ꦽꦴ"] = "ฺฤๅ",
	
	-- medials (U+A9BE - U+A9BF):
	["ꦾ"] = "ฺย",
	["ꦿ"] = "ฺร",
	
	-- pangkon/virama (U+A9C0):
	
	["꧀"] = "ฺ",
	
	-- puncuations (U+A9C1 - U+A9CF):
	
	["꧁"] = "«",
	["꧂"] = "»",
	["꧃"] = "(letter to younger age or lower rank)",
	["꧄"] = "(letter to equal age or equal rank)",
	["꧅"] = "(letter to older age or higher rank)",
	["꧆"] = "๏",
	["꧇"] = ":", -- number indicator
	["꧈"] = "ฯ",
	["꧉"] = "๚",
	["꧊"] = "\"",
	["꧋"] = "//",
	["꧌"] = "(",
	["꧍"] = ")",
	["ꧏ"] = "ๆ",
	
	-- digits (U+A9D0 - U+A9D9):
	
	["꧐"] = "0",
	["꧑"] = "1",
	["꧒"] = "2",
	["꧓"] = "3",
	["꧔"] = "4",
	["꧕"] = "5",
	["꧖"] = "6",
	["꧗"] = "7",
	["꧘"] = "8",
	["꧙"] = "9",
	
	-- ellipsis (U+A9DE - U+A9DF):
	
	["꧞"] = "-",
	["꧟"] = "-",

	-- zero-width space (display it if it hides in a word)
	[u(0x200B)] = "‼",
	[u(0x200C)] = "‼",
	[u(0x200D)] = "‼",
}

function export.tr(text, lang, sc)
	local CSVC = {
		initial = "([ꦏ-ꦲ]꦳?)",
		medial = "([ꦾꦿ]?)",
		nucleus = "([ꦴ-ꦽ꧀]?ꦴ?)",
		final = "([ꦀ-ꦃ]?)",
	}
	local VC = {
		nucleus = "([ꦄ-ꦎ]ꦴ?)",
		final = "([ꦀ-ꦃ]*)",
	}
	
	local number_indicator = "꧇"
	local digits = "[꧐-꧙]"
	
	local initial = true
	
	text = mw.ustring.gsub(text, CSVC.initial .. CSVC.medial .. CSVC.nucleus.. CSVC.final, function(a,b,c,d)
		a = (conv[a] or error("Initial not recognized: " .. a))
		b = (b == "" and "" or (conv[b] or error("Medial not recognized: " .. b)))
		c = (c == "" and "ะ" or (conv[c] or error("Nucleus not recognized: " .. c)))
		d = (d == "" and "" or (conv[d] or error("Final not recognized: " .. d)))
		if initial and a == "ห" then
			a = "อ̱"
		end
		initial = false
		return a .. b .. c .. d
	end)
	
	text = mw.ustring.gsub(text, VC.nucleus.. VC.final, function(a,b)
		a = conv[a]
		b = (b == "" and "" or conv[b])
		initial = false
		return a .. b
	end)
	
	text = mw.ustring.gsub(text, number_indicator .. "(" .. digits .. "+)" .. number_indicator, function(a)
		a = mw.ustring.gsub(a, ".", conv)
		initial = true
		return a
	end)
	
	text = mw.ustring.gsub(text, ".", conv)
	text = mw.ustring.gsub(text, "([ก-ฮ]̱?)↶([เแโไ])", "%2%1")
	text = mw.ustring.gsub(text, "([ก-ฮ]̱?)ะ([ก-ฮ]̱?)ฺ", "%1ั%2ฺ")
	text = mw.ustring.gsub(text, "เ([ก-ฮ]̱?)อ̂ะ([ก-ฮ]̱?)ฺ", "เ%1ิ%2ฺ")

	return text
end

return export