ข้ามไปเนื้อหา

มอดูล:Guru-translit

จาก วิกิพจนานุกรม พจนานุกรมเสรี

This module will transliterate text in the อักษรคุรมุขี. It is used to transliterate ปัญจาบเก่า, ปัญจาบ, Pahari-Potwari, สันสกฤต, and Sindhi Bhil. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Guru-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local letter_with_mark = "(.["..u(0x0300).."-"..u(0x036F).."]?)"

local tt = {
	-- consonants
	["ਕ"] = "ก", ["ਖ"] = "ข", ["ਗ"] = "ค", ["ਘ"] = "ฆ", ["ਙ"] = "ง",
	["ਚ"] = "จ", ["ਛ"] = "ฉ", ["ਜ"] = "ช", ["ਝ"] = "ฌ", ["ਞ"] = "ญ",
	["ਟ"] = "ฏ", ["ਠ"] = "ฐ", ["ਡ"] = "ฑ", ["ਢ"] = "ฒ", ["ਣ"] = "ณ",
	["ਤ"] = "ต", ["ਥ"] = "ถ", ["ਦ"] = "ท", ["ਧ"] = "ธ", ["ਨ"] = "น",
	["ਪ"] = "ป", ["ਫ"] = "ผ", ["ਬ"] = "พ", ["ਭ"] = "ภ", ["ਮ"] = "ม",
	["ਯ"] = "ย", ["ਰ"] = "ร", ["ਲ"] = "ล", [u(0x0A33)] = "ล̱", ["ਵ"] = "ว",
	[u(0x0A36)] = "ศ", ["ਸ"] = "ส", ["ਹ"] = "ห",
	[u(0x0A59)] = "ฃ", [u(0x0A5A)] = "ฅ",
	[u(0x0A5B)] = "ซ", ["ੜ"] = "ฬ", [u(0x0A5E)] = "ฝ",
	-- independent vowels
	["ਅ"] = "อ", ["ਆ"] = "อา", ["ਇ"] = "อิ", ["ਈ"] = "อี", ["ਉ"] = "อุ", ["ਊ"] = "อู",
	["ਏ"] = "เอ", ["ਐ"] = "ไอ", ["ਓ"] = "โอ", ["ਔ"] = "เอา",
	["ੲ"] = "อ", ["ੳ"] = "อ", -- bases of ʼi and ʼu
	-- dependent vowels and diacritics (excluding front type)
	["ਾ"] = "า", ["ਿ"] = "ิ", ["ੀ"] = "ี", ["ੁ"] = "ุ", ["ੂ"] = "ู",
	["ਂ"] = "ํ", ["ੰ"] = "ํ", ["ਃ"] = "ะ", ["੍"] = "ฺ", ["ੵ"] = "ฺย",
	["਼"] = u(0x0331), -- macron below
	["ਁ"] = "ํ์", -- candrabindu
	["ੑ"] = u(0x0301), -- acute accent
	-- numerals
	["੦"] = "0", ["੧"] = "1", ["੨"] = "2", ["੩"] = "3", ["੪"] = "4",
	["੫"] = "5", ["੬"] = "6", ["੭"] = "7", ["੮"] = "8", ["੯"] = "9",
	-- zero-width space (display it if it hides in a word)
	[u(0x200B)] = "‼",
}

local adjust0 = {
	-- for convenience
	["ਲ".."਼"] = u(0x0A33), ["ਸ".."਼"] = u(0x0A36),
	["ਖ".."਼"] = u(0x0A59), ["ਗ".."਼"] = u(0x0A5A),
	["ਜ".."਼"] = u(0x0A5B), ["ਫ".."਼"] = u(0x0A5E),
	["ੴ"] = "ਇੱਕ ਓਅੰਕਾਰ",
}

local adjust1 = {
	-- dependent vowels (front type)
	["ੇ"] = "เ%1", ["ੈ"] = "ไ%1", ["ੋ"] = "โ%1", ["ੌ"] = "เ%1า",
}

function export.tr(text, lang, sc, debug_mode)

	if type(text) == "table" then -- called directly from a template
		text = text.args[1]
	end

	for k, v in pairs(adjust0) do
		text = gsub(text, k, v)
	end

	-- addak rule to double subsequent consonant
	text = gsub(text, "ੱ([ਕ-ਹ"..u(0x0A59).."-"..u(0x0A5E).."])", "%1੍%1")

	text = gsub(text, ".", tt)

	for k, v in pairs(adjust1) do
		text = gsub(text, letter_with_mark..k, v)
	end

	text = gsub(text, "([เแไโ])อฺ", "อฺ%1")
	
	-- ย้ายสัญลักษณ์ขึ้นบน เมื่อมีสระล่าง (ยกเว้นตัวที่ไม่มี)
	text = gsub(text, u(0x0331).."([ุ-ฺ])", u(0x0304).."%1") -- macron below > macron above
	
	-- ย้ายสัญลักษณ์ลงล่าง เมื่อมีสระบน (ยกเว้นตัวที่ไม่มี)
	text = gsub(text, u(0x0301).."([ัิ-ื็-๎])", u(0x0317).."%1") -- acute accent > acute accent below
	
	return text

end

return export