ข้ามไปเนื้อหา

มอดูล:uk-translit-Thai

จาก วิกิพจนานุกรม พจนานุกรมเสรี

local export = {}
local u = mw.ustring.char
local rsubn = mw.ustring.gsub

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local regular_tt = {
	["А"]="ั", ["Б"]="บ", ["В"]="ว", ["Г"]="ฮ", ["Ґ"]="กฺ", ["Д"]="ด", -- Ukranian в = /ʋ ~ w/
	["Е"]="↶แ", ["Є"]="ย↶แ", ["Ж"]="ชฺ", ["З"]="ซฺ",
	["И"]="ึ", ["І"]="ิ", ["Ї"]="ยิ", ["Й"]="ย",
	["К"]="ก", ["Л"]="ล", ["М"]="ม", ["Н"]="น", ["О"]="อ̂",
	["П"]="ป", ["Р"]="ร", ["С"]="ซ", ["Т"]="ต", ["У"]="ุ", ["Ф"]="ฟ",
	["Х"]="ฅ", ["Ц"]="ต͜ซ", ["Ч"]="ช", ["Ш"]="ฌ", ["Щ"]="ฌฺ",
	["Ь"]=u(0x02B9), ["Ю"]="ยุ", ["Я"]="ยั",

	["а"]="ั", ["б"]="บ", ["в"]="ว", ["г"]="ฮ", ["ґ"]="กฺ", ["д"]="ด",
	["е"]="↶แ", ["є"]="ย↶แ", ["ж"]="ชฺ", ["з"]="ซฺ",
	["и"]="ึ", ["і"]="ิ", ["ї"]="ยิ", ["й"]="ย",
	["к"]="ก", ["л"]="ล", ["м"]="ม", ["н"]="น", ["о"]="อ̂",
	["п"]="ป", ["р"]="ร", ["с"]="ซ", ["т"]="ต", ["у"]="ุ", ["ф"]="ฟ",
	["х"]="ฅ", ["ц"]="ต͜ซ", ["ч"]="ช", ["ш"]="ฌ", ["щ"]="ฌฺ",
	["ь"]=u(0x02B9), ["ю"]="ยุ", ["я"]="ยั",

	["'"]=u(0x02BA), [u(0x2019)]=u(0x02BA), [u(0x02BC)]=u(0x02BA),

	-- others
	[u(0x02CA)]="", [u(0x0301)]="", [u(0x0341)]="", -- acute
	
	-- right single quotation mark, modifier letter apostrophe → modifier letter double prime
	["’"]='ʺ', ["ʼ"]= 'ʺ', 
	-- Ukrainian style quotes
	['«']='“', ['»']='”',
}

-- These need to be separated from the `regular_tt` so they don't interfere with reverse translit.
local obsolete_tt = {
	-- obsolete letters, pre-refom
	["Ё"]="Ë", ["ё"]="ë", ["Ъ"]=u(0x02BA), ["ъ"]=u(0x02BA), ["Ы"]="ึ", ["ы"]="ึ", ["Ѣ"]="Ě", ["ѣ"]="ě", 
	["Э"]="↶เ", ["э"]="↶เ",
	-- obsolete letters, Middle Ukrainian
	["Ѥ"]='Je', ["ѥ"]='je', ["Ъ"]='ʺ', ["ъ"]='ʺ', ["Ы"]='Y', ["ы"]='y', ["Ѣ"]='I', ["ѣ"]='i', 
	["Ѧ"]='Ja', ["ѧ"]='ja', ["Ѩ"]='Ja', ["ѩ"]='ja', ["Ѫ"]='U', ["ѫ"]='u', ["Ѭ"]='Ju', ["ѭ"]='ju', 
	["Ѯ"]='Ks', ["ѯ"]='ks', ["Ѱ"]='Ps', ["Ѱ"]='ps', ["Ѳ"]='F', ["ѳ"]='f', ["Ѵ"]='I', ["ѵ"]='i', 
	["Ѡ"]='O', ["ѡ"]='o', 
}

local AC = mw.ustring.char(0x0301) -- acute =  ́
local acute_decomposer = {
	["á"] = "a" .. AC,
	["é"] = "e" .. AC,
	["í"] = "i" .. AC,
	["ó"] = "o" .. AC,
	["ú"] = "u" .. AC,
	["ý"] = "y" .. AC,
	["Á"] = "A" .. AC,
	["É"] = "E" .. AC,
	["Í"] = "I" .. AC,
	["Ó"] = "O" .. AC,
	["Ú"] = "U" .. AC,
	["Ý"] = "Y" .. AC,
}


function export.tr(text)--translit any words or phrases
	-- Remove word-final hard sign, either utterance-finally or followed by
	-- a non-letter character such as space, comma, period, hyphen, etc.
	text = rsub(text, "[Ъъ]$", "")
	text = rsub(text, "[Ъъ]([%A])", "%1")

	text = rsub(text, "'+", { ["'"] = 'ʺ' }) -- neutral apostrophe
	text = rsub(text, '.', regular_tt)
	text = rsub(text, '.', obsolete_tt)

	--Acute has no use here
	text = rsub(text, AC, "")

	text = rsub(text, "^([ัิึุ↶])", "อ%1")
	text = rsub(text, "([%s%p])([ัิึุ↶])", "%1อ%2")
	text = rsub(text, "([ัิุ])([ัิึุ↶])", "%1อ%2")
	text = rsub(text, "^(อ̂)", "อ%1")
	text = rsub(text, "([%s%p])(อ̂)", "%1อ%2")
	text = rsub(text, "([ัิุ])(อ̂)", "%1อ%2")

	text = rsub(text, "([ก-ฮ]ฺ?)↶([เแ])", "%2%1")

	text = rsub(text, "ั".."ั", "า")
	text = rsub(text, "ั$", "า")
	text = rsub(text, "ั([%s%p])", "า%1")

	text = rsub(text, "ิ".."ิ", "ี")
	text = rsub(text, "ิ$", "ี")
	text = rsub(text, "ิ([%s%p])", "ี%1")

	text = rsub(text, "ึ".."ึ", "ื")
	text = rsub(text, "ึ$", "ื")
	text = rsub(text, "ึ([%s%p])", "ื%1")

	text = rsub(text, "ุ".."ุ", "ู")
	text = rsub(text, "ุ$", "ู")
	text = rsub(text, "ุ([%s%p])", "ู%1")

	text = rsub(text, "ั([ก-ฮ]ฺ?)([ัาิีึืุู])", "า%1%2")
	text = rsub(text, "ิ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ี%1%2")
	text = rsub(text, "ึ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ื%1%2")
	text = rsub(text, "ุ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ู%1%2")
	text = rsub(text, "ั([ก-ฮ]ฺ?)(อ̂)", "า%1%2")
	text = rsub(text, "ิ([ก-ฮ]ฺ?)(อ̂)", "ี%1%2")
	text = rsub(text, "ึ([ก-ฮ]ฺ?)(อ̂)", "ื%1%2")
	text = rsub(text, "ุ([ก-ฮ]ฺ?)(อ̂)", "ู%1%2")

	text = rsub(text, "ั([ก-ฮ]ฺ?)([ัาิีึืุู])", "า%1%2") --twice
	text = rsub(text, "ิ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ี%1%2")
	text = rsub(text, "ึ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ื%1%2")
	text = rsub(text, "ุ([ก-ฮ]ฺ?)([ัาิีึืุู])", "ู%1%2")
	text = rsub(text, "ั([ก-ฮ]ฺ?)(อ̂)", "า%1%2")
	text = rsub(text, "ิ([ก-ฮ]ฺ?)(อ̂)", "ี%1%2")
	text = rsub(text, "ึ([ก-ฮ]ฺ?)(อ̂)", "ื%1%2")
	text = rsub(text, "ุ([ก-ฮ]ฺ?)(อ̂)", "ู%1%2")

	text = rsub(text, "ั([เแ])", "า%1")
	text = rsub(text, "ิ([เแ])", "ี%1")
	text = rsub(text, "ึ([เแ])", "ื%1")
	text = rsub(text, "ุ([เแ])", "ู%1")

	return text
end


function export.reverse_tr(text)--reverse-translit any words or phrases
	local reverse_tt = {}
	for k, v in pairs(regular_tt) do
		reverse_tt[v] = k
	end
	reverse_tt['ʺ'] = "'"
	reverse_tt['ʹ'] = "ь"
	reverse_tt['y'] = "и"
	reverse_tt['Y'] = "И"
	text = rsub(text, '.', acute_decomposer)
	text = rsub(text, '[Jj][aeiu]', reverse_tt)
	text = rsub(text, '[Šš]č', reverse_tt)
	text = rsub(text, '.', reverse_tt)
	return text
end

return export