ข้ามไปเนื้อหา

มอดูล:pa-Arab-translit

จาก วิกิพจนานุกรม พจนานุกรมเสรี

This module will transliterate text in the อักษรชาห์มุขี. It is also used to transliterate ปัญจาบเก่า, Pahari-Potwari, and Saraiki. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:pa-Arab-translit/testcases.

Functions

[แก้ไข]
tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local u = mw.ustring.char
local gsub = mw.ustring.gsub

local export = {}


local vav = u(0x0648)
local ye = u(0x06CC)
local alif = u(0x0627)
local he = 'ه'

local vw_s_cfu = u(0x0650) -- pesh (i)
local vw_s_ccu = u(0x0655) -- hamza below
local vw_s_cbr = u(0x064F) -- zer (u)
local vw_s_mcu = u(0x0654) -- hamza above
local vw_s_ocu = u(0x064E) -- zabar (a)

local vw_l_cbr = u(0x0657) -- inverted zer
local vw_l_cfu = u(0x0656) -- subscript alif

local hat = u(0x065A)
local inverted_hat = u(0x065B)
local hats = hat .. inverted_hat

local short_vowels_list = vw_s_cfu .. vw_s_ccu .. vw_s_cbr .. vw_s_mcu .. vw_s_ocu 

-- carrier + diacritic combos
local long_u = vav .. vw_l_cbr
local short_o = vav .. inverted_hat
local long_i = ye .. vw_l_cfu
local short_e = ye .. inverted_hat

local vocalised_carrier = long_u .. short_o .. long_i .. short_e .. 'و' .. 'و' .. 'ی' .. 'ے'

local consonants_1 = "ببپتثجچحخدذرزژسشصضطظعغفقکگلمنڻوهىھٹڈڑ"
local consonants_2 = "ببھتھڈھجھدھٹھکھڑھ"
local vowels = "اِیاُؤآے"
local consonants = gsub(consonants_1, consonants_2, "")


local conv = {
	--consonants
	['ب'] = 'พ', ['پ'] = 'ป', ['ت'] = 'ต', ['ث'] = 'ซ', ['ج'] = 'จ', 
	['چ'] = 'ฉ', ['ح'] = 'ห', ['خ'] = 'ฅ', ['د'] = 'ท', ['ذ'] = 'ซ̱',
	['ر'] = 'ร', ['ز'] = 'ซ̱', ['ژ'] = 'ช̱', ['س'] = 'ส', ['ش'] = 'ศ',
	['ص'] = 'ซ', ['ض'] = 'ซ̱', ['ط'] = 'ต', ['ظ'] = 'ซ̱', ['ع'] = 'อ',
	['غ'] = 'ฆ̱', ['ف'] = 'ฟ', ['ق'] = 'ฆ', ['ک'] = 'ก', ['گ'] = 'ค',
	['ل'] = 'ล', ['م'] = 'ม', ['ن'] = 'น', ['ڻ'] = 'ณ', ['و'] = 'ว',
	['ه'] = 'ห', ['ى'] = 'ย', ['ھ'] = 'ห', ['ٹ'] = 'ฏ', ['ڈ'] = 'ฑ',
	['ڑ'] = 'ฬ',
	
	--aspirated consonants
	['بھ'] = 'ภ', ['تھ'] = 'ถ', ['ڈھ'] = 'ฒ', ['جھ'] = 'ฌ',
	['دھ'] = 'ธ', ['ٹھ'] = 'ฐ', ['کھ'] = 'ข', ['ڑھ'] = 'ฬ̱',
	['گھ'] = 'ฆ',

	-- digits
	['۰'] = '0', ['۱'] = '1', ['۲'] = '2', ['۳'] = '3', ['۴'] = '4',
	['۵'] = '5', ['۶'] = '6', ['۷'] = '7', ['۸'] = '8', ['۹'] = '9', 
}

local nasal_assim = {
	['[กค]ห?'] = 'ง',
	['[จฉ]ห?'] = 'ญ',
	['[ฏฑ]ห?'] = 'ณ',
	['[ตท]ห?'] = 'น',
	['[ปพ]ห?'] = 'ม',
	['น'] = 'น',
	['ม'] = 'ม',
    ['ซ'] = 'น',
}

local short_vowels = {
	-- independent vowels
	[u(0x00627)] = 'า', -- alif ا
	[u(0x00622)] = 'า', -- alif with madda آ
	[u(0x06D2)] = 'เ', -- ye ے
	['اِی'] = 'ี' ,
	['اُ '] = 'ุ', -- alif with damma
	['آے'] = 'เ',
	[u(0x0624)] = 'เา', -- waw with hamza ؤ	

	--vowels
	[u(0x064E)] = 'ะ', -- zabar َ◌
	[u(0x0670)] = 'า', -- khari zabar ◌ٰ
	[u(0x0650)] = 'ิ', -- zer ◌ِ
	[u(0x064F)] = 'ุ', -- pesh ُ◌

	-- other diacritics
	[u(0x06BA)] = 'N', -- noon ghunna ں
	[u(0x0621)] = 'ิ', -- hamza ء
	[u(0x0652)] = 'ูน', -- sukun ◌ْ
	[u(0x0651)] = 'ː', -- shad ◌ّ
	[u(0x064B)] = 'น', -- do zabar ◌ً
	[u(0x064D)] = 'นิ', -- do zer ◌ٍ	
}

local alif = 'ا'
local waw = 'و'
local ye = 'ی'
local noon = u(0x06BA)
local shadda = u(0x0651)

function export.tr(text, lang, sc)
	
	text = gsub(text,
       'وا' .. '([' .. consonants .. '])',
        "วา%1")
        
    -- interconsonantal vav is a long ō sound
    text = gsub(text,
        '([' .. consonants ..  ']ھ?)' .. vav .. '([' .. consonants .. '])',
        "%1ู%2")

    -- intervocalic alif is a long a sound
	text = gsub(text, '([' .. consonants .. 'و ' .. '])' .. alif .. '([' .. consonants .. 'و' .. '])', "%1า%2")

    -- final he + short vowel disregards the he and transliterates the vowel
    text = gsub(text, 'ہ([' ..  short_vowels_list .. '])', short_vowels)

    -- word-initial alif + vowelled carrier drops the alif
    text = gsub(text, '^' .. alif .. '([' .. vocalised_carrier .. '])', "%1")

    -- word-initial alif + short vowel diacritic drops the alif
    text = gsub(text, '^' .. alif .. '([' .. short_vowels_list .. '])', "%1")
	
    -- long /u:/ and /i:/
    text = gsub(text, vav .. vw_s_cbr .. vav .. "([" .. consonants .. "])", vav .. "อู%1")
    text = gsub(text, "([" .. consonants  .. "])" .. vw_s_cfu .. ye .. "([" .. consonants .. "])", "%1ี%2")

    -- vav with hat = short o
    text = gsub(text, vav .. "[" .. hats .. "]", "โ็")

    -- vav with short vowel
    text = gsub(text,
        vav .. "([" .. short_vowels_list .. "])",
        function(c)
            return "v" .. short_vowels[c]
        end)

    -- nun or re with hat
    -- TODO: add support for re
    text = gsub(text, "ن" .. "[" .. hats.. "]", "น")

    -- ye with hat = short e
    text = gsub(text, ye .. "[" .. hats.. "]", "เ")

    -- vav with inverted pish = long u
    text = gsub(text, long_u, "ู")

    -- long i
    text = gsub(text, ye .. vw_l_cfu, 'ี') 

    -- intervocalic ye is a long a sound
    text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])', "%1เ%2")

    -- word-final alif and ye
    text = gsub(text, '([' .. consonants .. '])' .. ye .. '$', "%1ี")
    text = gsub(text, '([' .. consonants .. '])' .. alif .. '$', "%1า")

    -- regard the consonant + short vowel combinations throughout
	text = gsub(text, '.', short_vowels)

	text = gsub(text, '[بتجدکگٹڈڑ]ھ', conv)
	text = gsub(text, '[بپتثجچحخدذرزژسشصضطظعغفقکگلمنڻوهىھٹڈڑ]', conv)
	
	-- normal consonants left over
	text = gsub(text, vav, 'ว')
	text = gsub(text, 'ہ', 'ห')
    text = gsub(text, "ی", "ย")
    
    for key,val in pairs(nasal_assim) do
		text = mw.ustring.gsub(text,"N("..key..")",val.."%1")
	end
	text = gsub(text,"([ะาิีุูเโ็])N ", "%1ํ")
	text = gsub(text,"(.?)N", "%1ํ")
	
	text = gsub(text,"ː(.)","%1%1")
	
	text = gsub(text," ?।",".")
	
	text = gsub(text," $","")

	text = gsub(text,"^([ะาิีุูเโ็])","อ%1")
	text = gsub(text,"([%s%p])([ะาิีุูเโ็])","%1อ%2")
	text = gsub(text,"([ก-ห]̱?)([เโ])","%2%1")
	
	return text
end

return export