มอดูล:th-utilities

This มอดูล lacks a documentation subpage. Please create it.
Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}

local gsub = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local en_utilities = require("Module:en-utilities")

local thai_digits = {"๐", "๑", "๒", "๓", "๔", "๕", "๖", "๗", "๘", "๙"}
local thai_words = {"ศูนย์", "หนึ่ง", "สอง", "สาม", "สี่", "ห้า", "หก", "เจ็ด", "แปด", "เก้า"}
local thai_words2 = {["."]="จุด",["-"]="ขีด",["+"]="บวก",["−"]="ลบ",["/"]="ทับ"}

-- from [[Module:headword/data]]
local thai_pos = {
	-- these are lemmas
	["abbreviations"] = "คำย่อ",
	["acronyms"] = "อักษรอ่านย่อ",
	["adjectives"] = "คำคุณศัพท์",
	--["adnominals"] = "adnominals",
	--["adpositions"] = "adpositions",
	["adverbs"] = "คำกริยาวิเศษณ์",
	["affixes"] = "หน่วยคำเติม",
	--["ambipositions"] = "ambipositions",
	["articles"] = "คำกำกับนาม",
	["circumfixes"] = "หน่วยคำเติมคร่อม",
	--["circumpositions"] = "circumpositions",
	["classifiers"] = "คำลักษณนาม",
	["cmavo"] = "ชมาโว",
	--["cmavo clusters"] = "cmavo clusters",
	["cmene"] = "ชเมเน",
	--["combining forms"] = "combining forms",
	["conjunctions"] = "คำสันธาน",
	--["counters"] = "counters",
	["determiners"] = "คำกำหนด",
	--["diacritical marks"] = "diacritical marks",
	["digraphs"] = "ทวิอักษร",
	--["equative adjectives"] = "equative adjectives",
	["fu'ivla"] = "ฟูฮิฝลา",
	["gismu"] = "กิสมู",
	["Han characters"] = "อักษรจีน",
	["Han tu"] = "ฮั้นถื่อ",
	["hanja"] = "ฮันจา",
	["hanzi"] = "ฮั่นจื้อ",
	--["ideophones"] = "ideophones",
	["idioms"] = "สำนวน",
	["infixes"] = "อาคม",
	["initialisms"] = "อักษรย่อ",
	["iteration marks"] = "เครื่องหมายซ้ำ",
	["interfixes"] = "หน่วยคำเติมเชื่อม",
	["interjections"] = "คำอุทาน",
	["kana"] = "คานะ",
	["kanji"] = "คันจิ",
	["letters"] = "ตัวอักษร",
	["ligatures"] = "ตัวอักษรควบ",
	--["logograms"] = "logograms",
	["lujvo"] = "ลุฌโว",
	["morae"] = "มอรา",
	["morphemes"] = "หน่วยคำ",
	--["non-constituents"] = "non-constituents",
	["nouns"] = "คำนาม",
	["numbers"] = "จำนวน",
	["numeral symbols"] = "ตัวเลข",
	["numerals"] = "เลข",
	["particles"] = "คำอนุภาค",
	["phrases"] = "วลี",
	["postpositions"] = "คำปัจฉบท",
	--["postpositional phrases"] = "postpositional phrases",
	--["predicatives"] = "predicatives",
	["prefixes"] = "อุปสรรค",
	--["prepositional phrases"] = "prepositional phrases",
	["prepositions"] = "คำบุพบท",
	["preverbs"] = "คำกริยาเติมหน้า",
	--["pronominal adverbs"] = "pronominal adverbs",
	["pronouns"] = "คำสรรพนาม",
	["proper nouns"] = "คำวิสามานยนาม",
	["proverbs"] = "สุภาษิต",
	["punctuation marks"] = "เครื่องหมายวรรคตอน",
	--["relatives"] = "relatives",
	["roots"] = "ราก",
	["stems"] = "ต้นเค้าศัพท์",
	["suffixes"] = "ปัจจัย",
	["syllables"] = "พยางค์",
	["symbols"] = "สัญลักษณ์",
	["verbs"] = "คำกริยา",

	-- these are non-lemmas
	["active participle forms"] = "รูปผันพาร์ทิซิเพิลกรรตุวาจก",
	["active participles"] = "พาร์ทิซิเพิลกรรตุวาจก",
	["adjectival participles"] = "พาร์ทิซิเพิลเชิงคุณศัพท์",
    --["adjective case forms"] = "adjective case forms",
	["adjective forms"] = "รูปผันคำคุณศัพท์",
	--["adjective feminine forms"] = "adjective feminine forms",
	["adjective plural forms"] = "รูปผันคำคุณศัพท์พหูพจน์",
	["adverb forms"] = "รูปผันคำกริยาวิเศษณ์",
	["adverbial participles"] = "พาร์ทิซิเพิลเชิงกริยาวิเศษณ์",
	--["agent participles"] = "agent participles",
	["article forms"] = "รูปผันคำกำกับนาม",
	--["circumfix forms"] = "circumfix forms",
	--["combined forms"] = "combined forms",
	["comparative adjective forms"] = "รูปผันคำคุณศัพท์ขั้นกว่า",
	["comparative adjectives"] = "คำคุณศัพท์ขั้นกว่า",
	["comparative adverb forms"] = "รูปผันคำกริยาวิเศษณ์ขั้นกว่า",
	["comparative adverbs"] = "คำกริยาวิเศษณ์ขั้นกว่า",
	["conjunction forms"] = "รูปผันคำสันธาน",
	["contractions"] = "คำหดย่อ",
	--["converbs"] = "converbs",
	["determiner comparative forms"] = "รูปผันคำกำหนดขั้นกว่า",
	["determiner forms"] = "รูปผันคำกำหนด",
	["determiner superlative forms"] = "รูปผันคำกำหนดขั้นสุด",
	["diminutive nouns"] = "คำนามบอกความเล็ก",
	--["elative adjectives"] = "elative adjectives",
	--["equative adjective forms"] = "equative adjective forms",
	--["equative adjectives"] = "equative adjectives",
	["future participles"] = "พาร์ทิซิเพิลอนาคตกาล",
	--["gerunds"] = "gerunds",
	--["infinitive forms"] = "infinitive forms",
	--["infinitives"] = "infinitives",
	["interjection forms"] = "รูปผันคำอุทาน",
	["jyutping"] = "ยฺหวืดเพ็ง",
	--["misspellings"] = "misspellings",
	--["negative participles"] = "negative participles",
	--["nominal participles"] = "nominal participles",
	--["noun case forms"] = "noun case forms",
	["noun dual forms"] = "รูปผันคำนามทวิพจน์",
	["noun forms"] = "รูปผันคำนาม",
	--["noun paucal forms"] = "noun paucal forms",
	["noun plural forms"] = "รูปผันคำนามพหูพจน์",
	--["noun possessive forms"] = "noun possessive forms",
	--["noun singulative forms"] = "noun singulative forms",
	["numeral forms"] = "รูปผันเลข",
	["participles"] = "พาร์ทิซิเพิล",
	["participle forms"] = "รูปผันพาร์ทิซิเพิล",
	["particle forms"] = "รูปผันคำอนุภาค",
	["passive participles"] = "พาร์ทิซิเพิลกรรมวาจก",
	["past active participles"] = "พาร์ทิซิเพิลกรรตุวาจกอดีตกาล",
	["past participles"] = "พาร์ทิซิเพิลอดีตกาล",
	["past participle forms"] = "รูปผันพาร์ทิซิเพิลอดีตกาล",
	["past passive participles"] = "พาร์ทิซิเพิลกรรมวาจกอดีตกาล",
	--["perfect active participles"] = "perfect active participles",
	--["perfect participles"] = "perfect participles",
	--["perfect passive participles"] = "perfect passive participles",
	["pinyin"] = "พินอิน",
	["plurals"] = "พหูพจน์",
	["postposition forms"] = "รูปผันคำปัจฉบท",
	["prefix forms"] = "รูปผันอุปสรรค",
	--["preposition contractions"] = "preposition contractions",
	["preposition forms"] = "รูปผันคำบุพบท",
	--["prepositional pronouns"] = "prepositional pronouns",
	["present active participles"] = "พาร์ทิซิเพิลกรรตุวาจกปัจจุบันกาล",
	["present participles"] = "พาร์ทิซิเพิลปัจจุบันกาล",
	["present passive participles"] = "พาร์ทิซิเพิลกรรมวาจกปัจจุบันกาล",
	["pronoun forms"] = "รูปผันคำสรรพนาม",
	--["pronoun possessive forms"] = "pronoun possessive forms",
	["proper noun forms"] = "รูปผันคำวิสามานยนาม",
	["proper noun plural forms"] = "รูปผันคำวิสามานยนามพหูพจน์",
	["rafsi"] = "รัฟซี",
	["romanizations"] = "การถอดเป็นอักษรโรมัน",
	--["root forms"] = "root forms",
	--["singulatives"] = "singulatives",
	["suffix forms"] = "รูปผันปัจจัย",
	["superlative adjective forms"] = "รูปผันคำคุณศัพท์ขั้นสุด",
	["superlative adjectives"] = "คำคุณศัพท์ขั้นสุด",
	["superlative adverb forms"] = "รูปผันคำกริยาวิเศษณ์ขั้นสุด",
	["superlative adverbs"] = "คำกริยาวิเศษณ์ขั้นสุด",
	["verb forms"] = "รูปผันคำกริยา",
	["verbal nouns"] = "คำกริยานาม",

	-- these are something else
	["cardinal nouns"] = "คำนามเชิงการนับ",
	["abstract nouns"] = "คำอาการนาม",
	["auxiliary verbs"] = "คำกริยานุเคราะห์",
	["cardinal adjectives"] = "คำคุณศัพท์เชิงการนับ",
}

function export.th_pos(pos)
	return thai_pos[pos] or thai_pos[en_utilities.pluralize(pos)] or pos
end

function export.arabic_digit_to_thai(text)
	if type(text) == "number" then
		text = tostring(text) -- convert to string
	end
	if type(text) == "string" and find(text, "[0-9]") then
		for n = 0, 9 do
			text = gsub(text, tostring(n), thai_digits[n + 1])
		end
	end
	return text
end

function export.thai_digit_to_arabic(text)
	if type(text) == "string" and find(text, "[๐-๙]") then
		for n = 0, 9 do
			text = gsub(text, thai_digits[n + 1], tostring(n))
		end
	end
	return text
end

function export.thai_number_sequence(text)
	if type(text) == "number" then
		text = tostring(text) -- convert to string
	end
	if type(text) == "string" then
		text = export.thai_digit_to_arabic(text)
		for n = 0, 9 do
			text = gsub(text, tostring(n), thai_words[n + 1])
		end
		text = gsub(text, ".", thai_words2)
	end
	return text
end

--([\-+]?)([0-9,]*)((\.[0-9]+)?)

function export.thai_number_integer(text)
	if type(text) == "number" then
		text = tostring(text) -- convert to string
	end
	if type(text) == "string" and match(text, "^[-%+−]?[0-9๐-๙,]+$") then
		text = export.thai_digit_to_arabic(text)
		text = gsub(text, ",", "")
		local len = text:len() -- now only Arabic digits
		--TODO
	end
	return text
end

function export.thai_number_float(text)
	if type(text) == "number" then
		-- floating-point may be not accurate due to binary system
		text = tostring(text) -- convert to string
	end
	if type(text) == "string" then
		--TODO
	end
	return text
end

return export