มอดูล:za-sortkey

จาก วิกิพจนานุกรม พจนานุกรมเสรี
local export = {}
local u = mw.ustring.char
local a = u(0xF000)
local b = u(0xF001)
local c = u(0xF002)
local d = u(0xF003)
local e = u(0xF004)
local f = u(0xF005)
local g = u(0xF006)

local remove_diacritics = "%p"

local monographs = {
	["ə"] = "a" .. b, ["ƃ"] = "m" .. b, ["ƌ"] = "n" .. b, ["ŋ"] = "n" .. d, ["ɵ"] = "o" .. b, ["ɯ"] = "w" .. a, ["z"] = "2" .. a, ["ƨ"] = "2" .. b, ["j"] = "3" .. a, ["з"] = "3" .. b, ["[ptk]"] = "3" .. c, ["x"] = "4" .. a, ["ч"] = "4" .. b, ["q"] = "5" .. a, ["ƽ"] = "5" .. b, ["h"] = "6" .. a, ["ƅ"] = "6" .. b, ["[bgd]"] = "6" .. c
}

local digraphs = {
	["ae"] = "a" .. a, ["gv"] = "g" .. a, ["gy"] = "g" .. b, ["mb"] = "m" .. a, ["my"] = "m" .. c, ["nd"] = "n" .. a, ["ng"] = "n" .. c, ["ŋv"] = "n" .. f, ["ny"] = "n" .. g, ["oe"] = "o" .. a
}

local trigraphs = {
	["ngv"] = "n" .. e
}

function export.makeSortKey(text, lang, sc)
	for from, to in pairs(trigraphs) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	for from, to in pairs(digraphs) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	for from, to in pairs(monographs) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(text), "[" .. remove_diacritics .. "]", "")) -- decompose, remove appropriate diacritics, then recompose again
end

return export