Modul:cop-sortkey

Daripada Wiktionary

Pendokumenan untuk modul ini boleh diciptakan di Modul:cop-sortkey/doc

export = {}

local match = mw.ustring.match
local function ugsub(text, regex, replacement)
	local out = mw.ustring.gsub(text, regex, replacement)
	return out
end

local alphabet = "ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱϣϥⳉϧϩϫϭw"
local vowels = "ⲁⲉⲏⲓⲟⲩⲱ"
local vowel = "[" .. vowels .. "]"
local consonants = ugsub(alphabet, vowel, "")
local consonant = "[" .. consonants .. "]"

local replacements = {
	["ⲟⲩ"] = "ⲩ",
	["ⳤ"] = "ⲕⲉ",
	["ⲉⲓ"] = "ⲓ",
	["ϯ"] = "ⲧⲓ",
	["-"] = "",
	["⸗"] = "",
	["ˋ"] = "",
}

local CopticToGreek = {
	["ⲁ"] = "α",
	["ⲃ"] = "β",
	["ⲅ"] = "γ",
	["ⲇ"] = "δ",
	["ⲉ"] = "ε",
	["ⲍ"] = "ζ",
	["ⲏ"] = "η",
	["ⲑ"] = "θ",
	["ⲓ"] = "ι",
	["ⲕ"] = "κ",
	["ⲗ"] = "λ",
	["ⲙ"] = "μ",
	["ⲛ"] = "ν",
	["ⲝ"] = "ξ",
	["ⲟ"] = "ο",
	["ⲡ"] = "π",
	["ⲣ"] = "ρ",
	["ⲥ"] = "σ",
	["ⲧ"] = "τ",
	["ⲩ"] = "υ",
	["ⲫ"] = "φ",
	["ⲭ"] = "χ",
	["ⲯ"] = "ψ",
	["ⲱ"] = "ω",
}

function export.makeSortKey(text, lang, sc)
	if sc and sc ~= "Copt" then
		return text
	end
	
	if not text then
		return nil
	end
	
	local str_gsub = string.gsub
	
	text = mw.ustring.lower(text)
	
	for letter, replacement in pairs(replacements) do
		text = str_gsub(text, letter, replacement)
	end
	
	local origText = text
	
	text = ugsub(text, "ⲩ(" .. vowel .. ")", "w%1")
	text = ugsub(text, "(" .. vowel .. ")ⲩ", "%1w")
	
--	mw.log(origText, text)
	
	local sort = {}
	
	for word in mw.ustring.gmatch(text, "%S+") do
		-- Add initial vowel (if any).
		table.insert(sort, match(word, "^" .. vowel) )
		-- Add consonants (in order).
		table.insert(sort, ugsub(word, vowel .. "+", ""))
		
		--[[
			Add the number "1" if word ends in consonant.
			"1" sorts before Greek–Coptic and Coptic Unicode blocks.
		]]
		if mw.ustring.match(word, consonant .. "$") then
			table.insert(sort, "1")
		elseif mw.ustring.match(word, vowel .. "$") then
			table.insert(sort, "2")
		end
		
		-- Get non-initial vowels (in order) by removing initial vowel and all consonants.
		table.insert(sort, ugsub(ugsub(word, "^" .. vowel, ""), consonant, ""))
		
		table.insert(sort, " ")
	end
	
	sort = table.concat(sort)
	
	sort = str_gsub(sort, "w", "ⲩ")
	
	--[[
		Convert Greek-derived Coptic characters to Greek ones.
		Otherwise, the uniquely Coptic letters would sort first, because
		they were added to Unicode earlier.
		ϣϥⳉϧϩϫϭ		ⲁⲃⲅⲇⲉⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱ

		αβγδεζηθικλμνξοπρστυφχψω	ϣϥⳉϧϩϫϭ		
	]]
	sort = str_gsub(sort, "[\194-\244][\128-\191]+", CopticToGreek)
	
	return sort
end

local lang = require("Module:languages").getByCode("cop")
local sc = require("Module:scripts").getByCode("Copt")
local function tag(text)
	return require("Module:script utilities").tag_text(text, lang, sc)
end

function export.showSorting(frame)
	local terms = {}
	
	for i, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local function comp(term1, term2)
		return export.makeSortKey(term1) < export.makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		terms[i] = "\n* " .. tag(term) .. " (<code>" .. export.makeSortKey(term) .. "</code>)"
	end
	
	return table.concat(terms)
end

return export