Ir al contenido

Módulo:sortkey

De Wikcionario, el diccionario libre
Documentación del módulo

Fallaron 10 de 16 pruebas (actualizar)

test_sortkeys:
pruebas español
idiomaentradase esperasalidaevaluación
español (es)témpuraTEMPURATEMPURA[ ok ]
español (es)¿Qué?QUE¿QUE?[ MAL ]
español (es)Por qué?POR QUEPOR QUE?[ MAL ]
español (es)Por quéPOR QUEPOR QUE[ ok ]
español (es)Por quePOR QUEPOR QUE[ ok ]
español (es)re!RERE![ MAL ]
español (es)¡re!RE¡RE![ MAL ]
español (es)reRERE[ ok ]
español (es)pingüinoPINGUINOPINGUINO[ ok ]
pruebas con diacríticos
idiomaentradase esperasalidaevaluación
griego antiguo (grc)Πηληϊάδης ΑἶνοςΠΗΛΗΙΑΔΗΣ ΑΙΝΟΣΠΗΛΗΙΑΔΗΣ ΑΙΝΟΣ[ ok ]
navajo (nv)shį́į́dą́ą́ʼSHIIDAASIIDAAZ[ MAL ]
prueba con el dotted dottles i
idiomaentradase esperasalidaevaluación
turco (tr)İzmirİZMİRIZMIR[ MAL ]
turco (tr)ışıkIŞIKISIK[ MAL ]
prueba con módulos dedicados
idiomaentradase esperasalidaevaluación
vietnamita (vi)Tuyên ngôn toàn thế giới về nhân quyền của Liên Hợp QuốcTUYE₂N NGO₂N TOAN1 THE₂4 GIO₃I4 VE₂1 NHA₂N QUYE₂N1 CUA2 LIE₂N HO₃P5 QUO₂C4TUYÊN NGÔN TOÀN THẾ GIỚI VỀ NHÂN QUYỀN CỦA LIÊN HỢP QUỐC[ MAL ]
chino (zh)命裡有時終須有,命裡無時莫強求口05衣07月02日06糸05頁03月02,口05衣07火08日06艸07弓08水02命裡有時終須有,命裡無時莫強求[ MAL ]
chino (zh)⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心麵辵54辵54麥09⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心麵[ MAL ]
Esta documentación está transcluida desde Módulo:sortkey/doc.
Los editores pueden experimentar en la zona de pruebas de este módulo.
Por favor, añade las categorías e interwikis a la subpágina de documentación. Subpáginas de este módulo.
local ugsub = mw.ustring.gsub
local tofixednfd = require("Módulo:String").toNFD
local tofixednfc = require("Módulo:String").toNFC
local corregir = require("Módulo:String").corregirSecuenciasIncorrectas

local dir_char = "\226\128\170-\226\128\174\226\129\166-\226\129\169"
local function remove_directional_chars(text)
	return (ugsub(text, "^[" .. dir_char .. "]*(.*)%f[%z" .. dir_char .. "][" .. dir_char .. "]*$", "%1"))
end

--[==[Creates a sort key for the given entry name, following the rules appropriate for the language. This removes diacritical marks from the entry name if they are not considered significant for sorting, and may perform some other changes. Any initial hyphen is also removed, and anything parentheses is removed as well.
The <code>sort_key</code> setting for each language in the data modules defines the replacements made by this function, or it gives the name of the module that takes the entry name and returns a sortkey.]==]
-- Convert any HTML entities.
local function noEntities(text)
	if text:match("&[^;]+;") then
		return require("Module:String/avanzado").get_entities(text)
	else
		return text
	end
end

-- Check if the raw text is an unsupported title, and if so return that. Otherwise, remove HTML entities. We do the pre-conversion to avoid loading the unsupported title list unnecessarily.
local function checkNoEntities(text)
	local textNoEnc = noEntities(text)
	if textNoEnc ~= text and mw.loadData("Module:enlaces/datos").unsupported_titles[text] then
		return text
	else
		return textNoEnc
	end
end

-- Process carets (and any escapes). Default to simple removal, if no pattern/replacement is given.
local function processCarets(text, pattern, repl)
	local rep
	repeat
		text, rep = text:gsub("\\\\(\\*^)", "\3%1")
	until rep == 0
	return text
		:gsub("\\^", "\4")
		:gsub(pattern or "%^", repl or "")
		:gsub("\3", "\\")
		:gsub("\4", "^")
end
	
local function removeCarets(text, sc)
	if not sc:hasCapitalization() and sc:isTransliterated() and text:match("%^") then
		return processCarets(text)
	else
		return text
	end
end

local export = {}

function export.generarSortkey(text, cod, idioma)
	if (not text) or text == "" then
		return text, nil, {}
	end
	if text:find("<[^<>]+>") then
		require("Módulo/traza")("símbolo HTML en texto")
	end
	-- Remove directional characters, soft hyphens, strip markers and HTML tags.
	text = ugsub(text, "[\194\173" .. dir_char .. "]", "")
	text = mw.text.unstrip(text)
		:gsub("<[^<>]+>", "")
	
	text = mw.uri.decode(text, "PATH")
	text = checkNoEntities(text)
	
	-- Remove initial hyphens and * unless the term only consists of spacing + punctuation characters.
	text = ugsub(text, "^([􀀀-􏿽]*)[-־ـ᠊*]+([􀀀-􏿽]*)(.*[^%s%p].*)", "%1%2%3")
	
	local sc = require("Module:String/avanzado").findBestScript(text, idioma)
	
	text = corregir(text, sc)
	text = tofixednfd(text, sc)
	text = removeCarets(text, sc)
	
	-- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı".
	if idioma.dotted_dotless_i then
		text = text
			:gsub(mw.ustring.toNFD("İ"), "i")
			:gsub("I", "ı")
		text = tofixednfd(text, sc)
	end
	-- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I".
	-- An exception is made for scripts that (sometimes) sort by scraping page content, as that means they are sensitive to changes in capitalization (as it changes the target page).
	local fail, cats
	if not sc:sortByScraping() then
		text = text:ulower()
	end
	
	text, fail, cats = require("Módulo:String/sustituir")(text, nil, nil, cod, idioma, sc, idioma.sort_key, "makeSortKey")
	
	if not sc:sortByScraping() then
		if idioma.dotted_dotless_i and not idioma.sort_key then
			text = text
				:gsub("ı", "I")
				:gsub("i", "İ")
			text = tofixednfc(text, sc)
		end
		text = text:uupper()
	end
	
	-- Remove parentheses, as long as they are either preceded or followed by something.
	text = text
		:gsub("(.)[()]+", "%1")
		:gsub("[()]+(.)", "%1")
	
	text = require("Módulo:String").encode_html(text)
	return text, fail, cats
end

return export