Módulo:String/sustituir rec

De Wikcionario, el diccionario libre

La documentación para este módulo puede ser creada en Módulo:String/sustituir rec/doc

local tonfd = require("Módulo:String").toNFD
local tonfc = require("Módulo:String").toNFC
local corregir = require("Módulo:String").corregirSecuenciasIncorrectas
local gsub = require("Módulo:String").gsub

local function doRemoveExceptions(text, sc, remove_exceptions)
	substitutes = {} or substitutes
	local i = 0
	for _, exception in ipairs(remove_exceptions) do
		exception = tonfd(exception, sc)
		text = gsub(text, exception, function(m)
			i = i + 1
			table.insert(substitutes, m)
			return "\127"
		end)
	end
	return text
end

local function undoRemoveExceptions(text)
	local i = 0
	return text:gsub("\127", function(m1)
		i = i + 1
		return substitutes[i]
	end)
end

local function doSubstitutions(text, cod, idioma, sc, substitution_data, function_name, recursed)
	local fail, cats = nil, {}
	local sc_code = sc:getCode()
	-- If there are language-specific substitutes given in the data module, use those.
	if type(substitution_data) == "table" then
		-- If a script is specified, run this function with the script-specific data before continuing.
		if substitution_data[sc_code] then
			text, fail, cats = doSubstitutions(text, cod, idioma, sc, substitution_data[sc_code], function_name, true)
		-- Hant, Hans and Hani are usually treated the same, so add a special case to avoid having to specify each one separately.
		elseif sc_code:match("^Han") and substitution_data.Hani then
			text, fail, cats = doSubstitutions(text, cod, idioma, sc, substitution_data.Hani, function_name, true)
		-- Substitution data with key 1 in the outer table may be given as a fallback.
		elseif substitution_data[1] then
			text, fail, cats = doSubstitutions(text, cod, idioma, sc, substitution_data[1], function_name, true)
		end
		-- Iterate over all strings in the "from" subtable, and gsub with the corresponding string in "to". We work with the NFD decomposed forms, as this simplifies many substitutions.
		if substitution_data.from then
			for i, from in ipairs(substitution_data.from) do
				-- We normalize each loop, to ensure multi-stage substitutions work correctly.
				text = tonfd(text, sc)
				-- Check whether specific magic characters are present, as they rely on UTF-8 compatibility. If not, just use string.gsub. In most cases, doing this is faster than using mw.ustring.gsub every time.
				text = gsub(text, tonfd(from, sc), substitution_data.to[i] or "")
			end
		end
		
		if substitution_data.remove_diacritics then
			text = tonfd(text, sc)
			-- Convert exceptions to PUA.
			if substitution_data.remove_exceptions then
				text = doRemoveExceptions(text, sc, substitution_data.remove_exceptions)
			end
			-- Strip diacritics.
			text =  gsub(text, "[" .. substitution_data.remove_diacritics .. "]", "")
			-- Convert exceptions back.
			if substitution_data.remove_exceptions then
				text = undoRemoveExceptions(text)
			end
		end
	elseif type(substitution_data) == "string" then
		-- If there is a dedicated function module, use that. --> DESACTIVADO TEMPORALMENTE HASTA HABER IMPLEMENTADO TODOS LOS MODULOS
		--[=[
		local is_module, module = pcall(require, "Módulo:" .. substitution_data)
		if is_module then
			if function_name == "tr" then
				text, fail, cats = module[function_name](text, cod, sc_code)
			else
				text, fail, cats = module[function_name](tonfd(text, sc), cod, sc_code)
			end
		else
			error("Substitution data '" .. substitution_data .. "' does not match an existing module or module failed to execute: " .. tostring(module) .. ".")
		end
		]=]--
	end
	
	-- Don't normalize to NFC if this is the inner loop or if a module returned nil.
	if recursed or not text then
		return text, fail, cats
	else
		-- Fix any discouraged sequences created during the substitution process, and normalize into the final form.
		text = corregir(text, sc)
		return tonfc(text), fail, cats
	end
end

-- This avoids calling into globals with require when the main function recurses.
return function (text, cod, idioma, sc, substitution_data, function_name)
	return doSubstitutions(text, cod, idioma, sc, substitution_data, function_name)
end