Módulo:generar-pron/eo

De Wikcionario, el diccionario libre

La documentación para este módulo puede ser creada en Módulo:generar-pron/eo/doc

local export = {}

local insert = table.insert
local concat = table.concat

local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strlower = m_str.lower
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strstrip = m_str.strip
local strlen = m_str.len
local strexplode = m_str.explode_utf8
local strhtml = m_str.encode_html

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´ʼ]"

local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)

local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"

local divsil = u(0xFFF0)
local sepsil = "%-." .. divsil
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local SALVO_SEPARADORES_SILABICOS = "[^" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"

local consonants = {
	["b"] = "b",
	["c"] = "t͡s",
	["ĉ"] = "t͡ʃ",
	["d"] = "d",
	["f"] = "f",
	["g"] = "ɡ",
	["ĝ"] = "d͡ʒ",
	["h"] = "h",
	["ĥ"] = "x",
	["j"] = "j",
	["ĵ"] = "ʒ",
	["k"] = "k",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["p"] = "p",
	["r"] = "r",
	["s"] = "s",
	["ŝ"] = "ʃ",
	["t"] = "t",
	["v"] = "v",
	["z"] = "z",
	['ŭ'] = "w"
}

local vowels = {
	["a"] = "a",
	["e"] = "e",
	["i"] = "i",
	["o"] = "o",
	["u"] = "u",
}

local letters_phonemes = {}

-- combine into single table
for k, v in pairs(vowels) do letters_phonemes[k] = v end
for k, v in pairs(consonants) do letters_phonemes[k] = v end

local function has_vowel(term)
	return strlower(term):find("[aeiou]") ~= nil
end

local function separar_en_silabas(p)
	local letters = strexplode(p)

	if not letters[2] then
		return letters[1]
	end
	local l_r_exceptions = {["m"] = true, ["n"] = true, ["ŭ"] = true, ["j"] = true}

	local result = {[1] = ""}
	local j = 1
	for i = 1, #letters - 2 do
		result[j] = result[j] .. letters[i]
		local letter = strlower(letters[i])
		local letter1 = strlower(letters[i + 1])
		local letter2 = strlower(letters[i + 2])

		if vowels[letter] then
			if consonants[letter1] and vowels[letter2] then
				-- single consonant goes with following vowel
				if has_vowel(result[j]) and (letter1 ~= 'ŭ') then
					j = j + 1
					result[j] = ""
				end

			elseif consonants[letter1] and not l_r_exceptions[letter1] and (letter2 == 'l' or letter2 == 'r') and (letter1 ~= 'l' and letter1 ~= 'r') then
				-- consonant followed by l or r goes with l or r
				if has_vowel(result[j]) then
					j = j + 1
					result[j] = ""
				end

			elseif vowels[letter1] then
				-- two vowels
				if has_vowel(result[j]) then
					j = j + 1
					result[j] = ""
				end
			end
		elseif consonants[letter] then
			if consonants[letter1] and vowels[letter2] then
				if (strlen(result[j]) ~= 1) then
					-- single consonant goes with following vowel
					if has_vowel(result[j]) then
						j = j + 1
						result[j] = ""
					end
				end
			elseif consonants[letter1] and not l_r_exceptions[letter1] and (letter2 == 'l' or letter2 == 'r') and (letter1 ~= 'l' and letter1 ~= 'r') then
				-- consonant followed by l or r goes with l or r
				if has_vowel(result[j]) then
					j = j + 1
					result[j] = ""
				end

			elseif vowels[letter1] then
				-- two vowels
				if has_vowel(result[j]) then
					j = j + 1
					result[j] = ""
				end
			end
		end
	end

	-- add last two letters
	if letters[2] then
		local c1 = letters[#letters - 1]
		local c2 = letters[#letters]

		if c1 ~= 'ŭ' then
            if vowels[c1] and vowels[c2] then
                result[j] = result[j] .. c1
                j = j + 1
                result[j] = c2
            elseif has_vowel(result[j]) and has_vowel(c1 .. c2) then
            	j = j + 1
            	result[j] = c1 .. c2
        	else
        		result[j] = result[j] .. c1 .. c2
    		end

        else
            if vowels[letters[#letters - 2]] and vowels[c2] then
            	result[j] = result[j] .. c1
                j = j + 1
                result[j] = c2
            elseif has_vowel(result[j]) and has_vowel(c1 .. c2) then
            	j = j + 1
            	result[j] = c1 .. c2
        	else
        		result[j] = result[j] .. c1 .. c2
    		end
        end
	end

	return concat(result, divsil)
end

local function acentuar(p)
	-- Se acentúa siempre la anteúltima sílaba
	if not strfind(p, SEPARADORES_SILABICOS) then
		return strsubn(p, SEPARADORES_SILABICOS.."?".."("..SALVO_SEPARADORES_SILABICOS.."+)$", ac_primario.."%1")
	end

	return strsubn(p, SEPARADORES_SILABICOS.."?".."("..SALVO_SEPARADORES_SILABICOS.."+"..SEPARADORES_SILABICOS..SALVO_SEPARADORES_SILABICOS.."+)$", ac_primario.."%1")
end

local function normalizar(texto)
	texto = strlower(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")

	return texto
end

local function generar_pron(texto)
	texto = normalizar(texto)
	local convertido = {}
	local fragmentos = strsplit(texto, "%s*|%s*")

	for _,fragmento in ipairs(fragmentos) do
	    local palabras = strsplit(fragmento, "%s")
		local palabras_convertidas = {}
	    for _,p in ipairs(palabras) do

			-- add /o/ if word is a single character and a consonant
			if strlen(p) == 1 and consonants[p] then
				p = p .. 'o'
			end

			p = separar_en_silabas(p)
			p = acentuar(p)

			p = strsubn(p, divsil, ".")
			p = strsubn(p, ".", letters_phonemes)

	    	insert(palabras_convertidas, p)
	    end
	    insert(convertido, concat(palabras_convertidas, " "))
	end

    return {{strhtml(concat(convertido, " | "))}}
end

-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)

	-- Si es una única consonante, le tengo que agregar una "o" al final (es para la pronunciación del abecedario)
	if (args["ayuda"][1] and args["ayuda"][1] ~= "") then
		if strlen(args["ayuda"][1]) == 1 and consonants[args["ayuda"][1]] then
			args["ayuda"][1] = args["ayuda"][1] .. "o"
		end
	elseif strlen(titulo) == 1 and consonants[titulo] then
		args["ayuda"][1] = titulo .. "o"
	end

	local vino_ayuda = false

	if #args["ayuda"] < 1 then
		args["ayuda"][1] = titulo
	else
		vino_ayuda = true
	end

	if #args["fone"] < 1 and #args["fono"] < 1 then
		-- ya se hizo arriba
		--local x = pron_abc[args["ayuda"][1]]
		--if x then
		--	args["tl"] = x
		--	args["ayuda"][1] = x
		--end
		local A = #args["ayuda"]
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local fono = generar_pron(args["ayuda"][j])
			for i,_ in ipairs(fono) do
				if vino_ayuda then
					insert(args["fgraf"], {args["ayuda"][j]})
				end
				insert(args["fono"], fono[i])
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end

		local tiene_espacios = strfind(titulo, " ")

		if not tiene_espacios then
			if not args["d"][1] then
				local aux = separar_en_silabas(args["ayuda"][1])
				args["d"][1] = strsubn(aux, divsil, "-")
			end
		end

		local rim = args["fono"][1][1]
		rim = strsubn(rim, "^.*ˈ(.-)$", "%1")
		args["rima"] = strsubn(rim, ".-".."(".."[aeiou]"..".*"..")".."$", "%1")
	end

	return args
end

return export