Módulo:generar-pron/gn
La documentación para este módulo puede ser creada en Módulo:generar-pron/gn/doc
-- PRONUNCIACIÓN PARA EL GUARANÍ
-- Autor: Tmagc (inspirado en la versión de en.wikt pero reimplementado desde CERO)
local export = {}
local insert = table.insert
local concat = table.concat
local m_str = require("Módulo:String")
local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local strlower = m_str.lower
local strstrip = m_str.strip
local strsplit = m_str.split
local strhtml = m_str.encode_html
--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)
local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"
local divsil = u(0xFFF0)
local sepsil = "%-." .. divsil
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local SALVO_SEPARADORES_SILABICOS = "[^" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"
local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹]"
local puso = "'ʼꞌꞋ" -- son distintos
local PUSO = "["..puso.."]"
local VOCAL = "[aeiouyãẽĩõũỹAEIOUYÃẼĨÕŨỸ]"
local CONS = "[bcdfgg̃hjklmnñprstvwxzBCDFGG̃HJKLMNÑPRSTVWXZ"..puso.."]"
local VOCAL_TILDADA = "[áéíóúýâêîôûŷÁÉÍÓÚÝÂÊÎÔÛŶ]"
local VOCAL_GENERAL = "[aeiouyãẽĩõũỹáéíóúýâêîôûŷAEIOUYÃẼĨÕŨỸÁÉÍÓÚÝÂÊÎÔÛŶ]"
local R_ESPECIAL = "℟"
local quitar_tilde = {
["á"] = "a",
["é"] = "e",
["í"] = "i",
["ó"] = "o",
["ú"] = "u",
["ý"] = "y",
["â"] = "ã",
["ê"] = "ẽ",
["î"] = "ĩ",
["ô"] = "õ",
["û"] = "ũ",
["ŷ"] = "ỹ",
}
-- single characters that map to IPA sounds
local phonetic_chars_map = {
["f"] = "f",
["g"] = "ɰ",
["g̃"] = "ɰ̃",
["h"] = "h",
["j"] = "d͡ʒ",
["k"] = "k",
["l"] = "l",
["m"] = "m",
["n"] = "n",
["ñ"] = "ɲ",
["p"] = "p",
["r"] = "ɾ",
["s"] = "s",
["t"] = "t",
["v"] = "ʋ",
["'"] = "ʔ",
["a"] = "a",
["ã"] = "ã",
["e"] = "e",
["ẽ"] = "ẽ",
["i"] = "i",
["ĩ"] = "ĩ",
["o"] = "o",
["õ"] = "õ",
["u"] = "u",
["ũ"] = "ũ",
["y"] = "ɨ",
["ỹ"] = "ɨ̃",
}
-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
["ch"] = "ʃ",
["mb"] = "ᵐb",
["nd"] = "ⁿd",
["ng"] = "ᵑɡ",
["nt"] = "ⁿt",
['rr'] = R_ESPECIAL,
}
local pron_abc = {
["A"] = "a",
["Ã"] = "ã",
["Ch"] = "che",
["E"] = "e",
["Ẽ"] = "ẽ",
["G"] = "ge",
["G̃"] = "g̃e",
["H"] = "he",
["I"] = "i",
["Ĩ"] = "ĩ",
["J"] = "je",
["K"] = "ke",
["L"] = "le",
["M"] = "me",
["Mb"] = "mbe",
["N"] = "ne",
["Nd"] = "nde",
["Ng"] = "nge",
["Nt"] = "nte",
["Ñ"] = "ñe",
["O"] = "o",
["Õ"] = "õ",
["P"] = "pe",
["R"] = "re",
["Rr"] = "rre",
["S"] = "se",
["T"] = "te",
["U"] = "u",
["Ũ"] = "ũ",
["V"] = "ve",
["Y"] = "y",
["Ỹ"] = "ỹ",
["a"] = "a",
["ã"] = "ã",
["ch"] = "che",
["e"] = "e",
["ẽ"] = "ẽ",
["g"] = "ge",
["g̃"] = "g̃e",
["h"] = "he",
["i"] = "i",
["ĩ"] = "ĩ",
["j"] = "je",
["k"] = "ke",
["l"] = "le",
["m"] = "me",
["mb"] = "mbe",
["n"] = "ne",
["nd"] = "nde",
["ng"] = "nge",
["nt"] = "nte",
["ñ"] = "ñe",
["o"] = "o",
["õ"] = "õ",
["p"] = "pe",
["r"] = "re",
["rr"] = "rre",
["s"] = "se",
["t"] = "te",
["u"] = "u",
["ũ"] = "ũ",
["v"] = "ve",
["y"] = "y",
["ỹ"] = "ỹ",
["ʼ"] = "puso",
["'"] = "puso",
}
local function normalizar(texto)
texto = strlower(texto)
texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
texto = strsubrep(texto, "%s+", " ")
texto = strstrip(texto, "[%s|]+")
return texto
end
local function separar_en_silabas(p)
p = divsil .. p .. divsil
p = strsubrep(p, "(" .. VOCAL_GENERAL .. ")(" .. CONS .. "+" .. VOCAL_GENERAL .. ")", "%1"..divsil.."%2")
p = strsubrep(p, "(" .. VOCAL_GENERAL .. CONS .. "+)(" .. CONS .. VOCAL_GENERAL .. ")", "%1"..divsil.."%2")
for cc, fono in pairs(phonetic_2chars_map) do
local a, b = cc:sub(1,1), cc:sub(2,2)
local A, B = a:upper(), b:upper()
p = strsubrep(p, "(["..a..A.."])"..divsil.."(["..b..B.."]"..VOCAL..")", divsil.."%1%2")
p = strsubrep(p, "(["..a..A.."]["..b..B.."])"..divsil.."("..VOCAL..")", divsil.."%1%2")
p = strsubrep(p, "(["..a..A.."])"..divsil.."(["..b..B.."])("..CONS..")", "%1%2"..divsil.."%3")
end
p = strsubn(p, PUSO..divsil, divsil.."'")
p = strsubn(p, "("..VOCAL..")("..VOCAL_TILDADA..")", "%1"..divsil.."%2")
p = strsubn(p, "("..VOCAL_TILDADA..")("..VOCAL..")", "%1"..divsil.."%2")
-- correcciones finales
p = strsubn(p, "["..divsil.."]+", divsil)
p = strsubrep(p, SEPARADORES_SILABICOS.."("..CONS.."+)"..SEPARADORES_SILABICOS.."("..SALVO_SEPARADORES_SILABICOS.."-"..VOCAL_GENERAL..")", divsil.."%1%2")
p = strsubrep(p, SEPARADORES_SILABICOS.."("..SALVO_SEPARADORES_SILABICOS.."-)"..SEPARADORES_SILABICOS.."("..CONS.."+)"..SEPARADORES_SILABICOS, divsil.."%1%2"..divsil)
p = strsubn(p, "["..divsil.."]+", divsil)
p = strstrip(p, "["..divsil.." ]+")
return p
end
local function acentuar(p)
local sust
p = strsubn(p, PUSO, "ʔ")
p, sust = strsubb(p, "[^%.]*"..VOCAL_TILDADA.."[^%.]*", ac_primario.."%0")
if not sust then
p, sust = strsubb(p, "[^%.]+$", ac_primario.."%0") -- por defecto es la última sílaba
end
assert(sust)
p = strsubn(p, "%."..ac_primario, ac_primario)
return p
end
local function generar_pron(text)
if strfind(text, "[‘’]") then
error("Por favor, utilice los caracteres 0xA78C (ꞌ) o 0xA78B (Ꞌ) para el PUSO, GRACIAS")
end
text = normalizar(text)
local convertido = {}
local fragmentos = strsplit(text, "%s*|%s*")
for _,fragmento in ipairs(fragmentos) do
local palabras = strsplit(fragmento, "%s")
local palabras_convertidas = {}
for _,p in ipairs(palabras) do
p = separar_en_silabas(p)
p = strsubn(p, divsil, ".")
p = acentuar(p)
p = strsubn(p, VOCAL_TILDADA, quitar_tilde)
-- mapeo fonológico
for a,b in pairs(phonetic_2chars_map) do
p = strsubn(p, a, b)
end
p = strsubn(p, ".", phonetic_chars_map)
p = strsubn(p, R_ESPECIAL, "r")
insert(palabras_convertidas, p)
end
insert(convertido, concat(palabras_convertidas, " "))
end
return {{strhtml(concat(convertido, " | "))}}
end
-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
if #args["ayuda"] < 1 then
args["ayuda"][1] = titulo
end
if #args["fone"] < 1 and #args["fono"] < 1 then
local x = pron_abc[args["ayuda"][1]]
if x then
args["tl"] = x
args["ayuda"][1] = x
end
local A = #args["ayuda"]
local j = 1 -- indice de la ayuda
local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
while k <= 9 and j <= A do
local fono = generar_pron(args["ayuda"][j])
for i,_ in ipairs(fono) do
table.insert(args["fono"], fono[i])
k = k + 1
if k > 9 then
break
end
end
j = j + 1
end
local tiene_espacios = strfind(titulo, " ")
if not tiene_espacios then
if not args["d"][1] then
local aux = separar_en_silabas(args["ayuda"][1])
args["d"][1] = strsubn(aux, divsil, "-")
end
end
local rim = args["fono"][1][1]
rim = strsubn(rim, "^.*ˈ(.-)$", "%1")
args["rima"] = strsubn(rim, ".-".."([aãeẽiĩoõuũɨɨ̃].*"..")".."$", "%1")
end
return args
end
return export