Módulo:generar-pron/az
La documentación para este módulo puede ser creada en Módulo:generar-pron/az/doc
-- PRONUNCIACIÓN PARA EL AZERÍ
-- Impelementado por Tmagc, basado en la versión de en.wikt
local export = {}
local insert = table.insert
local concat = table.concat
local m_str = require("Módulo:String")
local u = m_str.char
local strlower = m_str.lower
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local substr = m_str.sub
local strlen = m_str.ulen
local strstrip = m_str.strip
local strsplit = m_str.split
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html
--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)
local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"
local divsil = u(0xFFF0)
local sepsil = "%-." .. divsil
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local SALVO_SEPARADORES_SILABICOS = "[^" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"
local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´ʼ]"
local vocales = "AaEeİiOoUuIıÖöÜüƏəАаЕеЫыИиОоӨөУуҮүӘә"
local paradas = "bBdDkKqQpPtTБбДдКкГгПпТт"
local africadas = "cCçÇgGҸҹЧчҜҝ"
local fricativas = "fFğĞhHxXjJsSşŞvVyYzZФфҒғҺһХхЖжСсШшВвЈјЗз"
local liquidas = "rRlLРрЛл"
local nasales = "mMnNМмНн"
--local sonora = "bBçÇdDgGğĞjJlLmMnNqQrRvVyYzZ"
--local no_sonora = "cCfFhHkKpPtTsSşŞxX"
local consonantes = paradas .. africadas .. fricativas .. liquidas .. nasales
local VOCAL = "["..vocales.."]"
local CONS = "["..consonantes.."]"
local agudo = u(0x301)
local grave = u(0x300)
local dieresis = u(0x0308)
local cedilla = u(0x0327)
local punto = u(0x0307)
local breve = u(0x306)
local diacr_no_tilde = dieresis .. cedilla .. punto .. breve
local tilde = agudo .. grave
local DIACR_NO_TILDE = "["..diacr_no_tilde.."]"
local TILDE = "["..tilde.."]"
local permitido = vocales .. consonantes .. separador .. agudo .. grave .. "|" -- asumiendo que limpiamos toda la puntuación
-- IPA
local vocal_ipa = "iyɯueœoæɑ"
local vocales_posteriores = "ɑuoɯ"
local vocales_frontales = "æyœie"
local VOCAL_POSTERIOR = "["..vocales_posteriores.."]"
local VOCAL_FRONTAL = "["..vocales_frontales.."]"
-- cf. https://www.youtube.com/watch?v=rrjbSG29kUQ
-- cf. https://az.wikipedia.org/wiki/Az%C9%99rbaycan_%C9%99lifbas%C4%B1
-- tengo que mapear esto primero para que no se mezcle con lo otro
local mapeo_IPA_1_1 = {
["j"] = "ʒ",
["ж"] = "ʒ",
}
local mapeo_IPA_1_2 = {
["y"] = "j",
["ј"] = "j",
}
local mapeo_IPA_1 = {
["a"] = "ɑ",
["b"] = "b",
["c"] = "ʥ",
["ç"] = "ʨ",
["d"] = "d",
["e"] = "e",
["ə"] = "æ",
["f"] = "f",
["g"] = "ɟ",
["ğ"] = "ʁ",
["h"] = "h",
["x"] = "χ",
["ı"] = "ɯ",
["i"] = "i",
--["j"] = "ʒ",
["k"] = "c",
["q"] = "ɡ",
["l"] = "l",
["m"] = "m",
["n"] = "n",
["o"] = "o",
["ö"] = "œ",
["p"] = "p",
["r"] = "ɾ",
["s"] = "s",
["ş"] = "ʃ",
["t"] = "t",
["u"] = "u",
["ü"] = "y",
["v"] = "v",
--["y"] = "j",
["z"] = "z",
["а"] = "ɑ",
["б"] = "b",
["ҹ"] = "ʥ",
["ч"] = "ʨ",
["д"] = "d",
["е"] = "e",
["ә"] = "æ",
["ф"] = "f",
["ҝ"] = "ɟ",
["ғ"] = "ʁ",
["һ"] = "h",
["х"] = "χ",
["ы"] = "ɯ",
["и"] = "i",
--["ж"] = "ʒ",
["к"] = "c",
["г"] = "ɡ",
["л"] = "l",
["м"] = "m",
["н"] = "n",
["о"] = "o",
["ө"] = "œ",
["п"] = "p",
["р"] = "ɾ",
["с"] = "s",
["ш"] = "ʃ",
["т"] = "t",
["у"] = "u",
["ү"] = "y",
["в"] = "v",
--["ј"] = "j",
["з"] = "z",
}
local mapeo_IPA_3 = {
["ov("..SEPARADORES_SILABICOS.."?["..consonantes.."uU])"] = "ou̯%1", -- sovurmaq, dovşan
["öv("..SEPARADORES_SILABICOS.."?["..consonantes.."])"] = "œy̯%1", --cövhərn
}
local pron_abc = {
["a"] = "a",
["b"] = "be",
["c"] = "ce",
["ç"] = "çe",
["d"] = "de",
["e"] = "e",
["ə"] = "ə",
["f"] = "fe",
["g"] = "ge",
["ğ"] = "ğe",
["h"] = "he",
["x"] = "xe",
["ı"] = "ı",
["i"] = "i",
["j"] = "je",
["k"] = "ke",
["q"] = "qe",
["l"] = "el",
["m"] = "em",
["n"] = "en",
["o"] = "o",
["ö"] = "ö",
["p"] = "pe",
["r"] = "er",
["s"] = "se",
["ş"] = "şe",
["t"] = "te",
["u"] = "u",
["ü"] = "ü",
["v"] = "ve",
["y"] = "ye",
["z"] = "ze",
["A"] = "a",
["B"] = "be",
["C"] = "ce",
["Ç"] = "çe",
["D"] = "de",
["E"] = "e",
["Ə"] = "ə",
["F"] = "fe",
["G"] = "ge",
["Ğ"] = "ğe",
["H"] = "he",
["X"] = "xe",
["I"] = "ı",
["I"] = "i",
["J"] = "je",
["K"] = "ke",
["Q"] = "qe",
["L"] = "el",
["M"] = "em",
["N"] = "en",
["O"] = "o",
["Ö"] = "ö",
["P"] = "pe",
["R"] = "er",
["S"] = "se",
["Ş"] = "şe",
["T"] = "te",
["U"] = "u",
["Ü"] = "ü",
["V"] = "ve",
["Y"] = "ye",
["Z"] = "ze",
["а"] = "a",
["б"] = "be",
["ҹ"] = "ce",
["ч"] = "çe",
["д"] = "de",
["е"] = "e",
["ә"] = "ə",
["ф"] = "fe",
["ҝ"] = "ge",
["ғ"] = "ğe",
["һ"] = "he",
["х"] = "xe",
["ы"] = "ı",
["и"] = "i",
["ж"] = "je",
["к"] = "ke",
["г"] = "qe",
["л"] = "el",
["м"] = "em",
["н"] = "en",
["о"] = "o",
["ө"] = "ö",
["п"] = "pe",
["р"] = "er",
["с"] = "se",
["ш"] = "şe",
["т"] = "te",
["у"] = "u",
["ү"] = "ü",
["в"] = "ve",
["ј"] = "ye",
["з"] = "ze",
["А"] = "a",
["Б"] = "be",
["Ҹ"] = "ce",
["Ч"] = "çe",
["Д"] = "de",
["Е"] = "e",
["Ә"] = "ə",
["Ф"] = "fe",
["Ҝ"] = "ge",
["Ғ"] = "ğe",
["Һ"] = "he",
["Х"] = "xe",
["Ы"] = "ı",
["И"] = "i",
["Ж"] = "je",
["К"] = "ke",
["Г"] = "qe",
["Л"] = "el",
["М"] = "em",
["Н"] = "en",
["О"] = "o",
["Ө"] = "ö",
["П"] = "pe",
["Р"] = "er",
["С"] = "se",
["Ш"] = "şe",
["Т"] = "te",
["У"] = "u",
["Ү"] = "ü",
["В"] = "ve",
["Ј"] = "ye",
["З"] = "ze",
}
local function normalizar(texto)
texto = strlower(texto)
texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
texto = strnfd(texto)
texto = strsubrep(texto, "("..TILDE..")("..DIACR_NO_TILDE..")", "%2%1")
texto = strsubn(texto, "."..DIACR_NO_TILDE, {
["o" .. dieresis] = "ö",
["O" .. dieresis] = "Ö",
["u" .. dieresis] = "ü",
["U" .. dieresis] = "Ü",
["c" .. cedilla] = "ç",
["C" .. cedilla] = "Ç",
["s" .. cedilla] = "ş",
["S" .. cedilla] = "Ş",
["i" .. punto] = "i",
["I" .. punto] = "İ",
["g" .. breve] = "ğ",
["G" .. breve] = "Ğ"
})
if strfind(texto, "[^"..permitido.."]") then
error("Caracteres no permitidos")
end
texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
texto = strsubrep(texto, "%s+", " ")
texto = strstrip(texto, "[%s|]+")
return texto
end
local function separar_en_silabas(p)
p = strsubn(p, "()("..CONS.."+)()", function(start_pos, cluster, end_pos)
if start_pos == 1 or end_pos == strlen(p) + 1 then
return cluster
end
local enc = false
cluster, enc = strsubb(cluster, "(["..paradas.."])(["..paradas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..paradas.."])(["..africadas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..paradas.."])(["..nasales.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..paradas.."])(["..fricativas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..fricativas.."])(["..paradas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..nasales.."])(["..paradas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..africadas.."])(["..paradas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..fricativas..africadas.."])(["..fricativas..africadas.."])$", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..nasales.."])([^"..nasales.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "([^"..nasales.."])(["..nasales.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
--cluster, enc = strsubb(cluster, "(["..paradas.."])(["..liquidas.."])", "%1"..divsil.."%2")
--if enc then
-- return cluster
--end
cluster, enc = strsubb(cluster, "(["..liquidas.."])(["..paradas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..liquidas.."])(["..africadas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
cluster, enc = strsubb(cluster, "(["..liquidas.."])(["..fricativas.."])", "%1"..divsil.."%2")
if enc then
return cluster
end
return divsil..cluster
end)
return p
end
local function generar_pron(text)
text = normalizar(text)
local convertido = {}
local fragmentos = strsplit(text, "%s*|%s*")
for _,fragmento in ipairs(fragmentos) do
local palabras = strsplit(fragmento, "%s+")
local palabras_convertidas = {}
for _,p in ipairs(palabras) do
p = separar_en_silabas(p)
local sus1, sus2
p, sus1 = strsubb(p, "^("..SALVO_SEPARADORES_SILABICOS.."-)"..TILDE, ac_primario.."%1")
p, sus2 = strsubb(p, divsil.."("..SALVO_SEPARADORES_SILABICOS.."-)"..TILDE, ac_primario.."%1")
if not sus1 and not sus2 then
p = strsubn(p, divsil.."("..SALVO_SEPARADORES_SILABICOS.."-)$", ac_primario.."%1")
end
p = strsubn(p, divsil, ".")
-- mapeo fonológico
for a,b in pairs(mapeo_IPA_3) do
p = strsubn(p, a, b)
end
p = strsubn(p, ".", mapeo_IPA_1_1)
p = strsubn(p, ".", mapeo_IPA_1_2)
p = strsubn(p, ".", mapeo_IPA_1)
-- Double q: <qq> → [kː]/[back vowel]_[back vowel]
p = strsubn(p, "("..VOCAL_POSTERIOR..")ɡɡ("..VOCAL_POSTERIOR..")", "%1kk%1")
-- Fricativization: < q > → [χ]/ [back vowel]_#
-- Approximatization: < k > → [j]/ [front vowel]_#
p = strsubn(p, "("..VOCAL_POSTERIOR..")ɡ$", "%1χ")
p = strsubn(p, "("..VOCAL_FRONTAL..")k$", "%1j")
-- r-Assimilation: <r> → [lː]/_lar,lər (in non-lemma forms only)
p = strsubn(p, "ɾlæɾ", "llæɾ")
p = strsubn(p, "ɾlɑɾ", "llɑɾ")
-- l-Assimilation: <l> → [tː,dː,n,rː]/t,d,n_
p = strsubn(p, "([zʒ])("..SEPARADORES_SILABICOS.."?)l", "%1%2d")
p = strsubn(p, "([sʃ])("..SEPARADORES_SILABICOS.."?)l", "%1%2t")
p = strsubn(p, "([tdn])("..SEPARADORES_SILABICOS.."?)l", "%1%2%1")
p = strsubn(p, "ɾl", "ɾɾ")
-- n-Assimilation: <n> → [mː]/_m
p = strsubn(p, "nm", "mm")
p = strsubn(p, "ɾɾ", "r")
p = strsubn(p, "^ɾ", "r")
p = strsubn(p, "(.)%1", "%1ː")
-- l-Velarization: l → [ɫ]/_[back vowel], l → [ɫ]/[back vowel]_
p = strsubn(p, "("..VOCAL_POSTERIOR..")l", "%1ɫ")
p = strsubn(p, "l("..VOCAL_POSTERIOR..")", "ɫ%1")
p = strsubn(p, "ʥ", "d͡ʑ")
p = strsubn(p, "ʨ", "t͡ɕ")
insert(palabras_convertidas, p)
end
insert(convertido, concat(palabras_convertidas, " "))
end
return {{strhtml(concat(convertido, " | "))}}
end
function export.test_sp(p)
p = separar_en_silabas(p)
p = strsubn(p, divsil, "-")
return p
end
-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
if #args["ayuda"] < 1 then
args["ayuda"][1] = titulo
end
if #args["fone"] < 1 and #args["fono"] < 1 then
local x = pron_abc[args["ayuda"][1]]
if x then
args["tl"] = x
args["ayuda"][1] = x
end
local A = #args["ayuda"]
local j = 1 -- indice de la ayuda
local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
while k <= 9 and j <= A do
local fone = generar_pron(args["ayuda"][j])
for i,_ in ipairs(fone) do
table.insert(args["fone"], fone[i])
k = k + 1
if k > 9 then
break
end
end
j = j + 1
end
local tiene_espacios = strfind(titulo, " ")
if not tiene_espacios then
if not args["d"][1] then
local aux = separar_en_silabas(args["ayuda"][1])
args["d"][1] = strsubn(aux, divsil, "-")
end
end
if args["fone"][1] and args["fone"][1][1] then
local rim = args["fone"][1][1]
rim = strsubn(rim, "^.*ˈ(.-)$", "%1")
args["rima"] = strsubn(rim, ".-".."(["..vocal_ipa.."].*"..")".."$", "%1")
end
end
return args
end
return export