Módulo:generar-pron/ast

La documentación para este módulo puede ser creada en Módulo:generar-pron/ast/doc
--tomado de https://en.wiktionary.org/wiki/Module:es-pronunc

--Autores: Benwing2 & Tmagc

-- OBSERVACION: está calcado del español, con algunos ajustes menores, sobre todo la x mapea como “sh”
-- FALTA: agregar la fonética precisa, que no sé si es igual a la del español o no

local export = {}

local m_table = require("Módulo:tabla")

local m_prefijos = mw.loadData("Módulo:generar-pron/es/prefijos")
--local AB_SUB_POST = "En general, la erre suena fuerte cuando está pospuesta a los prefijos ab-, sub- y post-. [https://www.rae.es/dpd/r#1 Más información]." La referencia no coincide con la nota y es dudoso que existan palabras así con el prefijo post-
local AB_SUB = "En general, la erre suena fuerte si está pospuesta a los prefijos ab- o sub-. Esto ocurre por ejemplo en ''abrogar'' o en ''subrogar''. Pero es necesario que el prefijo esté presente y no forme parte del sufijo, por lo que palabras como ''abrir'', ''subranquial'' o ''subrigadier'' no caen bajo el criterio mencionado."
local NO_HIATO = "Desde 1999 se recomienda considerar siempre diptongo, a efectos de acentuación gráfica, la combinación entre vocales cerradas “[[átono|átonas]]” (/iu/, /ui/), o entre vocales cerradas con vocales abiertas ambas “[[átono|átonas]]” (/ua/, /ei/, /io/, etc.). Esta recomendación se transforma en prescripción a partir de la reforma ortográfica de 2010, por lo que muchas palabras que se podían ''escribir'' con hiato deberán escribirse en diptongo. No obstante, esto no implica una proscripción en la pronunciación. [http://rae.es/consultas/palabras-como-guion-truhan-fie-liais-etc-se-escriben-sin-tilde Más información]."
local DOBLE_SEPARACION = "Se han detectado posibles prefijos semánticos en la palabra. De ser así, es posible que haya varias divisiones válidas como ocurre en el caso de ''transatlántico'' (tran-sat-lán-ti-co o trans-at-lán-ti-co, incluso tran-sa-tlán-ti-co) o ''subrayar'' (su-bra-yar o sub-ra-yar) [https://twitter.com/RAEinforma/status/1435181208435036160]. Por motivos técnicos, en estas situaciones sólo se mostrará la división fonética y no la división léxica o semántica, aunque se recomienda preferir esta última para el lenguaje escrito. [http://lema.rae.es/dpd/?key=guion#21 Más información]."
local DOBLE_VOCAL = "Dos vocales seguidas no pueden separarse nunca a final de línea, formen diptongo, triptongo o hiato. Para palabras con ''h'' intercalada, se actuará como si esta letra muda no existiese. Quedan exceptuadas de esta consideración las palabras compuestas. [http://lema.rae.es/dpd/?key=guion#21 Más información]."
local LETRA_HUERFANA = "Por motivos estéticos, debe evitarse dejar una letra huérfana a final de línea. [http://lema.rae.es/dpd/srv/search?key=guion Más información]."

--[=[
REVISAR:

16. agREGAR TABLA CON INFORMACION DE PREFIJOS (ej: transatlántico -> trans-at-lán-ti-co , en lugar de tran-sat-lán-ti-co, que es como lo hace ahora)

YA IMPLEMENTADO:

1. Port latest changes to production module. [DONE]
2. Finish work on rhymes and hyphenation. [DONE]
3. Handle <hmp:...> for homophones. [DONE]
4. Don't add comma before phonetic IPA. [DONE]
5. Handle secondary stress, suffixes, etc. in syllabification. [DONE]
6. Need some changes to syllable splitting in consonant clusters. (e.g. 'cum‧min‧gto‧ni‧ta') [DONE]
8. Propagate qualifiers on individual pronun terms to rhymes and hyph. (creo que está)
7. Fix handling of references to correspond to Portuguese module. [DONE]
9. Support raw phonemic/phonetic pronunciations. [DONE]
10. Support overall audio. [DONE]
11. Keep th/ph/kh/gh/tz ([[Ertzaintza]]) together when syllabifying (but not bh due to [[subhumano]], [[subhistoria]], etc.). [DONE]
12. Support <q:...> and <qq:...> on audio. [DONE]
13. Support <a:...> and <aa:...> (using {{a|...}}, left and right) on terms, rhymes, hyphenation, homophones and
    audio. [DONE]
14. Support # instead of ; as separator between audio file and gloss and make sure it works if gloss has embedded # or
    ;. [DONE]
15. Use parse_inline_modifiers() in [[Module:parse utilities]]. [DONE]
]=]

local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strupper = m_str.upper
local strlower = m_str.lower
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strstrip = m_str.strip
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8

--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ag = u(0x0301) -- acute =  ́
local gr = u(0x0300) -- grave =  ̀
local circunflejo = u(0x0302) -- circumflex =  ̂
local virgulilla = u(0x0303) -- tilde =  ̃
local dieresis = u(0x0308) -- diaeresis =  ̈
local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)

local diacritico = ag .. gr .. circunflejo
local DIACRITICO = "[" .. diacritico .. "]"
local tilde = ag .. gr
local TILDE = "[" .. tilde .. "]"
local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"

local divsil = u(0xFFF0)
local sepsil = "%-." .. divsil
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = diacritico .. acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"

local vocales = "aeiouüAEIOUÜ"
local VOCAL = "[" .. vocales .. "]"
local vocales_tildadas = "áéíóúàèìòù"
local VOCAL_TILDADA = "[" .. vocales_tildadas .. "]"
local CONS = "[^" .. vocales .. separador .. "]"
local CONS_SALVO_H = "[^" .. vocales .. separador .. "h]"
local CONS_O_SEP_PALABRA = "[^" .. vocales .. separador_excepto_palabras .. "]"
--local T = "[^" .. vocales .. "lrɾjw" .. separador .. "]" -- obstruent or nasal

-- Para las notas al pie
local vocales_cerradas_atonas = "iu"
local VOCAL_CERRADA_ATONA = "["..vocales_cerradas_atonas.."]"
local vocales_abiertas_atonas = "aeo"
local VOCAL_ABIERTA_ATONA = "["..vocales_abiertas_atonas.."]"
local VOCAL_GENERAL = "[" .. vocales .. vocales_tildadas .. "]"

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

local TEMP_I = u(0xFFF1)
local TEMP_J = u(0xFFF2)
local TEMP_U = u(0xFFF3)
local TEMP_Y_CONS = u(0xFFF4)
local TEMP_QU = u(0xFFF5)
local TEMP_QU_CAPS = u(0xFFF6)
local TEMP_GU = u(0xFFF7)
local TEMP_GU_CAPS = u(0xFFF8)
local TEMP_H = u(0xFFF9)
local TEMP_Y = u(0xFFFA)
local TEMP_W = u(0xFFFB)
local TEMP_TCHR = u(0xFFFC)
local foot_boundari = "|"
local terminador = "#"
local comodin_yeista = "ɟ"
local comodin_chancho = "ĉ"
local comodin_jota = "ħ"
local comodin_deshielo = "ĥ"
local comodin_thrauna = "ţ"

local COMODINES = "[" .. TEMP_I .. TEMP_J .. TEMP_U .. TEMP_Y_CONS .. TEMP_QU .. TEMP_QU_CAPS .. TEMP_GU .. TEMP_GU_CAPS .. TEMP_H .. TEMP_Y .. TEMP_W .. TEMP_TCHR ..
comodin_yeista .. comodin_chancho .. comodin_jota .. comodin_deshielo .. comodin_thrauna ..
"g" .. "%" .. foot_boundari .. terminador .. "]"

local recuperar_comodin = {
    [TEMP_I] = "i",
	[TEMP_J] = "i", -- i como semivocal
    [TEMP_U] = "u",
    [TEMP_Y_CONS] = "y",
    [TEMP_QU] = "qu",
    [TEMP_QU_CAPS] = "Qu",
    [TEMP_GU] = "gu",
    [TEMP_GU_CAPS] = "Gu",
    [TEMP_H] = "h", -- h que no se aspirada
    [TEMP_Y] = "i", --sufijos -ay/-ey/-oy/-uy
	[TEMP_W] = "w̝", -- hueso, huevo, etc. (la otra w ya la procesamos antes)
	[TEMP_TCHR] = "tchr",
	[comodin_jota] = "h",  -- fake aspirated "h" to real "h"
	[comodin_chancho] = "t͡ʃ", -- fake "ch" to real "ch"
	[comodin_yeista] = "ʝ", --phonetic and "ɟ͡ʝ" or "ʝ", -- fake "y" to real "y
	[comodin_deshielo] = "desh",
	[comodin_thrauna] = "ʈ͡ʂ",
	["g"] = "ɡ", -- U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
	[foot_boundari] = "&#124;",
	[terminador] = "",
}

local recuperar_comodin_silabeo = {
    [TEMP_I] = "i",
	[TEMP_J] = "i", -- i como semivocal
    [TEMP_U] = "u",
    [TEMP_Y_CONS] = "y",
    [TEMP_QU] = "qu",
    [TEMP_QU_CAPS] = "Qu",
    [TEMP_GU] = "gu",
    [TEMP_GU_CAPS] = "Gu",
    [TEMP_H] = "h", -- h que no se aspirada
    [TEMP_Y] = "i", --sufijos -ay/-ey/-oy/-uy
	[TEMP_W] = "u", -- hueso, huevo, etc. (la otra w ya la procesamos antes)
	[TEMP_TCHR] = "tchr",
	[comodin_jota] = "h",  -- fake aspirated "h" to real "h"
	[comodin_chancho] = "ch", -- fake "ch" to real "ch"
	[comodin_yeista] = "y", --phonetic and "ɟ͡ʝ" or "ʝ", -- fake "y" to real "y
	[comodin_deshielo] = "desh",
	[comodin_thrauna] = "tchr",
	["g"] = "ɡ", -- U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
	[foot_boundari] = "&#124;",
	[terminador] = "",
}

local no_acentuado = m_table.listToSet({
	"el", "la", "los", "las", "un", -- artículos
	"me", "te", "se", "lo", "le", "nos", "os", "les", -- pron. objeto
	"mi", "mis", "tu", "tus", "su", "sus", -- posesivos
	"que", "quien", "cuan", "cual",  -- pron. relativos
	"y", "e", "o", "u", "ni", -- conjunciones
	"de", "del", "a", "al", -- preposiciones y articulos
	"por", "en", "con", "sin", "tras", -- más preposiciones
	"mas", --pero
	"so", --de so pretexto
	"si",
})

-- aislar_diacriticos salvo ñ y ü
local function aislar_diacriticos(text)
	text = strnfd(text)
	text = strsubn(text, ".[" .. virgulilla .. dieresis .. "]", {
		["n" .. virgulilla] = "ñ",
		["N" .. virgulilla] = "Ñ",
		["u" .. dieresis] = "ü",
		["U" .. dieresis] = "Ü",
	})
	return text
end

local function quitar_diacriticos(text) -- salvo virgulilla y diéresis
	text = strnfd(text)
	text = strsubn(text, "[" .. ag .. gr .. circunflejo .. ac_primario .. ac_secundario .. "]", "")
	return strnfc(text)
end
-- convert i/u between vowels to glide
local vowel_to_glide = { ["i"] = "j", ["u"] = "w" }
local vowel_to_glide_silabeo = { ["i"] = TEMP_J, ["u"] = TEMP_W }
	
local tildar = {
	["a"] = "á",
	["e"] = "é",
	["i"] = "í",
	["o"] = "ó",
	["u"] = "ú",
}

local diacritico_a_IPA = { [ag] = ac_primario, [gr] = ac_secundario, [circunflejo] = "" }
local pron_abc = {{"a"},{"be","be larga"},{"ce"},{"de"},{"e"},{"efe"},{"ge"},{"hache"},{"i","i latina"},
	{"jota"},{"ka"},{"ele"},{"eme"},{"ene"},{"o"},{"pe"},{"cu"},{"erre"},{"ese"},{"te"},{"u"},
	{"ve","ve corta","uve"},{"doble ve","doble u","uve doble"},{"equis"},{"i","i griega","ye"},{"zeta"},{"eñe"}}

local function normalizar(texto)
	texto = strlower(texto)
	texto = aislar_diacriticos(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

local function determinar_acentuacion(acento_id)
	if acento_id then
		if acento_id == -1 then
			return "monosílaba"
		elseif acento_id == 0 then
			return "doble acentuada"
		elseif acento_id == 1 then
			return "aguda"
		elseif acento_id == 2 then
			return "llana"
		elseif acento_id == 3 then
			return "esdrújula"
		elseif acento_id >= 4 then
			return "sobreesdrújula"
		else
			return nil
		end
	else
		return nil
	end
end

--Convierte los diacríticos a notación IPA
local function reemplazar_tildes(w)
	local silabas = strsplit(w, SEPARADORES_SILABICOS)
	local L = #silabas
	local sufijo = nil
	
	if L >= 4 and silabas[L-1] == "men" and silabas[L] == "te" then
		sufijo = {ac_primario.."men", divsil.."te"}
		silabas[L-1], silabas[L] = nil, nil
		L = L - 2
	end
	
	local sustituido = false
	local sust = false
	
	for i,silaba in ipairs(silabas) do
		silabas[i], sust = strsubb(silaba, "^(.*)(" .. DIACRITICO .. ")(.*)$",	function(pre, diacr, post) return diacritico_a_IPA[diacr] .. pre .. post end)
		sustituido = sustituido or sust
	end
	
	if not sustituido then
		if L > 1 then 
			if strfind(w, "[^" .. vocales .. "ns]$") then
				silabas[L] = ac_primario .. silabas[L] --aguda
			else
				silabas[L - 1] = ac_primario .. silabas[L - 1] --grave
			end
		else --Si L==1, entonces el monosílabo es tónico ya que no tiene circunflejos
			silabas[1] = ac_primario .. silabas[1]
		end 
	end
	
	local p = silabas[1]
	
	for i,s in ipairs(silabas) do
		if i > 1 then
			if strfind(s, ACENTOS_IPA) then
				p = p .. s
			else
				p = p .. divsil .. s
			end
		end
	end
	
	if sufijo then
		return p .. sufijo[1] .. sufijo[2]
	else
		return p
	end
end

--Se obtiene la longitud silábica, acentuación y rima (hacerlo con la ayuda, no con el original)
local function obtener_informacion(w)
	if type(w) ~= "string" then
		return nil	
	end
	local silabas = strsplit(w, SEPARADORES_SILABICOS)
	
	local L = #silabas
	local sufijo = nil
	if L >= 4 and silabas[L-1] == "men" and silabas[L] == "te" then
		sufijo = "-men-te"
		silabas[L-1], silabas[L] = nil, nil
		L = L - 2
	end
	
	local acento_id = nil
	local acento_idx = nil
	local lleva_tilde = false
	
	for i,silaba in ipairs(silabas) do
		lleva_tilde = strfind(silaba, "^(.*)(" .. VOCAL_TILDADA .. ")(.*)$")
		if lleva_tilde then
			acento_id = L == 1 and -1 or L - i + 1
			acento_idx = i
			break
		end
	end
	
	if not lleva_tilde then
		if L == 1 then
			acento_id = -1 --monosílabo
			acento_idx = 1
		else
			if strfind(w, "[^" .. vocales .. "ns]$") then
				acento_id = 1 -- aguda
				acento_idx = L
			else
				acento_id = 2 -- grave
				acento_idx = L - 1
			end
		end
	end
	
	if sufijo then --por ahora el único caso contemplado son los adverbios que terminan en -mente
		return L + 2, determinar_acentuacion(0), "ente" --0 para indicar doble acentuación
	else
		local x = nil
		if lleva_tilde then
			x = strfind(silabas[acento_idx], VOCAL_TILDADA)
			if not x then
				return nil,nil,nil
			end
		else
			x = strfind(silabas[acento_idx], VOCAL)
			if not x then
				return nil,nil,nil
			end
			local q = substr(silabas[acento_idx], x-1, x+1)
			if q == "que" or q == "qui" or q == "gue" or q == "gui" then
				x = x+1
			end
		end
		local rima = substr(silabas[acento_idx], x)
		for i = acento_idx+1,L do
			rima = rima .. silabas[i]	
		end
		return L, determinar_acentuacion(acento_id), rima
	end
end

--Pasos finales al dividir en sílabas (2 puntos de entrada: la función de generar
--pronunciación y la que separa en sílabas propiamente dicha)
local function separar_en_silabas_final(word)

	--PASO 0: BUSCAR EN LA TABLA DE PREFIJOS: extraer sílabas conocidas y que no puedan deducirse regularmente (tran, trans)
	
	--PASO 1: Divida tras cada grupo de vocales + última consonante que no sea h, o bien entre consonantes que estén
	--rodeades por vocales, pero sin contar la 'h' (prohibir --> prohi.bir)
	
	word = strsubrep(word, "(" .. VOCAL .. DIACRITICO .. "*)(" .. CONS_SALVO_H .. VOCAL .. ")", "%1"..divsil.."%2")
	word = strsubrep(word, "(" .. VOCAL .. DIACRITICO .. "*" .. CONS .. "+)(" .. CONS .. VOCAL .. ")", "%1"..divsil.."%2")

	--PASO 2: Vuelva a juntar algunos grupos de consonantes
	
	-- Juntar consonantes fricativas y oclusivas con l y con r. A ecepción de dl.
	word = strsubn(word, "([pbfvkctg])"..divsil.."([lrɾ])", divsil.."%1%2")
	word = strsubn(word, "d%"..divsil.."([rɾ])", divsil.."d%1")
	-- Juntar ch, sh, ph, th, dh, fh, kh or gh. NO Juntar bh (subhumano, subhúmedo)
	word = strsubn(word, "([csptdfkg])"..divsil.."h", divsil.."%1h")
	-- Juntar las ll y rr
	word = strsubn(word, "l"..divsil.."l", divsil.."ll")
	word = strsubn(word, "r"..divsil.."r", divsil.."rr")
	-- Juntar tz (([[Ertzaintza]], [[quetzal]], [[hertziano]], palabras Vascas, Nahualt o Alemanas)
	word = strsubn(word, "t"..divsil.."z", divsil.."tz")
	
	-- PASO 3: Separar tl cuando aparezca al final de una palabra. (Palabras mexicanas como Nahuatl, Popocatepetl, etc.)
	-- https://catalog.ldc.upenn.edu/docs/LDC2019S07/Syllabification_Rules_in_Spanish.pdf
	-- https://www.spanishdict.com/guide/spanish-syllables-and-syllabification-rules.
	-- --> esto está mal, tl no es una sílaba aparte ni siquiera en náhuatl
	-- word = strsubn(word, "([^"..divsil.."])tl$", "%1"..divsil.."tl")

	-- PASO 4: separar los hiatos 
	-- vocal abierta + vocal abierta (opcional h)
	word = strsubrep(word, "([aeoAEO]" .. DIACRITICO .. "*)(h?[aeo])", "%1"..divsil.."%2")
	-- vocal abierta + vocal tildada
	word = strsubrep(word, "([aeoAEO]" .. DIACRITICO .. "*)(h?" .. VOCAL .. TILDE .. ")", "%1"..divsil.."%2")
	-- vocal cerrada tildada + vocal abierta
	word = strsubn(word, "([iuüyIUÜY]" .. TILDE .. ")(h?[aeo])", "%1"..divsil.."%2")

	-- ESto no parece español...
	-- dos vocales seguidas tildadas WTF????
	word = strsubrep(word, "([iuüyIUÜY]" .. TILDE .. ")(h?" .. VOCAL .. TILDE .. ")", "%1"..divsil.."%2")
	-- ii (como antiincendios, según la RAE tanto 'antincendios' como 'antiincendios' son válidas; 'hawaiiano', 'shiita')
	word = strsubrep(word, "([iI]" .. DIACRITICO .. "*)(h?i)", "%1"..divsil.."%2")
	-- uu (calculo que por las dudas, le mandó 'vacuum' de ejemplo, pero eso no es español)
	word = strsubrep(word, "([uU]" .. DIACRITICO .. "*)(h?u)", "%1"..divsil.."%2")

	return word
end

-- FUNCION QUE DIVIDE EN SILABAS (SÓLO PARA UNA PALBRA; SI HAY VARIAS PALABRAS HACERLO DE A UNA POR VEZ POR FAVOR)
local function separar_en_silabas(w)
	if type(w) ~= "string" or strfind(w, " ") then
		return nil
	end
	
	local p = normalizar(w)
	
	p = strsubn(p, "|", "")
	p = strsubrep(p, "%s+", divsil) --cambio espacio por divisor de sílaba
	
	p = strsubn(p, "y(" .. VOCAL .. ")", TEMP_Y_CONS .. "%1")
	
	--Cambio thr por el comodin --> de baja hasta nuevo aviso
	--p = strsubn(p, "thr", comodin_thrauna)

	-- Mantenemos junta la 'sh' cuando se trate de desh- (deshuesar, deshonra, deshecho), en los demás casos la separamos.
	-- Para hacerlo, cambiamos la h en los segundos casos
	p = strsubn(p, "^([Dd]es)h", "%1" .. TEMP_H)
	p = strsubn(p, "([ %-][Dd]es)h", "%1" .. TEMP_H)

	-- Cambiamos 'qu' y 'gu'
	-- Dicen que 'qu' va bien reemplazarlo, pero hace agua con 'quietud'
	p = strsubn(p, "qu(" .. VOCAL .. ")", TEMP_QU .. "%1")
	p = strsubn(p, "Qu(" .. VOCAL .. ")", TEMP_QU_CAPS .. "%1")
	p = strsubn(p, "gu(" .. VOCAL .. ")", TEMP_GU .. "%1")
	p = strsubn(p, "Gu(" .. VOCAL .. ")", TEMP_GU_CAPS .. "%1")

	--Agregamos glides (paranoia, baiano, abreuense, alauita, Malaui, marihuana, parihuela, antihielo, pelluhuano, náhuatl)
	--NOTA IMPORTANTE: conviene hacerlo de atrás hacia adelante para que no se generen cosas raras como 'an.tih.ie.lo'

	p = strsubrep(p, "(.*" .. VOCAL .. DIACRITICO .. "*)(h?)([iu])(" .. VOCAL .. ")",function (v1, h, iu, v2) return v1 .. divsil .. h .. vowel_to_glide_silabeo[iu] .. v2 end)


	-- Entro a la función que separa ya pre-procesado a formato IPA
	p = separar_en_silabas_final(p)
	
	--p = strsubn(p, comodin_thrauna, "thr")
	p = strsubn(p, COMODINES, recuperar_comodin_silabeo)
	p = strsubn(p, divsil, "-")
	return strnfc(p)
end

-- EXPORTAR A IPA. Es decir, ya teniendo el texto re-escrito para facilitar la pronunciación.
local function generar_pron(text, phonetic)
	if type(text) ~= "string" then
		return {},{}
	end

	local distincion_seseo = false
	local distincion_yeismo = false
	local distincion_sheismo = false

	text = normalizar(text)
	
	--Comienzo a sustituir
	--Observación general: el orden en el que sustituimos importa
	
	-- Make prefixes unstressed unless they have an explicit stress marker; also make certain
	-- monosyllabic words (e.g. [[el]], [[la]], [[de]], [[en]], etc.) without stress marks be
	-- unstressed.
	local words = strsplit(text, " ")
	for i, word in ipairs(words) do
		if no_acentuado[word] then
			words[i] = strsubn(word, "^(.*" .. VOCAL .. ")", "%1" .. circunflejo) --pongo el circunflejo para indicar que no debe haber acentuación
		end
	end
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = table.concat(words, " ")
	text = strsubn(text, " | ", "# | #")
	text = "##" .. strsubn(text, " ", "# #") .. "##"

	--determinar el sonido apropiado para la y
	-- Nota: sufijos -ay/-ey/-oy/-uy se acentúan, pero no -ai/-ei/-oi/-ui
	-- Nota: -uy mapea a /uj/ mientras que -ui mapea a /wi/
	
	text, distincion_sheismo = strsubb(text, "y(" .. VOCAL .. ")", comodin_yeista.."%1") -- ɟ -> comodín del yeísmo
	
	--Esto no es conveniente hacerlo
	--text = strsubn(text, "([aeou])y#", "%1" .. TEMP_Y .. "#") -- pongo marca privada por el momento
	--text = strsubn(text, "y", "i")

	--Necesitamos procesar sh/ch justo acá (para no hacer lío con la x exhausto (??))
	text = strsubn(text, "ch", comodin_chancho) -- otro comodín más
	text = strsubn(text, "#desh", comodin_deshielo) --comodín para preservar 'desh' (deshuesar, etc.)
	text = strsubn(text, "sh", "ʃ") --reemplazo sh
	text = strsubn(text, comodin_deshielo, "#desh") --restauro desh
	text = strsubn(text, "#p([st])", "#%1") -- Saco la p inicial de psicología o pterodáctilo
	--text = strsubn(text, "thr", comodin_thrauna)
	
	--tl al final de la palabra
	text = strsubn(text, "tl#", "t#")

	--x
	text = strsubn(text, "x", "ʃ") -- DIFERENCIA FUNDAMENTAL: cómo mapear la equis asturiana

	--c, g, q
	text, distincion_seseo = strsubb(text, "c([ie])",  "θ"  .. "%1") -- Busco si hay ceceos y los sustituyo. ¿Por qué habían puesto la z?
	text = strsubn(text, "g([iey])", "x%1") -- must happen after handling of x above
	text = strsubn(text, "gu([ie])", "g%1")
	text = strsubn(text, "gü([ie])", "gu%1")
	-- following must happen before stress assignment; [[branding]] has initial stress like 'brandin'
	text = strsubn(text, "ng([^aeiouüwhlr])", "n%1") -- [[Bangkok]], [[ángstrom]], [[branding]]
	text = strsubn(text, "qu([ie])", "k%1")
	text = strsubn(text, "ü", "u") -- [[Düsseldorf]], [[hübnerita]], obsolete [[freqüentemente]], etc.
	text = strsubn(text, "q", "k") -- [[quark]], [[Qatar]], [[burqa]], [[Iraq]], etc.

	-- map various consonants to their phoneme equivalent
	text = strsubn(text, "[cjñrv]", {["c"]="k", ["j"]="x", ["ñ"]="ɲ", ["r"]="ɾ", ["v"]="b" })

	-- ([[hielo]], [[enhiesto]], [[deshielo]], ...)
	local word_initial_hi, syl_initial_hi, initial_hi
	text, word_initial_hi = strsubb(text, "#h?[iy](" .. VOCAL .. ")", "#j%1")
	text, syl_initial_hi = strsubb(text, "(" .. CONS .. SEPARADORES_SILABICOS .. "*)h[iy](" .. VOCAL .. ")", "%1j%2")
	initial_hi = word_initial_hi or syl_initial_hi
	--  ([[huevo]], [[deshuesar]])
	text = strsubb(text, "(" .. CONS_O_SEP_PALABRA .. SEPARADORES_SILABICOS .. "*)hu(" .. VOCAL .. ")", "%1" .. TEMP_W .. "%2")


	--Busco si hay algún posible lleísmo
	text, distincion_yeismo = strsubb(text, "ll", "ʎ") --sustituyo y busco lleísmos

	if distincion_yeismo then
		distincion_sheismo = true
	end

	--Sustituyo rr y lr por r ¿no falta buscar el separador silábico en lr y sr?
	text = strsubn(text, "ɾɾ", "r")
	text = strsubn(text, "([#lnsθ])ɾ", "%1r") --([[alrededor]], [[malrotar]]), nr ([[enriquecer]], [[sonrisa]], etc.), sr ([[Israel]], [[desregular]], etc.), zr ([[Azrael]], [[cruzrojista]]), rr
	text = strsubn(text, "nn", "N") --doble n (ennoblecer) ¿por qué una N grande en lugar de meter un separador silábico?
	text = strsubn(text, "bb", "B") --doble b (subbase) ¿por qué una B grande en lugar de meter un separador silábico?

	text = strsubn(text, "(" .. CONS .. ")%1", "%1") -- elimino consonantes dobles [[Addis Abeba]], [[cappa]], [[descender]], [[crackear]]
	text = strsubn(text, "sθ", "θ") -- elimino sz como en [[fascinante]]

	-- restablezco las consonantes dobles, MUY OSCURO ESTO
	text = strsubn(text, "N", "nn")
	text = strsubn(text, "B", "bb")

	--sustitución de oclusivas (T) cuando se juntan con ptk --> esta parte no tiene sentido
	--local voice_stop = { ["p"] = "b", ["t"] = "d", ["k"] = "g" }
	--text = strsubn(text, "t(" .. SEPARADOR .. "*[sθ])", "!%1") -- eximir -ts-, -tz-
	--text = strsubn(text, "([ptk])(" .. SEPARADOR .. "*" .. T .. ")", function(stop, after) return voice_stop[stop] .. after end)
	--text = strsubn(text, "!", "t") -- recuperar -ts-, -tz-

	text = strsubn(text, "n([# .]*[bpm])", "m%1") --nb, bp, nm por mb, mp, mm (ejemplo: enviar, inmoral, etc.)

	text = strsubn(text, "h", "") --sacar la h muda

	-- i and u between vowels -> consonant-like substitutions: [[paranoia]], [[baiano]], [[abreuense]], [[alauita]],
	-- [[Malaui]], etc.; also with h, as in [[marihuana]], [[parihuela]], [[antihielo]], [[pelluhuano]], [[náhuatl]],
	-- etc. Add .* at the beginning so we go right-to-left, in the case of [[hawaiiano]] -> ha.wai.iano.
	text = strsubrep(text, "(.*" .. VOCAL .. DIACRITICO .. "*h?)([iu])(" .. VOCAL .. ")",
		function (v1, iu, v2) return v1 .. vowel_to_glide[iu] .. v2 end
	)
	
	words = strsplit(text, "[#(%s)]+")
	local ac_words = {}
	for _,word in ipairs(words) do
		if #word > 0 then
			if strfind(word, "|") then
				table.insert(ac_words, word)
			else
				table.insert(ac_words, reemplazar_tildes(separar_en_silabas_final(word)))
			end
		end
	end
	
	text = table.concat(ac_words, " ")
	text = strsubn(text, " | ", "# | #")
	text = "##" .. strsubn(text, " ", "# #") .. "##"

	--diphthongs; do not include TEMP_Y here
	text = strsubn(text, "ih?([aeou])", "j%1") -- REVISAR: así estaba en en.wikt, pero es cierto que ih- y uh- forman diptongo en asturiano??
	text = strsubn(text, "uh?([aeio])", "w%1")
	text = strsubn(text, "([aeiou])y", "%1j")
	--text = strsubn(text, TEMP_Y, "i") -- -ay/-ey/-oy/-uy
	
	local hay_z = false
	text, hay_z = strsubb(text, "z", "θ")
    distincion_seseo = distincion_seseo or hay_z

	-- suppress syllable mark before IPA stress indicator
	text = strsubn(text, "%.(" .. ACENTOS_IPA .. ")", "%1")
	--make all primary stresses but the last one be secondary
	text = strsubrep(text, ac_primario.."(.+)"..ac_primario, ac_secundario.."%1"..ac_primario)

	--phonetic transcription --> más fino, lo que se escribe entre [] corchetes
	-- REVISAR: Necesitamos a un experto que nos diga cómo es la fonética exacta!!
	if phonetic then
		error("FONETICA DEL ASTURIANO AUN NO CHEQUEADA (USE LA PARTE FONOLOGICA SOLAMENTE)")
		-- θ, s, f before voiced consonants
		local voiced = "mnɲbd"..comodin_yeista.."gʎ" .. TEMP_W
		local r_cluster = "ɾr"
		local tovoiced = {
			["θ"] = "θ̬",
			--["s"] = "z", --no existe tal cosa
			--["f"] = "v", --no existe tal cosa
		}
		local function voice(sound, following)
			return tovoiced[sound] .. following
		end
		--text = strsubn(text, "([θs])(" .. SEPARADOR .. "*[" .. voiced .. r .. "])", voice) --mal
		--text = strsubn(text, "(f)(" .. SEPARADOR .. "*[" .. voiced .. "])", voice) --mal

		text = strsubn(text, "([θ])(" .. SEPARADOR .. "*[" .. voiced .. r_cluster .. "])", voice) --única sustitución que tiene sentido

		-- aproximantes
		local stop_to_fricative = {["b"] = "β", ["d"] = "ð", [comodin_yeista] = "ʝ", ["g"] = "ɣ"}
		local fricative_to_stop = {["β"] = "b", ["ð"] = "d", ["ʝ"] = comodin_yeista, ["ɣ"] = "g"}
		text = strsubn(text, "[bd"..comodin_yeista.."g]", stop_to_fricative) --convierto todo a fricativa
		text = strsubn(text, "([mnɲ]" .. SEPARADOR .. "*)([βɣ])", --salvo las precedidas por m,n,ng (convierto de nuevo)
			function(nasal, fricative) return nasal .. fricative_to_stop[fricative] end
		)
		text = strsubn(text, "([lʎmnɲ]" .. SEPARADOR .. "*)([ðʝ])",  --salvo las precedidas por lm,ln,lng, (convierto de nuevo)
			function(nasal_l, fricative) return nasal_l .. fricative_to_stop[fricative] end
		)
		text = strsubn(text, "(##" .. ACENTOS_IPA .. "*)([βɣðʝ])", --salvo las sílabas acentuadas (convierto de nuevo)
			function(stress, fricative) return stress .. fricative_to_stop[fricative] end
		)

		text = strsubn(text, "[td]", {["t"] = "t̪", ["d"] = "d̪"}) --dentalización

		-- nasal assimilation before consonants
		local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =	"ɱ", "n̪", "n̟", "nʲ", "ɲ", "ŋ"
		local nasal_assimilation = {
			["f"] = labiodental,
			["t"] = dentialveolar, ["d"] = dentialveolar,
			["θ"] = dental,
			[comodin_chancho] = alveolopalatal,	["ʃ"] = alveolopalatal,	["ʒ"] = alveolopalatal,
			[comodin_yeista] = palatal, ["ʎ"] = palatal,
			["k"] = velar, ["x"] = velar, ["g"] = velar,
		}
		text = strsubn(text, "n(" .. SEPARADOR .. "*)(.)", function(stress, following) return (nasal_assimilation[following] or "n") .. stress .. following end)

		-- lateral assimilation before consonants
		text = strsubn(text, "l(" .. SEPARADOR .. "*)(.)",
			function(stress, following)
				local l = "l"
				if following == "t" or following == "d" then -- dentialveolar
					l = "l̪"
				elseif following == "θ" then -- dental
					l = "l̟"
				elseif following == comodin_chancho or following == "ʃ" then -- alveolopalatal
					l = "lʲ"
				end
				return l .. stress .. following
			end)

		--semivowels
		text = strsubn(text, "([aeouãẽõũ][iĩ])", "%1̯")
		text = strsubn(text, "([aeioãẽĩõ][uũ])", "%1̯")

		-- voiced fricatives are actually approximants
		text = strsubn(text, "([βðɣ])", "%1̞")
	end

	-- final conversions
	text = strsubn(text, COMODINES, recuperar_comodin)
	text = strsubn(text, divsil, ".")
	text = strnfc(text)

	local yeista = strsubn(text, "ʎ", "ʝ")
	
	if yeista ~= text then
    	return {{"yeísta"}, {"no yeísta"}}, {{yeista}, {text}}
    else
    	return {{"pronunciación"}}, {{text}}
    end
end

-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
	local tit = titulo
	local vino_ayuda, ss_
	
	if #args["ayuda"] < 1 then
		args["ayuda"][1] = tit
	else
		vino_ayuda = true
	end

	if #args["fone"] < 1 and #args["fono"] < 1 then
		if #titulo == 1 then
			if titulo >= "a" and titulo <= "z" then
				args["ayuda"] = pron_abc[string.byte(titulo) - 96]
				args["tl"] = args["ayuda"]
			elseif titulo >= "A" and titulo <= "Z" then
				args["ayuda"] = pron_abc[string.byte(titulo) - 64]
				args["tl"] = args["ayuda"]
			end
		elseif titulo == "ñ" or titulo == "Ñ" then
			args["ayuda"] = pron_abc[27]
			args["tl"] = args["ayuda"]
		end
		local A = #args["ayuda"]
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local pron, fono = generar_pron(args["ayuda"][j], false)
			for i,_ in ipairs(pron) do
				table.insert(args["pron"], pron[i])
				table.insert(args["fono"], fono[i])
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end
	else
		
		for i, _ in ipairs(args["pron"]) do
			if args["pron"][i] and args["pron"][i][1] then
				local c = args["pron"][i][1]:sub(1,1)
				local cc = args["pron"][i][1]:sub(1,2)
				local cccc = args["pron"][i][1]:sub(1,4)
				local cccccc = args["pron"][i][1]:sub(1,6)
				if cc == 'se' then
				elseif 	cc == 'ye' then
					args["pron"][i][1] = 'yeísta'
				elseif cccc == 'no ye' then
					args["pron"][i][1] = 'no yeísta'
				elseif cc == 'll' then
					args["pron"][i][1] = 'no yeísta'
				end
			end
		end
	end
	
	local tiene_espacios = strfind(tit, " ")
	local rim = nil
	
	if tiene_espacios then
		if not args["rima"] then
			local palabras = strsplit(args["ayuda"][1], "%s+")
			_,_,rim = obtener_informacion(separar_en_silabas(palabras[#palabras]))
		end
		
	else
		if #args["d"] < 1 and #titulo > 1 and titulo ~= "ñ" and titulo ~= "Ñ" then
			if not args["ls"] and not args["ac"] and not args["rima"] and #titulo > 1 then
				ss_ = separar_en_silabas(args["ayuda"][1])
				args["ls"], args["ac"], rim = obtener_informacion(ss_)
			end
			
			if not args["nl"] then
				args["nl"] = strlen(tit)
			end
			
			-- obtenida la “información” de la palabra en base a la “ayuda”, determino cómo mostrar la grafía original
			if not vino_ayuda then -- situación normal
				args["d"][1] = separar_en_silabas(tit)
				
				-- Notas sobre división en sílabas...
				local letras = strexplode(tit)
				local prefijo = ""
				for i,c in ipairs(letras) do
					if i > 8 then
						break
					end
					prefijo = prefijo .. c
					if m_prefijos[prefijo] then
						table.insert(args["dnota"], DOBLE_SEPARACION)
						break
					end
				end
				
				if #args["dnota"] == 0 then
					if strfind(args["d"][1], "[^qQgG]"..VOCAL_CERRADA_ATONA.."h?"..VOCAL_CERRADA_ATONA) or strfind(args["d"][1], "[^qQgG]"..VOCAL_CERRADA_ATONA.."h?"..VOCAL_ABIERTA_ATONA) then --ignorar que, qui, gue, gui
						table.insert(args["dnota"], NO_HIATO)
					elseif strfind(args["d"][1], SEPARADORES_SILABICOS..VOCAL_GENERAL..SEPARADORES_SILABICOS) or strfind(args["d"][1], "^"..VOCAL_GENERAL..SEPARADORES_SILABICOS) or strfind(args["d"][1], SEPARADORES_SILABICOS..VOCAL_GENERAL.."$") then
						table.insert(args["dnota"], LETRA_HUERFANA)
					elseif strfind(args["d"][1], VOCAL_GENERAL.."h?"..SEPARADORES_SILABICOS.."h?"..VOCAL_GENERAL) then
						table.insert(args["dnota"], DOBLE_VOCAL)
					end
				end
				
				if strfind(titulo, "^abr") or strfind(titulo, "^subr") then --strfind(titulo, "^post") then saco esta parte porque afecta a pocas, casi nunguna palabra (cuál?)
					if args["dnota"][1] then
						args["dnota"][1] = args["dnota"][1].."<br>"..AB_SUB
					else
						table.insert(args["dnota"], AB_SUB)
					end
				end
			else -- palabra extranjera, vino con ayuda --> veo cómo quedaría mejor
				local tit_sombra = quitar_diacriticos(tit)
				local ss_ayuda = quitar_diacriticos(ss_)
				local ss = tit
				local j0 = 0 -- desde dónde debería buscar la próxima coincidencia de letras
				
				while true do
					local i = strfind(ss_ayuda, "[a-zA-ZÀ-ž]%-[a-zA-ZÀ-ž]")
					if not i or i < 1 then
						break
					end
					local a, b = substr(ss_ayuda, i, i), substr(ss_ayuda, i+2, i+2)
					local ab = a..b 
					local j = strfind(tit_sombra, ab, j0) -- veo si encuentro la separación
					if j then -- Si aparece en el título, entonces inserto separador ahí miemo
						tit_sombra = substr(tit_sombra, 1, j).."-"..substr(tit_sombra, j+1)
						ss = substr(ss, 1, j).."-"..substr(ss, j+1)
						j0 = j+2 -- incremento dos: por la primera letra y por el guion
					else --Sino, veo si al menos coincide con una de las dos letras
						local k = strfind(tit_sombra, a, j0) or math.huge
						local l = strfind(tit_sombra, b, j0) or math.huge
						if k < l and k >= 1 then
							tit_sombra = substr(tit_sombra, 1, k).."-"..substr(tit_sombra, k+1)
							ss = substr(ss, 1, k).."-"..substr(ss, k+1)
							j0 = k+2
						elseif l < k and l >= 2 then
							tit_sombra = substr(tit_sombra, 1, l-1).."-"..substr(tit_sombra, l)
							ss = substr(ss, 1, l-1).."-"..substr(ss, l)
							j0 = l+1 -- en este caso se agrega sólo el guion porque la letra considerada es la segunda
						end
					end
					ss_ayuda = substr(ss_ayuda, i+2) -- trunco la ayuda para obtener la siguiente separación
				end
				
				args["d"][1] = ss
				
				if #args["dnota"] == 0 then
					if strfind(args["d"][1], SEPARADORES_SILABICOS.."[a-zA-ZÀ-ž]"..SEPARADORES_SILABICOS) or strfind(args["d"][1], "^".."[a-zA-ZÀ-ž]"..SEPARADORES_SILABICOS) or strfind(args["d"][1], SEPARADORES_SILABICOS.."[a-zA-ZÀ-ž]".."$") then
						table.insert(args["dnota"], LETRA_HUERFANA)
					elseif strfind(args["d"][1], VOCAL_GENERAL.."h?"..SEPARADORES_SILABICOS.."h?"..VOCAL_GENERAL) then
						table.insert(args["dnota"], DOBLE_VOCAL)
					end
				end
			end
		end
	end

	if rim then
		local pron, fone = generar_pron(rim, false)
		local rima = fone[1][1]
		rima = strsubn(rima, "[ʎʝ]", "ʃ")
		rima = strsubn(rima, "θ", "s")
		rima = strsubn(rima, ac_primario, "")
		local idx = strfind(rima, VOCAL)
		if rima and idx and idx <= #rima then
			rima = substr(rima, idx)	
		end
		args["rima"] = rima
	end

	return args

end

return export