Módulo:generar-pron/pt

La documentación para este módulo puede ser creada en Módulo:generar-pron/pt/doc
--[=[

Author: Benwing
Introducido en es.wikt por Tmagc

Issues concerning South Brazil pronunciation:
(Rather than an unified South Brazil pronunciation, it's better to transform it in Rio Grande do Sul/Gaúcho pronunciation)

1. Should all 'ẽ' (not just word-final) be rendered as [ẽj̃]? We have several existing examples, e.g.
   /de.zẽ.ba.ˈla.do/|/de.zẽj̃.ba.ˈla.do/ for [[desembalado]], /ˌde.zẽj̃.has.ˈkɐ̃.so/ for [[desenrascanço]],
   /ẽj̃.baw.sa.ˈma(ɻ)/ for [[embalsamar]], [ẽj̃.pũˈj̃aɾ] for [[empunhar]], /ẽ.ʁus.tiɾ/|[ẽj̃.ʁuʃˈ(t)͡ʃiɾ] for
   [[enrustir]].
2. Coda 'r': Mostly /ɻ/ but sometimes given as both /ɾ/ and /ɻ/ or occasionally /r/.
3. Raising of unstressed 'e' and 'o', and 'des-': Mostly not present but sometimes yes, e.g. [[clarear]]
   /kla.ɾe.ˈa(ɾ)/|/kla.ɾe.ˈa(ɻ)/|/kla.ɾi.ˈa(ɾ)/|/kla.ɾi.ˈa(ɻ)/|/kla.ˈɾja(ɾ)/|/kla.ˈɾja(ɻ)/, [[consentâneo]]
   [kõ(w̃).senˈtə̃.ni̯u], [[cozido]] /ko.ˈzi.dʊ/|/ku.ˈzi.dʊ/, [[desbloquear]] /dez.blo.ke.ˈa(ɾ)/|/dez.blo.ˈkja(ɾ)/|/d͡ʒiz-/|/-(ɻ)/,
   [[desferir]] /des.fe.ˈɾi(ɾ)/|/des.fe.ˈɾi(ɻ)/|/des.fi.ˈɾi(ɾ)/|/des.fi.ˈɾi(ɻ)/, [[desrespeitar]] /dez.hes.pej.ˈta(ɾ)/|/d͡ʒiz-/|/-(ɻ)/,
   [[destruído]] /des.tɾu.ˈi.do/|/d͡ʒis.tɾu.ˈi.do/.
4. Epenthetic 'i': Less common? E.g. [[dignidade]] given as just /ˌd͡ʒiɡ.ni.ˈda.de/, but [[digno]] as /ˈd͡ʒiɡ.no/|/ˈd͡ʒi.ɡi.no/.
5. Secondary stress: Often given two syllables before the stress. I have left it out unless it seems stable and in a prefix, e.g.
   [[eleitorado]] /e.ˌlej.to.ˈɾa.do/, [[eletrizante]] /e.ˌle.tɾi.ˈzɐ̃.te/, [[elevado]] /ˌe.le.ˈva.do/, [[elevador]] /e.ˌle.va.ˈdoɻ/,
   [[engarrafamento]] /ˌẽ.ɡa.ˌha.fa.ˈmẽ.to/.
6. Initial em-: Mostly given as /ẽ-/ or /ẽj̃-/, but sometimes /ĩ-/ as in [[emparedar]] /ĩ.pa.ɾe.ˈda(ɾ)/|/ĩ.pa.ɾe.ˈda(ɻ)/
   or both as in [[encaixar]] /ẽ.kaj.ˈʃa(ɾ)/|/ĩ.kaj.ˈʃa(ɻ)/|/ẽ.kaj.ˈʃa(ɾ)/|/ĩ.kaj.ˈʃa(ɻ)/.
7. -nh-: [[empunhar]] given as [ẽj̃.pũˈj̃aɾ], but is /j̃/ actually characteristic of this accent or is it /ɲ/?
8. Other inconsistencies: e.g. [[enrustir]] /ẽ.ʁus.tiɾ/|[ẽj̃.ʁuʃˈ(t)͡ʃiɾ], with coda /ʃ/ and strong [ʁ] (usually [h]).
   [[ab-rogar]] /ab.ʁoˈɡa(ɻ)/|/ab.hoˈɡa(ɻ)/|/ab.χoˈɡa(ɻ)/|/ab.ɦoˈɡa(ɻ)/ with all possible strong r's.
9. Nasal vowels: Almost always as elsewhere, but occasionally e.g. [[entender]] /ĩn.tẽnˈde(r)/, [[a gente]] /a.ˈʒen.te/|/a.ˈʒẽ.te/.
10. Nasal diphthongs: -ão sometimes claims to be /ɐ̃õ/, sometimes /ɐ̃w̃/. [[-ção]] listed both.
]=]

local export = {}

local unpack = unpack or table.unpack
local insert = table.insert
local concat = table.concat
local sort = table.sort

local m_table = require("Módulo:tabla")
local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local strmatch = m_str.match
local strmatchit = m_str.gmatch
local strsplit = m_str.split
local strstrip = m_str.strip
local strexplode = m_str.explode_utf8
local strlower = m_str.lower
local strlen = m_str.len
local substr = m_str.sub
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html

-- version of strsubn() that discards all but the first return value
local function strsub(term, foo, bar)
	local retval = strsubn(term, foo, bar)
	return retval
end

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀ = open vowel quality without stress in Portugal only
local MACRON = u(0x0304) -- macron =  ̄ = closed vowel quality without stress in Portugal only
local CFLEX = u(0x0302) -- circumflex =  ̂
local TILDE = u(0x0303) -- tilde =  ̃
local DIA = u(0x0308) -- diaeresis =  ̈
local CEDILLA = u(0x0327) -- cedilla =  ̧
local DOTOVER = u(0x0307) -- dot over =  ̇
-- DOTUNDER indicates an explicitly unstressed syllable; useful when accompanied by a quality marker (acute or
-- circumflex), or by itself with a/e/o, where it defaults to acute (except in the following circumstances, where it
-- defaults to circumflex: (1) in the diphthongs ei/eu/oi/ou; (2) in a nasal vowel).
local DOTUNDER = u(0x0323) -- dot under =  ̣
-- LINEUNDER indicates an explicit secondary stress; normally not necessary as primary stress is converted to secondary
-- stress if another primary stress follows, but can be used e.g. after a primary stress; can be accompanied by a
-- quality marker (acute or circumflex) with a/e/o; if not, defaults to acute (except in the same circumstances where
-- dot under defaults to circumflex).
local LINEUNDER = u(0x0331) -- line under =  ̱
-- Serves to temorarily mark where a syllable division should not happen; temporarily substitutes for comma+space;
-- temporarily substitutes for #.
local TEMP1 = u(0xFFF0)
local SYLDIV = u(0xFFF1) -- used to represent a user-specific syllable divider (.) so we won't change it
local PSEUDOCONS = u(0xFFF2) -- pseudo-consonant at the edge of prefixes ending in a vowel and suffixes beginning with a vowel
local PREFIX_MARKER = u(0xFFF3) -- marker indicating a prefix so we can convert primary to secondary accents


local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local stress = "ˈˌ"
local stress_c = "[" .. stress .. "]"
local sepsil = "-"
local separadores_silabicos = "%."..sepsil..SYLDIV..stress
local SEPARADORES_SILABICOS = "["..separadores_silabicos.."]"

-- Since we convert all symbols at the beginning and decompose accented characters (except for ç and ü), we can later
-- use capital and/or accented letters to represent additional distinctions, typically in cases where we want to
-- remember the source of a given phoneme. By convention we use capital letters, optionally with accents.
-- Specifically:
-- * A/E/O represent written a/e/o where we don't yet know the vowel quality. Towards the beginning, we convert all
--   written a/e/o to A/E/O and later convert them to their final qualities (which might include /a/ /e/ /o/, so we
--   can't use those symbols directly for this purpose).
-- * Ẽ stands for a word-initial Brazilian sound that can be pronounced either /ẽ/ (in careful speech) or /ĩ/ (in
--   natural speech) and originates from en- or em- before a consonant. We distinguish this from written in-/im-,
--   which can be only /ĩ/, and written ehn-/ehm- (or similar), which can be only /ẽ/.
-- * I is used to represent epenthetic i in Brazilian variants (which should not affect stress assignment but is
--   otherwise treated as a normal sound), and Ɨ represents deleted epenthetic i (which still palatalizes /t/ and /d/).
--   I is also used to represent Brazil e or i from initial esC-, and Portugal (i) from initial esC-.
-- * Ì is used to represent either i. in hiatus or /j/ in Brazil; likewise for Ù representing u. in hiatus or /w/.
-- * Ɔ (capital version of ɔ) stands for a Portugal sound that can be pronounced either /o/ or /ɔ/ (depending on the
--   speaker), before syllable-final /l/.
-- * Ú is used word-finally after i to represent either .u in hiatus or /w/ in Brazil.
-- * L is used word-finally in Portugal to split words ending in -le into two pronuns due to the differing pronunciation
--   of /l/ in the two cases (coda or not).
local vowel = "aɐeɛiɨoɔuüAEẼIƗÌOƆÙÚ"
local V = "[" .. vowel .. "]"
local NV_NOT_SPACING_CFLEX = "[^" .. vowel .. "%^]"
local high_front_vocalic = "iIƗÌy"
local front_vocalic = "eɛɨẼ" .. high_front_vocalic
local FRONTV = "[" .. front_vocalic .. "]"
local glide = "yw"
local W = "[" .. glide .. "]" -- glide
local ipa_stress = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local primary_quality = AC .. CFLEX
local primary_quality_c = "[" .. primary_quality .. "]"
local quality = AC .. CFLEX .. GR .. MACRON
local quality_c = "[" .. quality .. "]"
local stress = LINEUNDER .. DOTOVER .. DOTUNDER .. ipa_stress
local stress_c = "[" .. stress .. "]"
local non_primary_stress = LINEUNDER .. DOTUNDER .. "ˌ"
local non_primary_stress_c = "[" .. non_primary_stress .. "]"
local accent = quality .. stress .. TILDE
local accent_c = "[" .. accent .. "]"
-- Any component separator that should be "transparent" (i.e. ignored) during syllabification processes. This should
-- include a subset of the component_sep characters, currently + and * (which ++ is converted into).
local syl_transp_component_sep = "+*"
local syl_transp_component_sep_c = "[" .. syl_transp_component_sep .. "]"
-- Any character that should be "transparent" (i.e. ignored) during syllabification processes. This includes the
-- syllable-transparent component separators + and ++ (converted into *) as well as the tie character, which originates
-- from an apostrophe (e.g. [[barriga d'água]]).
local syl_transp = syl_transp_component_sep .. "‿"
local syl_transp_c = "[" .. syl_transp .. "]"
-- Zero or more syllable-transparent characters; used during syllabification.
local STC = syl_transp_c .. "*"
-- Component separators that are not transparent to syllabification. Includes colon (:), hyphen (-) and double hyphen
-- (--), which is converted internally to @.
local non_syl_transp_component_sep = ":@%-"
local non_syl_transp_component_sep_c = "[" .. non_syl_transp_component_sep .. "]"
-- "component_sep" means any symbol that may separate word components (not including #, which is added at a certain
-- point next to certain word components so that the adjacent characters are treated as if they are at word bounaries).
local component_sep = syl_transp_component_sep .. non_syl_transp_component_sep
local component_sep_c = "[" .. component_sep .. "]"
local word_or_component_sep_c = "[#" .. component_sep .. "]"
-- Syllable divider (auto-inserted or user-specified).
local syldiv = "." .. SYLDIV
local syldiv_c = "[" .. syldiv .. "]"
-- "charsep" means any symbol that may separate the individual characters that make up a word, and which should be
-- ignored for e.g. consonant-consonant assimilation processes. This currently includes accents and syllable dividers.
local charsep = accent .. syldiv
local charsep_c = "[" .. charsep .. "]"
-- Characters that may divide words, other than the tie (‿), which is transparent to syllabification.
local non_syl_transp_word_divider = " #"
-- All characters that may divide words.
local word_divider = non_syl_transp_word_divider .. "‿"
-- "wordsep_not_syl_transp" means the same as "wordsep" below but excludes syllable-transparent characters. It is used
-- in other collections of symbols (particularly when negated, so as to include syllable-transparent characters but
-- otherwise exclude word separators) rather than by itself.
local wordsep_not_syl_transp = charsep .. non_syl_transp_word_divider .. non_syl_transp_component_sep
-- "wordsep" means any symbol that may separate the individual characters that make up a word or may separate words or
-- components, and which should be ignored for e.g. consonant-consonant assimilation processes that operate across
-- words. This currently includes everything in "charsep" and "component_sep" plus symbols that may divide words.
local wordsep = wordsep_not_syl_transp .. syl_transp
local wordsep_c = "[" .. wordsep .. "]"
local C = "[^" .. vowel .. wordsep .. "_]" -- consonant
-- consonant or syllable-transparent component separator
local C_OR_SYL_TRANSP = "[^" .. vowel .. wordsep_not_syl_transp .. "_]"
local H_OR_SYL_TRANSP = "[h" .. syl_transp .. "]"
local H_GLIDE_OR_SYL_TRANSP = "[h" .. glide .. syl_transp .. "]"
local C_NOT_H_OR_GLIDE = "[^h" .. glide .. vowel .. wordsep .. "_]" -- consonant other than h, w or y
local C_OR_WORD_BOUNDARY = "[^" .. vowel .. charsep .. "_]" -- consonant or word boundary
local voiced_cons = "bdglʎmnɲŋrɾʁvzjʒʤ" -- voiced sound

-- Unstressed words with vowel reduction in Brazil and Portugal.
local unstressed_words = m_table.listToSet({
	"o", "os", -- definite articles
	"me", "te", "se", "lhe", "lhes", "nos", "vos", -- unstressed object pronouns
	-- See https://en.wikipedia.org/wiki/Personal_notaouns_in_Portuguese#Contractions_between_clitic_notaouns
	"mo", "mos", "to", "tos", "lho", "lhos", -- object pronouns combined with articles
	-- Allomorphs of articles after certain consonants
	"lo", "los", "no", -- [[nos]] above as object pronoun
	-- Allomorphs of object pronouns before other pronouns
	"vo", -- [[no]] above as allomorph of article
	"que", -- subordinating conjunctions
	"e", -- coordinating conjunctions
	"de", "do", "dos", "por", -- basic prepositions + combinations with articles; [[no]], [[nos]] above already
	-- FIXME: Portugal pronun for pelos, pela, pelas given as if spelled pêlos, etc. with stress, but differently for pelo.
	-- I am assuming the Portugal pronuns with stress are wrong.
	"pelo", "pelos", "pela", "pelas" -- preposition + article combinations
})

-- Unstressed words with vowel reduction in Portugal only.
local unstressed_full_vowel_words_brazil = m_table.listToSet({
	"a", "as", -- definite articles
	-- See https://en.wikipedia.org/wiki/Personal_notaouns_in_Portuguese#Contractions_between_clitic_notaouns
	"ma", "mas", "ta", "tas", "lha", "lhas", -- object pronouns combined with articles
	-- Allomorphs of articles after certain consonants
	"la", "las", "na", "nas",
	"da", "das", -- basic prepositions + combinations with articles; [[na]], [[nas]] above already
	-- coordinating conjunctions; [[mas]] above already
	"para", "pra", -- preposition; for the verb form use "pára" instead
})

-- Unstressed words without vowel reduction.
local unstressed_full_vowel_words = m_table.listToSet({
	"um", "uns", -- single-syllable indefinite articles
	"meu", "teu", "seu", "meus", "teus", "seus", -- single-syllable possessives
	"ou", -- coordinating conjunctions
	-- Note that in order to match à and às we have to write them as below because at the point we are trying to
	-- match them, all text has been converted to canonical decomposed Unicode form. Writing "à" and "às" directly
	-- won't work even if you type in the text using decomposed Unicode characters because all page contents are
	-- automatically converted to canonical composed form when saved.
	"ao", "aos", "a" .. GR, "a" .. GR .. "s", -- basic prepositions + combinations with articles
	"em", "com", -- other prepositions
})

-- Special-case pronunciations for certain unstressed words with irregular pronunciations. The left side is the
-- original spelling after DOTUNDER or DOTOVER has been added; which diacritic gets added depends on whether the word
-- has vowel reduction (DOTOVER) or no vowel reduction (DOTUNDER). The right side is the respelling. See comment just
-- above for why we write "a" .. GR instead of "à".
local unstressed_notaunciation_substitution = {
	["a" .. DOTUNDER .. "o"] = "a" .. DOTUNDER .. "u",
	["a" .. DOTUNDER .. "os"] = "a" .. DOTUNDER .. "us",
	["a" .. GR .. DOTUNDER] = "a" .. DOTUNDER,
	["a" .. GR .. DOTUNDER .. "s"] = "a" .. DOTUNDER .. "s",
	["po" .. DOTOVER .. "r"] = "pu" .. DOTOVER .. "r",
}

-- Dialects and subdialects:
local br_styles = {
	["gbr"] = true,
	["rio"] = true,
	["sp"] = true,
	["sbr"] = true,
}

local pt_styles = {
	["gpt"] = true,
	["cpt"] = true,
	["spt"] = true,
	["npt"] = true,
}

local nombre_completo = {
	["gbr"] = "brasilero",
	["rio"] = "carioca",
	["sp"] = "paulista",
	["sbr"] = "gaúcho",
	["gpt"] = "europeo",
	["cpt"] = "extremeño",
	["spt"] = "alentejano/algarvio",
	["npt"] = "portuense/transmontano"
}

-- el alfabeto
local pron_abc = {{"a"},{"bê"},{"cê"},{"dê"},{"e"},{"efe"},{"gê"},{"agá"},{"i"},
	{"jota"},{"cá"},{"ele"},{"eme"},{"ene"},{"o"},{"pê"},{"quê"},{"erre"},{"esse"},{"tê"},{"u"},
	{"vê"},{"dáblio"},{"chis"},{"ípsilon"},{"zê"}}


-- Reorder the diacritics (accent marks) in `text` according to a canonical order. Specifically, there can conceivably
-- be up to three accents on a vowel: a quality mark (acute/circumflex/grave/macron); a mark indicating secondary stress
-- (lineunder), tertiary stress (dotunder; i.e. no stress but no vowel reduction) or forced vowel reduction (dotover);
-- and a nasalization mark (tilde). Order them as follows: quality - stress - nasalization. `err` is a function of one
-- argument (an error string) and should throw an error if called.
local function reorder_accents(text)
	local function reorder_accent_string(accentstr)
		local accents = strexplode(accentstr)
		local accent_order = {
			[AC] = 1,
			[CFLEX] = 1,
			[GR] = 1,
			[MACRON] = 1,
			[LINEUNDER] = 2,
			[DOTUNDER] = 2,
			[DOTOVER] = 2,
			[TILDE] = 3,
		}
		sort(accents, function(ac1, ac2)
			return accent_order[ac1] < accent_order[ac2]
		end)
		return concat(accents)
	end
	text = strsub(text, "(" .. accent_c .. "+)", reorder_accent_string)
	-- Remove duplicate accents.
	text = strsubrep(text, "(" .. accent_c .. ")%1", "%1")
	-- Make sure we don't have more than one of a given class.
	if strfind(text, quality_c .. quality_c) then
		error("Two different quality diacritics cannot occur together")
	end
	if strfind(text, stress_c .. stress_c) then
		error("Two different stress diacritics cannot occur together")
	end
	-- Only a/e/o can receive a circumflex, grave or macron.
	if strfind(text, "[^aeo][" .. CFLEX .. GR .. MACRON .. "]") then
		error("Only a/e/o can be followed by circumflex, grave or macron")
	end
	return text
end


-- Generate partial IPA for a single preprocessed term respelling `text` in the specified `style` ('gbr', 'rio', etc.;
-- see all_style_descs above). If `phonetic` is given, generate phonetic output, otherwise phonemic output. `err` is a
-- function of one argument (an error string) and should throw an error if called. This function is a subfunction of
-- `IPA` and cannot really be used by itself, because it generates output containing special symbols that need to be
-- postprocessed into multiple outputs (and in addition some other final postprocessing needs to happen, e.g. to get
-- stress marks in the right place). The function `IPA` is available be called externally.
local function one_term_ipa(text, style, phonetic)
	local brazil = br_styles[style]
	local portugal = pt_styles[style]

	-- Initial x -> /ʃ/: [[xérox]], [[xilofone]], [[xadrez]], etc.
	text = strsub(text, "(" .. word_or_component_sep_c .. ")x", "%1ʃ")
	-- Final x -> /ks/ ([[clímax]], [[xérox]], [[córtex]], [[hélix]], [[durex]], [[lux]], etc.), but for now we map to
	-- X because later on we open unstressed vowels before final x.
	text = strsub(text, "x(" .. word_or_component_sep_c .. ")", "X%1")
	-- x after certain dipthongs (ai, ei, oi, ou) and after -en- should be /ʃ/. Other diphthongs before x are rare
	-- and mostly learned and we need to force explicit respelling.
	text = strsub(text, "(([aeo])" .. charsep_c .. "*([iun])" .. charsep_c .. "*)x",
		function(all, a, b)
			local ab = a .. b
			-- [[baixo]], [[peixe]], [[troixa]], [[frouxo]], [[enxame]], etc.
			if ab == "ai" or ab == "ei" or ab == "oi" or ab == "ou" or ab == "en" then
				return all .. "ʃ"
			else
				return all .. "x"
			end
		end)
	-- -exC- should be pronounced like -esC- in Brazil but -eisC- in Portugal. Cf. excelente, experiência, têxtil,
	-- êxtase. Not with other vowels (cf. [[Felixlândia]], [[Laxmi]], [[Oxford]]).
	-- FIXME: Maybe this applies only to Lisbon and environs?
	text = strsub(text, "(e" .. accent_c .. "*)x(" .. C .. ")", function(v, c)
		if brazil then
			return v .. "s" .. c
		elseif c == "s" then
			return v .. "isç"
		else
			return v .. "is" .. c
		end
	end)
	if strfind(text, "x") then
		--error("x must be respelled z, sh, cs, ss or similar")
		-- En lugar de tirar error, asumo la pronunciación más probable https://rioandlearn.com/x-in-portuguese/

		-- 1. Tras au, es ss (auxiliar) --> esto lo necesito al principio
		text = strsub(text, "([Aa][uU])x", "%1ss")

		-- 2. Después de un diptongo o de "en" es sh (enxada, faixa)
		text = strsub(text, "(".."[aeouAEOU]["..AC..CFLEX.."]?[iuIU]"..")".."x", "%1sh")
		text = strsub(text, "([eE]n)x", "%1sh")

		-- 3. Entre vocales es z (exilio, exodo)
		text = strsub(text, "(".."[aeiouAEIOU]"..accent_c.."?"..")".."x".."([aeiouAEIOU])", "%1z%2")

		-- 4. Antes de p o de t es s (experiencia, texto)
		text = strsub(text, "x([pt])", "s%1")

		-- 5. Antes de c es sc (exceçao)
		text = strsub(text, "xc", "sc")

		-- 6. Los que quedan, asumo que mapean a ks (torax, ortodoxo, etc.)
		text = strsub(text, "x", "ks")
	end

	-- combinations with h; needs to precede handling of c and s, and needs to precede syllabification so that
	-- the consonant isn't divided from the following h.
	if style == "npt" then
		-- In Northern Portugal the affricate tch is kept 
		text = strsub(text, "ch", "tʃ")
	end
	-- Else
	text = strsub(text, "([scln])h", {["s"]="ʃ", ["c"]="ʃ", ["n"]="ɲ", ["l"]="ʎ" })

	-- remove initial <h>
	text = strsub(text, "(" .. word_or_component_sep_c .. ")h([^" .. accent .. "])", "%1%2")
	
	-- Betacism
	if style == "npt" then
		-- In Northern Portugal, <v> is realized as <b>  
		text = strsub(text, "v", "b")
	end
	
	-- c, g, q
	-- This should precede syllabification especially so that the latter isn't confused by gu, qu, gü, qü
	-- also, c -> ç before front vowel ensures that cc e.g. in [[cóccix]], [[occitano]] isn't reduced to single c.
	text = strsub(text, "c(" .. FRONTV .. ")", "ç%1")
	text = strsub(text, "g(" .. FRONTV .. ")", "j%1")
	text = strsub(text, "gu(" .. FRONTV .. ")", "g%1")
	-- [[camping]], [[doping]], [[jogging]], [[Bangkok]], [[angstrom]], [[tungstênio]]
	text = strsub(text, "ng([^" .. vowel .. glide .. "hlr])", brazil and "n%1" or "ngh%1")
	text = strsub(text, "qu(" .. FRONTV .. ")", "k%1")
	text = strsub(text, "ü", "u") -- [[agüentar]], [[freqüentemente]], [[Bündchen]], [[hübnerita]], etc.
	text = strsub(text, "([gq])u(" .. V .. ")", "%1w%2") -- [[quando]], [[guarda]], etc.
	text = strsub(text, "[cq]", "k") -- [[Qatar]], [[burqa]], [[Iraq]], etc.

	-- y -> i between non-vowels, cf. [[Itamaraty]] /i.ta.ma.ɾa.ˈt(ʃ)i/, [[Sydney]] respelled 'Sýdjney' or similar
	-- /ˈsid͡ʒ.nej/ (Brazilian). Most words with y need respelling in any case, but this may help.
	text = strsub(text, "(" .. C_OR_WORD_BOUNDARY .. ")y(" .. accent_c .. "*" .. C_OR_WORD_BOUNDARY .. ")", "%1i%2")

	-- Reduce double letters to single, except for rr, mm, nn and ss, which map to special single sounds. Do this
	-- before syllabification so double letters don't get divided across syllables. The case of cci, cce is handled
	-- above. nn always maps to /n/ and mm to /m/ and can be used to force a coda /n/ or /m/. As a result,
	-- [[connosco]] will need respelling 'comnôsco', 'cõnôsco' or 'con.nôsco', and [[comummente]] will similarly
	-- need respelling e.g. as 'comum.mente' or 'comũmente'. Examples of words with double letters (Brazilian
	-- pronunciation):
	-- * [[Accra]] no respelling needed /ˈa.kɾɐ/;
	-- * [[Aleppo]] respelled 'Aléppo' /aˈlɛ.pu/;
	-- * [[buffer]] respelled 'bâfferh' /ˈbɐ.feʁ/;
	-- * [[cheddar]] respelled 'chéddarh' /ˈʃɛ.daʁ/;
	-- * [[Hanna]] respelled 'Ranna' /ˈʁɐ̃.nɐ/;
	-- * [[jazz]] respelled 'djézz' /ˈd͡ʒɛs/;
	-- * [[Minnesota]] respelled 'Minnessôta' /mi.neˈso.tɐ/;
	-- * [[nutella]] respelled 'nutélla' /nuˈtɛ.lɐ/;
	-- * [[shopping]] respeled 'shópping' /ˈʃɔ.pĩ/ or 'shóppem' /ˈʃɔ.pẽj̃/;
	-- * [[Stonehenge]] respelled 'Sto̱wnn.rrendj' /ˌstownˈʁẽd͡ʒ/;
	-- * [[Yunnan]] no respelling needed /juˈnɐ̃/.
	--
	-- Note that further processing of r and s happens after syllabification and stress assignment, because we need
	-- e.g. to know the distinction between final -s and -z to assign the stress properly.
	text = strsub(text, "rr", "ʁ")
	text = strsub(text, "nn", "N")
	text = strsub(text, "mm", "M")
	-- Deleted epenthetic /i/ should prevent preceding /m/, /n/ from being converted into nasalization.
	text = strsub(text, "mƗ", "MƗ")
	text = strsub(text, "nƗ", "NƗ")
	-- Will map later to /s/; need to special case to support spellings like 'nóss' (= nós, plural of nó).
	text = strsub(text, "ss", "S")
	text = strsub(text, "(" .. C .. ")%1", "%1")

	-- muit- is special and contains nasalization. Do before palatalization of t/d so [[muitíssimo]] works.
	text = strsub(text, "(" .. word_or_component_sep_c .. "mu" .. stress_c .. "*)(it)", "%1" .. TILDE .. "%2")

	-- Palatalize t/d + Ɨ -> affricates in Brazil. Use special unitary symbols, which we later convert to regular affricate
	-- symbols, so we can distinguish palatalized d from written dj. We only do Ɨ now so we can delete it; we do another
	-- palatalization round towards the end after raising e -> i.
	local palatalize_td = {["t"] = "ʧ", ["d"] = "ʤ"}
	if brazil then
		text = strsub(text, "([td])(" .. word_or_component_sep_c .. "*Ɨ)",
			function(td, high_vocalic) return palatalize_td[td] .. high_vocalic end)
		-- Now delete the symbol for deleted epenthetic /i/; it still triggers palatalization of t and d.
		text = strsub(text, "Ɨ", "")
	end
	-- Divide words into syllables.
	-- First, change user-specified . into a special character so we won't move it around. We need to keep this
	-- going forward until after we place the stress, so we can correctly handle initial i- + vowel, as in [[ia]],
	-- [[iate]] and [[Iaundé]]. We need to divide [[ia]] as 'i.a' but [[iate]] as 'ia.te' and [[Iaundé]] as 'Ia.un.dé'.
	-- In the former case, the stress goes on i but in the latter cases not; so we always divide <ia> as 'i.a',
	-- and then after stress assignment remove the syllable divider if the <i> isn't stressed. The tricky thing is
	-- that we want to allow the user to override this by explicitly adding a . between the <i> and <a>. So we need
	-- to keep the distinction between user-specified . and auto-determined . until after stress assignment.
	text = strsub(text, "%.", SYLDIV)
	-- We have various characters indicating divisions between word components where we want to treat the components
	-- more or less like separate words (e.g. -mente, -zinho/-zinha). Some such "characters" are digraphs, which we
	-- convert internally to single characters to simplify the code. Here, -- separates off -mente/-zinho/-zinha and
	-- ++ separates off prefixes. We want to ignore at least + and ++ (converted to *) for syllabification purposes.
	text = strsub(text, "%-%-", "@")
	text = strsub(text, "%+%+", "*")

	-- Respell [[homenzinho]] as 'homemzinho' so it is stressed correctly.
	text = strsub(text, "n(" .. SYLDIV .. "?ziɲos?" .. word_or_component_sep_c .. ")", "m%1")

	-- Divide before the last consonant (possibly followed by a glide). We then move the syllable division marker
	-- leftwards over clusters that can form onsets. Note that syllable-transparent component separators will always
	-- be (and will continue to be) to the left of syllable dividers rather than to the right, so we don't need to
	-- check for the latter situation.
	text = strsubrep(text, "(" .. V .. accent_c .. "*" .. C_OR_SYL_TRANSP .. "-)(" .. C .. H_GLIDE_OR_SYL_TRANSP .. "*" .. V .. ")", "%1.%2")
	text = strsub(text, "([pbtdkgfv]" .. H_OR_SYL_TRANSP .. "*)%.([lr])", ".%1%2")
	if portugal then
		-- "Improper" clusters of non-sibiliant-obstruent + obstruent (pt, bt, bd, dk, kt; ps, bs, bv, bʒ, tz, dv, ks;
		-- ft), non-sibiliant-obstruent + nasal (pn, bn, tm, tn, dm, dn, gm, gn), nasal + nasal (mn) are syllabified in
		-- Portugal as .pt, .bv, .mn, etc. Note ʃ.t, ʃ.p, ʃ.k, etc. But in Brazil, all of these divide between the
		-- consonants (p.t, b.v, ʃ.t, s.p, etc.). Particular case: [[ab-rogação]] divides as a.brr in Portugal but ab.rr
		-- in Brazil.
		text = strsub(text, "([pbtdkgfv]" .. H_OR_SYL_TRANSP .. "*)%.([pbtdkgfvsSçzʃʒjmMnNɲʎʁ])", ".%1%2")
		text = strsub(text, "([mM]" .. H_OR_SYL_TRANSP .. "*)%.([nN])", ".%1%2")
	else
		-- /tʃ/, /dʒ/ are normally single sounds, but adj- in [[adjetivo]], [[adjunto]] etc. should be 'ad.j'
		text = strsub(text, "(t" .. STC .. ")%.(ʃ)", ".%1%2")
		text = strsub(text, "(d" .. STC .. ")%.(j)", ".%1%2")
		text = strsub(text, "(" .. word_or_component_sep_c .. "a" .. STC .. ")%.(d" .. STC .. ")(j)", "%1%2.%3")
	end
	-- All vowels should be separated from adjacent vowels by a syllable division except
	-- (1) aeo + unstressed i/u, ([[saiba]], [[peixe]], [[noite]], [[Paulo]], [[deusa]], [[ouro]]), except when
	-- followed by nh or m/n/r/l + (non-vowel or word end), e.g. Bom.ba.im, ra.i.nha, Co.im.bra, sa.ir, but Jai.me,
	-- a.mai.nar, bai.le, ai.ro.so, quei.mar, bei.ra;
	-- (2) iu(s), ui(s) at end of word, e.g. fui, Rui, a.zuis, pa.riu, viu, sa.iu;
	-- (3) ão, ãe, õe.
	--
	-- The easiest way to handle this is to put a special symbol between vowels that should not have a syllable
	-- division between them.
	--
	-- First, put a syllable divider between [aeo].[iu][mnlr], as in [[Bombaim]], [[Coimbra]], [[saindo]], [[sair]],
	-- [[Iaundé]], [[Raul]]. Note that in cases like [[Jaime]], [[queimar]], [[fauna]], [[baile]], [[Paulo]], [[beira]],
	-- where a vowel follows the m/n/l/r, there will already be a syllable division between i.m, u.n, etc., which will
	-- block the following substitution.
	text = strsub(text, "([aeo]" .. accent_c .. "*" .. STC .. ")([iu]" .. STC .. "[mnlr])", "%1.%2")
	-- Also put a syllable divider between [aeo].[iu].ɲ coming from 'nh' ([[rainha]], [[moinho]]).
	text = strsub(text, "([aeo]" .. accent_c .. "*" .. STC .. ")([iu]" .. STC .. "%.ɲ)", "%1.%2")
	-- Prevent syllable division between final -ui(s), -iu(s). This should precede the following rule that prevents
	-- syllable division between ai etc., so that [[saiu]] "he left" gets divided as sa.iu.
	-- It doesn't make sense to have STC in the middle of a diphthong here.
	text = strsub(text, "(u" .. accent_c .. "*)(is?" .. word_or_component_sep_c .. ")", "%1" .. TEMP1 .. "%2")
	text = strsub(text, "(i" .. accent_c .. "*)(us?" .. word_or_component_sep_c .. ")", "%1" .. TEMP1 .. "%2")
	-- Prevent syllable division between ai, ou, etc. unless either the second vowel is accented [[saído]]) or there's
	-- a TEMP1 marker already after the second vowel (which will occur e.g. in [[saiu]] divided as 'sa.iu').
	text = strsubrep(text, "([aeo]" .. accent_c .. "*)([iu][^" .. accent .. TEMP1 .. "])", "%1" .. TEMP1 .. "%2")
	-- Prevent syllable division between nasal diphthongs unless somehow the second vowel is accented.
	text = strsubrep(text, "(a" .. accent_c .. "*" .. TILDE .. ")([eo][^" .. accent .. "])", "%1" .. TEMP1 .. "%2")
	text = strsubrep(text, "(o" .. accent_c .. "*" .. TILDE .. ")(e[^" .. accent .. "])", "%1" .. TEMP1 .. "%2")
	text = strsubrep(text, "(u" .. accent_c .. "*" .. TILDE .. ")(i[^" .. accent .. "])", "%1" .. TEMP1 .. "%2")
	-- All other sequences of vowels get divided.
	text = strsubrep(text, "(" .. V .. accent_c .. "*" .. STC .. ")(" .. V .. ")", "%1.%2")
	-- Remove the marker preventing syllable division.
	text = strsub(text, TEMP1, "")

	-- An acute or circumflex not followed by a stress marker has primary stress, so indicate it.
	text = strsubrep(text, "(" .. V .. quality_c .. ")([^" .. stress .. "])", "%1ˈ%2")
	-- Line-under indicates secondary stress.
	text = strsub(text, LINEUNDER, "ˌ")

	-- Add primary stress to the word if not already present.
	local function accent_word(word)
		-- Check if stress already marked. We check first for primary stress before checking for tilde in case both
		-- primary stress and tilde occur, e.g. [[bênção]], [[órgão]], [[hétmã]], [[connosco]] respelled 'cõnôsco'.
		if strfind(word, "ˈ") then
			return word
		end

		-- Preserve the syllable divider, which may be auto-added or user-specified.
		local syllables = strsplit(word, "([." .. SYLDIV .. "])")

		-- Check for nasal vowel marked with tilde and without non-primary stress; assign stress to the last such
		-- syllable in case there's more than one tilde, e.g. [[pãozão]]. Note, this can happen in the part before
		-- -mente, cf. [[anticristãmente]], and before -zinho, cf. [[coraçãozinho]].
		for i = #syllables, 1, -2 do -- -2 because of the syllable dividers; see above.
			local changed
			syllables[i], changed = strsubb(syllables[i], "(" .. V .. quality_c .. "*)" .. TILDE, "%1ˈ" .. TILDE)
			if changed then
				return concat(syllables)
			end
		end

		-- Apply the default stress rule.
		local sylno
		-- Prefixes ending in a vowel such as pseudo- have a PSEUDOCONS after the final vowel, but we don't want that to
		-- interfere in the stress-assignment algorithm.
		if #syllables > 1 and (strfind(word, "[aeo][s" .. PSEUDOCONS .. "]?$") or strfind(word, "[ae]m$") or strfind(word, "[ae]ns$")) then
			-- Stress the last syllable but one. The -2 is because of the syllable dividers; see above.
			sylno = #syllables - 2
		else
			sylno = #syllables
		end
		-- Don't put stress on epenthetic i; instead, we stress the preceding syllable, as if epenthetic i weren't
		-- there.
		while sylno > 1 and strfind(syllables[sylno], "I") do
			sylno = sylno - 2
		end
		-- It is (vaguely) possible that we have a one-syllable word beginning with a complex cluster such as gn-
		-- followed by a normally unstressed ending such as -em. In this case, we want the ending to be stressed.
		while sylno < #syllables and strfind(syllables[sylno], "I") do
			sylno = sylno + 2
		end
		-- If we are on a syllable without a vowel (can happen if it's the last syllable in a non-final component of a
		-- word, when using a component separator that is transparent to stress, such as in [[rapazinho]] respelled
		-- 'rapaz+inho'), stress the syllable to the left.
		while sylno > 1 and not strfind(syllables[sylno], V) do
			sylno = sylno - 2
		end
		if strfind(syllables[sylno], stress_c) then
			-- Don't do anything if stress mark already present. (Since we check for primary stress above, this check
			-- specifically affects non-primary stress.)
			return word
		end
		-- Add stress mark after first vowel (and any quality mark).
		syllables[sylno] = strsub(syllables[sylno], "^(.-" .. V .. quality_c .. "*)", "%1ˈ")
		return concat(syllables)
	end

	-- Split the text into words and the words into components so we can correctly add stress to components without it.
	local words = strsplit(text, " ")
	for j, word_with_boundary_markers in ipairs(words) do
		-- Prefixes have a PREFIX_MARKER after the # at the end of the prefix; split it off.
		local begin_marker, word, end_marker = strmatch(word_with_boundary_markers, "^(#*)(.-)([#" .. PREFIX_MARKER .. "]*)$")
		-- Words ends in -mente, -zinho(s) or -zinha(s); add primary stress to the preceding portion as if stressed
		-- (e.g. [[agitadamente]] -> 'agitádamente') unless already stressed (e.g. [[rapidamente]] respelled
		-- 'rápidamente'). The primary stress will be converted to secondary stress further below. Essentially, we
		-- rip the word apart into two words ('mente'/'zinho' and the preceding portion) and
		-- stress each one independently. Note that the effect of adding a primary stress will also be to cause
		-- an error if stressed 'e' or 'o' is not properly marked as é/ê or ó/ô; cf. [[certamente]], which must
		-- be respelled 'cértamente', and [[posteriormente]], which must be respelled 'posteriôrmente', just as
		-- with [[certa]] and [[posterior]]. To prevent this happening, you can add an accent to -mente or
		-- -zinho, e.g. [[dormente]] respelled 'dormênte', [[vizinho]] respelled 'vizínho'.
		if strfind(word, syldiv_c .. "men%.te$") then
			word = strsub(word, syldiv_c .. "(men%.te)$", "@%1")
		else
			word = strsub(word, syldiv_c .. "(zi%.ɲ[oa]s?)$", "@%1")
		end

		-- Split on components; preserve the component divider.
		local components = strsplit(word, "(" .. component_sep_c .. syldiv_c .. "*)")
		for k = 1, #components, 2 do -- 2 because of the component dividers.
			-- Don't add stress to components followed by ++ (converted to *).
			if k == #components or not strfind(components[k + 1], "%*") then
				components[k] = accent_word(components[k])
			end
		end
		-- Reconstruct the word.
		words[j] = begin_marker .. concat(components, "") .. end_marker
	end

	-- Reconstruct the text from the words.
	text = concat(words, " ")

	-- Add word boundaries around component separators. We add them on both sides of - and -- (converted to @), which
	-- behave mostly like a true word separator, but only on the right side of other component separators (which
	-- corresponds to the beginning of the word following the separator). Note that some component separators (+ and ++
	-- [converted to *]) are transparent to syllable boundaries, meaning that there may be a syllable divider directly
	-- to the right of the component separator. To simplify the code below, we put the word boundary marker on the outside
	-- of the syllable boundary marker.
	text = strsub(text, "([%-@]" .. syldiv_c .. "?)", "#%1#")
	text = strsub(text, "([+:*]" .. syldiv_c .. "?)", "%1#")

	-- I has served its purpose (not considered when accenting).
	text = strsub(text, "I", "i")

	-- Remove hiatus between initial <i> and following vowel ([[Iasmim]]) unless the <i> is stressed ([[ia]]) or the
	-- user explicitly added a . (converted to SYLDIV above).
	text = strsub(text, "#i%.(" .. V .. ")", "#y%1")
	if brazil then
		-- In Brazil, hiatuses involving i. or u. have two possibilities (full vowel or glide); represent using Ì. and Ù.,
		-- which we later convert appropriately. Do this before eliminating SYLDIV so the user can force a hiatus using a
		-- period.
		local hiatus_to_optional_glide = {["i"] = "Ì", ["u"] = "Ù"}
		text = strsub(text, "(" .. C_OR_WORD_BOUNDARY .. ")([iu])(%." .. V .. ")",
			function(before, hiatus, after) return before .. hiatus_to_optional_glide[hiatus] .. after end)
		-- In Brazil, hiatuses of the form í.o (e.g. [[rio]] "river", [[vazio]]; but not [[rio]] "I laugh") have two
		-- possibilities (i.u or iw); represent using Ú, which we later convert appropriately. Do this before eliminating
		-- SYLDIV so the user can force a hiatus using a period, as in [[rio]] "I laugh" respelled 'ri.o'.
		text = strsub(text, "(i" .. ipa_stress_c .. "%.)o(s?#)", "%1Ú%2")
	else
		-- Outside of Brazil, e.i -> a.i, e.g. [[ateísta]], [[proteína]], [[proteinúrio]] respelled 'prote.inúrio'. But seems
		-- not to happen in rei- ([[reincidente]], [[reiniciar]], [[reidratar]], etc.). Note, it does occur in [[reídeo]],
		-- which needs respelling.
		text = strsub(text, "(#re" .. syldiv_c .. ")(i)", "%1" .. TEMP1 .. "%2")
		text = strsub(text, "e(" .. syldiv_c .. "i)", "a%1")
		text = strsub(text, TEMP1, "")
		-- Outside of Brazil, hiatuses involving 'e./i.' or 'o./u.' after obstruent + l/r preceding a vowel have two
		-- possibilities (full vowel or glide), as in [[criança]], [[altruista]], etc. Represent using Ì. and Ù., which
		-- we later convert appropriately. Do this before eliminating SYLDIV so the user can force a hiatus using a
		-- period.
		local hiatus_to_optional_glide = {["e"] = "Ì", ["i"] = "Ì", ["o"] = "Ù", ["u"] = "Ù"}
		text = strsub(text, "([pbtdkgfv]" .. H_OR_SYL_TRANSP .. "*[lr])([eiou])(%." .. V .. ")",
			function(before, hiatus, after) return before .. hiatus_to_optional_glide[hiatus] .. after end)
		-- Outside of Brazil, remove hiatus more generally whenever 'e./i.' or 'o./u.' precedes a vowel. Do this before
		-- eliminating SYLDIV so the user can force hiatus using a period.
		local hiatus_to_glide = {["e."] = "y", ["i."] = "y", ["o."] = "w", ["u."] = "w"}
		text = strsub(text, "(" .. C_OR_WORD_BOUNDARY .. ")([eiou]%.)(" .. V .. ")",
			function(before, hiatus, after) return before .. hiatus_to_glide[hiatus] .. after end)
	end

	-- Convert user-specified syllable division back to period. See comment above when we add SYLDIV.
	text = strsub(text, SYLDIV, ".")
	-- Vowel quality handling. First convert all a -> A, e -> E, o -> O. We will then convert A -> a/ɐ, E -> e/ɛ/ɨ,
	-- O -> o/ɔ/u depending on accent marks and context. Ultimately all vowels will be one of the nine qualities
	-- aɐeɛiɨoɔu and following each vowel will either be nothing (no stress), an IPA primary stress mark (ˈ) or an
	-- IPA secondary stress mark (ˌ), in turn possibly followed by a tilde (nasalization). After doing everything
	-- that depends on the position of stress, we will move the IPA stress marks to the beginning of the syllable.
	text = strsub(text, "[aeo]", {["a"] = "A", ["e"] = "E", ["o"] = "O"})
	text = strsub(text, DOTOVER, "") -- eliminate DOTOVER; it served its purpose of preventing stress

	-- Nasal vowel handling.
	-- Final unstressed -am (in third-person plural verbs) pronounced like unstressed -ão.
	text = strsub(text, "Am#", "A" .. TILDE .. "O#")
	if portugal then
		-- In Portugal, final -n is really /n/, and preceding unstressed e/o are open ([[cólon]], [[crípton]], [[éon]];
		-- [[glúten]], [[hífen]], [[pólen]]).
		text = strsub(text, "n#", "N#")
		text = strsub(text, "([EO])(N#)", "%1" .. AC .. "%2")
	end
	if brazil then
		-- In Brazil, [[põem]] is pronounced like 'põe'.
		text = strsub(text, "(Oˈ" .. TILDE .. ")(Em#)", "%1E#")
	else
		-- In Portugal, circumflex accent on final -em ([[vêm]], [[mantêm]], etc.) indicates a special double nasal diphthong
		-- pronunciation.
		text = strsub(text, "E" .. CFLEX .. "ˈm#", "E" .. CFLEX .. "ˈ" .. TILDE .. "y" .. TILDE .. ".E" .. CFLEX .. "m#")
		-- In Portugal, [[põem]] is pronounced like 'põeem'.
		text = strsub(text, "(Oˈ" .. TILDE .. ")(Em#)", "%1E.%2")
	end
	-- Acute accent on final -em ([[além]], [[também]]) and final -ens ([[parabéns]]) does not indicate an open
	-- pronunciation.
	text = strsub(text, "E" .. AC .. "(ˈ[mn]s?#)", "E" .. CFLEX .. "%1")
	-- Vowel + m/n within a syllable gets converted to tilde.
	text = strsub(text, "(" .. V .. quality_c .. "*" .. stress_c .. "*)[mn]", "%1" .. TILDE)
	-- Non-high vowel without quality mark + tilde needs to get the circumflex (possibly fed by the previous change).
	text = strsub(text, "([AEO])(" .. stress_c .. "*)" .. TILDE, "%1" .. CFLEX .. "%2" .. TILDE)
	
	-- Primary-stressed vowel without quality mark + m/n/nh across syllable boundary gets a circumflex, cf. [[cama]],
	-- [[ano]], [[banho]] (excluding Northern Portugal)
	if style == "npt" then
		-- Northern Portugal keeps the open "a" vowels
		text = strsub(text, "(A)(ˈ%.[mnɲMN])", "%1" .. AC .. "%2")
	end
	text = strsub(text, "(" .. V .. ")(ˈ%.[mnɲMN])", "%1" .. CFLEX .. "%2")
	if brazil then
		if style ~= "sbr" then -- Seems this happens less or not at all in South Brazil.
			-- Primary-stressed vowel + m/n across syllable boundary gets nasalized in Brazil, cf. [[cama]], [[ano]].
			text = strsub(text, "(" .. V .. quality_c .. "*)(ˈ%.[mnMN])", "%1" .. TILDE .. "%2")
		end
		-- All vowels before nh (always across syllable boundary) get circumflexed and nasalized in Brazil,
		-- cf. [[ganhar]]. I *think* the circumflexing but not nasalizing happens in South Brazil.
		text = strsub(text, "(" .. V .. stress_c .. "*)(%.ɲ)", "%1" .. CFLEX .. "%2")
		if style ~= "sbr" then -- I *think* this doesn't apply to South Brazil; need to verify.
			text = strsub(text, "(" .. V .. quality_c .. "*" .. stress_c .. "*)(%.ɲ)", "%1" .. TILDE .. "%2")
		end
		-- Convert initial unstressed em-/en- before consonant to special symbol /Ẽ/, which later on is converted
		-- to /e/ (careful pronunciation) or /i/ (natural pronunciation).
		text = strsub(text, "(#E" .. CFLEX .. TILDE ..")(%." .. C ..")", "#Ẽ" .. TILDE .. "%2")
		-- Same in [[em]] standing alone (which will have a DOTUNDER in it), and in [[em-]].
		text = strsub(text, "(#E" .. CFLEX .. DOTUNDER .. "?" .. TILDE ..")(#)", "#Ẽ" .. TILDE .. "%2")
	end

	-- Nasal diphthongs.
	local nasal_termination_to_glide = {["E"] = "y", ["O"] = "w"}
	-- In ãe, ão, the second letter represents a glide.
	text = strsub(text, "(A" .. CFLEX .. stress_c .. "*" .. TILDE .. ")([EO])",
		function(v1, v2) return v1 .. nasal_termination_to_glide[v2] .. TILDE end)
	-- Likewise for õe.
	text = strsub(text, "(O" .. CFLEX .. stress_c .. "*" .. TILDE .. ")E", "%1y" .. TILDE)
	-- Likewise for ũi (generated above from muit-).
	text = strsub(text, "(u" .. stress_c .. "*" .. TILDE .. ")i", "%1y" .. TILDE)
	-- Final -em and -ens (stressed or not) pronounced /ẽj̃(s)/. (Later converted to /ɐ̃j̃(s)/ in Portugal.)
	text = strsub(text, "(E" .. CFLEX .. stress_c .. "*" .. TILDE .. ")(s?#)", "%1y" .. TILDE .. "%2")

	-- Oral diphthongs.
	-- ei, eu, oi, ou -> êi, êu, ôi, ôu
	text = strsub(text, "([EO])(" .. stress_c .. "*[iuywY])", "%1" .. CFLEX .. "%2")
	-- ai, au -> ái, áu
	text = strsub(text, "(A)(" .. stress_c .. "*[iuywY])", "%1" .. AC .. "%2")

	-- Convert A/E/O as appropriate when followed by a secondary or tertiary stress marker. If a quality is given,
	-- it takes precedence; otherwise, act as if an acute accent were given.
	text = strsub(text, "([AEO])(" .. non_primary_stress_c .. ")", "%1" .. AC .. "%2")

	-- Stressed o in -dor, -dor, -sor ([[ganhador]], [[autor]], [[invasor]], [[agressor]], etc.) and feminines and plurals
	-- is closed /o/.
	text = strsub(text, "([dtsS])O(ˈr#)", "%1o%2")
	text = strsub(text, "([dtsS])O(ˈ%.r[EA]s?#)", "%1o%2")
	-- Stressed o in -oso is closed /o/.
	text = strsub(text, "O(ˈ%.sO#)", "o%1")
	-- Stressed o in -osa, -osos, -osas is open /ɔ/.
	text = strsub(text, "O(ˈ%.s[OA]s?#)", "ɔ%1")

	-- Unstressed syllables.
	-- Before final <x>, unstressed a/e/o are open, e.g. [[clímax]], [[córtex]], [[xérox]].
	text = strsub(text, "([AEO])(X)", "%1" .. AC .. "%2")
	-- Capital X has served its purpose, so replace it.
	text = strsub(text, "X", "kç")
	if brazil then
		if style ~= "sbr" then
			-- Final unstressed -e(s), -o(s) -> /i/ /u/ (including before -mente)
			local brazil_final_vowel = {["E"] = "i", ["O"] = "u"}
			text = strsub(text, "([EO])(s?#)", function(v, after) return brazil_final_vowel[v] .. after end)
			-- Word-final unstressed -a(s) -> /ɐ/ (not before -mente)
			text = strsub(text, "A(s?#[^@])", function(after) return "ɐ" .. after end)
			-- Word-final unstressed -ar -> /ɐr/ (e.g. [[açúcar]])
			text = strsub(text, "A(r#)", function(after) return "ɐ" .. after end)
		end
		-- Initial unmarked unstressed non-nasal e- + -sC- -> /i/ or /e/ ([[estar]], [[esmeralda]]). To defeat this,
		-- explicitly mark the <e> e.g. as <ệ> or <eh>. We reuse the special symbol /I/ for this purpose, which later
		-- on is converted to /i/ or /e/. In South Brazil, however, the raised /i/ variant doesn't seem to exist.
		if not strfind(text, "#Es.ç") then
			text = strsub(text, "#E(s" .. C .. "*%.)", style == "sbr" and "#e%1" or "#I%1")
		end
		-- Remaining unstressed a, e, o without quality mark -> /a/ /e/ /o/.
		local brazil_unstressed_vowel = {["A"] = "a", ["E"] = "e", ["O"] = "o"}
		text = strsub(text, "([AEO])([^" .. accent .. "])",
			function(v, after) return brazil_unstressed_vowel[v] .. after end)
	end
	if portugal then
		-- In Portugal, final unstressed -r opens preceding a/e/o ([[dólar]], [[líder]], [[júnior]], [[inter-]]
		-- respelled 'ínter:...').
		text = strsub(text, "([AEO])(r" .. word_or_component_sep_c .. ")", "%1" .. AC .. "%2")
		-- In Portugal, unstressed a/e/o before coda l takes on an open quality. Note that any /l/ directly after a
		-- vowel must be a coda /l/ because otherwise there would be a syllable boundary marker.
		text = strsub(text, "([AEO])l", function(v)
			-- The symbol Ɔ is later converted to /o/ or /ɔ/.
			local vowel_to_before_l = {["A"] = "a", ["E"] = "ɛ", ["O"] = "Ɔ"}
			return vowel_to_before_l[v] .. "l"
		end)
		-- Unstressed 'ie' -> /jɛ/
		text = strsub(text, "yE([^" .. accent .. "])", "yɛ%1")
		-- Initial unmarked unstressed non-nasal e- + -sC- (seemingly also after des-, see Infopédia [[desestagnar]]) ->
		-- temporary symbol I (later changed to /(i)/, except after a vowel, in which case it is deleted). Note that /s/
		-- directly after a vowel must be a coda /s/ because otherwise there would be a syllable boundary marker.
		text = strsub(text, "#Es", "#Is")
		-- Initial unmarked unstressed non-nasal e- -> /i/, including after des-.
		text = strsub(text, "#E([^" .. accent .. "])", "#i%1")
		-- Initial unmarked unstressed non-nasal o- -> /ɔ/ if another vowel follows (not 'o', 'os' by themselves).
		text = strsub(text, "(#O)(.-#)", function(o, rest)
			if strfind(rest, "^[^" .. accent .. "]") and strfind(rest, V) then
				return "#ɔ" .. rest
			else
				return o .. rest
			end
		end)
		-- All other unmarked unstressed non-nasal e, o, a -> /ɨ/ /u/ /ɐ/
		local portugal_unstressed_vowel = {["A"] = "ɐ", ["E"] = "ɨ", ["O"] = "u"}
		text = strsub(text, "([AEO])([^" .. accent .. "])",
			function(v, after) return portugal_unstressed_vowel[v] .. after end)
	end

	-- Remaining vowels.
	-- All remaining a -> /a/ (should always be stressed).
	text = strsub(text, "A([^" .. quality .. "])", "a%1")
	-- Ignore quality markers on i, u; only one quality.
	text = strsub(text, "([iu])" .. quality_c, "%1")
	-- Convert a/e/o + quality marker appropriately.
	local vowel_quality = {
		["A" .. AC] = "a", ["A" .. CFLEX] = "ɐ",
		["E" .. AC] = "ɛ", ["E" .. CFLEX] = "e",
		["O" .. AC] = "ɔ", ["O" .. CFLEX] = "o",
	}
	text = strsub(text, "([AEO]" .. quality_c .. ")", vowel_quality)
	-- Stressed o in hiatus ([[voo]], [[boa]], [[perdoe]], etc.) is closed /o/.
	text = strsub(text, "O(ˈ%." .. V .. ")", "o%1")
	-- Stressed closed /o/ in Northern Portugal in hiatus has a following optional /w/.
	if style=="npt" then
		text = strsub(text, "(oˈ%.)(" .. V .. ")", "%1(w)%2")
	end

	-- En lugar de tirar error (lo que debería hacer), asumo que son graves ´ (lo más probable)
	if strfind(text, "[EO]") then
		--error("Stressed e or o not occurring nasalized or in a diphthong must be marked for quality using é/ê or ó/ô")
		text = strsub(text, "([EO])", "%1" .. AC)
		text = strsub(text, "([EO]" .. quality_c .. ")", vowel_quality)
	end

	-- Finally, eliminate DOTUNDER, now that we have done all vowel reductions.
	text = strsub(text, DOTUNDER, "")

	if brazil then
		-- Epenthesize /(j)/ in [[faz]], [[mas]], [[três]], [[dez]], [[feroz]], [[luz]], [[Jesus]], etc. Note, this only
		-- triggers at actual word boundaries (not before -mente), and not on nasal vowels or diphthongs. To defeat this
		-- (e.g. in plurals), respell using 'ss' or 'hs'.
		text = strsub(text, "(" .. V .. "ˈ)([sz]#[^@])", "%1Y%2")
		-- Also should happen at least before + (cf. [[rapazinho]] respelled 'rapaz+inho', [[vozinha]] respelled
		-- 'vóz+inha').
		text = strsub(text, "(" .. V .. "ˈ)(%.?[sz]%+)", "%1Y%2")
		-- But should not happen after /i/.
		text = strsub(text, "iˈY", "iˈ")
	end
	-- 'S' here represents earlier ss. Word-finally it is used to prevent epenthesis of (j) and should behave
	-- like 's'. Elsewhere (between vowels) it should behave like 'ç'.
	text = strsub(text, "S#", "s#")
	text = strsub(text, "S", "ç")

	-- s, z
	-- s in trans + V -> z: [[transação]], [[intransigência]]
	text = strsub(text, "(trɐ" .. stress_c .. "*" .. TILDE .. ".)s(" .. V .. ")", "%1z%2")
	-- word final z -> s
	text = strsub(text, "z#", "s#")
	-- s is voiced between vowels (not nasalized) or between vowel and voiced consonant, including across word
	-- boundaries; may be fed by previous rule. We have to split this into two rules before /s/ should not be voiced
	-- between nasal vowel and another vowel ([[cansar]]) but should be voiced between nasal vowel and a voiced
	-- consonant ([[transgredir]]). Note that almost all occurrences of nasal vowel + s + voiced consonant are in
	-- trans- which potentially could be handled above, but there may be others, e.g. [[Flensburg]].
	text = strsub(text, "(" .. V .. stress_c .. "*Y?%.?)s(" .. wordsep_c .. "*h?[" .. vowel .. glide .. "])", "%1z%2")
	text = strsub(text, "(" .. V .. accent_c .. "*Y?%.?)s(" .. wordsep_c .. "*h?[" .. voiced_cons .. "])", "%1z%2")
	-- z before voiceless consonant, e.g. [[Nazca]]; c and q already removed
	text = strsub(text, "z(" .. wordsep_c .. "*[çfkpsʃt])", "s%1")
	if portugal or style == "rio" then
		-- In Portugal and Rio de Janeiro; s/z before consonant (including across word boundaries) or end of utterance -> ʃ/ʒ;
		-- but not word-initially (e.g. [[stressado]]).
		local shibilant = {["s"] = "ʃ", ["z"] = "j"}
		text = strsub(text, "([sz])(##)", function(sz, after) return shibilant[sz] .. after end)
		-- s/z are maintained word-initially but not following : or similar component boundary ([[antroposcopia]] respelled
		-- 'antrópò:scopia'). To implement this, insert TEMP1 directly before the s/z we want to preserve, then check for this
		-- TEMP1 not being present when converting to shibiliant, then remove TEMP1.
		text = strsub(text, "([# %-]#)([sz])", "%1" .. TEMP1 .. "%2")
		text = strsubrep(text, "([^" .. TEMP1 .. "])([sz])(" .. wordsep_c .. "*" .. C_NOT_H_OR_GLIDE .. ")",
			function(before, sz, after) return before .. shibilant[sz] .. after end)
		text = strsub(text, TEMP1, "")
	end
	text = strsub(text, "ç", "s")
	text = strsub(text, "j", "ʒ")
	-- Reduce identical sibilants/shibilants, including across word boundaries.
	text = strsub(text, "([szʃʒ])(" .. wordsep_c .. "*)(%1)", "%2%1")
	if style == "rio" then
		-- Also reduce shibilant + sibilant ([[descer]], [[as]] [[zonas]]); not in Portugal, but in Portugal we later
		-- generate two outputs in this case, either /ʃs/ and /ʒz/ (careful pronunciation) or /ʃ/ and /ʒ/ (natural
		-- pronunciation). Note that the reduction of /ʃs/ to /ʃ/ in Portugal is different from the reduction of the
		-- same to /s/ in Brazil.
		text = strsub(text, "ʃ(" .. wordsep_c .. "*s)", "%1")
		text = strsub(text, "ʒ(" .. wordsep_c .. "*z)", "%1")
	end

	-- N/M from double n/m
	text = strsub(text, "[NM]", {["N"] = "n", ["M"] = "m"})
	
	if portugal then
		--Drop unneeded 'h' in 'rh' respellings for European Portuguese
		text = strsub(text, "rh","r")
		if style=="spt" then
			--Paragoge of i in word-final r/l in Southern Portugal
		text = strsub(text, "ˈr#", "ˈ.ri#")
		text = strsub(text, "ˈl#", "ˈ.li#")
		end
	end
	-- r
	-- Double rr -> ʁ already handled above.
	-- Initial r or l/n/s/z + r -> strong r (ʁ).
	text = strsub(text, "([#" .. TILDE .. "lszʃʒ]%.?)r", "%1ʁ")
	if brazil then
		-- Word-final r before vowel in verbs is /(ɾ)/.
		text = strsub(text, "([aɛei]ˈ)r(#" .. wordsep_c .. "*h?" .. V .. ")", "%1(ɾ)%2")
		-- Coda r before vowel is /ɾ/.
		text = strsub(text, "r([.#]" .. wordsep_c .. "*h?" .. V .. ")", "ɾ%1")
	end
	-- Word-final r in Brazil in verbs (not [[pôr]]) is usually dropped. Use a spelling like 'marh' for [[mar]]
	-- to prevent this. Make sure not to do this before -mente/-zinho ([[polegarzinha]], [[popularmente]]).
	if brazil then
		text = strsub(text, "([aɛei]ˈ)r(#[^@])",
			"%1(" .. (style == "sp" and "ɾ" or style == "sbr" and "ɻ" or "ʁ") .. ")%2")
		if style ~= "sp" then
			-- Coda r in Southern Brazil is [ɻ], otherwise outside of São Paulo is /ʁ/.
			text = strsub(text, "r(" .. C .. "*[.#])", (style == "sbr" and "ɻ" or "ʁ") .. "%1")
		end
	end
	-- All other r -> /ɾ/.
	text = strsub(text, "r", "ɾ")
	if brazil and phonetic then
		-- "Strong" ʁ before voiced consonant is [ɦ] in much of Brazil, [ʁ] in Rio. Use R as a temporary symbol.
		text = strsub(text, "ʁ(" .. wordsep_c .. "*[" .. voiced_cons .. "])", style == "rio" and "R%1" or "ɦ%1")
		-- Other "strong" ʁ is [h] in much of Brazil, [χ] in Rio. Use H because later we remove all <h>.
		text = strsub(text, "ʁ", style == "rio" and "χ" or "H")
		text = strsub(text, "R", "ʁ")
	end
	
	-- Nasal Diphthong <ẽi> and <ɛi>
	if portugal then
	-- In Portugal, always lower e -> ɐ before j when nasalized.
		text = strsub(text, "e(" .. accent_c .. TILDE .. "*%.?y)", "ɐ%1")
	-- In Portugal, lower ɛ -> e before i in <ɛi> (the open pronunciation is dated)
		text = strsub(text, "ɛ(" .. accent_c .. "*i)", "e%1")
	end
	
	-- Diphthong <ei> (if unchanged, it shall yield /ej/; this should be the case for African pronunciations)
	if brazil then
		-- In Brazil, add optional /j/ in <eir>, <eij>, <eig> and <eix> (as in [[cadeira]], [[beijo]], [[manteiga]] and
		-- [[peixe]]).
		text = strsub(text, "(e" .. accent_c .. "*)i(%.[ɾʒgʃ])", "%1(j)%2")
		-- [In Brazil, add optional /j/ in <aix> (as in [[caixa]] and [[baixo]]).] -- This was added by an IP, see
		-- [[Special:Contributions/186.212.6.138]]; this seems non-standard to me. If we are to include it, it should
		-- not be done this way, but as two separate outputs with the one lacking the /j/ marked with a qualifier such
		-- as "non-standard"; compare the way the initial enC- is handled (near the end of export.IPA()), where there
		-- are two outputs, with /ẽC-/ marked as "careful pronunciation" and /ĩC-/ marked as "natural pronunciation".
		-- (Benwing2)
		-- text = strsub(text, "(a" .. accent_c .. "*)i(%.ʃ)", "%1(j)%2")
	elseif style == "spt"  then
		-- In Southern Portugal, <ei> monophthongizes to <e>
		text = strsub(text, "(e" .. accent_c .. "*)i", "%1")
	elseif style == "gpt" then
		-- In general Portugal, lower e -> ɐ before i in <ei>.
		text = strsub(text, "e(" .. accent_c .. "*i)", "ɐ%1")
		-- In general Portugal, lower e -> ɐ before j
		text = strsub(text, "e(" .. accent_c .. "*%.?y)", "ɐ%1")
		-- In general Portugal, lower e -> ɐ(j) before other palatals.
		text = strsub(text, "e(" .. stress_c .. "*)(%.?[ʒʃɲʎ](" .. V .. "))", "ɐ%1(j)%2")
	end

	-- Diphthong <ou> (if unchanged, it shall yield /o(w)/)
	if style == "spt" then
		-- In Southern Portugal, <ou> always monophthongizes to <o>
		text = strsub(text, "(o" .. accent_c .. "*)u", "%1")
	elseif style == "npt" then
		-- In Northern Portugal (and Galicia), <ou> is kept
	text = strsub(text, "(o" .. accent_c .. "*)u", "%1w")
	end
	-- Else optional /w/ in <ou>
	text = strsub(text, "(o" .. accent_c .. "*)u", "%1(w)")

	-- Stop consonants.
	if brazil then
		-- Palatalize t/d + i/y -> affricates in Brazil.
		text = strsub(text, "([td])(" .. word_or_component_sep_c .. "*[" .. high_front_vocalic .. "])",
			function(td, high_vocalic) return palatalize_td[td] .. high_vocalic end)
	elseif phonetic then
		-- Fricativize voiced stops in Portugal when not utterance-initial or after a nasal; also not in /ld/.
		-- Easiest way to do this is to convert all voiced stops to fricative and then back to stop in the
		-- appropriate contexts.
		local fricativize_stop = { ["b"] = "β", ["d"] = "ð", ["g"] = "ɣ" }
		local occlude_fricative = { ["β"] = "b", ["ð"] = "d", ["ɣ"] = "g" }
		text = strsub(text, "[bdg]", fricativize_stop)
		text = strsub(text, "##([βðɣ])", function(bdg) return "##" .. occlude_fricative[bdg] end)
		text = strsub(text, "(" .. TILDE .. wordsep_c .. "*)([βðɣ])", function(before, bdg) return before .. occlude_fricative[bdg] end)
		text = strsub(text, "(l" .. wordsep_c .. "*)ð", "%1d")
	end

	-- Glides and l. ou -> o(w) must precede coda l -> w in Brazil, because <ol> /ow/ cannot be reduced to /o/.
	-- ou -> o(w) before conversion of remaining diphthongs to vowel-glide combinations so <ow> can be used to
	-- indicate a non-reducible glide.
	
	-- Handle coda /l/.
	if brazil then
		-- Coda l -> /w/ in Brazil.
		text = strsub(text, "l(" .. C .. "*[.#])", "w%1")
	elseif phonetic then
		-- Coda l -> [ɫ] in Portugal (and Rio Grande do Sul, according to Cunha-Cintra)
		text = strsub(text, "l(" .. C .. "*[.#])", "ɫ%1")
	end
	text = strsub(text, "y", "j")
	if brazil then
		text = strsub(text, "Y", "(j)") -- epenthesized in [[faz]], [[três]], etc.
	else
		-- 'I' in Portugal represents word-initial (i) before sC, except after /i/ (e.g. [[antiestático]]), in which
		-- case it is elided. In the latter case, we need to elide the word/component separators, otherwise we end up
		-- with an extra syllable divider: /ˌɐ̃.ti.ʃˈta.ti.ku/ instead of correct /ˌɐ̃.tiʃˈta.ti.ku/.
		text = strsub(text, "(i" .. accent_c .. "*)" .. word_or_component_sep_c .. "*#I", "%1")
		text = strsub(text, "I", "(i)")
	end
	local vowel_termination_to_glide = brazil and phonetic and
		{["i"] = "ɪ̯", ["j"] = "ɪ̯", ["u"] = "ʊ̯", ["w"] = "ʊ̯"} or
		{["i"] = "j", ["j"] = "j", ["u"] = "w", ["w"] = "w"}
	-- i/u as second part of diphthong becomes glide.
	text = strsub(text, "(" .. V .. accent_c .. "*" .. "%(?)([ijuw])",
		function(v1, v2) return v1 .. vowel_termination_to_glide[v2] end)

	-- nh
	if brazil and phonetic and style ~= "sbr" then
		-- [[unha]] pronounced [ˈũ.j̃ɐ]; nasalization of previous vowel handled above. But initial nh- e.g. [[nhaca]],
		-- [[nheengatu]], [[nhoque]] is [ɲ]. I *think* this doesn't happen in South Brazil. We do have the phonetic
		-- representation [ẽj̃.pũˈj̃aɾ] given for [[empunhar]], but this is the only such case and may be a mistake.
		text = strsub(text, "([^#])ɲ", "%1j" .. TILDE)
	end

	if portugal then
		-- Suppress final -ɨ before a vowel
		text = strsub(text, "ɨ#[ %-]#(" .. V .. ")", "‿%1")
		--Make optional utterance-finally
		--	text = strsub(text, "ɨ##", "(ɨ)##")
		-- (ɨ) after l when suppressed should convert to coda ɫ, so split it later into two pronuns.
	--	text = strsub(text, "l%(ɨ%)##", "L##")
		-- (ɨ) after r when suppressed should also convert to coda
	--	text = strsub(text, "ɾ%(ɨ%)##", "R##")
	end

	text = strsub(text, "g", "ɡ") -- U+0261 LATIN SMALL LETTER SCRIPT G
	text = strsub(text, "[ʧʤ]", {["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ"})
	text = strsub(text, "tʃ", "t͡ʃ")
	text = strsub(text, "dʒ", "d͡ʒ")
	text = strsub(text, "h", "")
	text = strsub(text, "H", "h")

	return text
end

local function normalizar(texto)
	texto = strlower(texto)
	
	-- decompose everything but ç and ü
	texto = strnfd(texto)
	texto = strsub(texto, ".[" .. CEDILLA .. DIA .. "]", {
		["c" .. CEDILLA] = "ç",
		["u" .. DIA] = "ü",
	})
	texto = reorder_accents(texto)
	
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

-- Generate the IPA for a single term respelling `text` in the specified `style` ('gbr', 'rio', etc.; see
-- all_style_descs above). Return value is a list of objects of the following form:
--   { phonemic = STRING, phonetic = STRING, qualifiers = {STRING, ...} }
-- Note that the returned qualifiers are only those generated automatically as a result of certain characteristics of
-- the respelling, e.g. in Brazil initial em-/en- + consonant has two outputs, one labeled "careful pronunciation" and
-- the other "natural pronunciation". User-specified qualifiers are added at the end by the caller of IPA(), and
-- prepended to the auto-generated qualifiers.
local function generar_pron(text)
	text = normalizar(text)

	local words_br, words_pt = strsplit(text, "([ %-]+)"), strsplit(text, "([ %-]+)")

	local function procesar_palabras(words, brazil, portugal)
		local function word_is_prefix(i)
			-- Check for prefixes, either a final prefix (followed by "-" separator, then a blank word, then no more
			-- words) or a non-final prefix (followed by "- " separator).
			return i == #words - 2 and words[i + 1] == "-" and words[i + 2] == "" or i < #words and words[i + 1] == "- "
		end
		for i = 1, #words, 2 do
			local word = words[i]
			-- Make prefixes unstressed with vowel reduction unless they have an explicit stress marker;
			-- likewise for certain monosyllabic words (e.g. [[o]], [[se]], [[de]], etc.; also [[a]], [[das]], etc.
			-- in Portugal) without stress marks.
			if word_is_prefix(i) and not strfind(words[i], accent_c) or unstressed_words[word] or
				portugal and unstressed_full_vowel_words_brazil[word] then
				-- add DOTOVER to the last vowel not the first one, or we will mess up 'que' by
				-- adding the DOTOVER after the 'u'
				word = strsub(word, "^(.*" .. V .. quality_c .. "*)", "%1" .. DOTOVER)
			end
			-- Make certain monosyllabic words (e.g. [[meu]], [[com]]; also [[a]], [[das]], etc. in Brazil)
			-- without stress marks be unstressed without vowel reduction.
			if unstressed_full_vowel_words[word] or brazil and unstressed_full_vowel_words_brazil[word] then
				-- add DOTUNDER to the first vowel not the last one, or we will mess up 'meu' by
				-- adding the DOTUNDER after the 'u'; add after a quality marker for à, às
				word = strsub(word, "^(.-" .. V .. quality_c .. "*)", "%1" .. DOTUNDER)
			end
			-- Some unstressed words need special pronunciation.
			word = unstressed_notaunciation_substitution[word] or word
			words[i] = word
		end

		local x = concat(words)
		-- Now eliminate word-final question mark and exclamation point (converted to foot boundary above when word-medial).
		x = strsub(x, "[!?]", "")
		-- Apostrophe becomes tie (e.g. in [[barriga d'agua]]).
		x = strsub(x, "'", "‿")
		-- User-specified # as in i# (= i. or y) and u# (= u. or w) becomes TEMP1 so we can add # for word boundaries.
		x = strsub(x, "#", TEMP1)
		-- Put # at word beginning and end and double ## at text/foot boundary beginning/end.
		x = strsub(x, " | ", "# | #")
		x = "##" .. strsub(x, " ", "# #") .. "##"
		-- Eliminate hyphens indicating prefixes/suffixes; but preserve a marker indicating prefixes, so we can later
		-- convert primary to secondary stress.
		x = strsub(x, "(" .. V .. charsep_c .. "*)(%-#)", "%1" .. PSEUDOCONS .. "%2")
		x = strsub(x, "%-#", "#" .. PREFIX_MARKER)
		x = strsub(x, "#%-(" .. V .. ")", "#" .. PSEUDOCONS .. "%1")
		x = strsub(x, "#%-", "#")

		return x
	end

	local br, pt = procesar_palabras(words_br, true, false), procesar_palabras(words_pt, false, true)


	--[=[
	local variants

	-- Map over each element in `variants`. If `from` is found in the element, replace the element with two elements, one
	-- obtained by replacing `from` with `to1` and the other by replacing `from` with `to2`. If `to2` is nil, only one
	-- element replaces the original element.
	local function flatmap_and_sub_pre(from, to1, qual1, to2, qual2)
		variants = flatmap(variants, function(item)
			if strfind(item.respelling, from) then
				local retval = {
					{
						respelling = strsub(item.respelling, from, to1),
						qualifiers = combine_qualifiers(item.qualifiers, qual1),
					}
				}
				if to2 then
					insert(retval,
						{
							respelling = strsub(item.respelling, from, to2),
							qualifiers = combine_qualifiers(item.qualifiers, qual2),
						}
					)
				end
				return retval
			else
				return {item}
			end
		end)
	end
	]=]--

	-- Remove grave accents and macrons, which have special meaning only for Portugal. Do this before handling o^
	-- and similar so we can write áutò^:... and have it correctly give 'autò-' in Portugal but 'áutu-,áuto-' in
	-- Brazil.
	br = strsub(br, "[" .. GR .. MACRON .. "]", "")


	-- Convert grave accents and macrons to explicit dot-under + quality marker.
	local grave_macron_to_quality = {
		[GR] = AC,
		[MACRON] = CFLEX,
	}
	pt = strsub(pt, "[" .. GR .. MACRON .. "]", function(acc) return grave_macron_to_quality[acc] .. DOTUNDER end)
	-- ê*/ô* -> é/ó and é*/ó* -> ê/ô (reverse accents)
	pt = strsub(pt, "([eo])([" .. AC .. CFLEX .. "])%*", function(eo, acc)
		return eo .. (acc == CFLEX and AC or CFLEX) end)

	-- Treat vowel after des- as word-initial, as in Brazil.		
	--pt = strsub(pt, "(" .. word_or_component_sep_c .. ")des%^+", "%1des++")
	-- Remove i*, i^ and i^^ not followed by a vowel (i.e. Brazilian epenthetic i), but not i^ and i^^ followed or
	-- preceded by a vowel (which has a totally different meaning, i.e. i or y in Brazil).
	-- Also remove all remaining ^.
	--pt = strsub(pt, "i%^+(" .. V .. ")", "i%1")
	--pt = strsub(pt, "(" .. V .. ")i%^+", "%1i")
	--pt = strsub(pt, "i?[*%^]+", "")

	--[=[
	if brazil then
		-- Handle i^ and i^^ before a vowel = /i/ or /j/.
		flatmap_and_sub_pre("i%^%^(" .. V .. ")", "y%1", nil, "i.%1", nil)
		flatmap_and_sub_pre("i%^(" .. V .. ")", "i.%1", nil, "y%1", nil)
		-- Handle i^ and i^^ after a vowel = /i/ or /j/; mostly useful for ui^
		flatmap_and_sub_pre("(" .. V .. ")i%^%^", "%1y", nil, "%1.i", nil)
		flatmap_and_sub_pre("(" .. V .. ")i%^", "%1.i", nil, "%1y", nil)
		-- Handle i^ and i^^ not before a vowel = optional epenthetic /i/.
		if style == "sbr" then
			-- Epenthetic /i/ seems less common in South Brazil. Make i^^ not epenthesize (but still palatalize /t/ and /d/),
			-- and i^ epenthesize but not as the first option.
			flatmap_and_sub_pre("i%^%^(" .. NV_NOT_SPACING_CFLEX .. ")", "Ɨ%1", nil)
			flatmap_and_sub_pre("i%^(" .. NV_NOT_SPACING_CFLEX .. ")", "Ɨ%1", nil, "I%1", nil)
		else
			flatmap_and_sub_pre("i%^%^(" .. NV_NOT_SPACING_CFLEX .. ")", "Ɨ%1", nil, "I%1", nil)
			flatmap_and_sub_pre("i%^(" .. NV_NOT_SPACING_CFLEX .. ")", "I%1", nil, "Ɨ%1", nil)
		end
		-- Handle i* = epenthetic /i/.
		flatmap_and_sub_pre("i%*", "I", nil)
		-- Handle u^ and u^^ = /u/ or /w/.
		flatmap_and_sub_pre("u%^%^", "w", nil, "u.", nil)
		flatmap_and_sub_pre("u%^", "u.", nil, "w", nil)
		if style == "sbr" then
			-- The raised variant apparently does not occur in South Brazil.
			flatmap_and_sub_pre("([eo])%^+", "%1", nil)
		else
			-- Handle e^ and e^^ = /e/ or /i/; handle o^ and o^^ = /o/ or /u/.
			-- Do e^ and o^ together so we get only two outputs, not four, if they cooccur.
			-- Likewise for e^^ and o^^.
			flatmap_and_sub_pre("([eo])%^%^",
				function(eo) return eo == "e" and "i" or "u" end, nil,
				"%1", nil)
			flatmap_and_sub_pre("([eo])%^",
				"%1", nil,
				function(eo) return eo == "e" and "i" or "u" end, nil)
		end
		-- Handle ê*/ô*/é*/ó* = same as without asterisk.
		flatmap_and_sub_pre("([eo][" .. AC .. CFLEX .. "])%*", "%1", nil)
		-- Handle des^ at beginning of word or component = des++ or dis++, and des^^ = opposite order. But apparently
		-- not in South Brazil, where the raised variant doesn't occur.
		if style == "sbr" then
			flatmap_and_sub_pre("(" .. word_or_component_sep_c .. ")des%^+", "%1des++", nil)
		else
			flatmap_and_sub_pre("(" .. word_or_component_sep_c .. ")des%^%^", "%1dis++", nil, "%1des++", nil)
			flatmap_and_sub_pre("(" .. word_or_component_sep_c .. ")des%^", "%1des++", nil, "%1dis++", nil)
		end
		for _, variant in ipairs(variants) do
			if strfind(variant.respelling, "[*%^]") then
				error(("* or ^ remains after applying all known replacements involving these characters (result is '%s')"):format(variant.respelling))
			end
		end
	end
	]=]--

	--[=[
	-- Replace i# and u# sequences (above we replaced # with TEMP1).
	flatmap_and_sub_pre("i" .. TEMP1, "i.", nil, "y", {"faster pronunciation"})
	flatmap_and_sub_pre("u" .. TEMP1, "u.", nil, "w", {"faster pronunciation"})
	]=]--

	local function generar(s, variante, fone)
		local f = one_term_ipa(s, variante, fone)

		local brazil = br_styles[variante]
		local portugal = pt_styles[variante]

		local normal, rapido, lento

		if brazil then
			-- Convert Ẽ from initial [[em]] as a word by itself to either /ẽj̃/ and /ĩ/.
			lento = strsub(f, "Ẽ" .. TILDE .. "#", "e" .. TILDE .. "j" .. TILDE .. "#")
			normal = strsub(f, "Ẽ" .. TILDE .. "#", "i" .. TILDE .. "#")
			-- Convert Ẽ from initial em-/en- + consonant to either /ẽ/ and /ĩ/.
			--flatmap_and_sub_post("Ẽ", "e", {"careful pronunciation"}, "i", {"natural pronunciation"})
			lento = strsub(lento, "Ẽ", "e")
			normal = strsub(normal, "Ẽ", "i")

			--flatmap_and_sub_post("I", "i", nil, "e", nil) en este paso no me queda claro si sería e o i (asumo que es "e" porqe sería la fonética??)
			if fone then
				lento = strsub(lento, "I", "e")
				normal = strsub(normal, "I", "e")
			else
				lento = strsub(lento, "I", "i")
				normal = strsub(normal, "I", "i")
			end

			-- Convert Ú resulting from stressed final '-io(s)'.
			--flatmap_and_sub_post("%.Ú", ".u", nil, {"w", "ʊ̯"}, nil) -- misma observacion que antes
			if fone then
				lento = strsub(lento, "%.Ú", "ʊ̯")
				normal = strsub(normal, "%.Ú", "ʊ̯")
			else
				lento = strsub(lento, "%.Ú", ".u")
				normal = strsub(normal, "%.Ú", ".u")
			end
		else -- Portugal
			lento = strsub(f, "ʃ(" .. wordsep_c .. "*)s", "ʃ%1s")
			normal = strsub(f, "ʃ(" .. wordsep_c .. "*)s", "%1ʃ")
			lento = strsub(lento, "ʒ(" .. wordsep_c .. "*)z", "ʒ%1z")
			normal = strsub(normal, "ʒ(" .. wordsep_c .. "*)z", "%1ʒ")

			if fone then
				lento = strsub(lento, "Ɔ", "ɔ")
				normal = strsub(normal, "Ɔ", "ɔ")
			else
				lento = strsub(lento, "Ɔ", "o")
				normal = strsub(normal, "Ɔ", "o")
			end
			--flatmap_and_sub_post("ʃ(" .. wordsep_c .. "*)s",
			--		"ʃ%1s", {"careful pronunciation"}, "%1ʃ", {"natural pronunciation"})
			--flatmap_and_sub_post("ʒ(" .. wordsep_c .. "*)z",
			--		"ʒ%1z", {"careful pronunciation"}, "%1ʒ", {"natural pronunciation"})
			--flatmap_and_sub_post("Ɔ", "o", nil, "ɔ", nil)
			-- Split (ɨ) after l or r into two pronuns, one with ɨ and the other without it (with one fewer syllables and coda).
			--flatmap_and_sub_post("%.([^.# ]-)L#", ".%1lɨ#", nil, {"%1l#", "%1ɫ#"}, nil)
			--flatmap_and_sub_post("%.([^.# ]-)R#", ".%1ɾɨ#", nil, {"%1ɾ#", "%1ɾ#"}, nil)
		end
		rapido = strsub(normal, "([ÌÙ])%.", function(iu) return iu == "Ì" and "j" or "w" end)
		normal = strsub(normal, "([ÌÙ])%.", function(iu) return iu == "Ì" and "i." or "u." end)
		lento = strsub(lento, "([ÌÙ])%.", function(iu) return iu == "Ì" and "i." or "u." end)

		-- Final changes to the generated IPA to produce what's shown to the user. We used to do this at the end of
		-- one_term_ipa() but the stuff below needs to happen after the expansion of Ì. and Ù. in Brazil to either i./u.
		-- or j/w, because the latter transformation involves removing a syllable boundary, which will cause a stress mark
		-- on the following syllable to retract to the beginning of the newly combined syllable. To avoid lots of hassle,
		-- we postpone this stress mark movement till now.
		local function finalize_ipa(a)
			-- Convert Brazil i/u in hiatus to ɪ/ʊ in the phonetic representation. This needs to happen after handling of
			-- Ì. and Ù., which feeds this change.
			if brazil and fone then
				local phonetic_hiatus_iu_to_actual = {["i"] = "ɪ", ["u"] = "ʊ"}
				a = strsub(a, "([iu])(%." .. V .. ")", function(iu, after) return phonetic_hiatus_iu_to_actual[iu] .. after end)
			end

			-- Stress marks and syllable dividers.
			-- Component separators that aren't transparent to syllabification need to be made into syllable dividers.
			a = strsub(a, non_syl_transp_component_sep_c, ".")
			-- IPA stress marks in components followed by + should be removed.
			a = strsub(a, ipa_stress_c .. "([^" .. word_divider .. component_sep .. "]*%+)", "%1")
			-- Component separators that are transparent to syllabification need to be removed now, before moving IPA stress marks
			-- to the beginning of the syllable, so they don't interfere in this process.
			a = strsub(a, syl_transp_component_sep_c .. "#?", "")
			-- Move IPA stress marks to the beginning of the syllable.
			a = strsubrep(a, "([#.])([^#.]*)(" .. ipa_stress_c .. ")", "%1%3%2")
			-- Suppress syllable divider before IPA stress indicator.
			a = strsub(a, "%.(#?" .. ipa_stress_c .. ")", "%1")
			-- Make all primary stresses but the last one in a given word be secondary. May be fed by the first rule above.
			a = strsubrep(a, "ˈ([^ ]+)ˈ", "ˌ%1ˈ")
			-- Make primary stresses in prefixes become secondary.
			a = strsubrep(a, "ˈ([^#]*#" .. PREFIX_MARKER .. ")", "ˌ%1")

			-- Remove # symbols at word/text boundaries, as well as _ (which forces separate interpretation), pseudo-consonant
			-- markers (at edges of some prefixes/suffixes), and prefix markers, and recompose.
			a = strsub(a, "[#_" .. PSEUDOCONS .. PREFIX_MARKER .. "]", "")
			a = strnfc(a)

			return strhtml(a)
		end

		normal = finalize_ipa(normal)
		rapido = finalize_ipa(rapido)
		lento = finalize_ipa(lento)

		if normal == rapido and normal == lento then
			return {}, {normal}
		elseif normal == rapido then
			return {"normal", "lento"}, {normal, lento}
		elseif normal == lento then
			return {"normal", "rápido"}, {normal, rapido}
		else
			return {"normal", "rápido", "lento"}, {normal, rapido, lento}
		end
	end

	local _, fono_ = generar(br, "gbr", false)
	local fono = fono_[1]

	local gbr_nota, gbr_fone = generar(br, "gbr", true)
	local rio_nota, rio_fone = generar(br, "rio", true)
	local sp_nota, sp_fone = generar(br, "sp", true)
	local sbr_nota, sbr_fone = generar(br, "sbr", true)

	local gpt_nota, gpt_fone = generar(br, "gpt", true)
	local cpt_nota, cpt_fone = generar(br, "cpt", true)
	local spt_nota, spt_fone = generar(br, "spt", true)
	local npt_nota, npt_fone = generar(br, "npt", true)

	local pron = {{nombre_completo["gbr"]}}
	local nota, fone = {gbr_nota}, {gbr_fone}
	local son_iguales = m_table.deepEquals

	if not son_iguales(gbr_fone, rio_fone) then
		insert(pron, {nombre_completo["rio"]})
		insert(nota, rio_nota)
		insert(fone, rio_fone)
	end

	if not son_iguales(gbr_fone, sp_fone) then
		insert(pron, {nombre_completo["sp"]})
		insert(nota, sp_nota)
		insert(fone, sp_fone)
	end

	if not son_iguales(gbr_fone, sbr_fone) then
		insert(pron, {nombre_completo["sbr"]})
		insert(nota, sbr_nota)
		insert(fone, sbr_fone)
	end

	insert(pron, {nombre_completo["gpt"]})
	insert(nota, gpt_nota)
	insert(fone, gpt_fone)

	if not son_iguales(gpt_fone, cpt_fone) then
		insert(pron, {nombre_completo["cpt"]})
		insert(nota, cpt_nota)
		insert(fone, cpt_fone)
	end

	if not son_iguales(gpt_fone, spt_fone) then
		insert(pron, {nombre_completo["spt"]})
		insert(nota, spt_nota)
		insert(fone, spt_fone)
	end

	if not son_iguales(gpt_fone, npt_fone) then
		insert(pron, {nombre_completo["npt"]})
		insert(nota, npt_nota)
		insert(fone, npt_fone)
	end

	return pron, fone, nota, fono

end


--Se obtiene el tipo de acentuación
local function determinar_acentuacion(w)
	if type(w) ~= "string" then
		return nil
	end
	local silabas = {}
	for s in strmatchit(w, "[^"..separadores_silabicos.."]+") do
		insert(silabas, s)
	end
	local L = #silabas
	local sufijo = nil
	if L >= 4 and silabas[L-1] == "men" and silabas[L] == "te" then
		return "doble", L
	elseif L == 1 then
		return "monosílaba", L
	else
		local i = 1
		for silaba in strmatchit(w, SEPARADORES_SILABICOS..'*'.."[^"..separadores_silabicos.."]+") do
			if strfind(silaba, primary_stress) then
				local idx = L - i
				if idx == 0 then
					return "aguda", L
				elseif idx == 1 then
					return "llana", L
				elseif idx == 2 then
					return "esdrújula", L
				else
					return "sobreesdrújula", L
				end
				break
			end
			i = i + 1
		end
		error("Se esperaba que la pronunciación de la palabra hubiera sido generada con las marcas de acentuación")
	end
end


function export.procesar_pron_args(titulo, args)
	if #args["ayuda"] < 1 then
		args["ayuda"][1] = titulo
	end

	local fono

	if #args["fone"] < 1 and #args["fono"] < 1 then
		if #titulo == 1 then
			if titulo >= "a" and titulo <= "z" then
				args["ayuda"] = pron_abc[string.byte(titulo) - 96]
				args["tl"] = args["ayuda"]
			elseif titulo >= "A" and titulo <= "Z" then
				args["ayuda"] = pron_abc[string.byte(titulo) - 64]
				args["tl"] = args["ayuda"]
			end
		end
		args["pron"], args["fone"], args["fnota"], fono = generar_pron(args["ayuda"][1])
		local tiene_espacios = strfind(titulo, " ")
		local rim = fono
		rim = strsub(rim, "^.*"..primary_stress.."(.-)$", "%1")
		args["rima"] = strsub(rim, ".-".."("..V..".*"..")".."$", "%1")

		if not tiene_espacios then
			args["ac"], args["ls"] = determinar_acentuacion(fono)
		end
	end
	return args
end

return export