Módulo:generar-pron/pl

De Wikcionario, el diccionario libre

La documentación para este módulo puede ser creada en Módulo:generar-pron/pl/doc

local export = {}

local unpack = unpack or table.unpack
local insert = table.insert
local concat = table.concat

local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strstrip = m_str.strip
local strupper = m_str.upper
local strlower = m_str.lower
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8
local strhtml = m_str.encode_html

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

local letters2phones = {
	["a"] = {
		["u"] = { "a", "w" },
		[false] = "a",
	},
	["ą"] = {
		["l"] = { "ɔ", "l" },
		["ł"] = { "ɔ", "w" },
		[false] = "ɔ̃",
	},
	["b"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "bʲ", "j", "ɔ", "l" },
				["ł"] = { "bʲ", "j", "ɔ", "w" },
				[false] = { "bʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "bʲ", "j", "ɛ", "l" },
				["ł"] = { "bʲ", "j", "ɛ", "w" },
				[false] = { "bʲ", "j", "ɛ̃" }
			},
			["a"] = { "bʲ", "j", "a" },
			["e"] = { "bʲ", "j", "ɛ" },
			["i"] = { "bʲ", "i" },
			["o"] = { "bʲ", "j", "ɔ" },
			["ó"] = { "bʲ", "j", "u" },
			["u"] = { "bʲ", "j", "u" },
			[false] = { "bʲ", "i" }
		},
		[false] = "b"
	},
	["c"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "t͡ɕ", "ɔ", "l" },
				["ł"] = { "t͡ɕ", "ɔ", "w" },
				[false] = { "t͡ɕ", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "t͡ɕ", "ɛ", "l" },
				["ł"] = { "t͡ɕ", "ɛ", "w" },
				[false] = { "t͡ɕ", "ɛ̃" }
			},
			["a"] = { "t͡ɕ", "a" },
			["e"] = { "t͡ɕ", "ɛ" },
			["o"] = { "t͡ɕ", "ɔ" },
			["ó"] = { "t͡ɕ", "u" },
			["u"] = { "t͡ɕ", "u" },
			["y"] = { "t͡ɕ", "ɨ" },
			[false] = { "t͡ɕ", "i" }
		},
		["h"] = {
			["i"] = {
				["ą"] = {
					["l"] = { "xʲ", "j", "ɔ", "l" },
					["ł"] = { "xʲ", "j", "ɔ", "w" },
					[false] = { "xʲ", "j", "ɔ̃" }
				},
				["ę"] = {
					["l"] = { "xʲ", "j", "ɛ", "l" },
					["ł"] = { "xʲ", "j", "ɛ", "w" },
					[false] = { "xʲ", "j", "ɛ̃" }
				},
				["a"] = { "xʲ", "j", "a" },
				["e"] = { "xʲ", "j", "ɛ" },
				["i"] = { "xʲ", "j", "i" },
				["o"] = { "xʲ", "j", "ɔ" },
				["ó"] = { "xʲ", "j", "u" },
				["u"] = { "xʲ", "j", "u" },
				[false] = { "xʲ", "i" }
			},
			[false] = "x"
		},
		["z"] = "t͡ʂ",
		[false] = "t͡s"
	},
	["ć"] = "t͡ɕ",
	["d"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "dʲ", "j", "ɔ", "l" },
				["ł"] = { "dʲ", "j", "ɔ", "w" },
				[false] = { "dʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "dʲ", "j", "ɛ", "l" },
				["ł"] = { "dʲ", "j", "ɛ", "w" },
				[false] = { "dʲ", "j", "ɛ̃" }
			},
			["a"] = { "dʲ", "j", "a" },
			["e"] = { "dʲ", "j", "ɛ" },
			["i"] = { "dʲ", "i" },
			["o"] = { "dʲ", "j", "ɔ" },
			["ó"] = { "dʲ", "j", "u" },
			["u"] = { "dʲ", "j", "u" },
			[false] = { "dʲ", "i" }
		},
		["z"] = {
			["i"] = {
				["ą"] = {
					["l"] = { "d͡ʑ", "ɔ", "l" },
					["ł"] = { "d͡ʑ", "ɔ", "w" },
					[false] = {"d͡ʑ", "ɔ̃" }
				},
				["ę"] = {
					["l"] = { "d͡ʑ", "ɛ", "l" },
					["ł"] = { "d͡ʑ", "ɛ", "w" },
					[false] = { "d͡ʑ", "ɛ̃" }
				},
				["a"] = { "d͡ʑ", "a" },
				["e"] = { "d͡ʑ", "ɛ" },
				["o"] = { "d͡ʑ", "ɔ" },
				["ó"] = { "d͡ʑ", "u" },
				["u"] = { "d͡ʑ", "u" },
				["y"] = { "d͡ʑ", "ɨ" },
				[false] = { "d͡ʑ", "i" }
			},
			[false] = "d͡z"
		},
		["ż"] = "d͡ʐ",
		["ź"] = "d͡ʑ",
		[false] = "d"
	},
	["e"] = {
		["u"] = { "ɛ", "w" },
		[false] = "ɛ",
	},
	["ę"] = {
		["l"] = { "ɛ", "l" },
		["ł"] = { "ɛ", "w" },
		[false] = "ɛ̃",
	},
	["f"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "fʲ", "j", "ɔ", "l" },
				["ł"] = { "fʲ", "j", "ɔ", "w" },
				[false] = { "fʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "fʲ", "j", "ɛ", "l" },
				["ł"] = { "fʲ", "j", "ɛ", "w" },
				[false] = { "fʲ", "j", "ɛ̃" }
			},
			["a"] = { "fʲ", "j", "a" },
			["e"] = { "fʲ", "j", "ɛ" },
			["i"] = { "fʲ", "j", "i" },
			["o"] = { "fʲ", "j", "ɔ" },
			["ó"] = { "fʲ", "j", "u" },
			["u"] = { "fʲ", "j", "u" },
			[false] = { "fʲ", "i" }
		},
		[false] = "f"
	},
	["g"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "ɡʲ", "j", "ɔ", "l" },
				["ł"] = { "ɡʲ", "j", "ɔ", "w" },
				[false] = { "ɡʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "ɡʲ", "j", "ɛ", "l" },
				["ł"] = { "ɡʲ", "j", "ɛ", "w" },
				[false] = { "ɡʲ", "j", "ɛ̃" }
			},
			["a"] = { "ɡʲ", "j", "a" },
			["e"] = { "ɡʲ", "j", "ɛ" },
			["i"] = { "ɡʲ", "j", "i" },
			["o"] = { "ɡʲ", "j", "ɔ" },
			["ó"] = { "ɡʲ", "j", "u" },
			["u"] = { "ɡʲ", "j", "u" },
			[false] = { "ɡʲ", "i" }
		},
		[false] = "ɡ"
	},
	["h"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "xʲ", "j", "ɔ", "l" },
				["ł"] = { "xʲ", "j", "ɔ", "w" },
				[false] = { "xʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "xʲ", "j", "ɛ", "l" },
				["ł"] = { "xʲ", "j", "ɛ", "w" },
				[false] = { "xʲ", "j", "ɛ̃" }
			},
			["a"] = { "xʲ", "j", "a" },
			["e"] = { "xʲ", "j", "ɛ" },
			["i"] = { "xʲ", "j", "i" },
			["o"] = { "xʲ", "j", "ɔ" },
			["ó"] = { "xʲ", "j", "u" },
			["u"] = { "xʲ", "j", "u" },
			[false] = { "xʲ", "i" }
		},
		[false] = "x"
	},
	["i"] = "i",
	["j"] = "j",
	["k"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "kʲ", "j", "ɔ", "l" },
				["ł"] = { "kʲ", "j", "ɔ", "w" },
				[false] = { "kʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "kʲ", "j", "ɛ", "l" },
				["ł"] = { "kʲ", "j", "ɛ", "w" },
				[false] = { "kʲ", "j", "ɛ̃" }
			},
			["a"] = { "kʲ", "j", "a" },
			["e"] = { "kʲ", "j", "ɛ" },
			["i"] = { "kʲ", "j", "i" },
			["o"] = { "kʲ", "j", "ɔ" },
			["ó"] = { "kʲ", "j", "u" },
			["u"] = { "kʲ", "j", "u" },
			[false] = { "kʲ", "i" }
		},
		[false] = "k"
	},
	["l"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "lʲ", "j", "ɔ", "l" },
				["ł"] = { "lʲ", "j", "ɔ", "w" },
				[false] = { "lʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "lʲ", "j", "ɛ", "l" },
				["ł"] = { "lʲ", "j", "ɛ", "w" },
				[false] = { "lʲ", "j", "ɛ̃" }
			},
			["a"] = { "lʲ", "j", "a" },
			["e"] = { "lʲ", "j", "ɛ" },
			["i"] = { "lʲ", "j", "i" },
			["o"] = { "lʲ", "j", "ɔ" },
			["ó"] = { "lʲ", "j", "u" },
			["u"] = { "lʲ", "j", "u" },
			[false] = { "lʲ", "i" }
		},
		[false] = "l"
	},
	["ł"] = "w",
	["m"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "mʲ", "j", "ɔ", "l" },
				["ł"] = { "mʲ", "j", "ɔ", "w" },
				[false] = { "mʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "mʲ", "j", "ɛ", "l" },
				["ł"] = { "mʲ", "j", "ɛ", "w" },
				[false] = { "mʲ", "j", "ɛ̃" }
			},
			["a"] = { "mʲ", "j", "a" },
			["e"] = { "mʲ", "j", "ɛ" },
			["i"] = { "mʲ", "j", "i" },
			["o"] = { "mʲ", "j", "ɔ" },
			["ó"] = { "mʲ", "j", "u" },
			["u"] = { "mʲ", "j", "u" },
			[false] = { "mʲ", "i" }
		},
		[false] = "m"
	},
	["n"] = {
		["k"] = { 
			["i"] = {
				["ą"] = {
					["l"] = { "ŋ", "kʲ", "j", "ɔ", "l" },
					["ł"] = { "ŋ", "kʲ", "j", "ɔ", "w" },
					[false] = { "ŋ", "kʲ", "j", "ɔ̃" }
				},
				["ę"] = {
					["l"] = { "ŋ", "kʲ", "j", "ɛ", "l" },
					["ł"] = { "ŋ", "kʲ", "j", "ɛ", "w" },
					[false] = { "ŋ", "kʲ", "j", "ɛ̃" }
				},
				["a"] = { "ŋ", "kʲ", "j", "a" },
				["e"] = { "ŋ", "kʲ", "j", "ɛ" },
				["i"] = { "ŋ", "kʲ", "j", "i" },
				["o"] = { "ŋ", "kʲ", "j", "ɔ" },
				["ó"] = { "ŋ", "kʲ", "j", "u" },
				["u"] = { "ŋ", "kʲ", "j", "u" },
				[false] = { "ŋ", "kʲ", "i" }
			},
			[false] = { "ŋ", "k" }
		},
		["g"] = {
			["i"] = {
				["ą"] = {
					["l"] = { "ŋ", "ɡʲ", "j", "l" },
					["ł"] = { "ŋ", "ɡʲ", "j", "w" },
					[false] = { "ŋ", "ɡʲ", "j", "ɔ̃" }
				},
				["ę"] = {
					["l"] = { "ŋ", "ɡʲ", "j", "ɛ", "l" },
					["ł"] = { "ŋ", "ɡʲ", "j", "ɛ", "w" },
					[false] = { "ŋ", "ɡʲ", "j", "ɛ̃" }
				},
				["a"] = { "ŋ", "ɡʲ", "j", "a" },
				["e"] = { "ŋ", "ɡʲ", "j", "ɛ" },
				["i"] = { "ŋ", "ɡʲ", "j", "i" },
				["o"] = { "ŋ", "ɡʲ", "j", "ɔ" },
				["ó"] = { "ŋ", "ɡʲ", "j", "u" },
				["u"] = { "ŋ", "ɡʲ", "j", "u" },
				[false] = { "ŋ", "ɡʲ", "i" }
			},
			[false] = { "ŋ", "ɡ" }
		},
		["i"] = {
			["ą"] = {
				["l"] = { "ɲ", "ɔ", "l" },
				["ł"] = { "ɲ", "ɔ", "w" },
				[false] = { "ɲ", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "ɲ", "ɛ", "l" },
				["ł"] = { "ɲ", "ɛ", "w" },
				[false] = { "ɲ", "ɛ̃" }
			},
			["a"] = { "ɲ", "a" },
			["e"] = { "ɲ", "ɛ" },
			["i"] = { "ɲ", "j", "i" },
			["o"] = { "ɲ", "ɔ" },
			["ó"] = { "ɲ", "u" },
			["u"] = { "ɲ", "u" },
			[false] = { "ɲ", "i" }
		},
		[false] = "n"
	},
	["ń"] = "ɲ",
	["o"] = {
		[false] = "ɔ",
	},
	["ó"] = "u",
	["p"] = {
		["i"] = {
			-- piątek, piasek, etc.
			["ą"] = {
				["l"] = { "pʲ", "j", "ɔ", "l" },
				["ł"] = { "pʲ", "j", "ɔ", "w" },
				[false] = { "pʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "pʲ", "j", "ɛ", "l" },
				["ł"] = { "pʲ", "j", "ɛ", "w" },
				[false] = { "pʲ", "j", "ɛ̃" }
			},
			["a"] = { "pʲ", "j", "a" },
			["e"] = { "pʲ", "j", "ɛ" },
			["i"] = { "pʲ", "j", "i" },
			["o"] = { "pʲ", "j", "ɔ" },
			["ó"] = { "pʲ", "j", "u" },
			["u"] = { "pʲ", "j", "u" },
			[false] = { "pʲ", "i" }
		},
		[false] = "p"
	},
	["r"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "rʲ", "j", "ɔ", "l" },
				["ł"] = { "rʲ", "j", "ɔ", "w" },
				[false] = { "rʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "rʲ", "j", "ɛ", "l" },
				["ł"] = { "rʲ", "j", "ɛ", "w" },
				[false] = { "rʲ", "j", "ɛ̃" }
			},
			["a"] = { "rʲ", "j", "a" },
			["e"] = { "rʲ", "j", "ɛ" },
			["i"] = { "rʲ", "j", "i" },
			["o"] = { "rʲ", "j", "ɔ" },
			["ó"] = { "rʲ", "j", "u" },
			["u"] = { "rʲ", "j", "u" },
			[false] = { "rʲ", "i" }
		},
		["z"] = "ʐ",
		[false] = "r"
	},
	["q"] = {
		["u"] = { "k", "v" },
		[false] = false
	},
	["s"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "ɕ", "ɔ", "l" },
				["ł"] = { "ɕ", "ɔ", "w" },
				[false] = { "ɕ", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "ɕ", "ɛ", "l" },
				["ł"] = { "ɕ", "ɛ", "w" },
				[false] = { "ɕ", "ɛ̃" }
			},
			["a"] = { "ɕ", "a" },
			["e"] = { "ɕ", "ɛ" },
			["o"] = { "ɕ", "ɔ" },
			["ó"] = { "ɕ", "u" },
			["u"] = { "ɕ", "u" },
			["y"] = { "ɕ", "ɨ" },
			[false] = { "ɕ", "i" }
		},
		["z"] = "ʂ",
		[false] = "s",
	},
	["ś"] = "ɕ",
	["t"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "tʲ", "j", "ɔ", "l" },
				["ł"] = { "tʲ", "j", "ɔ", "w" },
				[false] = { "tʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "tʲ", "j", "ɛ", "l" },
				["ł"] = { "tʲ", "j", "ɛ", "w" },
				[false] = { "tʲ", "j", "ɛ̃" }
			},
			["a"] = { "tʲ", "j", "a" },
			["e"] = { "tʲ", "j", "ɛ" },
			["i"] = { "tʲ", "i" },
			["o"] = { "tʲ", "j", "ɔ" },
			["ó"] = { "tʲ", "j", "u" },
			["u"] = { "tʲ", "j", "u" },
			[false] = { "tʲ", "i" }
		},
		[false] = "t"
	},
	["u"] = "u",
	["v"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "vʲ", "j", "ɔ", "l" },
				["ł"] = { "vʲ", "j", "ɔ", "w" },
				[false] = { "vʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "vʲ", "j", "ɛ", "l" },
				["ł"] = { "vʲ", "j", "ɛ", "w" },
				[false] = { "vʲ", "j", "ɛ̃" }
			},
			["a"] = { "vʲ", "j", "a" },
			["e"] = { "vʲ", "j", "ɛ" },
			["i"] = { "vʲ", "j", "i" },
			["o"] = { "vʲ", "j", "ɔ" },
			["ó"] = { "vʲ", "j", "u" },
			["u"] = { "vʲ", "j", "u" },
			[false] = { "vʲ", "i" }
		},
		[false] = "v"
	},
	["w"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "vʲ", "j", "ɔ", "l" },
				["ł"] = { "vʲ", "j", "ɔ", "w" },
				[false] = { "vʲ", "j", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "vʲ", "j", "ɛ", "l" },
				["ł"] = { "vʲ", "j", "ɛ", "w" },
				[false] = { "vʲ", "j", "ɛ̃" }
			},
			["a"] = { "vʲ", "j", "a" },
			["e"] = { "vʲ", "j", "ɛ" },
			["i"] = { "vʲ", "j", "i" },
			["o"] = { "vʲ", "j", "ɔ" },
			["ó"] = { "vʲ", "j", "u" },
			["u"] = { "vʲ", "j", "u" },
			[false] = { "vʲ", "i" }
		},
		[false] = "v"
	},
	["x"] = { "k", "s" },
	["y"] = "ɨ",
	["z"] = {
		["i"] = {
			["ą"] = {
				["l"] = { "ʑ", "ɔ", "l" },
				["ł"] = { "ʑ", "ɔ", "w" },
				[false] = { "ʑ", "ɔ̃" }
			},
			["ę"] = {
				["l"] = { "ʑ", "ɛ", "l" },
				["ł"] = { "ʑ", "ɛ", "w" },
				[false] = { "ʑ", "ɛ̃" }
			},
			["a"] = { "ʑ", "a" },
			["e"] = { "ʑ", "ɛ" },
			["o"] = { "ʑ", "ɔ" },
			["ó"] = { "ʑ", "u" },
			["u"] = { "ʑ", "u" },
			[false] = { "ʑ", "i" }
		},
		[false] = "z"
	},
	["ź"] = "ʑ",
	["ż"] = "ʐ",
	["-"] = {},
}

local valid_phone = {
	["a"] = true, ["b"] = true, ["bʲ"] = true, ["d"] = true, ["dʲ"] = true, ["d͡z"] = true, ["d͡ʑ"] = true,
	["d͡ʐ"] = true, ["ɛ"] = true, ["ɛ̃"] = true, ["f"] = true, ["fʲ"] = true, ["ɡ"] = true,
	["ɡʲ"] = true, ["i"] = true, ["ɨ"] = true, ["j"] = true, ["k"] = true, ["kʲ"] = true,
	["l"] = true, ["lʲ"] = true, ["m"] = true, ["mʲ"] = true, ["n"] = true, ["ŋ"] = true,
	["ɲ"] = true, ["ɔ"] = true, ["ɔ̃"] = true, ["p"] = true, ["pʲ"] = true, ["r"] = true, ["rʲ"] = true,
	["s"] = true, ["ɕ"] = true, ["ʂ"] = true, ["t"] = true, ["tʲ"] = true, ["t͡s"] = true, ["t͡ɕ"] = true, ["t͡ʂ"] = true,
	["u"] = true, ["v"] = true, ["vʲ"] = true, ["w"] = true, ["w̃"] = true, ["x"] = true, ["xʲ"] = true, ["z"] = true,
	["ʑ"] = true, ["ʐ"] = true, ["ɣ"] = true
}

local sylmarks = {
	["."] = ".", ["'"] = "ˈ", ["ˈ"] = "ˈ", [","] = "ˌ"
}

local vowel = {
	["a"] = true, ["ɛ"] = true, ["ɛ̃"] = true,
	["i"] = true, ["ɨ"] = true, ["ɔ"] = true,
	["ɔ̃"] = true, ["u"] = true
}

local devoice = {
	["b"] = "p", ["d"] = "t", ["d͡z"] = "t͡s", ["d͡ʑ"] = "t͡ɕ",
	["d͡ʐ"] = "t͡ʂ", ["ɡ"] = "k", ["v"] = "f", ["vʲ"] = "fʲ",
	["z"] = "s", ["ʑ"] = "ɕ", ["ʐ"] = "ʂ",

	-- non-devoicable
	["bʲ"] = "bʲ", ["dʲ"] = "dʲ", ["ɡʲ"] = "ɡʲ", ["m"] = "m", ["mʲ"] = "mʲ",
	["n"] = "n", ["ɲ"] = "ɲ", ["ŋ"] = "ŋ", ["w"] = "w", ["w̃"] = "w̃",
	["l"] = "l", ["lʲ"] = "lʲ", ["j"] = "j", ["r"] = "r", ["rʲ"] = "rʲ", ["tʲ"] = "tʲ",
}

local voice = {
	["p"] = "b", ["t"] = "d", ["t͡s"] = "d͡z", ["t͡ɕ"] = "d͡ʑ",
	["t͡ʂ"] = "d͡ʐ", ["k"] = "ɡ", ["f"] = "v", ["fʲ"] = "vʲ",
	["s"] = "z", ["ɕ"] = "ʑ", ["ʂ"] = "ʐ", ["x"] = "ɣ",

	-- non-voicable
	["bʲ"] = "bʲ", ["dʲ"] = "dʲ", ["ɡʲ"] = "ɡʲ", ["m"] = "m", ["mʲ"] = "mʲ",
	["n"] = "n", ["ɲ"] = "ɲ", ["ŋ"] = "ŋ", ["w"] = "w", ["w̃"] = "w̃",
	["l"] = "l", ["lʲ"] = "lʲ", ["j"] = "j", ["r"] = "r", ["rʲ"] = "rʲ", ["tʲ"] = "tʲ",
}

local forward_assimilants = {
	["v"] = true, ["vʲ"] = true
}

local denasalized = {
	["ɛ̃"] = "ɛ",
	["ɔ̃"] = "ɔ",
}

local nasal_map = {
	["p"] = "m", ["pʲ"] = "m", ["b"] = "m", ["bʲ"] = "m", -- zębu, klępa
	["k"] = "ŋ", ["kʲ"] = "ŋ", ["ɡ"] = "ŋ", ["ɡʲ"] = "ŋ", -- pąk, łęgowy
	["t"] = "n", ["d"] = "n", -- wątek, piątek, mądrość

	["t͡ɕ"] = "ɲ", ["d͡ʑ"] = "ɲ", ["ɕ"] = "ɲ", ["ʑ"] = "ɲ", -- pięć, pędziwiatr, łabędź
	-- gęsi, więzi
	["t͡ʂ"] = "n", ["d͡ʐ"] = "n", -- pączek, ?
	-- węszyć, mężny
	["t͡s"] = "n", ["d͡z"] = "n", -- wiedząc, pieniędzy
}

local SPECIAL_FLAGS = {
	IS_RZ = "IS_RZ",
}

local third_last_syllable_stress = {
	"łbym", "łabym", "łbyś", "łabyś", "łby", "łaby", "łoby", "liby", "łyby",
}

local fourth_last_syllable_stress = {
	"libyśmy", "łybyśmy", "libyście", "łybyście",
}

local pron_abc = {
    ["A"] = {"a"},
    ["a"] = {"a"},
    ["Ą"] = {"ą", "a z ogonkiem"},
    ["ą"] = {"ą", "a z ogonkiem"},
    ["B"] = {"be"},
    ["b"] = {"be"},
    ["C"] = {"ce"},
    ["c"] = {"ce"},
    ["Ć"] = {"cie", "ce z kreską"},
    ["ć"] = {"cie", "ce z kreską"},
    ["D"] = {"de"},
    ["d"] = {"de"},
    ["E"] = {"e"},
    ["e"] = {"e"},
    ["Ę"] = {"ę", "e z ogonkiem"},
    ["ę"] = {"ę", "e z ogonkiem"},
    ["F"] = {"ef"},
    ["f"] = {"ef"},
    ["G"] = {"gie"},
    ["g"] = {"gie"},
    ["H"] = {"ha"},
    ["h"] = {"ha"},
    ["I"] = {"i"},
    ["i"] = {"i"},
    ["J"] = {"jot"},
    ["j"] = {"jot"},
    ["K"] = {"ka"},
    ["k"] = {"ka"},
    ["L"] = {"el"},
    ["l"] = {"el"},
    ["Ł"] = {"eł", "el z kreską"},
    ["ł"] = {"eł", "el z kreską"},
    ["M"] = {"em"},
    ["m"] = {"em"},
    ["N"] = {"en"},
    ["n"] = {"en"},
    ["Ń"] = {"eń", "en z kreską"},
    ["ń"] = {"eń", "en z kreską"},
    ["O"] = {"o"},
    ["o"] = {"o"},
    ["Ó"] = {"o z kreską", "o kreskowane", "u zamknięte"},
    ["ó"] = {"o z kreską", "o kreskowane", "u zamknięte"},
    ["P"] = {"pe"},
    ["p"] = {"pe"},
    ["Q"] = {"ku"},
    ["q"] = {"ku"},
    ["R"] = {"er"},
    ["r"] = {"er"},
    ["S"] = {"es"},
    ["s"] = {"es"},
    ["Ś"] = {"eś", "es z kreską"},
    ["ś"] = {"eś", "es z kreską"},
    ["T"] = {"te"},
    ["t"] = {"te"},
    ["U"] = {"u", "u otwarte"},
    ["u"] = {"u", "u otwarte"},
    ["V"] = {"fał", "we"},
    ["v"] = {"fał", "we"},
    ["W"] = {"wu"},
    ["w"] = {"wu"},
    ["X"] = {"iks"},
    ["x"] = {"iks"},
    ["Y"] = {"y", "igrek"},
    ["y"] = {"y", "igrek"},
    ["Z"] = {"zet"},
    ["z"] = {"zet"},
    ["Ź"] = {"ziet", "zet z kreską"},
    ["ź"] = {"ziet", "zet z kreską"},
    ["Ż"] = {"żet", "zet z kropką"},
    ["ż"] = {"żet", "zet z kropką"},
}


---
-- Check whether phone doesn't change due to voicing/devoicing
---@param phone string
---@return boolean
local function is_neutral(phone)
	return (devoice[phone] and voice[phone]) and (voice[phone] == devoice[phone])
end

---
-- Check whether phone is a special character (syllable mark or word boundary)
---@param phone string
---@return boolean
local function is_special(phone)
	return phone == " " or ((sylmarks[phone] ~= nil) and (sylmarks[phone] ~= false))
end

---
-- Check whether phone is voiced
---@param phone string
---@return boolean
local function is_voiced(phone)
	return devoice[phone] and phone ~= devoice[phone]
end

---
-- Check whether phone is prone to forward assimilation
---@param phone string
---@param flags table Special flags for this phone
---@return boolean
local function is_forward_assimilant(phone, flags)
	return forward_assimilants[phone] or (flags and flags[SPECIAL_FLAGS.IS_RZ])
end

---
-- Check whether phone cluster is a palatalized cluster
---@param cluster string
---@return boolean
local function is_palatalized_cluster(cluster)
	return cluster:find("[ɡxkfbmprvdtl]ʲj[aɔ̃ɛɛ̃iɔu]") ~= nil
end

---
-- Process special flags for grapheme and associate them with the recorded phone
---@param grapheme string
---@return table | nil
local function process_special_flags(grapheme)
	if grapheme == "rz" then
		return { [SPECIAL_FLAGS.IS_RZ] = true }
	end
end

---
-- Convert letters and graphemes to phones
---@param word string
---@return table<number, string>, table<number, table<string, boolean>>
local function convert_to_phones(word)
	local phones = {}
	local flags = {}
	local chbuf = ""
	local function append_phone(phone)
		insert(phones, phone)

		-- mark rz for assimilation later
		local grapheme_flags = process_special_flags(chbuf)
		if grapheme_flags then
			flags[#phones] = grapheme_flags
		end
		chbuf = ""
	end

	local l2ptab = letters2phones
	for ch in strmatchit(strlower(word), ".") do
		local value = l2ptab[ch]

		if value == nil then
			value = l2ptab[false]
			if value == false then
				return nil
			elseif type(value) == "table" then
				for _, phone in ipairs(value) do
					append_phone(phone)
				end
			else
				append_phone(value)
			end
			l2ptab = letters2phones
			value = l2ptab[ch]
		end

		chbuf = chbuf .. ch

		if type(value) == "table" then
			if value[false] == nil then
				for _, phone in ipairs(value) do
					append_phone(phone)
				end
				l2ptab = letters2phones
			else
				l2ptab = value
			end
		elseif type(value) == "string" then
			append_phone(value)
			l2ptab = letters2phones
		else
			append_phone(ch)
		end
	end

	if l2ptab ~= letters2phones then
		local value = l2ptab[false]
		if type(value) == "table" then
			for _, phone in ipairs(value) do
				append_phone(phone)
			end
		else
			append_phone(value)
		end
	end

	return phones, flags
end

---
-- Simplify nasals
---@param phones table<number, string>
---@return table<number, string>, table<number, table<string, boolean>>
local function simplify_nasals(phones, flags)
	local new_phones, new_flags = {}, {}
	for i, phone in ipairs(phones) do
		if denasalized[phone] then
			local pnext = phones[i + 1]
			if sylmarks[pnext] then
				pnext = phones[i + 2]
			end
			if phone == "ɛ̃" and (not pnext or not valid_phone[pnext]) then
				-- denasalize word-final ę
				insert(new_phones, denasalized[phone])
				new_flags[#new_phones] = flags[i]
			elseif nasal_map[pnext] then
				insert(new_phones, denasalized[phone])
				insert(new_phones, nasal_map[pnext])
				new_flags[#new_phones] = flags[i]
			else
				insert(new_phones, phone)
				new_flags[#new_phones] = flags[i]
			end
		else
			insert(new_phones, phone)
			new_flags[#new_phones] = flags[i]
		end
	end
	return new_phones, new_flags
end

---
-- Devoice consonant phones in terminal positions
---@param phones table<number, string> Target phone table to mutate
local function terminal_devoice(phones)
	local final_phone = phones[#phones]
	if is_voiced(final_phone) then
		phones[#phones] = devoice[final_phone]
	end
end

---
-- Process consonant cluster assimilation for single cluster
---@param cluster table<number, string> Consonant cluster
---@param flags table<number, table<string, boolean>> Flags relative to the cluster
---@param new_phones table<number, string> Target phone table to mutate
local function process_consonant_cluster(cluster, flags, new_phones)
	local determining_index = #cluster
	while cluster[determining_index] do
		local candidate = cluster[determining_index]
		-- Skip forward assimilants and neutral phones to find the first voiced/devoiced consonant which decides the entire cluster
		if not is_forward_assimilant(candidate, flags[determining_index]) and not is_neutral(candidate) and not is_special(candidate) then
			break
		end
		determining_index = determining_index - 1
	end

	-- If the cluster ends up being comprised of just neutral phones and forward assimilants, add it as-is
	if determining_index == 0 then
		for _, consonant in ipairs(cluster) do
			insert(new_phones, consonant)
		end
		return
	end

	-- Transform the entire cluster, forward and back, relative to the determining consonant's voicing
	local determining_consonant = cluster[determining_index]
	local target_map = is_voiced(determining_consonant) and voice or devoice

	for _, consonant in ipairs(cluster) do
		local transformed = target_map[consonant] or consonant
		insert(new_phones, transformed)
	end
end

---
-- Process consonant cluster assimilation for single cluster
---@param phones table<number, string>
---@param flags table<number, table<string, boolean>>
---@return table<number, string>
local function process_consonant_clusters(phones, flags)
	local new_phones = {}
	local i = 1
	while i <= #phones do
		local pcurr, pnext = phones[i], ""
		if not valid_phone[pcurr] or vowel[pcurr] then
			-- Other phone encountered, add it as-is
			insert(new_phones, pcurr)
		else
			-- Consonant cluster to process
			local cluster = {}
			-- Phone flags indexed relative to the cluster
			local cluster_flags = {}

			-- Search forward for consonant cluster
			local j = i
			while j <= #phones do
				pnext = phones[j]

				-- Break on vowel or invalid symbol and process what we have
				if vowel[pnext] or (not valid_phone[pnext] and not is_special(pnext)) then
					break
				end

				insert(cluster, pnext)
				-- Set the cluster-relative flag for the latest processed phoneme
				cluster_flags[#cluster] = flags[j]

				j = j + 1
			end

			if #cluster > 0 then
				if #cluster > 1 then
					-- Process actual consonant cluster
					process_consonant_cluster(cluster, cluster_flags, new_phones)
					-- Skip forward past the processed phones to avoid any unwanted duplication
					-- Offset by 1 to compensate, because i is unconditionally incremented by 1 at the very end
					i = j - 1
				else
					-- The cluster is a single consonant, add it as-is
					insert(new_phones, cluster[1])
				end
			end
		end
		i = i + 1
	end
	return new_phones
end

---
-- Join several phones together, handling table and nil values
---@vararg string | table Phones to join together
---@return string
local function join_phones(...)
	local args = {...}
	local str = ""
	for _, syllable in ipairs(args) do
		if type(syllable) == "table" then
			str = str .. concat(syllable, "")
		else
			str = str .. (syllable or "")
		end
	end
	return str
end

---
-- Group phones into syllables
---@param phones table<number, string>
---@return table<number, string>
local function collect_syllables(phones)
	local words, curword, sylmarked, sylbuf, had_vowl = {}, nil, false, nil, nil
	for i, pcurr in ipairs(phones) do
		local pprev, pnext, pnnext = phones[i - 1], phones[i + 1], phones[i + 2]

		if valid_phone[pcurr] then
			if not curword then
				curword, sylbuf, had_vowl, sylmarked = {}, '', false, false
				insert(words, curword)
			end

			local same_syl = true

			if vowel[pcurr] then
				if had_vowl then
					same_syl = false
				end
				had_vowl = true
			elseif had_vowl then
				if vowel[pnext] then
					same_syl = false
				elseif not vowel[pprev] and not vowel[pnext] then
					same_syl = false
				elseif vowel[pprev] and is_palatalized_cluster(join_phones(pcurr, pnext, pnnext)) then
					same_syl = false
				elseif ((pcurr == "s") and ((pnext == "t") or (pnext == "p") or (pnext == "k")))
						or (pnext == "r") or (pnext == "f") or (pnext == "w")
						or ((pcurr == "ɡ") and (pnext == "ʐ"))
						or ((pcurr == "d") and ((pnext == "l") or (pnext == "w") or (pnext == "ɲ")))
						or is_palatalized_cluster(join_phones(pprev, pcurr, pnext))
				then
					-- these should belong to a common syllable
					same_syl = true
				end
			end

			if same_syl then
				sylbuf = sylbuf .. pcurr
			else
				insert(curword, sylbuf)
				sylbuf, had_vowl = pcurr, vowel[pcurr]
			end
		elseif (curword or valid_phone[pnext]) and sylmarks[pcurr] then
			if not curword then
				curword, sylbuf, had_vowl = {}, '', false
				insert(words, curword)
			end
			sylmarked = true
			if sylbuf then
				insert(curword, sylbuf)
				sylbuf = ''
			end
			insert(curword, sylmarks[pcurr])
		else
			if sylbuf then
				assert(type(curword) == "table") -- para dejar al interprete tranquilo
				if #curword > 0 and not had_vowl then
					curword[#curword] = curword[#curword] .. sylbuf
				else
					insert(curword, sylbuf)
				end
				if sylmarked then
					words[#words] = concat(curword)
				end
			end
			curword, sylbuf = nil, nil
			insert(words, pcurr)
		end
	end
	if sylbuf then
		assert(type(curword) == "table") -- para dejar al interprete tranquilo
		if #curword > 0 and not had_vowl then
			curword[#curword] = curword[#curword] .. sylbuf
		else
			insert(curword, sylbuf)
		end
		if sylmarked then
			words[#words] = concat(curword)
		end
	end

	return words
end

local function get_stressed_syllable(word)
	local stressed_syllable = 1
	for i,v in ipairs(third_last_syllable_stress) do
		if word:sub(-string.len(v)) == v
		then
			stressed_syllable = 2
		end
	end
	for i,v in ipairs(fourth_last_syllable_stress) do
		if word:sub(-string.len(v)) == v
		then
			stressed_syllable = 3
		end
	end

	return stressed_syllable
end

local function is_more_than_one_word(word)
	if string.find(word, " ") then
		return true
	else
		return false
	end
end

local function normalizar(texto)
	texto = strlower(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

local function procesar_fragmento(w)
	local stressed_syllable = get_stressed_syllable(w)
	local more_than_one_word = is_more_than_one_word(w)
	local phones, flags = convert_to_phones(w)
	phones, flags = simplify_nasals(phones, flags)
	terminal_devoice(phones)
	phones = process_consonant_clusters(phones, flags)
	local words = collect_syllables(phones)

	-- mark syllable breaks and stress
	for i, word in ipairs(words) do
		if type(word) == "table" then
			-- unless already marked
			if not ((word[2] == ".") or (word[2] == "ˈ") or (word[2] == "ˌ")) then
				for j, syl in ipairs(word) do
					if not more_than_one_word then
						if #word < stressed_syllable+1 then
							stressed_syllable = #word-1
						end
					end

					if #word > 1 then
						if j == (#word - stressed_syllable) then
							word[j] = "ˈ" .. syl
						elseif j ~= 1 then
							word[j] = "." .. syl
						end
					end
				end
			end
			words[i] = concat(word)
		end
	end

	for i, _ in ipairs(words) do
		-- get rid of /ʲ/
		words[i] = strsubn(words[i], "ʲ([ij])", "%1")
		words[i] = strsubn(words[i], "ʲ", "j")
		-- replace /ɔ̃/ and /ɛ̃/ with /ɔw̃/ and /ɛw̃/
		words[i] = strsubn(words[i], "ɛ̃", "ɛw̃")
		words[i] = strsubn(words[i], "ɔ̃", "ɔw̃")
		---- replace /n/ with /w̃/ before /s, z, ʂ, ʐ, ɕ, ʑ/ (currently turned off)
		-- words[i] = strsubn(words[i], "n([szʂʐɕʑ])", "w̃%1")
		-- words[i] = strsubn(words[i], "n([ˈˌ.])([szʂʐɕʑ])", "w̃%1%2")
	end

	return concat(words)
end

local function generar_pron(t)
	t = normalizar(t)
	local convertido = {}
	local fragmentos = strsplit(t, "%s*|%s*")
	
	for _,fragmento in ipairs(fragmentos) do
	    insert(convertido, procesar_fragmento(fragmento))
	end
	
    return {{strhtml(concat(convertido, " | "))}}
end

local vowels = "aeiouyąęó"
local vowel = "[" .. vowels .. "]"
local consonants = "bcćdfghjklłmnńpqrsśtuvwxyzźż"
local consonant = "[" .. consonants .. "]"
-- vowel digraphs, not necessarily actual phonetic diphthongs
local diphthong_i_v2 = "[aąoeęuói]"
local diphthongs = {
	["a"] = "u",
	["e"] = "u",
	["i"] = diphthong_i_v2
}
-- consonant digraphs (key = first letter, value = possible second letters)
local digraphs = {
	["c"] = "[hz]",
	["d"] = "[zźż]",
	["q"] = "u",
	["r"] = "z",
	["s"] = "z",
}

local past_tense_suffixes = {
	"liśmy", "liście", "łyśmy", "łyście",
}

local latin_borrowing_suffixes = {
	"ika", "yka",
	"iki", "yki",
	"ika", "yka",
	"ice", "yce",
	"ikom", "ykom",
	"ikę", "ykę",
	"iką", "yką",
	"ice", "yce",
	"ikach", "ykach",
	"iko", "yko",
}

-- if this is changed, the next two functions also need to be
local function is_respelling_close_enough(respelling, word)
	word = strsubn(word, "j(" .. diphthong_i_v2 .. ")", "i%1")
	respelling = strsubn(respelling, "['.]", "")
	respelling = strsubn(respelling, "j(" .. diphthong_i_v2 .. ")", "i%1")
	return word == respelling
end

local function partition(word, oword)
	local parts = {}
	local lenword = strlen(word)
	local pos = 1
	local offset = 0
	word = strsubn(word, "['-]", ".")
	while pos <= lenword do
		if strfind(strlower(word), "^" .. vowel, pos) then
			local initial = substr(strlower(word), pos, pos)
			local seq = 1
			if diphthongs[initial] and strfind(strlower(word), "^" .. initial .. diphthongs[initial], pos) then
				seq = 2
			end
			insert(parts, { "v", substr(oword, pos - offset, pos - offset + seq - 1) })
			pos = pos + seq
		elseif strfind(strlower(word), "^" .. consonant, pos) then
			local initial = substr(strlower(word), pos, pos)
			local seq = 1
			if digraphs[initial] and strfind(strlower(word), "^" .. initial .. digraphs[initial], pos) then
				seq = 2
			end
			insert(parts, { "c", substr(oword, pos - offset, pos - offset + seq - 1) })
			pos = pos + seq
		elseif strfind(word, "^% ", pos) then
			-- multiword, do not hyphenate
			return nil
		elseif strfind(word, "^%.", pos) then
			-- syllable break
			if not strfind(oword, "^['-]", pos - offset) then
				offset = offset + 1
			end
			insert(parts, { "b", nil })
			pos = pos + 1
		else
			-- unrecognized symbol
			return nil
		end
	end
	return parts
end

local function get_word_suffix(word)
	word = word:gsub("([ˈ'.,ˌ])", "")
	local word_suffix = 0
	for i,v in ipairs(past_tense_suffixes) do
		if word:sub(-string.len(v)) == v
		then
			word_suffix = 1
		end
	end
	for i,v in ipairs(latin_borrowing_suffixes) do
		if word:sub(-string.len(v)) == v
		then
			word_suffix = 2
		end
	end

	return word_suffix
end

local function separar_en_silabas(word, otitle) --ayuda, otitle = titulo original
	local syllables = {}
	local cursyl = ""
	local nucleus = false
	local coda = nil
	local parts = partition(word, otitle)
	if not parts then return nil end
	for pos, p in ipairs(parts) do
		local kind, part = unpack(p)
		if kind == "v" then
			if coda then
				cursyl = cursyl .. substr(syllables[#syllables], -coda)
				syllables[#syllables] = substr(syllables[#syllables], 1, -coda - 1)
				coda = nil
			end
			if nucleus then
				insert(syllables, cursyl)
				cursyl = ""
			end
			nucleus = true
			coda = nil
			cursyl = cursyl .. part
		elseif kind == "c" then
			cursyl = cursyl .. part
			if nucleus then
				insert(syllables, cursyl)
				cursyl = ""
				nucleus = false
				coda = strlen(part)
			else
				coda = nil
			end
		elseif kind == "b" then
			-- implicit syllable break
			if #cursyl > 0 then
				if nucleus or #syllables < 1 then
					insert(syllables, cursyl)
				else
					syllables[#syllables] = syllables[#syllables] .. cursyl
				end
			end
			cursyl = ""
			nucleus = false
			coda = nil
		else
			-- unrecognized kind
			return nil
		end
	end
	if #cursyl > 0 then
		if nucleus or #syllables < 1 then
			insert(syllables, cursyl)
		else
			syllables[#syllables] = syllables[#syllables] .. cursyl
		end
	end
	return syllables
end

local ipavowel = "[aɛiɨɔu]"
local function generar_rima(ipa)
	local vowels_at = { }
	local pos = 1
	while true do
		local posnext = strfind(ipa, ipavowel, pos)
		if not posnext then break end
		insert(vowels_at, posnext)
		pos = posnext + 1
	end
	local vend
	if #vowels_at < 1 then return nil end
	if #vowels_at > 1 then
		vend = vowels_at[#vowels_at - 1]
	else
		vend = vowels_at[#vowels_at]
	end
	local snippet = substr(ipa, vend)
	snippet = strsubn(snippet, "[ˈˌ.]", "")
	if strfind(snippet, " ") then
		return nil -- copout, something must be wrong
	end
	return snippet
end

-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
	local tit = titulo
	local vino_ayuda, x

	if #args["ayuda"] < 1 then
		args["ayuda"][1] = tit
	else
		vino_ayuda = true
	end

	if #args["fone"] < 1 and #args["fono"] < 1 then
		x = pron_abc[args["ayuda"][1]]
		if x then
			args["ayuda"] = x
			args["tl"] = x
		end

		local A = #args["ayuda"]
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local fono = generar_pron(args["ayuda"][j])
			for i,_ in ipairs(fono) do
				table.insert(args["fono"], fono[i])
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end
	end


	if not args["rima"] then
		if args["fono"][1] and args["fono"][1][1] then
			args["rima"] = generar_rima(args["fono"][1][1])
		end
	end

	local tiene_espacios = strfind(tit, " ")

	if not tiene_espacios and not x then
		local ss__

		if vino_ayuda then
			if is_respelling_close_enough(args["ayuda"][1], tit) then
				ss__ = separar_en_silabas(args["ayuda"][1], tit)
			end
		else
			ss__ = separar_en_silabas(args["ayuda"][1], args["ayuda"][1])
		end

		if not ss__ then
			return args
		end

		if not args["ls"] then
			args["ls"] = #ss__
		end

		if not args["nl"] then
			args["nl"] = strlen(tit)
		end

		if not args["d"][1] then
			args["d"][1] = concat(ss__, "-")
		end
	end

	return args
end

return export