Módulo:generar-pron/hy

La documentación para este módulo puede ser creada en Módulo:generar-pron/hy/doc
-- Tomado de en.wikt, introducido en es.wikt pot Tmagc

local export = {}

local unpack = unpack or table.unpack

local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local strmatch = m_str.match
local strmatchit = m_str.gmatch
local strsplit = m_str.split
local strstrip = m_str.strip
local strlower = m_str.lower
local strlen = m_str.len
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html

-- single characters that map to IPA sounds   
local phonetic_chars_map = {
	-- Eastern Armenian
	east = {
		["ա"]="ɑ", ["բ"]="b", ["գ"]="ɡ", ["դ"]="d", ["ե"]="e", ["զ"]="z",
		["է"]="e", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
		["խ"]="χ", ["ծ"]="t͡s", ["կ"]="k", ["հ"]="h", ["ձ"]="d͡z", ["ղ"]="ʁ", 
		["ճ"]="t͡ʃ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="o",
		["չ"]="t͡ʃʰ", ["պ"]="p", ["ջ"]="d͡ʒ", ["ռ"]="r", ["ս"]="s", ["վ"]="v", 
		["տ"]="t", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
		["և"]="ev", ["օ"]="o", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
	},
	-- Western Armenian
	west = {
		["ա"]="ɑ", ["բ"]="pʰ", ["գ"]="kʰ", ["դ"]="tʰ", ["ե"]="e", ["զ"]="z",
		["է"]="e", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
		["խ"]="χ", ["ծ"]="d͡z", ["կ"]="ɡ", ["հ"]="h", ["ձ"]="t͡sʰ", ["ղ"]="ʁ", 
		["ճ"]="d͡ʒ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="o",
		["չ"]="t͡ʃʰ", ["պ"]="b", ["ջ"]="t͡ʃʰ", ["ռ"]="ɾ", ["ս"]="s", ["վ"]="v", 
		["տ"]="d", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
		["և"]="ev", ["օ"]="o", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
	},
}

-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
	east = {
		{ 'ու', 'u' },
	},
	west = {
		-- if not in the initial position and if not preceded by [ɑeəoiu]
		{ '(.?.?)յու', function(before)
			if not (before == '' or strfind(before, '[%sաեէիոօ]$')
			or before == "ու") then
				return before .. 'ʏ'
			end
		end },
		{ 'ու', 'u' },
		{ 'էօ', 'œ' },
		-- պ, տ, կ are not voiced after ս and շ
		{ 'սպ', 'sp' },
		{ 'ստ', 'st' },
		{ 'սկ', 'sk' },
		{ 'շպ', 'ʃp' },
		{ 'շտ', 'ʃt' },
		{ 'շկ', 'ʃk' },
		-- Western Armenian inserts ə in the causative
		{ 'ցնել', 't͡sʰənel' },

	},
}

-- el alfabeto
local pron_abc = {
	["Ա"] = {"այբ"},
	["ա"] = {"այբ"},
	["Բ"] = {"բեն"},
	["բ"] = {"բեն"},
	["Գ"] = {"գիմ"},
	["գ"] = {"գիմ"},
	["Դ"] = {"դա"},
	["դ"] = {"դա"},
	["Ե"] = {"եչ"},
	["ե"] = {"եչ"},
	["Զ"] = {"զա"},
	["զ"] = {"զա"},
	["Է"] = {"է"},
	["է"] = {"է"},
	["Ը"] = {"ըթ"},
	["ը"] = {"ըթ"},
	["Թ"] = {"թօ","թո"},
	["թ"] = {"թօ","թո"},
	["Ժ"] = {"ժէ","ժե"},
	["ժ"] = {"ժէ","ժե"},
	["Ի"] = {"ին"},
	["ի"] = {"ին"},
	["Լ"] = {"լիւն","լյուն"},
	["լ"] = {"լիւն","լյուն"},
	["Խ"] = {"խէ","խե"},
	["խ"] = {"խէ","խե"},
	["Ծ"] = {"ծա"},
	["ծ"] = {"ծա"},
	["Կ"] = {"կեն"},
	["կ"] = {"կեն"},
	["Հ"] = {"հօ","հո"},
	["հ"] = {"հօ","հո"},
	["Ձ"] = {"ձա"},
	["ձ"] = {"ձա"},
	["Ղ"] = {"ղատ"},
	["ղ"] = {"ղատ"},
	["Ճ"] = {"ճէ","ճե"},
	["ճ"] = {"ճէ","ճե"},
	["Մ"] = {"մեն"},
	["մ"] = {"մեն"},
	["Յ"] = {"յի","հի"},
	["յ"] = {"յի","հի"},
	["Ն"] = {"նու"},
	["ն"] = {"նու"},
	["Շ"] = {"շա"},
	["շ"] = {"շա"},
	["Ո"] = {"վօ","ո"},
	["ո"] = {"վօ","ո"},
	["Չ"] = {"չա"},
	["չ"] = {"չա"},
	["Պ"] = {"պէ","պե"},
	["պ"] = {"պէ","պե"},
	["Ջ"] = {"ջէ","ջե"},
	["ջ"] = {"ջէ","ջե"},
	["Ռ"] = {"ռա"},
	["ռ"] = {"ռա"},
	["Ս"] = {"սէ","սե"},
	["ս"] = {"սէ","սե"},
	["Վ"] = {"վև","վեվ"},
	["վ"] = {"վև","վեվ"},
	["Տ"] = {"տիւն","տյուն"},
	["տ"] = {"տիւն","տյուն"},
	["Ր"] = {"րէ","րե"},
	["ր"] = {"րէ","րե"},
	["Ց"] = {"ցօ","ցո"},
	["ց"] = {"ցօ","ցո"},
	["Ւ"] = {"հիւն","վյուն"},
	["ւ"] = {"հիւն","վյուն"},
	["Փ"] = {"փիւր","փյուր"},
	["փ"] = {"փիւր","փյուր"},
	["Ք"] = {"քէ","քե"},
	["ք"] = {"քէ","քե"},
	["Օ"] = {"օ"},
	["օ"] = {"օ"},
	["Ֆ"] = {"ֆէ","ֆե"},
	["ֆ"] = {"ֆէ","ֆե"},
	["Ու"] = {"ու"},
	["ու"] = {"ու"},
	["և"] = {"եվ"},
}

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

local function normalizar(texto)
	texto = strlower(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

local function _pronunciation(word, system)
	if not (phonetic_chars_map[system] and phonetic_2chars_map[system]) then
		error("Invalid system " .. tostring(system))
	end
	
	word = normalizar(word)

	local phonetic = word

	-- then long consonants that are orthographically geminated.
	phonetic = strsubn(phonetic, "(.)%1", "%1ː")

	for _, replacement in ipairs(phonetic_2chars_map[system]) do
		phonetic = strsubn(phonetic, unpack(replacement))
	end
	
	-- ոու is pronounced ou
	phonetic = strsubn(phonetic, "ոːւ", "օու")

	-- ե and ո are pronounced as je and vo word-initially.
	phonetic = strsubn(phonetic, "^ե", "յէ")
	phonetic = strsubn(phonetic, "^ո", "վօ")
	-- except when followed by another վ.
	phonetic = strsubn(phonetic, "^վօվ", "օվ")
	
	-- ոու is pronounced oov
	phonetic = strsubn(phonetic, "ոու", "օու")

	phonetic = strsubn(phonetic, '.', phonetic_chars_map[system])

	--oov is actually ou
	phonetic = strsubn(phonetic, "oov", "ou")

	-- insertion of the optional glide
	phonetic = strsubn(phonetic, "iɑ", "i(j)ɑ")
	phonetic = strsubn(phonetic, "ie", "i(j)e")
	phonetic = strsubn(phonetic, "io", "i(j)o")
	phonetic = strsubn(phonetic, "iu", "i(j)u")
	phonetic = strsubn(phonetic, "ɑi", "ɑ(j)i")
	phonetic = strsubn(phonetic, "ei", "e(j)i")
	phonetic = strsubn(phonetic, "oi", "o(j)i")
	phonetic = strsubn(phonetic, "ui", "u(j)i")

	-- assimilation: ppʰ = pʰː; ttʰ = tʰː; ; kkʰ = kʰː
	phonetic = strsubn(phonetic, "ppʰ", "pʰː")
	phonetic = strsubn(phonetic, "ttʰ", "tʰː")
	phonetic = strsubn(phonetic, "kkʰ ", "kʰː")

	-- nasal assimilation
	phonetic = strsubn(phonetic, "n([ɡk]+)", "ŋ%1")

	-- pseudo-palatalization under the influence of Russian [COLLOQUIAL, NOT STANDARD]
	--phonetic = strsubn(phonetic, "tj", "t͡sj")
	--phonetic = strsubn(phonetic, "tʰj", "t͡sʰj")
	--phonetic = strsubn(phonetic, "dj", "d͡zj")

	-- palatalization in the Eastern Armenian sequence -ությ-, especially in the suffix -ություն [considered non-standard by strict prescriptivists]
	if system == "east" then
		phonetic = strsubn(phonetic, "utʰj", "ut͡sʰj")
	end

	-- trilling of ɾ in some positions [COLLOQUIAL, NOT STANDARD]
	--phonetic = strsubn(phonetic, "ɾt", "rt")

	-- devoicing of consonants in some positions
	phonetic = strsubn(phonetic, "bpʰ", "pʰː")
	phonetic = strsubn(phonetic, "dpʰ", "tʰpʰ")
	phonetic = strsubn(phonetic, "ɡpʰ", "kʰpʰ")
	phonetic = strsubn(phonetic, "d͡zpʰ", "t͡sʰpʰ")
	phonetic = strsubn(phonetic, "d͡ʒpʰ", "t͡ʃʰpʰ")
	phonetic = strsubn(phonetic, "vpʰ", "fpʰ")
	phonetic = strsubn(phonetic, "ʒpʰ", "ʃpʰ")

	phonetic = strsubn(phonetic, "btʰ", "pʰtʰ")
	phonetic = strsubn(phonetic, "dtʰ", "tʰː")
	phonetic = strsubn(phonetic, "ɡtʰ", "kʰtʰ")
	phonetic = strsubn(phonetic, "d͡ztʰ", "t͡sʰtʰ")
	phonetic = strsubn(phonetic, "d͡ʒtʰ", "t͡ʃʰtʰ")
	phonetic = strsubn(phonetic, "vtʰ", "ftʰ")
	phonetic = strsubn(phonetic, "ʒtʰ", "ʃtʰ")

	phonetic = strsubn(phonetic, "bkʰ", "pʰkʰ")
	phonetic = strsubn(phonetic, "dkʰ", "tkʰ")
	phonetic = strsubn(phonetic, "ɡkʰ", "kʰː")
	phonetic = strsubn(phonetic, "d͡zkʰ", "t͡sʰkʰ")
	phonetic = strsubn(phonetic, "d͡ʒkʰ", "t͡ʃʰkʰ")
	phonetic = strsubn(phonetic, "vkʰ", "fkʰ")
	phonetic = strsubn(phonetic, "ʒkʰ", "ʃkʰ")

	phonetic = strsubn(phonetic, "bt͡ʃʰ", "pʰt͡ʃʰ")
	phonetic = strsubn(phonetic, "dt͡ʃʰ", "tʰt͡ʃʰ")
	phonetic = strsubn(phonetic, "ɡt͡ʃʰ", "kʰt͡ʃʰ")
	phonetic = strsubn(phonetic, "d͡zt͡ʃʰ", "t͡sʰt͡ʃʰ")
	phonetic = strsubn(phonetic, "d͡ʒt͡ʃʰ", "t͡ʃʰː")
	phonetic = strsubn(phonetic, "vt͡ʃʰ", "ft͡ʃʰ")
	phonetic = strsubn(phonetic, "ʒt͡ʃʰ", "ʃt͡ʃʰ")

	phonetic = strsubn(phonetic, "bt͡sʰ", "pʰt͡sʰ")
	phonetic = strsubn(phonetic, "dt͡sʰ", "tʰt͡sʰ")
	phonetic = strsubn(phonetic, "ɡt͡sʰ", "kʰt͡sʰ")
	phonetic = strsubn(phonetic, "d͡zt͡sʰ", "t͡sʰː")
	phonetic = strsubn(phonetic, "d͡ʒt͡sʰ", "t͡ʃʰt͡sʰ")
	phonetic = strsubn(phonetic, "vt͡sʰ", "ft͡sʰ")
	phonetic = strsubn(phonetic, "ʒt͡sʰ", "ʃt͡sʰ")

	phonetic = strsubn(phonetic, "zpʰ", "spʰ")
	phonetic = strsubn(phonetic, "ztʰ", "stʰ")
	phonetic = strsubn(phonetic, "zkʰ", "skʰ")

	phonetic = strsubn(phonetic, "ʁt͡s", "χt͡s")
	phonetic = strsubn(phonetic, "ʁt͡ʃ", "χt͡ʃ")
	phonetic = strsubn(phonetic, "ʁp", "χp")
	phonetic = strsubn(phonetic, "ʁt", "χt")
	phonetic = strsubn(phonetic, "ʁk", "χk")
	phonetic = strsubn(phonetic, "ʁs", "χs")
	phonetic = strsubn(phonetic, "ʁʃ", "χʃ")

	phonetic = strsubn(phonetic, "vt͡s", "ft͡s")
	phonetic = strsubn(phonetic, "vt͡ʃ", "ft͡ʃ")
	phonetic = strsubn(phonetic, "vp", "fp")
	phonetic = strsubn(phonetic, "vt", "ft")
	phonetic = strsubn(phonetic, "vk", "fk")
	phonetic = strsubn(phonetic, "vs", "fs")
	phonetic = strsubn(phonetic, "vʃ", "fʃ")

	if system == "west" then
		phonetic = strsubn(phonetic, "χd͡z", "χt͡s")
		phonetic = strsubn(phonetic, "χd͡ʒ", "χt͡ʃ")
		phonetic = strsubn(phonetic, "χb", "χp")
		phonetic = strsubn(phonetic, "χd", "χt")
		phonetic = strsubn(phonetic, "χɡ", "χk")
	end

	if system == "west" then
		phonetic = strsubn(phonetic, "t͡ʃʰd͡z", "t͡ʃʰt͡s")
		phonetic = strsubn(phonetic, "t͡sʰd͡z", "t͡sʰt͡s")
		phonetic = strsubn(phonetic, "pʰd͡z", "pʰt͡s")
		phonetic = strsubn(phonetic, "tʰd͡z", "tʰt͡s")
		phonetic = strsubn(phonetic, "kʰd͡z", "kʰt͡s")

		phonetic = strsubn(phonetic, "t͡ʃʰd͡ʒ", "t͡ʃʰt͡ʃ")
		phonetic = strsubn(phonetic, "t͡sʰd͡ʒ", "t͡sʰt͡ʃ")
		phonetic = strsubn(phonetic, "pʰd͡ʒ", "pʰt͡ʃ")
		phonetic = strsubn(phonetic, "tʰd͡ʒ", "tʰt͡ʃ")
		phonetic = strsubn(phonetic, "kʰd͡ʒ", "kʰt͡ʃ")

		phonetic = strsubn(phonetic, "t͡ʃʰb", "t͡ʃʰp")
		phonetic = strsubn(phonetic, "t͡sʰb", "t͡sʰp")
		phonetic = strsubn(phonetic, "pʰb", "pʰp")
		phonetic = strsubn(phonetic, "tʰb", "tʰp")
		phonetic = strsubn(phonetic, "kʰb", "kʰp")

		phonetic = strsubn(phonetic, "t͡ʃʰd", "t͡ʃʰt")
		phonetic = strsubn(phonetic, "t͡sʰd", "t͡sʰt")
		phonetic = strsubn(phonetic, "pʰd", "pʰt")
		phonetic = strsubn(phonetic, "tʰd", "tʰt")
		phonetic = strsubn(phonetic, "kʰd", "kʰt")

		phonetic = strsubn(phonetic, "t͡ʃʰɡ", "t͡ʃʰk")
		phonetic = strsubn(phonetic, "t͡sʰɡ", "t͡sʰk")
		phonetic = strsubn(phonetic, "pʰɡ", "pʰk")
		phonetic = strsubn(phonetic, "tʰɡ", "tʰk")
		phonetic = strsubn(phonetic, "kʰɡ", "kʰk")

	end


	-- prothetic ə before {s/ʃ/z}{p/t/k/b/d/g} in Western Armenian; this rule is not the norm in Eastern Armenian anymore
	if system == "west" then
		phonetic = strsubn(phonetic, "^([sʃz][ptkbdɡ]+)", "ə%1")
	end

	-- generating the stress
	phonetic = strsubn(phonetic, "%S+", function(word)
		-- Do not add a stress mark for monosyllabic words. Check to see if the word contains only a single instance of [ɑeəoiuœʏ]+.
		local numberOfVowels = select(2, strsubn(word, "[ɑeəoiuœʏ]", "%0"))
	
		-- If polysyllabic, add IPA stress mark using the following rules. The stress is always on the last syllable not 
		-- formed by schwa [ə]. In some rare cases the stress is not on the last syllable. In such cases the stressed vowel
		-- is marked by the Armenian stress character <՛>, e.g. մի՛թե. So:
		--      1) Find the vowel followed by <՛>․ If none, jump to step 2. Else check if it is the first vowel of the word.
		--         If true, put the IPA stress at the beginning, else do step 3.
		--      2) Find the last non-schwa vowel, i.e. [ɑeoiuœʏ],
		--      3) If the IPA symbol preceding it is [ɑeəoiuœʏ], i.e. a vowel, put the stress symbol between them, 
		--         if it is NOT [ɑeoiuəœʏ], i.e. it is a consonant, 
		--         put the stress before that consonant.
		if numberOfVowels > 1 then
			local rcount
			word, rcount = strsubn(word, "([^ɑeoiuœʏə]*[ɑeoiuœʏə])՛", "ˈ%1")
			if rcount == 0 then
				word = strsubn(word, "([^ɑeoiuœʏə]*[ɑeoiuœʏ][^ɑeoiuœʏə]*)$", "ˈ%1")
				word = strsubn(word, "([^ɑeoiuœʏə]*[ɑeəoiuœʏ]?[ɑeoiuœʏ][^ɑeoiuœʏə]*ə[^ɑeoiuœʏə]*)$", "ˈ%1")
			end
			-- Including () in the second and third sets will only work
			-- if () never encloses a vowel.
			word = strsubn(word, "([ɑeəoiuœʏ])ˈ([^ɑeoiuœʏə()]+)([^ɑeoiuœʏəːˈʰ()])", "%1%2ˈ%3")
			word = strsubn(word, "(.)͡ˈ", "ˈ%1͡")
			return word
		end
	end)

	-- correcting the stress position in some cases
	if system == "east" then
		phonetic = strsubn(phonetic, "ut͡sʰˈj", "uˈt͡sʰj")
	end
	-- move stress marker out of opening/closing parentheses
	if system == "east" or system == "west" then
        phonetic = strsubn(phonetic, "ˈ%)", ")ˈ")
        phonetic = strsubn(phonetic, "%(ˈ", "ˈ(")
    end

	return strhtml(phonetic)
end

local function generar_pron(x)
	return {{"oriental"}, {"occidental"}}, {{_pronunciation(x, "east")}, {_pronunciation(x, "west")}}
end

function export.procesar_pron_args(titulo, args)
	local x = pron_abc[titulo]
	if x then
		args["ayuda"] = x
	end
	if not args["ayuda"][1] then
		args["ayuda"][1] = titulo
	end

	if not args["fone"][1] and not args["fono"][1] then
		args["pron"], args["fone"] = generar_pron(args["ayuda"][1])
		local rim = args["fone"][1][1]
		rim = strsubn(rim, "^.*ˈ(.-)$", "%1")
		args["rima"] = strsubn(rim, ".-".."([ɑɛəiɔu].*"..")".."$", "%1")
	end

	return args
end

return export