Módulo:generar-pron/hy
La documentación para este módulo puede ser creada en Módulo:generar-pron/hy/doc
-- Tomado de en.wikt, introducido en es.wikt pot Tmagc
local export = {}
local unpack = unpack or table.unpack
local m_str = require("Módulo:String")
local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local strmatch = m_str.match
local strmatchit = m_str.gmatch
local strsplit = m_str.split
local strstrip = m_str.strip
local strlower = m_str.lower
local strlen = m_str.len
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html
-- single characters that map to IPA sounds
local phonetic_chars_map = {
-- Eastern Armenian
east = {
["ա"]="ɑ", ["բ"]="b", ["գ"]="ɡ", ["դ"]="d", ["ե"]="e", ["զ"]="z",
["է"]="e", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
["խ"]="χ", ["ծ"]="t͡s", ["կ"]="k", ["հ"]="h", ["ձ"]="d͡z", ["ղ"]="ʁ",
["ճ"]="t͡ʃ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="o",
["չ"]="t͡ʃʰ", ["պ"]="p", ["ջ"]="d͡ʒ", ["ռ"]="r", ["ս"]="s", ["վ"]="v",
["տ"]="t", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
["և"]="ev", ["օ"]="o", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
},
-- Western Armenian
west = {
["ա"]="ɑ", ["բ"]="pʰ", ["գ"]="kʰ", ["դ"]="tʰ", ["ե"]="e", ["զ"]="z",
["է"]="e", ["ը"]="ə", ["թ"]="tʰ", ["ժ"]="ʒ", ["ի"]="i", ["լ"]="l",
["խ"]="χ", ["ծ"]="d͡z", ["կ"]="ɡ", ["հ"]="h", ["ձ"]="t͡sʰ", ["ղ"]="ʁ",
["ճ"]="d͡ʒ", ["մ"]="m", ["յ"]="j", ["ն"]="n", ["շ"]="ʃ", ["ո"]="o",
["չ"]="t͡ʃʰ", ["պ"]="b", ["ջ"]="t͡ʃʰ", ["ռ"]="ɾ", ["ս"]="s", ["վ"]="v",
["տ"]="d", ["ր"]="ɾ", ["ց"]="t͡sʰ", ["ւ"]="v", ["փ"]="pʰ", ["ք"]="kʰ",
["և"]="ev", ["օ"]="o", ["ֆ"]="f", ["-"]=" ", ["՚"]="", ["-"]=""
},
}
-- character sequences of two that map to IPA sounds
local phonetic_2chars_map = {
east = {
{ 'ու', 'u' },
},
west = {
-- if not in the initial position and if not preceded by [ɑeəoiu]
{ '(.?.?)յու', function(before)
if not (before == '' or strfind(before, '[%sաեէիոօ]$')
or before == "ու") then
return before .. 'ʏ'
end
end },
{ 'ու', 'u' },
{ 'էօ', 'œ' },
-- պ, տ, կ are not voiced after ս and շ
{ 'սպ', 'sp' },
{ 'ստ', 'st' },
{ 'սկ', 'sk' },
{ 'շպ', 'ʃp' },
{ 'շտ', 'ʃt' },
{ 'շկ', 'ʃk' },
-- Western Armenian inserts ə in the causative
{ 'ցնել', 't͡sʰənel' },
},
}
-- el alfabeto
local pron_abc = {
["Ա"] = {"այբ"},
["ա"] = {"այբ"},
["Բ"] = {"բեն"},
["բ"] = {"բեն"},
["Գ"] = {"գիմ"},
["գ"] = {"գիմ"},
["Դ"] = {"դա"},
["դ"] = {"դա"},
["Ե"] = {"եչ"},
["ե"] = {"եչ"},
["Զ"] = {"զա"},
["զ"] = {"զա"},
["Է"] = {"է"},
["է"] = {"է"},
["Ը"] = {"ըթ"},
["ը"] = {"ըթ"},
["Թ"] = {"թօ","թո"},
["թ"] = {"թօ","թո"},
["Ժ"] = {"ժէ","ժե"},
["ժ"] = {"ժէ","ժե"},
["Ի"] = {"ին"},
["ի"] = {"ին"},
["Լ"] = {"լիւն","լյուն"},
["լ"] = {"լիւն","լյուն"},
["Խ"] = {"խէ","խե"},
["խ"] = {"խէ","խե"},
["Ծ"] = {"ծա"},
["ծ"] = {"ծա"},
["Կ"] = {"կեն"},
["կ"] = {"կեն"},
["Հ"] = {"հօ","հո"},
["հ"] = {"հօ","հո"},
["Ձ"] = {"ձա"},
["ձ"] = {"ձա"},
["Ղ"] = {"ղատ"},
["ղ"] = {"ղատ"},
["Ճ"] = {"ճէ","ճե"},
["ճ"] = {"ճէ","ճե"},
["Մ"] = {"մեն"},
["մ"] = {"մեն"},
["Յ"] = {"յի","հի"},
["յ"] = {"յի","հի"},
["Ն"] = {"նու"},
["ն"] = {"նու"},
["Շ"] = {"շա"},
["շ"] = {"շա"},
["Ո"] = {"վօ","ո"},
["ո"] = {"վօ","ո"},
["Չ"] = {"չա"},
["չ"] = {"չա"},
["Պ"] = {"պէ","պե"},
["պ"] = {"պէ","պե"},
["Ջ"] = {"ջէ","ջե"},
["ջ"] = {"ջէ","ջե"},
["Ռ"] = {"ռա"},
["ռ"] = {"ռա"},
["Ս"] = {"սէ","սե"},
["ս"] = {"սէ","սե"},
["Վ"] = {"վև","վեվ"},
["վ"] = {"վև","վեվ"},
["Տ"] = {"տիւն","տյուն"},
["տ"] = {"տիւն","տյուն"},
["Ր"] = {"րէ","րե"},
["ր"] = {"րէ","րե"},
["Ց"] = {"ցօ","ցո"},
["ց"] = {"ցօ","ցո"},
["Ւ"] = {"հիւն","վյուն"},
["ւ"] = {"հիւն","վյուն"},
["Փ"] = {"փիւր","փյուր"},
["փ"] = {"փիւր","փյուր"},
["Ք"] = {"քէ","քե"},
["ք"] = {"քէ","քե"},
["Օ"] = {"օ"},
["օ"] = {"օ"},
["Ֆ"] = {"ֆէ","ֆե"},
["ֆ"] = {"ֆէ","ֆե"},
["Ու"] = {"ու"},
["ու"] = {"ու"},
["և"] = {"եվ"},
}
local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"
local function normalizar(texto)
texto = strlower(texto)
texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
texto = strsubrep(texto, "%s+", " ")
texto = strstrip(texto, "[%s|]+")
return texto
end
local function _pronunciation(word, system)
if not (phonetic_chars_map[system] and phonetic_2chars_map[system]) then
error("Invalid system " .. tostring(system))
end
word = normalizar(word)
local phonetic = word
-- then long consonants that are orthographically geminated.
phonetic = strsubn(phonetic, "(.)%1", "%1ː")
for _, replacement in ipairs(phonetic_2chars_map[system]) do
phonetic = strsubn(phonetic, unpack(replacement))
end
-- ոու is pronounced ou
phonetic = strsubn(phonetic, "ոːւ", "օու")
-- ե and ո are pronounced as je and vo word-initially.
phonetic = strsubn(phonetic, "^ե", "յէ")
phonetic = strsubn(phonetic, "^ո", "վօ")
-- except when followed by another վ.
phonetic = strsubn(phonetic, "^վօվ", "օվ")
-- ոու is pronounced oov
phonetic = strsubn(phonetic, "ոու", "օու")
phonetic = strsubn(phonetic, '.', phonetic_chars_map[system])
--oov is actually ou
phonetic = strsubn(phonetic, "oov", "ou")
-- insertion of the optional glide
phonetic = strsubn(phonetic, "iɑ", "i(j)ɑ")
phonetic = strsubn(phonetic, "ie", "i(j)e")
phonetic = strsubn(phonetic, "io", "i(j)o")
phonetic = strsubn(phonetic, "iu", "i(j)u")
phonetic = strsubn(phonetic, "ɑi", "ɑ(j)i")
phonetic = strsubn(phonetic, "ei", "e(j)i")
phonetic = strsubn(phonetic, "oi", "o(j)i")
phonetic = strsubn(phonetic, "ui", "u(j)i")
-- assimilation: ppʰ = pʰː; ttʰ = tʰː; ; kkʰ = kʰː
phonetic = strsubn(phonetic, "ppʰ", "pʰː")
phonetic = strsubn(phonetic, "ttʰ", "tʰː")
phonetic = strsubn(phonetic, "kkʰ ", "kʰː")
-- nasal assimilation
phonetic = strsubn(phonetic, "n([ɡk]+)", "ŋ%1")
-- pseudo-palatalization under the influence of Russian [COLLOQUIAL, NOT STANDARD]
--phonetic = strsubn(phonetic, "tj", "t͡sj")
--phonetic = strsubn(phonetic, "tʰj", "t͡sʰj")
--phonetic = strsubn(phonetic, "dj", "d͡zj")
-- palatalization in the Eastern Armenian sequence -ությ-, especially in the suffix -ություն [considered non-standard by strict prescriptivists]
if system == "east" then
phonetic = strsubn(phonetic, "utʰj", "ut͡sʰj")
end
-- trilling of ɾ in some positions [COLLOQUIAL, NOT STANDARD]
--phonetic = strsubn(phonetic, "ɾt", "rt")
-- devoicing of consonants in some positions
phonetic = strsubn(phonetic, "bpʰ", "pʰː")
phonetic = strsubn(phonetic, "dpʰ", "tʰpʰ")
phonetic = strsubn(phonetic, "ɡpʰ", "kʰpʰ")
phonetic = strsubn(phonetic, "d͡zpʰ", "t͡sʰpʰ")
phonetic = strsubn(phonetic, "d͡ʒpʰ", "t͡ʃʰpʰ")
phonetic = strsubn(phonetic, "vpʰ", "fpʰ")
phonetic = strsubn(phonetic, "ʒpʰ", "ʃpʰ")
phonetic = strsubn(phonetic, "btʰ", "pʰtʰ")
phonetic = strsubn(phonetic, "dtʰ", "tʰː")
phonetic = strsubn(phonetic, "ɡtʰ", "kʰtʰ")
phonetic = strsubn(phonetic, "d͡ztʰ", "t͡sʰtʰ")
phonetic = strsubn(phonetic, "d͡ʒtʰ", "t͡ʃʰtʰ")
phonetic = strsubn(phonetic, "vtʰ", "ftʰ")
phonetic = strsubn(phonetic, "ʒtʰ", "ʃtʰ")
phonetic = strsubn(phonetic, "bkʰ", "pʰkʰ")
phonetic = strsubn(phonetic, "dkʰ", "tkʰ")
phonetic = strsubn(phonetic, "ɡkʰ", "kʰː")
phonetic = strsubn(phonetic, "d͡zkʰ", "t͡sʰkʰ")
phonetic = strsubn(phonetic, "d͡ʒkʰ", "t͡ʃʰkʰ")
phonetic = strsubn(phonetic, "vkʰ", "fkʰ")
phonetic = strsubn(phonetic, "ʒkʰ", "ʃkʰ")
phonetic = strsubn(phonetic, "bt͡ʃʰ", "pʰt͡ʃʰ")
phonetic = strsubn(phonetic, "dt͡ʃʰ", "tʰt͡ʃʰ")
phonetic = strsubn(phonetic, "ɡt͡ʃʰ", "kʰt͡ʃʰ")
phonetic = strsubn(phonetic, "d͡zt͡ʃʰ", "t͡sʰt͡ʃʰ")
phonetic = strsubn(phonetic, "d͡ʒt͡ʃʰ", "t͡ʃʰː")
phonetic = strsubn(phonetic, "vt͡ʃʰ", "ft͡ʃʰ")
phonetic = strsubn(phonetic, "ʒt͡ʃʰ", "ʃt͡ʃʰ")
phonetic = strsubn(phonetic, "bt͡sʰ", "pʰt͡sʰ")
phonetic = strsubn(phonetic, "dt͡sʰ", "tʰt͡sʰ")
phonetic = strsubn(phonetic, "ɡt͡sʰ", "kʰt͡sʰ")
phonetic = strsubn(phonetic, "d͡zt͡sʰ", "t͡sʰː")
phonetic = strsubn(phonetic, "d͡ʒt͡sʰ", "t͡ʃʰt͡sʰ")
phonetic = strsubn(phonetic, "vt͡sʰ", "ft͡sʰ")
phonetic = strsubn(phonetic, "ʒt͡sʰ", "ʃt͡sʰ")
phonetic = strsubn(phonetic, "zpʰ", "spʰ")
phonetic = strsubn(phonetic, "ztʰ", "stʰ")
phonetic = strsubn(phonetic, "zkʰ", "skʰ")
phonetic = strsubn(phonetic, "ʁt͡s", "χt͡s")
phonetic = strsubn(phonetic, "ʁt͡ʃ", "χt͡ʃ")
phonetic = strsubn(phonetic, "ʁp", "χp")
phonetic = strsubn(phonetic, "ʁt", "χt")
phonetic = strsubn(phonetic, "ʁk", "χk")
phonetic = strsubn(phonetic, "ʁs", "χs")
phonetic = strsubn(phonetic, "ʁʃ", "χʃ")
phonetic = strsubn(phonetic, "vt͡s", "ft͡s")
phonetic = strsubn(phonetic, "vt͡ʃ", "ft͡ʃ")
phonetic = strsubn(phonetic, "vp", "fp")
phonetic = strsubn(phonetic, "vt", "ft")
phonetic = strsubn(phonetic, "vk", "fk")
phonetic = strsubn(phonetic, "vs", "fs")
phonetic = strsubn(phonetic, "vʃ", "fʃ")
if system == "west" then
phonetic = strsubn(phonetic, "χd͡z", "χt͡s")
phonetic = strsubn(phonetic, "χd͡ʒ", "χt͡ʃ")
phonetic = strsubn(phonetic, "χb", "χp")
phonetic = strsubn(phonetic, "χd", "χt")
phonetic = strsubn(phonetic, "χɡ", "χk")
end
if system == "west" then
phonetic = strsubn(phonetic, "t͡ʃʰd͡z", "t͡ʃʰt͡s")
phonetic = strsubn(phonetic, "t͡sʰd͡z", "t͡sʰt͡s")
phonetic = strsubn(phonetic, "pʰd͡z", "pʰt͡s")
phonetic = strsubn(phonetic, "tʰd͡z", "tʰt͡s")
phonetic = strsubn(phonetic, "kʰd͡z", "kʰt͡s")
phonetic = strsubn(phonetic, "t͡ʃʰd͡ʒ", "t͡ʃʰt͡ʃ")
phonetic = strsubn(phonetic, "t͡sʰd͡ʒ", "t͡sʰt͡ʃ")
phonetic = strsubn(phonetic, "pʰd͡ʒ", "pʰt͡ʃ")
phonetic = strsubn(phonetic, "tʰd͡ʒ", "tʰt͡ʃ")
phonetic = strsubn(phonetic, "kʰd͡ʒ", "kʰt͡ʃ")
phonetic = strsubn(phonetic, "t͡ʃʰb", "t͡ʃʰp")
phonetic = strsubn(phonetic, "t͡sʰb", "t͡sʰp")
phonetic = strsubn(phonetic, "pʰb", "pʰp")
phonetic = strsubn(phonetic, "tʰb", "tʰp")
phonetic = strsubn(phonetic, "kʰb", "kʰp")
phonetic = strsubn(phonetic, "t͡ʃʰd", "t͡ʃʰt")
phonetic = strsubn(phonetic, "t͡sʰd", "t͡sʰt")
phonetic = strsubn(phonetic, "pʰd", "pʰt")
phonetic = strsubn(phonetic, "tʰd", "tʰt")
phonetic = strsubn(phonetic, "kʰd", "kʰt")
phonetic = strsubn(phonetic, "t͡ʃʰɡ", "t͡ʃʰk")
phonetic = strsubn(phonetic, "t͡sʰɡ", "t͡sʰk")
phonetic = strsubn(phonetic, "pʰɡ", "pʰk")
phonetic = strsubn(phonetic, "tʰɡ", "tʰk")
phonetic = strsubn(phonetic, "kʰɡ", "kʰk")
end
-- prothetic ə before {s/ʃ/z}{p/t/k/b/d/g} in Western Armenian; this rule is not the norm in Eastern Armenian anymore
if system == "west" then
phonetic = strsubn(phonetic, "^([sʃz][ptkbdɡ]+)", "ə%1")
end
-- generating the stress
phonetic = strsubn(phonetic, "%S+", function(word)
-- Do not add a stress mark for monosyllabic words. Check to see if the word contains only a single instance of [ɑeəoiuœʏ]+.
local numberOfVowels = select(2, strsubn(word, "[ɑeəoiuœʏ]", "%0"))
-- If polysyllabic, add IPA stress mark using the following rules. The stress is always on the last syllable not
-- formed by schwa [ə]. In some rare cases the stress is not on the last syllable. In such cases the stressed vowel
-- is marked by the Armenian stress character <՛>, e.g. մի՛թե. So:
-- 1) Find the vowel followed by <՛>․ If none, jump to step 2. Else check if it is the first vowel of the word.
-- If true, put the IPA stress at the beginning, else do step 3.
-- 2) Find the last non-schwa vowel, i.e. [ɑeoiuœʏ],
-- 3) If the IPA symbol preceding it is [ɑeəoiuœʏ], i.e. a vowel, put the stress symbol between them,
-- if it is NOT [ɑeoiuəœʏ], i.e. it is a consonant,
-- put the stress before that consonant.
if numberOfVowels > 1 then
local rcount
word, rcount = strsubn(word, "([^ɑeoiuœʏə]*[ɑeoiuœʏə])՛", "ˈ%1")
if rcount == 0 then
word = strsubn(word, "([^ɑeoiuœʏə]*[ɑeoiuœʏ][^ɑeoiuœʏə]*)$", "ˈ%1")
word = strsubn(word, "([^ɑeoiuœʏə]*[ɑeəoiuœʏ]?[ɑeoiuœʏ][^ɑeoiuœʏə]*ə[^ɑeoiuœʏə]*)$", "ˈ%1")
end
-- Including () in the second and third sets will only work
-- if () never encloses a vowel.
word = strsubn(word, "([ɑeəoiuœʏ])ˈ([^ɑeoiuœʏə()]+)([^ɑeoiuœʏəːˈʰ()])", "%1%2ˈ%3")
word = strsubn(word, "(.)͡ˈ", "ˈ%1͡")
return word
end
end)
-- correcting the stress position in some cases
if system == "east" then
phonetic = strsubn(phonetic, "ut͡sʰˈj", "uˈt͡sʰj")
end
-- move stress marker out of opening/closing parentheses
if system == "east" or system == "west" then
phonetic = strsubn(phonetic, "ˈ%)", ")ˈ")
phonetic = strsubn(phonetic, "%(ˈ", "ˈ(")
end
return strhtml(phonetic)
end
local function generar_pron(x)
return {{"oriental"}, {"occidental"}}, {{_pronunciation(x, "east")}, {_pronunciation(x, "west")}}
end
function export.procesar_pron_args(titulo, args)
local x = pron_abc[titulo]
if x then
args["ayuda"] = x
end
if not args["ayuda"][1] then
args["ayuda"][1] = titulo
end
if not args["fone"][1] and not args["fono"][1] then
args["pron"], args["fone"] = generar_pron(args["ayuda"][1])
local rim = args["fone"][1][1]
rim = strsubn(rim, "^.*ˈ(.-)$", "%1")
args["rima"] = strsubn(rim, ".-".."([ɑɛəiɔu].*"..")".."$", "%1")
end
return args
end
return export