มอดูล:la-pronunc
หน้าตา
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_a = require("Module:accent qualifier")
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local lang = require("Module:languages").getByCode("la")
local concat = table.concat
local deep_equals = m_table.deepEquals
local gsplit = m_str_utils.gsplit
local insert = table.insert
local invert = m_table.invert
local list_to_set = m_table.listToSet
local remove = table.remove
local rfind = m_str_utils.find
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = m_str_utils.trim
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulower = m_str_utils.lower
local usub = m_str_utils.sub
local ulen = m_str_utils.len
local umatch = m_str_utils.match
local MACRON = u(0x304) -- ̄
local BREVE = u(0x306) -- ̆
local TREMA = u(0x308) -- ̈
local LENGTH = MACRON .. "?" .. BREVE .. "?" .. TREMA .. "?"
local TIE = u(0x361) -- ͡
local VOWELS = "aeɛiɪoɔuʊyʏ"
local VOWEL = "[" .. VOWELS .. "]"
local TILDE = u(0x303) -- ̃
local HALF_LONG = "ˑ"
local LONG = "ː"
local letters_ipa = {
["ā"] = "aː", ["ē"] = "eː", ["ī"] = "iː", ["ō"] = "oː", ["ū"] = "uː", ["ȳ"] = "yː",
["ae"] = "ae̯", ["au"] = "au̯", ["ei"] = "ei̯", ["eu"] = "eu̯", ["oe"] = "oe̯", ["ou"] = "uː",
["c"] = "k", ["g"] = "ɡ", ["q"] = "k", ["v"] = "w", ["x"] = {"k", "s"},
["ph"] = "pʰ", ["th"] = "tʰ", ["ch"] = "kʰ", ["rh"] = "rʰ",
["qw"] = "kʷ", ["gw"] = "ɡʷ", ["sw"] = "sʷ",
["'"] = "ˈ",
}
-- Only includes changes from letters_ipa above.
local letters_ipa_eccl = require("Module:table/setParent")({
["y"] = "i", ["ȳ"] = "iː",
["ae"] = "eː", ["oe"] = "eː",
["c"] = "c", -- becomes /k/, /t͡ʃ/ or /ʃ/
["v"] = "v",
["ph"] = "f",
}, letters_ipa)
local lax_vowel = {
["e"] = "ɛ",
["i"] = "ɪ",
["o"] = "ɔ",
["u"] = "ʊ",
-- No evidence for this, and Greek did not have a near-close lax front-rounded vowel as far as we can tell
-- ["y"] = "ʏ",
}
local tense_vowel = invert(lax_vowel)
local voicing = {
["p"] = "b",
["t"] = "d",
["k"] = "ɡ",
}
local devoicing = invert(voicing)
local phonetic_rules = {
-- Bibliography included at the end
-- Assimilation of [g] to [ŋ] before a following /n/
{"ɡ([.ˈ]*)n", "ŋ%1n"},
-- Per Allen (1978: 23), although note the reservations expressed on the next page.
-- Assimilation of word-internal /n/ and /m/ to following consonants. Exception: /m/ does not assimilate to a following /n/.
{"[mn]([.ˈ]*)([kɡ])", "ŋ%1%2"},
{"m([.ˈ]*)([td])", "n%1%2"},
{"n([.ˈ]*)([mpb])", "m%1%2"},
-- Per George M. Lane: “Nasals changed their place of articulation to that of the following consonant. Thus, dental n before the labials p and b became the labial m... labial m before the gutturals c and g became guttural n...labial m before the dentals t, d, s became dental n...” (§164.3); “One nasal, n, is assimilated to another, m...but an m before n is never assimilated..." (§166.5). -- Per Lloyd (1987: 84): “The opposition between nasals was neutralized in syllable-final position, with the realization of the nasality being assimilated to the point of articulation of the following consonant, e.g., [m] is found only before labials, [n] only before dentals or alveolars, and [ŋ] only before velars and /n/."
-- Potential addition: assimilation of final /m/ and /n/ across word boundaries, per e.g. Allen (1987: 28, 31).
-- No additional labialization before high back vowels
{"ʷ%f[uʊ]", ""},
-- Tensing of short vowels before another vowel
{
"(" .. VOWEL .. ")([.ˈ]+[h]?)%f" .. VOWEL,
function (v, following)
return (tense_vowel[v] or v) .. following
end,
},
-- But not before consonantal glides
{"e([iu]̯)", "ɛ%1"},
-- Nasal vowels
{
"(" .. VOWEL .. ")(" .. LONG .. "?)m$",
function (v, long)
-- 2025-05-15: Change per [[Wiktionary:Beer_parlour/2025/May#Retiring_dual_phonemic-phonetic_transcriptions_for_Latin]]
if true then -- long == LONG then
return (tense_vowel[v] or v) .. TILDE .. LONG
end
return (lax_vowel[v] or v) .. TILDE .. HALF_LONG
end,
},
{
"(" .. VOWEL .. ")[nm]([.ˈ]*[fs])",
function (v, following)
return (tense_vowel[v] or v) .. TILDE .. LONG .. following
end,
},
-- Realization of /r/ as a tap
-- Pultrová (2013) argues for Latin /r/ being an alveolar tap.
-- Lloyd (1987: 81) agrees: “The /r/ was doubtlessly an alveolar flap."
-- Allen (1978: 33) expresses doubt: “By the classical period there is no reason to think that the sound had not strengthened to the trill described by later writers.”
-- Unconditional [r] transcription is preferable to unconditional [ɾ] per 18 September 2021 discussion at [[Module_talk:la-IPA#Transcription_of_Latin's_rhotic_consonant]]
-- No consensus yet on how to implement conditional allophony of [r] vs. [ɾ]
-- Voicing and loss of intervocalic /h/.
{"([^ˈ].)h", "%1(ɦ)"},
-- Per Allen (1978: 43–45).
-- Phonetic (as opposed to lexical/phonemic) assimilations
-- Place
-- First because this accounts for 'atque' seemingly escaping total assimilation (and 'adque' presumably not)
{"d([.ˈ]*s%f[" .. VOWELS .. "ptk])", "s%1"}, -- leave [t] out since etsi has [ts], not [sː]
{"s[^ː]([.ˈ]*)s%f[ptk]", "s(ː)%1"},
{"st([.ˈ]+)([^" .. VOWELS .. "])", "s(t)%1%2"},
{"d([.ˈ]+)([pkɡln])", "%2%1%2"}, --leave [r] out since dr does not assimilate, even when heterosyllabic (e.g. quadrans), except in prefixed words
{"b([.ˈ]+)([mf])", "%2%1%2"},
{"s([.ˈ]+)(f)", "%2%1%2"},
-- Regressive voicing assimilation in consonant clusters
{
"([bdɡ])([.ˈ]*)%f[fkpst]",
function (consonant, following)
return (devoicing[consonant] or consonant) .. following
end,
},
{
"([ptk])([.ˈ]*)%f[bdɡz]",
function (consonant, following)
return (voicing[consonant] or consonant) .. following
end,
},
-- 2025-05-15: Numerous changes per [[Wiktionary:Beer_parlour/2025/May#Retiring_dual_phonemic-phonetic_transcriptions_for_Latin]]:
-- (1) simplify l-pinguis vs. l-exilis to just [ɫ] (formerly [ɫ̪]) vs. [l] (formerly [lʲ] in some circumstances);
-- consider further simplifying further to use [l] before non-high-front vowels
-- (2) don't mark dental or alveolar notations on coronals
-- (3) don't mark centralized ä on [a]
-- Allophones of /l/
{"l", "ɫ"},
-- “Pinguis”. Dark/velarized.
-- Per Weiss (2009: 117): “... pinguis (velar). l is exīlis before i and when geminate, otherwise l is pinguis.”
-- Page 82: “... l is pinguis even before e, e.g. Herculēs < Hercolēs ... < Hercelēs ...”
-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-; l pinguis occurred before any other vowel; before any consonant except l; and in word-final position [...] l pinguis actually had two degrees of avoirdupois, being fatter before a consonant than before a vowel...”
-- Page 41: “... velarized l (that is, ‘l pinguis’)...”
-- Sen (2015: §2) states that /l/ was velarized in word-final position or before consonants–other than another /l/–and that it had varying degrees of “dark resonance (velarization in articulatory terms)” (p. 23) before e, a, o, and u (p. 33).
-- Both Sen and Sihler indicate different degrees of velarization, depending on the environment. IPA lacks a way to represent these gradations, unfortunately.
{"ɫ([.ˈ]*)ɫ", "l%1l"},
{"ɫ([.ˈ]*[iɪyʏ])", "l%1"},
-- “Exīlis”. Not dark/velarized. Possibly palatalized.
-- Per Sen (2015: 29): It is plausible [...] that simple onset /l/ was palatalized before /i/, thus [lʲ] [...] it seems likely that geminate /ll/ was also palatalized, given the similar behaviour of the two...”
-- Per Weiss (2009: 82): “In Latin, l developed... a non-velar (possibly palatal) allophone called exīlis before i and when geminate...”
-- Per Sihler (1995: 174): “l exilis was found before the vowels -i- and -ī-, and before another -l-.”
-- Per Sihler (2000: §133.1): "It is less clear whether the 'thin' lateral [i.e. L exilis] was specifically palatal, or palatalized, or only neutral."
-- Giannini and Marotta apparently argue that it was not palatalized (https://i.imgur.com/ytM1QDn.png). I do not have access to the book in question.
-- Retracted /s/
-- {"s", "s̠"}, [already commented out sometime before 2025-05-15]
-- Lloyd (1987: 80–81) expresses some uncertainty about this, but appears to overall be in favour of it: “... the evidence that the apico-alveolar pronunciation was ancient in Latin and inherited from Indo-European is quite strong.”
-- Per Zampaulo (2019: 93), “... in many instances, Latin s was likely pronounced as an apical segment [s̺] (rather than laminal [s])."
-- Per Widdison (1987: 64), "In all, it would be fair to state that the apico-alveolar [ś] articulation represented the main allophonic variant of Latin and possibly IE /s/..."
-- dental Z
-- {"z", "z̪"}, [2025-05-15; see above]
-- Dental articulations
-- {"[td]", "%0̪"} ,[2025-05-15; see above]
-- {"n([.ˈ]*[td])", "n̪%1"}, --it's not as clear as for the stops [2025-05-15; see above]
--Allophones of A
-- {"a", "ä"}, [2025-05-15; see above]
-- Works cited
-- Allen, William Sidney. 1978. Vox Latina: A Guide to the pronunciation of Classical Latin.
-- Lane, George M. A Latin grammar for schools and colleges.
-- Lloyd, Paul M. 1987. From Latin to Spanish.
-- Pultrová, Lucie. 2013. On the phonetic nature of the Latin R.
-- Sen, Ranjan. 2015. Syllable and segment in Latin.
-- Sihler, Andrew L. 1995. New comparative grammar of Greek and Latin.
-- Sihler, Andrew L. 2000. Language history: An introduction.
-- Weiss, Michael. 2009. Outline of the historical and comparative grammar of Latin.
-- Widdison, Kirk A. 16th century Spanish sibilant reordering: Reasons for divergence.
-- Zampaulo, André. 2019. Palatal Sound Change in the Romance languages: Diachronic and Synchronic Perspectives.
}
local phonetic_rules_eccl = {
-- Specifically the Roman Ecclesiastical for singing from the Liber Usualis
{"([aɛeiɔou][ːˑ.ˈ]*)s([.ˈ]*)%f[aɛeiɔou]", "%1s̬%2"}, --partial voicing of s between vowels
{"s([.ˈ]*)%f[bdɡmnlv]", "z%1"}, --full voicing of s before voiced consonants
{"ek([.ˈ]*)s([aɛeiɔoubdɡmnlv])", "eɡ%1z%2"}, --voicing of the prefix ex-
{"kz", "ɡz"},
-- Tapped R intervocalically and in complex onset
-- ^ Citation needed for this being the case in Ecclesiastical pronunciation
-- {"([aɛeiɔou][ːˑ.ˈ]+)r([aɛeiɔou]?)", "%1ɾ%2"},
-- {"([fbdɡptk])r", "%1ɾ"},
-- Dental articulations
{"([ln])([.ˈ]*[td][^͡])", "%1̪%2"}, --assimilation of n to dentality.
--Note that the quality of n might not be dental otherwise--it may be alveolar in most contexts in Italian, according to Wikipedia.
{"([td])([^͡])", "%1̪%2"}, --t and d are dental, except as the first element of a palatal affricate
{"t͡s", "t̪͡s̪"}, -- dental affricates
{"d͡z", "d̪͡z̪"}, --dental affricates
{"t̪([.ˈ]*t͡ʃ)", "t%1"},
{"d̪([.ˈ]*d͡ʒ)", "d%1"},
--end of words
{"([ln])t$", "%1̪t̪"},
{"([td])$", "%1̪"},
--Partial assimilation of l and n before palatal affricates, as in Italian
{"([ln])([.ˈ]*t͡ʃ)", "%1̠ʲ%2"},
{"([ln])([.ˈ]*d͡ʒ)", "%1̠ʲ%2"},
{"([ln])([.ˈ]*ʃ)", "%1̠ʲ%2"},
-- other coda nasal assimilation, full and partial. Per Canepari, only applies to /n/ and not to /m/
{"n([.ˈ]*[kɡ])", "ŋ%1"},
{"n([.ˈ]*[fv])", "ɱ%1"},
}
local lengthen_vowel = {
["a"] = "aː", ["aː"] = "aː",
["ɛ"] = "ɛː", ["ɛː"] = "ɛː",
["e"] = "eː", ["eː"] = "eː",
["i"] = "iː", ["iː"] = "iː",
["ɔ"] = "ɔː", ["ɔː"] = "ɔː",
["o"] = "oː", ["oː"] = "oː",
["u"] = "uː", ["uː"] = "uː",
["au̯"] = "aːu̯",
["ɛu̯"] = "ɛːu̯",
["eu̯"] = "eːu̯",
}
local vowels = list_to_set{
"a", "ɛ", "e", "ɪ", "i", "ɔ", "o", "ʊ", "u", "y",
"aː", "ɛː", "eː", "iː", "ɔː", "oː", "uː", "yː",
"ae̯", "au̯", "ei̯", "eu̯", "oe̯", "ou̯",
}
local onsets = {
"p", "pʰ", "b",
"t", "tʰ", "d",
"k", "kʰ", "kʷ", "ɡ", "ɡʷ",
"s", "sʷ", "z", "f", "v", "h",
"t͡s", "d͡z", "t͡ʃ", "d͡ʒ", "ʃ",
"l", "r", "rʰ",
"m", "n", "ɲ",
"j", "w",
"pl", "pʰl", "bl",
"kl", "kʰl", "ɡl",
"fl",
"pr", "pʰr", "br",
"tr", "tʰr", "dr",
"kr", "kʰr", "ɡr",
"fr",
}
for i = 1, #onsets do
local v = onsets[i]
if umatch(v, "^[ptk][ʰʷ]*[lr]?$") then
insert(onsets, "s" .. v)
end
end
onsets = list_to_set(onsets)
local codas = list_to_set{
"p", "pʰ", "b",
"t", "tʰ", "d",
"k", "kʰ", "ɡ",
"s", "z", "f",
"ʃ",
"l", "r",
"m", "n", "ɲ",
"j",
"ps", "ts", "ks",
"sp", "st", "sk",
"spʰ", "stʰ", "skʰ",
"lp", "lpʰ", "lb", "lps",
"lt", "ltʰ", "ld",
"lk", "lkʰ", "lɡ", "lks",
"ls",
"lm", "ln", "lms", "lns",
"rp", "rpʰ", "rb", "rps",
"rt", "rtʰ", "rd",
"rk", "rkʰ", "rɡ", "rks",
"rs",
"rl", "rls",
"rm", "rn", "rms", "rns",
"mp", "mpʰ", "mb", "mps",
"nt", "ntʰ", "nd",
"nk", "nkʰ", "nɡ", "nks",
"ns",
}
-- Prefixes that end in a consonant; can be patterns. Occurrences of such
-- prefixes + i + vowel cause the i to convert to j (to suppress this, add a
-- dot, i.e. syllable boundary, after the i).
local cons_ending_prefixes = {
"a[bd]", "circum", "con", "dis", "ex", "inter", "in", "ob", "per",
"subter", "sub", "super", "tr[aā]ns"
}
local macrons_to_breves = {
["ā"] = "ă",
["ē"] = "ĕ",
["ī"] = "ĭ",
["ō"] = "ŏ",
["ū"] = "ŭ",
-- Unicode doesn't have breve-y
["ȳ"] = "y" .. BREVE,
}
local function normalize_ligatures(ligature, diacritic)
return (ligature == "æ" and "a" or "o") .. diacritic .. "e"
end
-- NOTE: Everything is lowercased very early on, so we don't have to worry
-- about capitalized letters.
-- FIXME: handle ǟë̄ï̄ȫǖÿ̄ etc.
local short_vowels_string = "aeiouyăĕĭŏŭäëïöüÿ" -- no breve-y in Unicode
local long_vowels_string = "āēīōūȳ"
local vowels_string = short_vowels_string .. long_vowels_string
local vowels_c = "[" .. vowels_string .. "]"
local non_vowels_c = "[^" .. vowels_string .. "]"
local function track(page)
require("Module:debug/track")("la-IPA/" .. page)
return true
end
local function remove_diacritic(word, ch)
return toNFC((ugsub(toNFD(word), ch, "")))
end
local function match_phoneme(ch, pattern)
return ch and umatch(ch, pattern) and true or false
end
local function letters_to_ipa(word, phonetic, eccl)
local ph = {}
local dictionary = eccl and letters_ipa_eccl or letters_ipa
while ulen(word) > 0 do
local longestmatch = ""
for letter in pairs(dictionary) do
local letter_len = ulen(letter)
if letter_len > ulen(longestmatch) and usub(word, 1, letter_len) == letter then
longestmatch = letter
end
end
if ulen(longestmatch) > 0 then
local ipa = dictionary[longestmatch]
if type(ipa) == "table" then
for _, phoneme in ipairs(ipa) do
insert(ph, phoneme)
end
else
insert(ph, ipa)
end
word = usub(word, ulen(longestmatch) + 1)
else
insert(ph, usub(word, 1, 1))
word = usub(word, 2)
end
end
if eccl then
local front_vowel = "^[eɛiɪ]"
local i, n = 1, #ph
while i <= n do
local cur = ph[i]
if cur == "c" then -- c, but not k/q/x
ph[i] = "k" -- default
local nxt = ph[i + 1]
if nxt == "c" then -- cc{e|i}: t.t͡ʃ
if match_phoneme(ph[i + 2], front_vowel) then
ph[i], ph[i + 1] = "t", "t͡ʃ"
i = i + 2
end
elseif match_phoneme(nxt, front_vowel) then -- c{e|i}: t͡ʃ
ph[i] = "t͡ʃ"
i = i + 1
end
elseif cur == "ɡ" then
local nxt = ph[i + 1]
if nxt == "ɡ" then -- gg{e|i}: d.d͡ʒ
if match_phoneme(ph[i + 2], front_vowel) then
ph[i], ph[i + 1] = "d", "d͡ʒ"
i = i + 2
end
elseif nxt == "n" then -- gn: ɲ.ɲ
ph[i], ph[i + 1] = "ɲ", "ɲ"
i = i + 1
elseif match_phoneme(nxt, front_vowel) then -- g{e|i}: d͡ʒ
ph[i] = "d͡ʒ"
end
elseif cur == "h" then
ph[i] = ""
elseif cur == "m" then -- mihī̆ = michī̆
if ph[i + 1] == "i" and ph[i + 2] == "h" and match_phoneme(ph[i + 3], "^[iɪ]ː?$") then
ph[i + 2] = letters_ipa_eccl["ch"]
i = i + 3
end
elseif cur == "n" then -- nihil = nichil
if ph[i + 1] == "i" and ph[i + 2] == "h" and ph[i + 3] == "i" and ph[i + 4] == "l" then
ph[i + 2] = letters_ipa_eccl["ch"]
i = i + 4
end
elseif cur == "s" then -- sc{e|i}: ʃ.ʃ
if ph[i + 1] == "c" and match_phoneme(ph[i + 2], front_vowel) then
ph[i], ph[i + 1] = "ʃ", "ʃ"
i = i + 2
end
elseif cur == "t" then -- ti{V}: t͡si
if match_phoneme(ph[i + 1], "^[iɪ]") and not match_phoneme(ph[i - 1], "[dstz]ʰ?$") and vowels[ph[i + 2]] then
ph[i] = "t͡s"
i = i + 2
end
elseif cur == "z" then
if ph[i + 1] == "z" then -- zz: d.d͡z
ph[i], ph[i + 1] = "d", "d͡z"
i = i + 1
else -- z: d͡z
ph[i] = "d͡z"
end
end
i = i + 1
end
end
return ph
end
local function get_onset(syll)
local consonants = {}
for i = 1, #syll do
if vowels[syll[i]] then
break
end
if syll[i] ~= "ˈ" then
insert(consonants, syll[i])
end
end
return concat(consonants)
end
local function get_coda(syll)
local consonants = {}
for i = #syll, 1, -1 do
if vowels[syll[i]] then
break
end
insert(consonants, 1, syll[i])
end
return concat(consonants)
end
local function get_vowel(syll)
for i = 1,#syll do
if vowels[syll[i]] then return syll[i] end
end
end
-- Split the word into syllables of CV shape
local function split_syllables(remainder)
local syllables, syll = {}, {}
for _, phoneme in ipairs(remainder) do
if phoneme == "." then
if #syll > 0 then
insert(syllables, syll)
syll = {}
end
-- Insert a special syllable consisting only of a period.
-- We remove it later but it forces no movement of consonants across
-- the period.
insert(syllables, {"."})
elseif phoneme == "ˈ" then
if #syll > 0 then
insert(syllables,syll)
end
syll = {"ˈ"}
elseif vowels[phoneme] then
insert(syll, phoneme)
insert(syllables, syll)
syll = {}
else
insert(syll, phoneme)
end
end
-- If there are phonemes left, then the word ends in a consonant.
-- Add another syllable for them, which will get joined the preceding
-- syllable down below.
if #syll > 0 then
insert(syllables, syll)
end
-- Split consonant clusters between syllables
for i, current in ipairs(syllables) do
if #current == 1 and current[1] == "." then
-- If the current syllable is just a period (explicit syllable
-- break), remove it. The loop will then skip the next syllable,
-- which will prevent movement of consonants across the syllable
-- break (since movement of consonants happens from the current
-- syllable to the previous one).
remove(syllables, i)
elseif i > 1 then
local previous = syllables[i - 1]
local onset = get_onset(current)
-- Shift over consonants until the syllable onset is valid
while not (onset == "" or onsets[onset]) do
insert(previous, remove(current, 1))
onset = get_onset(current)
end
-- If the preceding syllable still ends with a vowel,
-- and the current one begins with s + another consonant, then shift it over.
if get_coda(previous) == "" and (current[1] == "s" and not vowels[current[2]]) then
insert(previous, remove(current, 1))
end
-- Check if there is no vowel at all in this syllable. That
-- generally happens either (1) with an explicit syllable division
-- specified, like 'cap.ra', which will get divided into the syllables
-- [ca], [p], [.], [ra]; or (2) at the end of a word that ends with
-- one or more consonants. We move the consonants onto the preceding
-- syllable, then remove the resulting empty syllable. If the
-- new current syllable is [.], remove it, too. The loop will then
-- skip the next syllable, which will prevent movement of consonants
-- across the syllable break (since movement of consonants happens
-- from the current syllable to the previous one).
if not get_vowel(current) then
for _ = 1, #current do
insert(previous, remove(current, 1))
end
remove(syllables, i)
if syllables[i] and #syllables[i] == 1 and syllables[i][1] == "." then
remove(syllables, i)
end
end
end
end
for _, syll in ipairs(syllables) do
local onset = get_onset(syll)
local coda = get_coda(syll)
if not (onset == "" or onsets[onset]) then
track("bad onset")
--error("onset error:[" .. onset .. "]")
end
if not (coda == "" or codas[coda]) then
track("bad coda")
--error("coda error:[" .. coda .. "]")
end
end
return syllables
end
local function phoneme_is_short_vowel(phoneme)
return rfind(phoneme, "^[aɛeiɔouy]$")
end
local function detect_accent(syllables, is_prefix, is_suffix)
-- Manual override
for i=1,#syllables do
for j=1,#syllables[i] do
if syllables[i][j] == "ˈ" then
remove(syllables[i],j)
return i
end
end
end
-- Prefixes have no accent.
if is_prefix then
return -1
end
-- Suffixes have an accent only if the stress would be on the suffix when the
-- suffix is part of a word. Don't get tripped up by the first syllable being
-- nonsyllabic (e.g. in -rnus).
if is_suffix then
local syllables_with_vowel = #syllables - (get_vowel(syllables[1]) and 0 or 1)
if syllables_with_vowel < 2 then
return -1
end
if syllables_with_vowel == 2 then
local penult = syllables[#syllables - 1]
if phoneme_is_short_vowel(penult[#penult]) then
return -1
end
end
end
-- Detect accent placement
if #syllables > 2 then
-- Does the penultimate syllable end in a single vowel?
local penult = syllables[#syllables - 1]
if phoneme_is_short_vowel(penult[#penult]) then
return #syllables - 2
else
return #syllables - 1
end
elseif #syllables == 2 then
return #syllables - 1
elseif #syllables == 1 then
return #syllables --mark stress on monosyllables so that stress-conditioned sound rules work correctly. Then, delete it prior to display
end
end
local function clean_syllable_breaks(word)
return (ugsub(word, "[%.ˈ][%.ˈ]+", function(m)
return m:find("ˈ") and "ˈ" or "."
end))
end
local function convert_word(word, phonetic, eccl)
-- Some contracted words end in apostrophes, which shouldn't be mistaken for stress placement.
word = word:gsub("'$", "")
-- Normalize i/j/u/v; do this before removing breves, so we keep the
-- ŭ in langŭī (perfect of languēscō) as a vowel.
word = ugsub(word, "(" .. vowels_c .. ")[vw](" .. non_vowels_c .. ")", "%1u%2")
word = ugsub(word, "qu(" .. vowels_c .. ")", "qw%1")
word = word:gsub("%f[^%z.'][ck]w", "qw")
word = ugsub(word, "%f[^%z.'n]gu(" .. vowels_c .. ")", "gw%1") -- nguV or initial guV
word = ugsub(word, "^i(" .. vowels_c .. ")", "j%1")
word = ugsub(word, "^u(" .. vowels_c .. ")", "v%1")
-- Per the August 31 2019 recommendation by [[User:Brutal Russian]] in
-- [[Module talk:la-IPA]], we convert i/j between vowels to jj if the
-- preceding vowel is short but to single j if the preceding vowel is long.
word = ugsub(word, "(" .. vowels_c .. ")('?)([iju])()", function (vowel, stress, cons, pos)
if vowels_string:find(usub(word, pos, pos)) then
if cons == "u" then
cons = stress .. "v"
-- FIXME: this should also catch diphthongs.
elseif eccl or long_vowels_string:find(vowel) then
cons = stress .. "j"
else
cons = "j" .. stress .. "j"
end
return vowel .. cons
end
end)
--Convert v/w to u syllable-finally
word = word:gsub("[vw]%f[%z.']", "u")
-- Convert i to j before vowel and after any prefix that ends in a consonant,
-- per the August 23 2019 discussion in [[Module talk:la-IPA]].
for _, pref in ipairs(cons_ending_prefixes) do
word = ugsub(word, "^(" .. pref .. ")i(" .. vowels_c .. ")", "%1j%2")
end
-- Convert z to zz between vowels so that the syllable weight and stress assignment will be correct.
word = ugsub(word, "(" .. vowels_c .. ")z(" .. vowels_c .. ")", "%1zz%2")
if eccl then
word = ugsub(word, "(" .. vowels_c .. ")ti(" .. vowels_c .. ")", "%1tt͡si%2")
end
-- Now remove breves.
word = remove_diacritic(word, BREVE)
-- Normalize syllabic vowels like aë, oë; do this after removing breves but
-- before any other normalizations.
word = ugsub(word, "(" .. vowels_c .. ")([äëïöüÿ])", "%1.%2")
word = remove_diacritic(word, TREMA)
-- Assume the u in a final -us or -um is not part of a diphthong
word = word:gsub("([aeo])(u[ms])$", "%1.%2")
word = word:gsub("[aeou]%f[i]", "%0.")
word = word:gsub("_", "")
-- Vowel length before nasal + fricative is allophonic
word = toNFC(toNFD(word):gsub("([aeiouy])" .. MACRON .. "([mn][.']*[fs])", "%1%2"))
if eccl then
word = toNFC(word:gsub("[aeiouy]%f[j]", "%0" .. MACRON))
end
-- Per May 10 2019 discussion in [[Module talk:la-IPA]], we syllabify
-- prefixes ab-, ad-, ob-, sub- separately from following l or r.
word = word:gsub("^a([bd])([lr])", "a%1.%2")
word = word:gsub("^ob([lr])", "ob.%1")
word = word:gsub("^sub([lr])", "sub.%1")
-- Remove hyphens indicating prefixes or suffixes; do this after the above,
-- some of which are sensitive to beginning or end of word and shouldn't
-- apply to end of prefix or beginning of suffix.
local is_prefix, is_suffix
word = word:gsub("^(%-?)(.-)(%-?)$", function(m1, m2, m3)
is_prefix, is_suffix = m1 == "-", m3 == "-"
return m2
end)
-- Convert word to IPA
local phonemes = letters_to_ipa(word, phonetic, eccl)
-- Split into syllables
local syllables = split_syllables(phonemes)
-- Add accent
local accent = detect_accent(syllables, is_prefix, is_suffix)
for _, syll in ipairs(syllables) do
for j in ipairs(syll) do
if eccl then
syll[j] = syll[j]:gsub("ː", "")
elseif phonetic then
syll[j] = lax_vowel[syll[j]] or syll[j]
end
end
end
for i, syll in ipairs(syllables) do
if eccl and i == accent and phonetic and vowels[syll[#syll]] then
syll[#syll] = lengthen_vowel[syll[#syll]] or syll[#syll]
end
for j = 1, #syll - 1 do
if syll[j] == syll[j + 1] then
syll[j + 1] = ""
end
end
end
for i, syll in ipairs(syllables) do
syll = concat(syll)
-- Atonic /ɔ/ and /ɛ/ merge with /o/ and /e/ respectively
if eccl and phonetic and i == accent then
syll = syll:gsub("o", "ɔ")
syll = syll:gsub("e", "ɛ")
-- Syllable-initial /ɡn/ becomes /n/ (e.g. "gnōscō")
elseif not eccl then
syll = syll:gsub("^ɡn", "n")
end
syllables[i] = (i == accent and "ˈ" or "") .. syll
end
word = clean_syllable_breaks(concat(syllables, "."))
-- poetic meter shows that a consonant before "h" was syllabified as an onset, not as a coda.
-- Based on outcome of talk page discussion, this will be indicated by the omission of /h/ [h] in this context.
word = ugsub(word, "([^" .. VOWELS .. "ptk.ˈːˑ])([.ˈːˑ]*)h", "%2%1")
if eccl then
-- Replace ʃ.ʃ or ʃˈʃ with .ʃ or ˈʃ after any consonant.
word = ugsub(word, "([^" .. VOWELS .. "])ʃ([.ˈ]*)ʃ", "%1%2ʃ")
word = word:gsub("ʰ", "")
else
-- [2025-05-15: don't do this, per [[Wiktionary:Beer_parlour/2025/May#Retiring_dual_phonemic-phonetic_transcriptions_for_Latin]]
-- word = word:gsub("j", "i̯") -- normalize glide spelling
-- word = word:gsub("w", "u̯")
end
if phonetic then
local rules = eccl and phonetic_rules_eccl or phonetic_rules
for _, rule in ipairs(rules) do
word = ugsub(word, rule[1], rule[2])
end
-- [2025-05-15: now that we've disabled the phonemic notation, we should put the syllable breaks in the
-- phonetic notation, as otherwise they don't display at all]
-- word = word:gsub("%.+", "") -- remove the dots
end
if phonetic then
word = ugsub(word, "(%a([̪̠̯]?))%1", "%1" .. LONG) -- convert double consonants into long ones
word = ugsub(word, "[ːˑ][ːˑ]+", "ː") -- maximum of one full length mark
end
return clean_syllable_breaks(word)
end
function export.convert_words(text, phonetic, eccl)
local disallowed = ugsub(text, "[a-zA-ZæœāǣēīōūȳăĕĭŏŭäëïöüÿĀǢĒĪŌŪȲĂĔĬŎŬÄËÏÖÜŸ%-,.?!:;()'\"_ " .. MACRON .. BREVE .. TREMA .. TIE .. "]", "")
local n = ulen(disallowed)
if n > 0 then
local msg = ("The character%%s %s %%s not allowed."):format(mw.dumpObject(disallowed))
if n == 1 then
error(msg:format("", "is"))
else
error(msg:format("s", "are"))
end
end
text = toNFD(text)
-- Call ulower() even though it's also called in phoneticize,
-- in case convert_words() is called externally.
text = ulower(text)
text = text:gsub("[,?!:;()\"]", "")
text = ugsub(text, "([æœ])(" .. LENGTH .. ")", normalize_ligatures)
-- Treat a tie as "_".
text = text:gsub(TIE, "_")
local result = {}
-- Split on spaces and hyphens, but hyphens preceded/followed by a space
-- are included in the word (e.g. prefixes and suffixes).
for chunk in gsplit(text, " +") do
for word in gsplit(chunk, "%f[%z-]%-+%f[^%z-]") do
if word:match("[^-]") then
insert(result, convert_word(toNFC(word), phonetic, eccl))
end
end
end
return trim(concat(result, " "))
end
-- Phoneticize Latin TEXT. Return a list of one or more phoneticizations,
-- each of which is a two-element list {PHONEMIC, PHONETIC}. If ECCL, use
-- Ecclesiastical pronunciation. Otherwise, use Classical pronunciation.
function export.phoneticize(text, eccl)
local function do_phoneticize(text, eccl)
return {
export.convert_words(text, false, eccl),
export.convert_words(text, true, eccl),
}
end
text = ulower(text)
-- If we have a macron-breve sequence, generate two pronunciations, one for
-- the long vowel and one for the short.
if rfind(text, "[āēīōūȳ]" .. BREVE) then
local longvar = ugsub(text, "([āēīōūȳ])" .. BREVE, "%1")
local shortvar = ugsub(text, "([āēīōūȳ])" .. BREVE, macrons_to_breves)
local longipa = do_phoneticize(longvar, eccl)
local shortipa = do_phoneticize(shortvar, eccl)
-- Make sure long and short variants are actually different (they won't
-- be in Ecclesiastical pronunciation).
if not deep_equals(longipa, shortipa) then
return {longipa, shortipa}
else
return {longipa}
end
elseif rfind(text, ";") then
local tautosyllabicvar = text:gsub(";", "")
local heterosyllabicvar = text:gsub(";", ".")
local tautosyllabicipa = do_phoneticize(tautosyllabicvar, eccl)
local heterosyllabicipa = do_phoneticize(heterosyllabicvar, eccl)
if not deep_equals(tautosyllabicipa, heterosyllabicipa) then
return {tautosyllabicipa, heterosyllabicipa}
else
return {tautosyllabicipa}
end
else
return {do_phoneticize(text, eccl)}
end
end
local function make_row(phoneticizations, dials, include_phonemic)
local IPA_items = {}
for _, phoneticization in ipairs(phoneticizations) do
local phonemic = phoneticization[1]
local phonetic = phoneticization[2]
local IPA_arg
local phonetic_brackets = "[" .. phonetic .. "]"
if include_phonemic then
IPA_arg = {pron = "/" .. phonemic .. "/ " .. phonetic_brackets}
else
IPA_arg = {pron = phonetic_brackets}
end
insert(IPA_items, IPA_arg)
end
return m_a.format_qualifiers(lang, dials) .. " " .. m_IPA.format_IPA_full { lang = lang, items = IPA_items }
end
function export.show_full(frame)
local boolean_default_true = {type = "boolean", default = true}
local args = require("Module:parameters").process(frame:getParent().args, {
[1] = {default = mw.title.getCurrentTitle().nsText == "Template" and "īnspīrāre" or mw.loadData("Module:headword/data").pagename},
classical = boolean_default_true,
cl = {alias_of = "classical"},
ecclesiastical = boolean_default_true,
eccl = {alias_of = "ecclesiastical"},
vul = {type = "boolean"}, -- To be removed.
-- 2025-05-15: Add include_phonemic (not by default) per [[Wiktionary:Beer_parlour/2025/May#Retiring_dual_phonemic-phonetic_transcriptions_for_Latin]]
include_phonemic = {type = "boolean"},
ann = true,
accent = {list = true},
indent = true
})
-- Track down any remaining uses of |vul=
if args.vul ~= nil then
track("vul")
end
local text = args[1]
local categories = {}
local accent = args.accent
local indent = (args.indent or "*") .. " "
local out = ""
if args.indent then
out = indent
end
if args.classical then
out = out .. make_row(export.phoneticize(text, false), #accent > 0 and accent or {"คลาสสิก"})
else
insert(categories, lang:getCanonicalName() .. " terms with Ecclesiastical IPA pronunciation only")
end
local anntext = (
args.ann == "1" and "'''" .. text:gsub("[.'_]", "") .. "''': " or
args.ann and "'''" .. args.ann .. "''': " or
"")
out = anntext .. out
if args.ecclesiastical then
if args.classical then
out = out .. "\n" .. indent .. anntext
end
out = out .. make_row(
export.phoneticize(text, true),
#accent > 0 and accent or {'คริสตจักร'}
)
end
return out .. require("Module:utilities").format_categories(categories)
end
return export