Module:Sandbox/Ajuanca/Names
< Module:Sandbox | Ajuanca
-- Task 8 of GCI 2019
local p = {}
-- Main function
function p.formatNames(frame)
local inputName = tostring(frame.args.name)
local inputLanguage = frame.args.language
local fullPersonalName = p.divideName(inputName)
local language = p.discoverLanguage(fullPersonalName, inputLanguage)
-- Select corresponding getFamilyNames function depending
-- on the language. Something like a switch.
local name
local str = ""
if (language == "es") then
name = p.esName(fullPersonalName)
elseif (language == "en") then
name = p.enName(fullPersonalName)
elseif (language == "de") then
name = p.deName(fullPersonalName)
elseif (language == "zh") then
name = p.zhName(fullPersonalName)
elseif (language == "ru") then
name = p.ruName(fullPersonalName)
else
return language
end
-- Depending on number of surnames, choose one output.
if(#name.givenNames > 1) then
str = "The given name is <b>"
for z, name in ipairs(name.givenNames) do
str = str .. " " .. name
end
str = str .. "</b>.<br>"
else
str = "The given name is <b>" .. name.givenNames[1] .. "</b>.<br>"
end
if(#name.familyNames > 1) then
if(language == "es") then
str = str .. "The first family name is <b>" .. name.familyNames[1] .. "</b>.<br>"
.. "The second family name is <b>" .. name.familyNames[2] .. "</b>."
elseif(language == "ru") then
str = str .. "The family name is <b>" .. name.familyNames[1] .. "</b>.<br>"
.. "The patronymic name is <b>" .. name.familyNames[2] .. "</b>."
elseif(language=="zh") then
str = str .. "The family name is <b>" ..name.familyNames[1] .. "</b>.<br>" ..
"The courtesy name is <b>" .. name.familyNames[2] .."</b>."
end
else
str = str .. "The family name is <b>" .. name.familyNames[1] .. "</b>."
end
return str
end
-- Finds language of name
-- Returns a String
function p.discoverLanguage(names, supposedLanguage)
local lang = "#"
if (supposedLanguage ~= nil) then
if (string.len(supposedLanguage) == 2) then
-- Array of ISO tags is compared here (no need of process if this attribute isn't given).
-- Return #(message error) if ISO tag provided doesn't exist or isn't implemented.
local compatibleCodes = {"en", "es", "zh", "ru", "de"}
local isCompatible = false
for n, iso in ipairs(compatibleCodes) do
if (supposedLanguage == iso) then
isCompatible = true
break
end
end
if (isCompatible) then
lang = supposedLanguage
else
lang = "#noCompatible"
end
else
lang = "#incorrectFormat"
end
else
function findLanguageMatch(name)
for k, name in ipairs (names) do
-- Search for indicators in each word.
-- First step is compare possible know surnames that exit the normal form or cause conflicts with other
-- indicators.
local commonSurnames = {
{ "es",
{ "Navarro", "Gil", "Serrano", "Sanz", "Ortega", "Iglesias", "Cruz", "Aguilar",
"Rivera", "Herrera", "Castro", "Luna", "Garza", "Soto", "Diaz"
}},
{ "de",
{ "Mies"
}},
{ "ru",
{ "Poda", "Petrenko", "Roschin", "Uspensky", "Vladímir"
}},
-- No need to include Wang, Zhang or Chang (common termination -ang).
{ "zh",
{ "Wáng", "Wāng", "Li", "Lei", "Lie", "Lee", "Cheung", "Chen", "Zhou", "Zhao"
}},
{"en",
{"Barack", "Darwin"
}}
}
-- Search for a common surname that scapes from previous rules.
for d, specificLanguageTable in ipairs(commonSurnames) do
for x, surname in ipairs(specificLanguageTable[2]) do
if(surname == name) then
return specificLanguageTable[1]
end
end
end
-- Spanish (es) indicators search.
local accentLetters = {"á", "é", "í", "ó", "ú", "Á", "É", "Í", "Ó", "Ú"}
local esCharacter = "ñ"
-- preposition 'de' appears also in de.
local esConnectors = {"del", "el", "la", "los", "las", "y"}
local esTerminations = {"ez", "es", "is", "iz"}
-- Search for accents.
for a, accent in ipairs(accentLetters) do
if (mw.ustring.find(name, accent)~=nil) then
return "es"
end
end
-- Search for espanish character.
if(string.find(name, esCharacter)~= nil) then
lang = "es"
end
-- Search for articles and connectors.
for b, connector in ipairs(esConnectors) do
if(name == connector) then
return "es"
end
end
-- Search for terminations.
for c, termination in ipairs(esTerminations) do
local finalLetters = string.sub(name, -2, -1)
if (finalLetters == termination) then
return "es"
end
end
-- Deutsch (de) indicators search.
local deCharacter = "β"
local deConnectors = {"van", "Van", "Le", "le", "ten"}
local deCommonSyllables = {"sch", "Sch", "ff", "tt", "dd", "kk"}
local deTerminations = {"er", "ck", "dt", "tz", "en"}
-- Search for german character.
if(string.find(name, deCharacter)~= nil) then
return "de"
end
--Search for connectors.
for e, connector in ipairs(deConnectors) do
if(name == connector) then
return "de"
end
end
-- Search for syllables.
for f, syllable in ipairs(deCommonSyllables) do
if(string.find(name, syllable)~= nil) then
return "de"
end
end
-- Search for terminations.
for g, termination in ipairs(deTerminations) do
local finalLetters = string.sub(name, -2, -1)
if (finalLetters == termination) then
return "de"
end
end
-- Russian (ru) indicators search.
local patronymicsDerivations = {"ovich", "evich"}
local derivatedSurnames = {"ov", "ev", "ova", "eva"}
-- Surnames that scape from the rest of indicators.
-- Search for patronymics
for s, patronymic in ipairs(patronymicsDerivations) do
if((string.find(name, patronymic))~= nil)then
return "ru"
end
end
-- Search for common derivations
for f, derivation in ipairs(derivatedSurnames) do
if((string.find(name, derivation))~= nil) then
return "ru"
end
end
-- Chinese (zh) indicators search.
local zhTerminations = {"ong", "ang"}
-- Check for common Zh terminations
for j, termination in ipairs(zhTerminations) do
local finalLetters = string.sub(name, -2, -1)
if (finalLetters == termination) then
return "zh"
end
end
-- Surnames beetween two and three characters, that aren't connectors of other languages
-- will probably be one of the Chinease 'Hundred Family Surnames' surname.
-- The only exception are abrevations.
-- Now is common to find given names of just one character.
if(string.len(name) <= 3) then
if(string.sub(name, string.len(name)) == ".") then
return "en"
else
return "zh"
end
end
end
end
-- If there was no match, get name by default (en) structure. No need
-- to search for english terminations.
lang = findLanguageMatch(name) or "en"
end
return lang
end
-- Divides the input based on spaces.
-- Returns a table
function p.divideName(name)
local nameTable = {}
for m in string.gmatch(name, ("%S+")) do
table.insert(nameTable, m)
end
return nameTable
end
-- Works with a spanish name, finding the given and families names.
-- Returns a table
function p.esName(fullNameTable)
name = {
givenNames = {},
familyNames = {}
}
if(not(p.isOnlyName(fullNameTable))) then
table.remove(fullNameTable)
end
table.insert(name.givenNames, fullNameTable[1])
table.remove(fullNameTable, 1)
function getPositionOfConecctors(fullNameTable)
local connectors = {"de", "del", "y"}
firstSurname = 0
lastSurname = 0
for d, actualName in ipairs(fullNameTable)do
for x, connector in ipairs(connectors) do
if(connector == actualName) then
firstSurname = d
for z, lastConnector in ipairs(connectors) do
if((fullNameTable[#fullNameTable-1] == lastConnector) and (firstSurname ~= #fullNameTable-1)) then
lastSurname = #fullNameTable-1
return {firstSurname, lastSurname }
end
if(fullNameTable[#fullNameTable-2] == lastConnector and (firstSurname ~= #fullNameTable-2)) then
lastSurname = #fullNameTable-2
return {firstSurname, lastSurname}
end
end
end
end
end
return { firstSurname, lastSurname }
end
local positions = getPositionOfConecctors(fullNameTable)
if(positions[1]==0) then
if(#fullNameTable==1)then
table.insert(name.familyNames, fullNameTable[1])
elseif(#fullNameTable==2)then
-- 1st maybe is a given name
local isGiven = true
local terminations = {"ez", "es", "is", "iz"}
for c, termination in ipairs(terminations) do
local finalLetters = string.sub(fullNameTable[1], -2, -1)
if (finalLetters == termination) then
isGiven = false
end
end
if(isGiven)then
table.insert(name.givenNames, fullNameTable[1])
else
table.insert(name.familyNames, fullNameTable[1])
end
table.insert(name.familyNames, fullNameTable[2])
else
table.insert(name.givenNames, fullNameTable[1])
table.insert(name.familyNames, fullNameTable[2])
table.insert(name.familyNames, fullNameTable[3])
end
else
if(positions[2]==0)then
if(positions[1]==1) then
-- Maybe a second surname is behind
table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[1]))
elseif(positions[1]==2)then
-- 1st maybe is a given name
local isGiven = true
local terminations = {"ez", "es", "is", "iz"}
for c, termination in ipairs(terminations) do
local finalLetters = string.sub(fullNameTable[1], -2, -1)
if (finalLetters == termination) then
isGiven = false
end
end
if(isGiven)then
table.insert(name.givenNames, fullNameTable[1])
else
table.insert(name.familyNames, fullNameTable[1])
end
table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[1]))
end
else
if(positions[1]==2)then
table.insert(name.givenNames, fullNameTable[1])
end
table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[1], positions[2]-1))
table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[2]))
end
end
return name
end
-- Works with a english name, finding the given and families names.
-- Returns a table
function p.enName(fullNameTable)
name = {
givenNames = {},
familyNames = {}
}
-- Take action if some kind of suffix is given.
if(not(p.isOnlyName(fullNameTable))) then
table.remove(fullNameTable)
end
for f, actualName in ipairs(fullNameTable) do
-- Only last position is family name.
if(f == #fullNameTable) then
table.insert(name.familyNames, actualName)
else
table.insert(name.givenNames, actualName)
end
end
return name
end
-- Works with a german name, finding the given and families names.
-- Returns a table
function p.deName(fullNameTable)
name = {
givenNames = {},
familyNames = {}
}
local connectors = {"de", "van", "den", "ten", "Van", "der", "Le", "le"}
if(not(p.isOnlyName(fullNameTable))) then
table.remove(fullNameTable)
end
for d, actualName in ipairs(fullNameTable)do
if(d==#fullNameTable) then
table.insert(name.familyNames, actualName)
else
for x, connector in ipairs(connectors) do
if(connector == actualName) then
table.insert(name.familyNames, table.concat(fullNameTable, " ", d))
return name
end
end
table.insert(name.givenNames, actualName)
end
end
return name
end
-- Works with a russian name, finding the given and families names.
-- Returns a table
function p.ruName(fullNameTable)
name = {
familyNames = {},
givenNames = {}
}
if(not(p.isOnlyName(fullNameTable))) then
table.remove(fullNameTable)
end
-- Russian names are formed by a given name, the surname and a patronymic
-- name.
table.insert(name.givenNames, fullNameTable[1])
-- Not all people has patrynomic name.
if(#fullNameTable > 2) then
table.insert(name.familyNames, fullNameTable[3])
table.insert(name.familyNames, fullNameTable[2])
else
table.insert(name.familyNames, fullNameTable[2])
end
return name
end
-- Works with a chinease name, finding the given and families names.
-- Returns a table
function p.zhName(fullNameTable)
name = {
familyNames = {},
givenNames = {}
}
if(not(p.isOnlyName(fullNameTable))) then
table.remove(fullNameTable)
end
table.insert(name.familyNames, fullNameTable[1])
table.remove(fullNameTable, 1)
-- Appart from given name and family name, maybe a courtesy name is given.
for x, eachName in ipairs(fullNameTable) do
if(string.find(eachName, "%(")~=nil) then
local intStart = string.find(eachName, "%(")+1
local intEnd = string.find(eachName, ")")-1
table.insert(name.familyNames, string.sub(eachName, intStart, intEnd))
else
table.insert(name.givenNames, " " .. eachName)
end
end
return name
end
-- Finds a no-name in the input
-- Returns boolean
function p.isOnlyName(tableName)
local isJustName = false
-- Searchs for Roman number in last name.
local romanNumbers = {"I", "V", "X"}
local hasRomanNumbers = false
for letter in string.gmatch(tableName[#tableName], "(%w)") do
for n, romanNumber in ipairs(romanNumbers) do
if (letter == romanNumber) then
hasRomanNumbers = true
break
else
hasRomanNumbers = false
end
end
end
-- Searchs for other suffixes in last name.
local suffixes = {"Jr", "Jr.", "Sr", "Sr."}
local hasSuffix = false
for n, suffix in ipairs(suffixes) do
if (tableName[#tableName] == suffix) then
hasSuffix = true
end
end
-- Returns final boolean
if (hasSuffix or hasRomanNumbers) then
isJustName = false
else
isJustName = true
end
return isJustName
end
return p