Module:Sandbox/Ajuanca/Names

-- Task 8 of GCI 2019
local p = {}

-- Main function
function p.formatNames(frame)
	local inputName = tostring(frame.args.name)
	local inputLanguage = frame.args.language
	local fullPersonalName = p.divideName(inputName)
	local language = p.discoverLanguage(fullPersonalName, inputLanguage)
	-- Select corresponding getFamilyNames function depending 
	-- on the language. Something like a switch.
	local name
	local str = ""
	if (language == "es") then
		name = p.esName(fullPersonalName)
	elseif (language == "en") then
		name = p.enName(fullPersonalName)
	elseif (language == "de") then
		name = p.deName(fullPersonalName)
	elseif (language == "zh") then
		name = p.zhName(fullPersonalName)
	elseif (language == "ru") then
		name = p.ruName(fullPersonalName)
	else
		return language
	end
	-- Depending on number of surnames, choose one output.
	if(#name.givenNames > 1) then
		str = "The given name is <b>" 
		for z, name in ipairs(name.givenNames) do
			str = str .. " " .. name
		end
		str = str .. "</b>.<br>"
	else
		str = "The given name is <b>" .. name.givenNames[1] .. "</b>.<br>"
	end
	if(#name.familyNames > 1) then
		if(language == "es") then
			str = str .. "The first family name is <b>" .. name.familyNames[1] .. "</b>.<br>"
			.. "The second family name is <b>" .. name.familyNames[2] .. "</b>."
		elseif(language == "ru") then
			str = str .. "The family name is <b>" .. name.familyNames[1] .. "</b>.<br>"
			.. "The patronymic name is <b>" .. name.familyNames[2] .. "</b>."
		elseif(language=="zh") then
			str = str .. "The family name is <b>" ..name.familyNames[1] .. "</b>.<br>" ..
			"The courtesy name is <b>" .. name.familyNames[2] .."</b>."
		end
	else
		str = str .. "The family name is <b>" .. name.familyNames[1] .. "</b>."
	end
	return str 
end

-- Finds language of name
-- Returns a String
function p.discoverLanguage(names, supposedLanguage)
	local lang = "#"
	if (supposedLanguage ~= nil) then
		if (string.len(supposedLanguage)  == 2) then
			-- Array of ISO tags is compared here (no need of process if this attribute isn't given).
			-- Return #(message error) if ISO tag provided doesn't exist or isn't implemented.
			local compatibleCodes = {"en", "es", "zh", "ru", "de"}
			local isCompatible = false
			for n, iso in ipairs(compatibleCodes) do
				if (supposedLanguage == iso) then
					isCompatible = true
					break
				end
			end
			if (isCompatible) then
				lang = supposedLanguage
			else
				lang = "#noCompatible"
			end
		else
			lang = "#incorrectFormat"
		end
	else
		function findLanguageMatch(name)
			for k, name in ipairs (names) do
				-- Search for indicators in each word.
				-- First step is compare possible know surnames that exit the normal form or cause conflicts with other
				-- indicators.
				local commonSurnames = {
					{ "es", 
						{ "Navarro", "Gil", "Serrano", "Sanz", "Ortega", "Iglesias", "Cruz", "Aguilar", 
						"Rivera", "Herrera", "Castro", "Luna", "Garza", "Soto", "Diaz"
					}},
					{ "de", 
						{ "Mies"
					}},
					{ "ru", 
						{ "Poda", "Petrenko", "Roschin", "Uspensky", "Vladímir" 
					}},
					-- No need to include Wang, Zhang or Chang (common termination -ang).
					{ "zh", 
						{ "Wáng", "Wāng", "Li", "Lei", "Lie", "Lee", "Cheung", "Chen", "Zhou", "Zhao"
					}},
					{"en",
						{"Barack", "Darwin"
						}}
				}
				-- Search for a common surname that scapes from previous rules.
				for d, specificLanguageTable in ipairs(commonSurnames) do
					for x, surname in ipairs(specificLanguageTable[2]) do
						if(surname == name) then
							return specificLanguageTable[1]
						end
					end
				end
			
				-- Spanish (es) indicators search.
				local accentLetters = {"á", "é", "í", "ó", "ú", "Á", "É", "Í", "Ó", "Ú"}
				local esCharacter = "ñ"
				-- preposition 'de' appears also in de.
				local esConnectors = {"del", "el", "la", "los", "las", "y"}
				local esTerminations = {"ez", "es", "is", "iz"}
				-- Search for accents.
				for a, accent in ipairs(accentLetters) do
					if (mw.ustring.find(name, accent)~=nil) then
						return "es"
					end
				end
				-- Search for espanish character. 
				if(string.find(name, esCharacter)~= nil) then
					lang = "es"
				end
				-- Search for articles and connectors.
				for b, connector in ipairs(esConnectors) do
					if(name == connector) then
						return "es"
					end
				end
				-- Search for terminations.
				for c, termination in ipairs(esTerminations) do
					local finalLetters = string.sub(name, -2, -1)
					if (finalLetters == termination) then
						return "es"
					end
				end
				
				-- Deutsch (de) indicators search.
				local deCharacter = "β"
				local deConnectors = {"van", "Van", "Le", "le", "ten"}
				local deCommonSyllables = {"sch", "Sch", "ff", "tt", "dd", "kk"}
				local deTerminations = {"er", "ck", "dt", "tz", "en"}
				-- Search for german character. 
				if(string.find(name, deCharacter)~= nil) then
					return "de"
				end
				--Search for connectors.
				for e, connector in ipairs(deConnectors) do
					if(name == connector) then
						return "de"
					end
				end
				-- Search for syllables.
				for f, syllable in ipairs(deCommonSyllables) do
					if(string.find(name, syllable)~= nil) then
						return "de"
					end
				end
				-- Search for terminations.
				for g, termination in ipairs(deTerminations) do
					local finalLetters = string.sub(name, -2, -1)
					if (finalLetters == termination) then
						return "de"
					end
				end
				
				-- Russian (ru) indicators search.
				local patronymicsDerivations = {"ovich", "evich"}
				local derivatedSurnames = {"ov", "ev", "ova", "eva"}
				-- Surnames that scape from the rest of indicators.
				-- Search for patronymics
				for s, patronymic in ipairs(patronymicsDerivations) do
					if((string.find(name, patronymic))~= nil)then
						return "ru"
					end
				end
				-- Search for common derivations
					for f, derivation in ipairs(derivatedSurnames) do
					if((string.find(name, derivation))~= nil) then
						return "ru"
					end
				end
			
				-- Chinese (zh) indicators search.
				local zhTerminations = {"ong", "ang"}
				-- Check for common Zh terminations
				for j, termination in ipairs(zhTerminations) do
					local finalLetters = string.sub(name, -2, -1)
					if (finalLetters == termination) then
						return "zh"
					end
				end
				-- Surnames beetween two and three characters, that aren't connectors of other languages
				-- will probably be one of the Chinease 'Hundred Family Surnames' surname.
				-- The only exception are abrevations.
				-- Now is common to find given names of just one character.
				if(string.len(name) <= 3) then
					if(string.sub(name, string.len(name)) == ".") then
						return "en"
					else
						return "zh"
					end
				end
			end
		end
		-- If there was no match, get name by default (en) structure. No need
		-- to search for english terminations.
		lang = findLanguageMatch(name) or "en"
	end
	return lang
end

-- Divides the input based on spaces. 
-- Returns a table
function p.divideName(name)
	local nameTable = {}
	for m in string.gmatch(name, ("%S+")) do 
		table.insert(nameTable, m)
	end
	return nameTable
end

-- Works with a spanish name, finding the given and families names.
-- Returns a table
function p.esName(fullNameTable)
	name = {
		givenNames = {},
		familyNames = {}
	}
	if(not(p.isOnlyName(fullNameTable))) then
		table.remove(fullNameTable)
	end
	table.insert(name.givenNames, fullNameTable[1])
	table.remove(fullNameTable, 1)
	function getPositionOfConecctors(fullNameTable)
		local connectors = {"de", "del", "y"}
		firstSurname = 0
		lastSurname = 0
		for d, actualName in ipairs(fullNameTable)do
			for x, connector in ipairs(connectors) do
				if(connector == actualName) then
					firstSurname = d
					for z, lastConnector in ipairs(connectors) do
						if((fullNameTable[#fullNameTable-1] == lastConnector) and (firstSurname ~= #fullNameTable-1)) then
							lastSurname = #fullNameTable-1
							return {firstSurname, lastSurname }
						end
						if(fullNameTable[#fullNameTable-2] == lastConnector and (firstSurname ~= #fullNameTable-2)) then
							lastSurname = #fullNameTable-2
							return {firstSurname, lastSurname}
						end
					end
				end
			end
		end
		return { firstSurname, lastSurname }
	end
	local positions = getPositionOfConecctors(fullNameTable)
	if(positions[1]==0) then
		if(#fullNameTable==1)then
			table.insert(name.familyNames, fullNameTable[1])
		elseif(#fullNameTable==2)then
			-- 1st maybe is a given name
			local isGiven = true
			local terminations = {"ez", "es", "is", "iz"}
			for c, termination in ipairs(terminations) do
				local finalLetters = string.sub(fullNameTable[1], -2, -1)
				if (finalLetters == termination) then
					isGiven = false
				end
			end
			if(isGiven)then
				table.insert(name.givenNames, fullNameTable[1])
			else
				table.insert(name.familyNames, fullNameTable[1])
			end
			table.insert(name.familyNames, fullNameTable[2])
		else
			table.insert(name.givenNames, fullNameTable[1])
			table.insert(name.familyNames, fullNameTable[2])
			table.insert(name.familyNames, fullNameTable[3])
		end
	else
		if(positions[2]==0)then
			if(positions[1]==1) then
				-- Maybe a second surname is behind
				table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[1]))
			elseif(positions[1]==2)then
				-- 1st maybe is a given name
				local isGiven = true
				local terminations = {"ez", "es", "is", "iz"}
				for c, termination in ipairs(terminations) do
					local finalLetters = string.sub(fullNameTable[1], -2, -1)
					if (finalLetters == termination) then
						isGiven = false
					end
				end
				if(isGiven)then
					table.insert(name.givenNames, fullNameTable[1])
				else
					table.insert(name.familyNames, fullNameTable[1])
				end
				table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[1]))
			end
		else
			if(positions[1]==2)then
				table.insert(name.givenNames, fullNameTable[1])
			end
			table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[1], positions[2]-1))
			table.insert(name.familyNames, table.concat(fullNameTable, " ", positions[2]))
		end
	end
	return name
end

-- Works with a english name, finding the given and families names.
-- Returns a table
function p.enName(fullNameTable)
	name = {
		givenNames = {},
		familyNames = {}
	}
	-- Take action if some kind of suffix is given.
	if(not(p.isOnlyName(fullNameTable))) then
		table.remove(fullNameTable)
	end
	for f, actualName in ipairs(fullNameTable) do
		-- Only last position is family name.
		if(f == #fullNameTable) then
			table.insert(name.familyNames, actualName)
		else
			table.insert(name.givenNames, actualName)
		end
	end
	return name
end

-- Works with a german name, finding the given and families names.
-- Returns a table
function p.deName(fullNameTable)
	name = {
		givenNames = {},
		familyNames = {}
	}
	local connectors = {"de", "van", "den", "ten", "Van", "der", "Le", "le"}
	if(not(p.isOnlyName(fullNameTable))) then
		table.remove(fullNameTable)
	end
	for d, actualName in ipairs(fullNameTable)do
		if(d==#fullNameTable) then
			table.insert(name.familyNames, actualName)
		else
			for x, connector in ipairs(connectors) do
				if(connector == actualName) then
					table.insert(name.familyNames, table.concat(fullNameTable, " ", d))
					return name
				end
			end
			table.insert(name.givenNames, actualName)
		end
	end
	return name
end

-- Works with a russian name, finding the given and families names.
-- Returns a table
function p.ruName(fullNameTable)
	name = {
		familyNames = {},
		givenNames = {}
	}
	if(not(p.isOnlyName(fullNameTable))) then
		table.remove(fullNameTable)
	end
	-- Russian names are formed by a given name, the surname and a patronymic
	-- name.
	table.insert(name.givenNames, fullNameTable[1])
	-- Not all people has patrynomic name.
	if(#fullNameTable > 2) then 
		table.insert(name.familyNames, fullNameTable[3])
		table.insert(name.familyNames, fullNameTable[2])
	else
		table.insert(name.familyNames, fullNameTable[2])
	end
	return name
end

-- Works with a chinease name, finding the given and families names.
-- Returns a table
function p.zhName(fullNameTable)
	name = {
		familyNames = {},
		givenNames = {}
	}
	if(not(p.isOnlyName(fullNameTable))) then
		table.remove(fullNameTable)
	end
	table.insert(name.familyNames, fullNameTable[1])
	table.remove(fullNameTable, 1)
	-- Appart from given name and family name, maybe a courtesy name is given.
	for x, eachName in ipairs(fullNameTable) do
		if(string.find(eachName, "%(")~=nil) then
			local intStart = string.find(eachName, "%(")+1
			local intEnd = string.find(eachName, ")")-1
			table.insert(name.familyNames, string.sub(eachName, intStart, intEnd))
		else
			table.insert(name.givenNames, " " .. eachName)
		end
	end
	return name
end

-- Finds a no-name in the input
-- Returns boolean
function p.isOnlyName(tableName)
	local isJustName = false
	-- Searchs for Roman number in last name.
	local romanNumbers = {"I", "V", "X"}
	local hasRomanNumbers = false
	for letter in string.gmatch(tableName[#tableName], "(%w)") do
		for n, romanNumber in ipairs(romanNumbers) do
			if (letter == romanNumber) then
				hasRomanNumbers = true
				break
			else
				hasRomanNumbers = false
			end
		end
	end
	-- Searchs for other suffixes in last name.
	local suffixes = {"Jr", "Jr.", "Sr", "Sr."}
	local hasSuffix = false
	for n, suffix in ipairs(suffixes) do
		if (tableName[#tableName] == suffix) then
			hasSuffix = true
		end
	end
	-- Returns final boolean
	if (hasSuffix or hasRomanNumbers) then
		isJustName = false
	else
		isJustName = true
	end
	return isJustName
end
return p