Module:Sandbox/Eievie

From English Wikipedia @ Freddythechick
local p = {}
local data = mw.loadData( 'Module:Interlinear/data' )
local gloss_override = {} -- for custom gloss abbreviations
local getArgs = require('Module:Arguments').getArgs
local yesno = require('Module:Yesno')
local lang_data = mw.loadData( 'Module:Lang/data' )

local gcl = require('Module:Interlinear/sandbox2/gcl').gcl

--------------------------
-- Almost-global variables
--------------------------
local frame, glossing_type, displaying_messages, free_translation, msg, buffer

-------------------
-- General settings
-------------------
local conf = { --settings
	WordSeparator = " \n\r\t", -- Don't replace with %s as this would include non-breaking spaces
	GlossAbbrPattern = "^([Ø0-9A-Z]+)$", -- this isn't a full regex, but a Lua pattern
	-- NOTE: The following characters must be formatted for use in a pattern set.
	GlossAbbrBoundary = "-.,;:<>‹›/\\~+=%?%s%[%]()%_\127'",
	GlossExcludeTable = {I = true,}, --strings not be treated as glossing abbreviations
	GlossExcludePattern = '^[0-9][0-9]+$', -- excludes strings consisting entirely of digits
	ErrorCategory = "[[Category:Pages with errors in interlinear text]]",
	AmbiguousGlossCategory = "[[Category:Articles with ambiguous glossing abbreviations]]",
	MessageGlossingError = "Error(s) in interlinear glossing",
    LowerCaseGlosses = {
		["1sg"] = true, ["2sg"] = true, ["3sg"] = true,
		["1du"] = true, ["2du"] = true, ["3du"] = true,
		["1pl"] = true, ["2pl"] = true, ["3pl"] = true,
		["Fsg"] = true, ["Fpl"] = true,
		["Msg"] = true, ["Mpl"] = true,
    },
    ErrorHelpLocation = "Template:Interlinear",
}

---------------------
-- CSS styles and classes
---------------------
conf.class = { --CSS classes
	GlossAbbr  = "gloss-abbr",
	ErrorMessage = "error",
}
---------------------
-- Sundry small functions
---------------------
local function normalise(str)
	return mw.ustring.gsub(str,"[" .. conf.WordSeparator .. "]+"," ")
end

local function tidyCss(str)
	str = mw.ustring.gsub(str, '^[\"\']*(.-)[\"\']*$', "%1") -- trims quotation marks
	if mw.ustring.sub(str, -1) ~= ";" then str = str .. ";" end -- appends ";" if missing
	return str
end

local function highlight(text)
	if text then
		return '<b>' .. text .. '</b>'
	else return "" end
end

local function tone_sup(str)
	return mw.ustring.gsub(str, "([^%p%s0-9])([0-9]+)", "%1<sup>%2</sup>")
end

local function is_empty(str) -- returns "false" if its argument is a string containing chars other than spaces &c.
	if not str then return true end
	if mw.ustring.find(str, "[^" .. conf.WordSeparator .. "]")
		then return false
	else return true end
end

local function help_link (anchor)
	if anchor then
		return " ([[" .. conf.ErrorHelpLocation .. "#" .. anchor .. "|help]])"
	else return "" end
end

---------------------
-- Apply other template
---------------------
local function expand_template(str, template, langarg)
	return frame:expandTemplate{
			title = template,
			args = { langarg, str }
		}
end

local function template_to_line(arr, n, template, args)
	if template and arr then
		for i = 0, n do
			arr[i] = expand_template(arr[i], template, args)
		end
	end
	return arr;
end
---------------------
-- The UserMessages object contains and processes error messages and warnings
---------------------
local UserMessages = {errors = {}, warnings = {}, gloss_messages = {}}
function UserMessages:add(msgtype, text, gloss)
	if msgtype == "gloss_message" then
		self.gloss_messages[gloss] = text
	elseif msgtype == "warning" then
		table.insert(self.warnings, text)
	elseif msgtype == "non-repeating error" then
		self.errors.nre = text
	elseif msgtype == "ambiguous gloss" then
		self.if_ambiguous_glosses = true
	elseif msgtype == "error" then
		table.insert(self.errors, text)
	else return error("UserMessages:add(): unknown message type", 2)
	end
end
function UserMessages:print_errors()
	local out = ""
	local namespace = mw.title.getCurrentTitle().namespace
	if next(self.errors) or self.warnings[1] then
		local err_span = mw.html.create("span")
		err_span:addClass(conf.class.ErrorMessage)
		for _,v in pairs(self.errors) do
			err_span:wikitext(" " .. v .. ";") end
		if namespace % 2 == 0 and namespace ~= 2 -- non-talk namespaces, excluding user pages; if modifying please update the description on the category page
			then err_span:wikitext(conf.ErrorCategory)
		end
		out = tostring(err_span)
		mw.addWarning(conf.MessageGlossingError)
	end
	if self.if_ambiguous_glosses then
		if namespace == 0 -- article namespace
			then out = out .. conf.AmbiguousGlossCategory -- this category will only track articles
		end
	end
	return out
end
function UserMessages:print_warnings()
	local out = ""
	-- Messages and warnings get displayed only if the page is being viewed in "preview" mode:
	if displaying_messages and (next(self.gloss_messages) or next(self.warnings)) then
		local div = mw.html.create("div")
		div:addClass("interlinear-preview-warning")
			:wikitext("<i>This message box is shown only in preview:</i>")
			:newline()
		for _,v in ipairs(self.warnings) do
			local p = div:tag("p")
			p:addClass(conf.class.ErrorMessage)
			p:wikitext(v)
		end
		if self.gloss_messages then
			div:wikitext("<p>To change any of the following default expansions, see [[Template:Interlinear/doc#Custom abbreviations|the template's documentation]]:</p>")
		end
		for _,v in pairs(self.gloss_messages) do
			div:wikitext("<p>" .. v .. "</p>")
		end
		out = out .. "\n\n" .. tostring(div)
	end
	return out
end

---------------------
-- find_gloss() parses a word into morphemes, and it calls format_gloss()
-- for anything that looks like a glossing abbreviation.
---------------------
local function set_glossing_type(glossing)
	if glossing then
		local GlossingType
		glossing = mw.ustring.lower(mw.text.trim(glossing))
		if mw.ustring.find(glossing, 'link') then
			GlossingType = "wikilink"
		elseif mw.ustring.find(glossing, 'label')
			or  mw.ustring.find(glossing, 'no link') then
			GlossingType = 'label'
		elseif mw.ustring.find(glossing, 'no abbr') then
			GlossingType = "no abbr"
		elseif yesno(glossing) == false then
			GlossingType = nil
		elseif yesno(glossing) then
			GlossingType = 'label' --conf.GlossingType
		else
			msg:add('error', 'Glossing type "' .. glossing .. '" not recognised') end
		return GlossingType
	else
		error("set_glossing_type: 'glossing' is nil or false", 2)
	end
end

local function find_gloss(word)
	local function scan_gloss(boundary, gloss_abbr) -- checks a morpheme if it is a gloss abbreviation
		if (mw.ustring.match(gloss_abbr, conf.GlossAbbrPattern)
			or conf.LowerCaseGlosses[gloss_abbr])
			and not (conf.GlossExcludeTable[gloss_abbr]
				or mw.ustring.match(gloss_abbr, conf.GlossExcludePattern))
			then --gloss_abbr = '{{gcl|' .. gloss_abbr .. '}}'
				local gcl_args = { gloss_abbr }
				if glossing_type then
					gcl_args = { gloss_abbr, glossing = glossing_type }
				end
			--	gloss_abbr = frame.gcl(gcl_args)
				gloss_abbr = frame:expandTemplate{
					title = 'gcl/sandbox', args = gcl_args
				}
		end
		return boundary .. gloss_abbr
	end
	local word = mw.text.decode(word, true)
	-- for the case of the English word "I", the 1SG pronoun
	if word == "I" then return word end
	local pattern = "([" .. conf.GlossAbbrBoundary .. "]?)([^" .. conf.GlossAbbrBoundary .. "]+)"
	word = mw.ustring.gsub(word, pattern, scan_gloss) -- splits into morphemes
	return word
end

---------------------
-- The main purpose of the bletcherous parse() is to split a line into words and and then for each eligible word
-- to call find_gloss(). The parser outputs the individual words (with any gloss abbreviation formatting applied).
-- The simple job of splitting at whitespaces has been made complicated by a) the fact that the input can contain
-- whitespaces inside the various html elements that are the result of the application of various formatting templates;
-- and b) the need to be able to recognise the output of the template that formats custom gloss abbreviations
-- (and hence skip passing it on to find_gloss). See talk for a suggestion about its future.
---------------------
local function parse(cline, i, tags_found,ifglossing)
	local function issue_error(message, culprit)
		UserMessages:add("error",  message .. ": ''" .. mw.ustring.sub(cline.whole, 1, i-1) .. "'''" .. culprit  .. "'''''")
	end
	if i > cline.length then return i end --this will only be triggered if the current line has less words than line 1
	local next_step, j, _, chunk
	local probe = mw.ustring.sub(cline.whole,i,i)
	if mw.ustring.match(probe,"[" .. conf.WordSeparator .. "]") and tags_found == 0
		then next_step =  i-1
	elseif probe == "[" then --Wikilink?
		if mw.ustring.sub(cline.whole,i+1,i+1) == "[" then
			_,j,chunk = mw.ustring.find(cline.whole,"(%[%[.-%]%])", i)
		else chunk = "["; j = i end --not a wikilink then
		buffer = buffer .. chunk
		next_step =  parse(cline, j+1,tags_found,ifglossing)
	elseif probe == "{"  and tags_found == 0 then --curly brackets enclose a sequence of words to be treated as a single unit
		_,j,chunk = mw.ustring.find(cline.whole,"(.-)(})", i+1)
		if not chunk then
			issue_error("Unclosed curly bracket", "{")
			chunk = highlight("{"); j = i
		elseif ifglossing==true then
			chunk = find_gloss(chunk)
		elseif
			cline.tone_sup then chunk = tone_sup(chunk)
		end
		buffer = buffer .. chunk
		next_step =  parse(cline, j+1,tags_found,ifglossing)
	elseif probe == "<" then -- We've encountered an HTML tag. What do we do now?
		local _,j,chunk = mw.ustring.find(cline.whole,"(<.->)",i)
		if not chunk then
			issue_error("Unclosed angle bracket", "<")
			chunk = highlight("<"); j = i
		elseif mw.ustring.sub(cline.whole,i,i+1) == "</" then -- It's a CLOSING tag
			if cline.glossing
				and ifglossing==false
				and mw.ustring.match(chunk,"</abbr>")
				then ifglossing=true end
			tags_found = tags_found - 1
		elseif not mw.ustring.match(chunk, "/>$") -- It's an OPENING tag, unless it opens a self-closing element (in which case the element is ignored)
			then if ifglossing == true -- the following checks for the output of {{ggl}}:
					and mw.ustring.find(chunk, conf.class.GlossAbbr, 1, true) -- it's important that the "find" function uses literal strings and not patterns
						then ifglossing = false end
			tags_found = tags_found + 1
		end
		buffer = buffer .. chunk
		next_step = parse(cline, j+1,tags_found,ifglossing)
	else -- No HTML tags, so we only need to find where the word ends
		local _,k,chunk = mw.ustring.find(cline.whole,"(..-)([ <[])",i)
		if k then --ordinary text
			if ifglossing==true then
				buffer = buffer .. find_gloss(chunk)
			else
				if cline.tone_sup then chunk = tone_sup(chunk) end
				buffer = buffer .. chunk
			end
			next_step = parse(cline, k, tags_found, ifglossing)
		else -- reached end of string
			if ifglossing == true then
				chunk = find_gloss(mw.ustring.sub(cline.whole,i))
			else
				chunk = mw.ustring.sub(cline.whole,i)
				if cline.tone_sup then chunk = tone_sup(chunk) end
			end
			buffer = buffer .. chunk
			next_step = cline.length
		end
	end
	return next_step
end

---------------------
-- HTML stuff
---------------------
local function build_interlinear_html(args, number_of_words, line)
	local interlinear_wrapper = mw.html.create("div")
	interlinear_wrapper:addClass("interlinear")
	
	-- right-to-left script
	if yesno(args.rtl) == true then
		interlinear_wrapper:addClass("right_to_left")
	end

	-- box
	if yesno(args.box) == true then
		interlinear_wrapper:addClass("box")
	end

	-- numbering and/or indent in the left margin
	local number, indent = nil, nil
	if args.number and args.number ~= ""
		then number = args.number end
	if args.indent and args.indent ~=""
		then indent = args.indent end
	if indent or number then
		if not indent then indent = "4" end --default value
		interlinear_wrapper:css("margin-left", indent .. 'em')
		if number then
			interlinear_wrapper:tag("div")
				:addClass("number")
				:wikitext(args.number)
		end
	end
	
	--lines to display above the interlinear block
	if args.top and args.top ~= "" then
		interlinear_wrapper:tag("div")
			:addClass("top")
			:wikitext(args.top)
	end
	
	-- Producing the interlinear block
	local blocks_wrapper = interlinear_wrapper:tag("div")
					:addClass("block_wrapper")

	-- non-standard spacing
	local _spacing = tonumber(args.spacing)
	if _spacing and _spacing <= 20 then
		blocks_wrapper:css('column-gap', _spacing .. 'em')
	end

	for wi = 1, number_of_words do
		local block = blocks_wrapper:tag("div")
							:addClass("word_block")
		for i,_ in ipairs (line) do
			if line[i].whole ~= "" then -- skipping empty lines
				local p = block:tag("p")
				p:attr(line[i].attr)
				if line[i].class then
					p:addClass(line[i].class)
				end
				local _text = line[i].words[wi]
				if _text == "" or _text == " "
					then _text = "&nbsp;" end
				-- <p> elements without content mess up the interlinear display
				p:wikitext(_text)
			end
		end
	end

	--- "comments", added at the end of each line
	if line.hasComments then
		local comment_block = blocks_wrapper:tag("div")
						:addClass("comment_block")
		for i,_ in ipairs (line) do
			local p = comment_block:tag("p")
			if line[i].c then
				p:wikitext(line[i].c)
			else p:wikitext("&nbsp;")
			end
		end
	end

	--Add hidden lines containing the content of each line of interlinear text
	-- this is for accessibility
	for i,v in ipairs(line) do
		local hidden_line = interlinear_wrapper:tag("p")
		hidden_line:addClass("hidden_text")
					:wikitext(v.whole)
	end

	-- Free translation
	local ft_line = interlinear_wrapper:tag("p")
	if free_translation and free_translation ~= "" then
		ft_line:addClass("free_translation")
		ft_line:wikitext(free_translation)
	end
	ft_line:node(msg:print_errors()) -- for error messages
	
	-- bottom
	if args.bottom and args.bottom ~= "" then
		local bottom = interlinear_wrapper:tag('p')
			:addClass('bottom')
			:wikitext(args.bottom)
	end
	return interlinear_wrapper
end

--------------------
-- The following is the function called by Template:Interlinear.
-- It processes the template arguments, then calls parse() to split the input lines into words
-- and it then builds the output html.
--------------------
function p.interlinearise(f)
---------------------
-- Prepare arguments
---------------------
	frame = f
	local if_auto_translit = false
	local args = getArgs(frame, { -- configuration for Module:Arguments
		trim = true,
		removeBlanks = false,
		parentFirst = true,
	--	wrappers = {
	--		'Template:Interlinear', 'Template:Fs interlinear',
	--		'Template:Interlinear/sandbox', 'Template:Fs interlinear/sandbox'
	--	},
	})
	local template_name = frame:getParent():getTitle()
	if template_name == 'Template:Fs interlinear/sandbox' then
		args.italics1 = args.italics1 or "no"
		args.italics2 = args.italics2 or "yes"
		args.glossing3 = args.glossing3 or "yes"
		if args.lang and not args.lang2 then args.lang2 = args.lang .."-Latn" end
		if args.transl and not args.transl2 then args.transl2 = args.transl end
		if_auto_translit = true
	end
	
--	if args.wordseparator and (args.wordseparator ~= "") then
--		conf.WordSeparator = conf.WordSeparator .. args.wordseparator
--	end
	
	if args.glossing then
		glossing_type = set_glossing_type(args.glossing)
	end

	-- messages will be displayed only in preview mode
	if frame:preprocess("{{REVISIONID}}") == "" then
		if not args['display-messages']
		or yesno(args['display-messages']) then
			displaying_messages = true
		end
	end
	msg = UserMessages
	
	local line = {}
	local function set_italics(n)
		line[n].class = "italics"
		line[n].tone_sup = true -- single digits are assumed to be tone markers and will hence be superscripted
		if args['tone-superscripting'] and not yesno(args['tone-superscripting'])
			then line[n].tone_sup = false end
	end

	local offset, last_line = 0, 0
	for j,v in ipairs(args) do -- iterates over the unnamed parameters from the template
		last_line = last_line +1
		if is_empty(v) then
			offset = offset + 1
		else
			
		local i = j - offset
		line[i] = {}
		
		-- normalise
		if args.wordseparator and (args.wordseparator ~= "") then
			v = mw.ustring.gsub(v,"([" .. args.wordseparator .. "]+)","%1 ")
		end
		v = mw.ustring.gsub(v,"[" .. conf.WordSeparator .. "]+"," ")
		line[i].whole = v
		line[i].length = mw.ustring.len(v)

		local _c = args["c" .. i]
		if _c and _c ~= "" then
			line.hasComments = true
			line[i].c = _c
		end

		---prepare style arguments----
		line[i].class = ""
		local _style = args["style" .. i]
		if not _style then
			_style = ""
		else
			_style = tidyCss(_style)
		end
		--line[i].attr holds the attributes for the <p> elements that enclose the words in line i
		line[i].attr = { style = _style }

		local _lang = args["lang" .. i]
		if _lang and #_lang > 1 then
			line[i].lang = _lang
		else _lang = args.lang
			if _lang and #_lang > 1 and i == 1 then -- if a lang= parameter is supplied, it's assumed to apply to line 1
				line[i].lang = _lang
			end
		end
		line[i].attr.lang = line[i].lang

		if yesno(args["italics" .. i]) then
			set_italics(i)
		end
		
		local _gloss = yesno(args["glossing" .. i]);
		if _gloss then
			line[i].glossing = _gloss
		end
		
		if yesno(args['ipa' .. i]) then
			line[i].ipa = true
		end
		
		local _wrapper = args['wrapper' .. i]
		if _wrapper and line[i].words then
			line[i].wrapper = _wrapper
		end
		
		if yesno(args["smallcaps" .. i]) then
			line[i].class = line[i].class .. " smallcaps"
			line[i].glossing = false
		end

		local _transl = args["transl" .. i]
		if _transl and #_transl > 1 then
			_transl = mw.ustring.lower(_transl)
			local _lookup = lang_data.translit_title_table[_transl]
			if _lookup then
				if _lang and  _lookup[_lang] then
					_transl = _lookup[_lang]
				else _transl = _lookup.default
				end
				if _transl then
					line[i].attr.title = _transl
				end
			else  msg:add("error", "Transliteration scheme '" .. _transl .. "' not recognised")
			end
		end

		local _class = args['class' .. i]
		if _class then
			line[i].class = line[i].class .. " " .. _class
		end

		if line[i].class == "" then
			line[i].class = nil
		end
		
		end -- ends the first if-statement in the loop
	end -- ends the FOR cycle

	local line_count = #line
	if line_count == 0 then
		msg:add("error", template_name .. ": no lines supplied.")
		return msg:print_errors()
	elseif line_count == 1 then
		msg:add("error", template_name .. ": only 1 line supplied.")
		return msg:print_errors()
	end

	if line_count > 1 then
		local _italics = args.italics
		local n = tonumber(_italics)
		if n and n > 0 then
			set_italics(n)
		elseif not (_italics and not yesno(_italics))
			and not (args["italics1"] and not yesno(args["italics1"])) then
			set_italics(1) -- by default, the first line will get italicised, unless italics=no or italics1=no
		end
		-- the last unnamed parameter is assumed to be the free translation:
		free_translation = args[last_line]
		if not is_empty(free_translation) then
			line [line_count] = nil
		end  --... and is thus excluded from interlinearising
	end

---------------------
-- Segment lines into words
---------------------
	for i,v in ipairs(line) do
		local wc, n = 1, 1
		line[i].words = {}
		while n <= line[i].length do
			buffer = ""
			n = parse(line[i], n, 0, true) + 2
			line[i].words[wc] = buffer
			wc = wc + 1
		end
	end

	----Check for mismatches in number of words across lines----
	local number_of_words, mismatch_found = 0, false
	for i,v in ipairs(line) do -- find the maximum number of words in any line
		local wc = #line[i].words
		if wc ~= number_of_words then
			if i ~= 1 and wc ~= 0 then
				mismatch_found = true
			end
			if wc > number_of_words then
				number_of_words = wc
			end
		end
	end
	----Deal with mismatches---
	if mismatch_found then
		local error_text = "Mismatch in the number of words between lines: "
		for i,v in ipairs(line) do
			local wc = #line[i].words
			error_text = error_text .. wc .. " word(s) in line " .. i .. ", "
			if wc ~= number_of_words then
				for current_word = wc+1, number_of_words do
					line[i].words[current_word] = "&nbsp;"
				end
			end
		end
		if string.sub(error_text, -2) == ", " then
			error_text = string.sub(error_text, 1, #error_text - 2) .. " "
		end
		error_text = error_text .. help_link("mismatch")
		UserMessages:add("error", error_text)
	end
	
	-- Wrap in first line of {{Fs interlinear}} in {{Script}}
	--if template_name == 'Template:Fs interlinear/sandbox'
	if args.script and line[1].words then
		line[1].words = template_to_line(
			line[1].words, number_of_words,
			'Script', args.script)
		if args.lang and line[2].words then
			line[2].words = template_to_line(
				line[2].words, number_of_words,
				'transliteration', args.lang)
		end
	elseif args.lang and line[1].words then
		line[1].words = template_to_line(
			line[1].words, number_of_words,
			'lang', args.lang)
	end

---------------------
-- Build the HTML
---------------------
	local divHtml = build_interlinear_html(args, number_of_words, line)

	local temp_track = ""
	if last_line == 2 then
		temp_track = "[[Category:Pages with interlinear glosses using two unnamed parameters]]"
	end
	if last_line > 3 and template_name ~= 'Template:Fs interlinear' then
		temp_track = "[[Category:Pages with interlinear glosses using more than three unnamed parameters]]"
	end
	
	return tostring(divHtml) .. temp_track .. msg:print_warnings()
end

return p