Module:Sandbox/Ajuanca/GraphIt

From English Wikipedia @ Freddythechick

This is the current revision of this page, as edited by imported>Ajuanca at 22:13, 11 October 2020 (fix bug: skip arithmetic for nil val). The present address (URL) is a permanent link to this version.

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

--[[
		------------------
	---| EPIDEMICS MODULE |---
		------------------
	~~ A specialized module to work with epidemics data. ~~
	
	In order to use it:
	- Data should be stored in Wikimedia Commons, as a Tabular file.
	  ie: "COVID-19 cases in Asturias.tab"
	- The module is added to any wiki with: 
	  {{#invoke:Module:Sandbox/Ajuanca/GraphIt|param1|param2|...}}
	- All the functions that don't begin with an underscore are thought 
	  to be invoked. The remaining functions are "internal" functions.
	  In case you face some problems with the functions, ask me on my talk
	  page (User_talk:Ajuanca)
	- Make sure you give the correct params. All functions include an 
	  explication. If a graph is requested, all available parameters at 
	  Module:Graph can be passed.
	  
	Feel free to leave any comment, suggestion or complaint on my 
	discussion page (User_talk:Ajuanca).

	ToDo list:
	[ ] Generate wikitable
	[ ] Add positive rate

	Some ideas that maybe are implemented:
	* Divide functions (graph, wikitable) instead of booleans?? 
	* Join functions (get_avg_incidence + get_all_incidence = get_incidences)
	* Get rid of internal join_tables function.
]]--
local p = {}
mgraph = require("Module:Graph")

-- Join two tables.
-- Number index are added over the first table.
-- Other type of keys are added "as they are".
function p.join_tables(_table1, _table2)
	for k, arg in pairs(_table2) do
		if not tonumber(k) then
			_table1[k] = arg
		else
			table.insert(_table1, arg)
		end
	end
	return _table1
end

-- Converts table data type to String.
-- Keys should be int numbers.
-- Values are concatenated with ", "
function p.table2string(_table)
	original_table = _table
	wrapped = ""
	for i=1, #original_table do
		wrapped = wrapped .. original_table[i] .. ", "
	end
	return wrapped:sub(0, -3)
end

-- Graph the given data.
-- All Moduule:Graph parameters are given.
function p._graph(args)
	local ret =  mgraph.chart {args=args}
	local graph = mw.getCurrentFrame():extensionTag('graph', ret)
	return graph
end

function p._get_avg_incidence(args)
	local incidences = args.incidences or args[1]
	local dates = args.dates or args[2]
	local avg_period = args.period or 3
	local periods_avg = {}
	local periods_dates = {}
	local period_cases = {}
	for i, sincidence in ipairs(incidences) do
		if period_cases == nil then
			
		else
			table.insert(period_cases, sincidence)
			if #period_cases == avg_period then
				local total = 0
		    	for i = 1, #period_cases do
		        	total = total + tonumber(period_cases[i])
		    	end
		    	table.insert(periods_avg, total/#period_cases)
		    	period_cases = {}
		    end
		end
	end
	for i, sdate in ipairs(dates) do
		if i%avg_period == 0 then
			table.insert(periods_dates, sdate)
		end
	end
	return {periods_avg, periods_dates}
end 

-- Get an average for the incidence proportion of a specific event.
-- ie. The week average of the daily confirmed cases of flu.
-- The given parameters are:
-- [1] or src: The tabular data, ie: "example.tab"
-- [2] or column_name: The name of the column.
-- [3] or date_name: The name of the date column.
-- [4] or inhabitants: The population size of the given region.
-- [5] or nth: The power of 10 in which the result is given.
-- column_title: The column title of the data to work with.
-- date_title: The column title of the date.
-- period: The number of values to perform the average with. 
-- 		   Default is 3 (ie: 3 days).
function p.get_avg_incidence(frame)
	return_graph = frame.args.graph == "true"
	return_table = frame.args.ltable == "true"
	return_wikitable = frame.args.wtable == "true"
	all_incidence = p.join_tables(p._get_all_incidence(frame.args), frame.args)
	avg_incidence = p._get_avg_incidence(all_incidence)
	to_return = {}
	if return_table then
		table.insert(to_return, avg_incidence)
	end
	avg_incidence = p.join_tables(avg_incidence, frame.args)
	if return_graph then
		avg_incidence.x = p.table2string(table.remove(avg_incidence, 2))
		avg_incidence.y = p.table2string(table.remove(avg_incidence, 1))
		table.insert(to_return, p._graph(avg_incidence))
	end
	if return_wikitable then
		mw.log("in progress")
	end
	if #to_return == 1 then
		to_return = to_return[1]
	end
	return to_return
end

function p._get_all_incidence(args)
	local data_page = args.src or args[1]
	local data = mw.ext.data.get(data_page)
	local column_name = args.column_name or args[2]
	local date_name = args.date_name or args[3]
	local total_residents = tonumber(args.inhabitants) or tonumber(args[4])
	local n = tonumber(args.nth) or tonumber(args[5])
	local column_title = args.column_title or nil
	local date_title = args.date_title or nil
	local ci = nil
	local di = nil
	local column_values = {}
	local date_values = {}
	for j, field in ipairs(data.schema.fields) do
		if field.name == column_name or field.title == column_title then
			ci = j
		elseif field.name == date_name or field.title == date_title then
			di = j
		end
		if ci and di then
			break
		end
	end
	for j, record in ipairs(data.data) do
		value = tonumber(record[ci])
		if value == nil then
			row_value = nil
		else
			row_value = (value*10^tonumber(n))/tonumber(total_residents)
		end
		table.insert(column_values, row_value)
		table.insert(date_values, record[di])
	end
	return {column_values, date_values}
end

-- Get the incidence proportion of a specific event for all the available dates.
-- Def: "Number of new cases of disease during specified time interval"
-- ie. The daily confirmed cases of flu per 10.000 inhabitants.
-- The given parameters are:
-- [1] or src: The tabular data, ie: "example.tab"
-- [2] or column_name: The name of the column.
-- [3] or date_name: The name of the date column.
-- [4] or inhabitants: The population size of the given region.
-- [5] or nth: The power of 10 in which the result is given.
-- column_title: The column title of the data to work with.
-- date_title: The column title of the date.
function p.get_all_incidence(frame)
	all_incidence = p._get_all_incidence(frame.args)
	return_graph = frame.args.graph == "true"
	return_table = frame.args.ltable == "true"
	return_wikitable = frame.args.wtable == "true"
	to_return = {}
	if return_table then
		table.insert(to_return, all_incidence)
	end
	all_incidence = p.join_tables(all_incidence, frame.args)
	if return_graph then
		all_incidence.x = p.table2string(table.remove(all_incidence, 2))
		all_incidence.y = p.table2string(table.remove(all_incidence, 1))
		table.insert(to_return, p._graph(all_incidence))
	end
	if return_wikitable then
		mw.log("in progress")
	end
	if #to_return == 1 then
		to_return = to_return[1]
	end
	return to_return
end

function p.string2table(_string)
	stable = {}
	for value in _string:gmatch("[^,]+") do 
		table.insert(stable, value) 
	end
	return stable
end

-- Get the incidence proportion of a specific event for the specific intervals.
-- Various events and averages can be given. If no averages are specified,
-- daily info is given.
-- ie. The daily confirmed cases of flu per 10.000 inhabitants and the week
-- average of hospital occupation due to flu per 10.000 inhabitants.
-- The given parameters are:
-- [1] or src: The tabular data, ie: "example.tab"
-- [2] or column_names: The names of the columns for the different events.
-- [3] or date_name: The name of the columnthat contains the date information.
-- [4] or inhabitants: The population size of the given region.
-- [5] or nth: The power of 10 in which the result is given.
-- avgs: The averages of the events to be calculated. 
--       ie: 3 (days)
function p.get_incidences(frame)
	columns = nil
	incidences = {}
	dates = {}
	avgs = nil
	if frame.args.column_names then
		columns = p.string2table(frame.args.column_names)
	else
		columns = p.string2table(table.remove(frame.args, 2))
	end
	if frame.args.avgs then
		avgs = p.string2table(frame.args.avgs)
	end
	if not avgs then
		for i, incidence in ipairs(columns) do
			frame.args.column_name = incidence
			incidence = p._get_all_incidence(frame.args)
			table.insert(incidences, incidence[1])
			table.insert(dates, incidence[2])
		end
	else
		for i, incidence in ipairs(columns) do
			frame.args.column_name = incidence
			local all_incidence = p._get_all_incidence(frame.args)
			local all_args = p.join_tables(all_incidence, frame.args)
			all_args.period = tonumber(avgs[i])
			incidence = p._get_avg_incidence(all_args)
			table.insert(incidences, incidence[1])
			table.insert(dates, incidence[2])
		end
	end
	return_graph = frame.args.graph == "true"
	return_table = frame.args.ltable == "true"
	return_wikitable = frame.args.wtable == "true"
	to_return = {}
	if return_table then
		table.insert(to_return, {incidences, dates})
	end
	if return_graph or true then
		to_graph = {}
		for i, incidence in ipairs(incidences) do
			key = "y" .. tostring(i)
			to_graph[key] = p.table2string(incidence)
			-- xnth values are not inserted due to Module:Graph limitations.
		end
		to_graph.x = p.table2string(dates[1])
		to_graph = p.join_tables(to_graph, frame.args)
		table.insert(to_return, p._graph(to_graph))
	end
	if return_wikitable then
		mw.log("in progress")
	end
	if #to_return == 1 then
		to_return = to_return[1]
	end
	return to_return
end

return p