Module:Sandbox/Tom.Reding/Tools

local p = {}

function p.fmttimestamp(frame)
	local ts = frame.args[1] or '00000000000000' --20190201223349
	local y  = string.sub(ts,1,4)
	local mo = string.sub(ts,5,6)
	local d  = string.sub(ts,7,8)
	local h  = string.sub(ts,9,10)
	local mi = string.sub(ts,11,12)
	local s  = string.sub(ts,13,14)
	return y..'-'..mo..'-'..d..'T'..h..':'..mi..':'..s --ISO 8601 format without time zone designator
end

function p.instanceOfs(frame)
-- returns the 'instance of'(s) (P31) of the WP page passed
-- good for finding exceptions in [[Category:Taxonbars on possible non-taxon pages]]
	local tab = '	'
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	if item then
		local tInstanceOfs = {}
		for i, instanceOfState in pairs ( item:getBestStatements('P31') ) do --instance of
			local instanceOf = instanceOfState.mainsnak.datavalue.value.id --'Q...'
--			local instanceOfLabel = mw.wikibase.getLabel(instanceOf) or 'nil' --plain English
			tInstanceOfs[#tInstanceOfs + 1] = '{{Q|'..instanceOf..'}}'
		end
		if not tInstanceOfs[1] then tInstanceOfs = { 'none found' } end
		return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instance of: '..table.concat(tInstanceOfs, ', ')..'<br/>'
	else
		return '[['..title..']]'..tab..'no QID found<br/>'
	end
end

function p.istaxon(frame)
-- climbs the taxon tree until a description is found
-- prefix 'c' = child, 'p' = parent
-- output:	[[cSitelink or cQID]]	[[cQID]]		<cI/O>	(cI/O lbl)	<cTaxonQID>	<cTaxon QID label>	[[pQID|pDescription]]:				<pDescription's animal type>
--     ex:	[[Orthalicus nobilis]]	[[d:Q49525664]]	Q16521	(taxon)		Q7432		species				[[d:Q7104378|genus of molluscs]]:	mollusc
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local acceptableInstanceOf = {
		['Q16521'] = 'taxon',
		['Q310890'] = 'monotypic taxon',
		['Q2568288'] = 'ichnotaxon',
		['Q23038290'] = 'fossil taxon',
		['Q47487597'] = 'monotypic fossil taxon',
	}
	local allowNonTaxons = false --if false (default), display '(not a taxon)';
	local otherInstanceOf = {    --if true, allow clades, etc., as named in otherInstanceOf{}
		['Q713623'] = 'clade',
	--	['Q4886'] = 'cultivar', --kind of a mess
	}
	local tab = '	'
	if item then
		
		--find child's rank
		local childRank, childRankLabel, species = nil
		local childRankState = item:getBestStatements('P105')[1] --taxon rank
		if childRankState then
			if childRankState.mainsnak.datavalue then
				childRank = childRankState.mainsnak.datavalue.value.id
				childRankLabel = mw.wikibase.getLabel(childRank)
				local speciesRanks = {
					['Q7432'] = 'species',
					['Q68947'] = 'subspecies',
					['Q4886'] = 'cultivar',
					['Q279749'] = 'form',
					['Q630771'] = 'subvariety',
					['Q767728'] = 'variety',
				--  these should all be plural; for reference only:
				--	['Q3025161'] = 'series',
				--	['Q3181348'] = 'section',
				}
				if speciesRanks[childRank] then
					species = 'species' --used later as a boolean
				end
			else
				childRank = 'missing taxon rank value'
				childRankLabel = '-'
			end
		end
		
		--find parent's description; if nil, search grandparent, etc., etc.
		local ancestorItem = item --initialize to child
		local bothDescriptionsFmtd = tab..tab --parent's raw descrip + child's new suggested descrip
		local parentDescription = nil
		local newChildDescription = nil
		local oldChildDescription = item:getDescription() --different output if nil vs present
		if oldChildDescription == nil then
			while bothDescriptionsFmtd == tab..tab do
				local parentState = ancestorItem:getBestStatements('P171')[1] --parent taxon
				if parentState then
					local parentId = parentState.mainsnak.datavalue.value.id
					if parentId then
						parentDescription = mw.wikibase.getDescription(parentId)
						if parentDescription then
							local trim = mw.ustring.gsub(parentDescription, '^[%w%s%(%)%-,]-%s+of%s+', '') --trim to first ' of ' to remove taxon rank/monotypic
							if species then
								--remove 's' semi-liberally
								trim = mw.ustring.gsub(trim, '^([%w]+[^s])s([, ])', '%1%2')   --birdS that ...; plantS, guava
								trim = mw.ustring.gsub(trim, '^([a-z]+[^s])s$', '%1')         --birdS
								trim = mw.ustring.gsub(trim, '([^a-zA-Z][a-z]+[^s])s$', '%1') --song and dance birdS
								trim = mw.ustring.gsub(trim, '^([%w%-]+ [a-z]+[^s])s([, ])', '%1%2') --song birdS in ...
								trim = mw.ustring.gsub(trim, 's %(fossil', ' (fossil') --birdS (fossil)
							--	trim = mw.ustring.gsub(trim, 's %(',  ' (')    --turn off if too liberal
								trim = mw.ustring.gsub(trim, 's of ', ' of ')  --triasic birdS of the family...
								trim = mw.ustring.gsub(trim, 's in the ', ' in the ')  --triasic birdS in the family...
								trim = mw.ustring.gsub(trim, '^bacteria', 'bacterium')
								trim = mw.ustring.gsub(trim, '[^%w]bacteria', 'bacterium')
								--fix special cases
								trim = mw.ustring.gsub(trim, 'algae', 'alga')
								trim = mw.ustring.gsub(trim, 'cactu(%f[\0, ])', 'cactus%1') --only way to combine rules with Lua's shitty regex; %f doesn't recognize [^s]...
								trim = mw.ustring.gsub(trim, 'cactuse(%f[\0, ])', 'cactus%1')
								trim = mw.ustring.gsub(trim, 'carnivorou(%f[\0, ])', 'carnivorous%1')
								trim = mw.ustring.gsub(trim, 'countrie(%f[\0, ])', 'countries%1')
								trim = mw.ustring.gsub(trim, 'citru(%f[\0, ])', 'citrus%1')
								trim = mw.ustring.gsub(trim, 'crocu(%f[\0, ])', 'crocus%1')
								trim = mw.ustring.gsub(trim, 'deciduou(%f[\0, ])', 'deciduous%1')
								trim = mw.ustring.gsub(trim, '[eE]delweis(%f[\0, ])', 'edelweiss%1')
								trim = mw.ustring.gsub(trim, 'fishe(%f[\0, ])', 'fish%1')
								trim = mw.ustring.gsub(trim, 'flightles(%f[\0, ])', 'flightless%1')
								trim = mw.ustring.gsub(trim, 'fung[iu](%f[\0, ])', 'fungus%1')
								trim = mw.ustring.gsub(trim, '[gG]enu(%f[\0, ])', 'genus%1')
								trim = mw.ustring.gsub(trim, '[gG]ras(%f[\0, ])', 'grass%1')
								trim = mw.ustring.gsub(trim, '[gG]rasse(%f[\0, ])', 'grass%1')
								trim = mw.ustring.gsub(trim, 'herbaceou(%f[\0, ])', 'herbaceous%1')
								trim = mw.ustring.gsub(trim, 'herbivorou(%f[\0, ])', 'herbivorous%1')
								trim = mw.ustring.gsub(trim, '[iI]ri(%f[\0, ])', 'iris%1')
								trim = mw.ustring.gsub(trim, 'leeche(%f[\0, ])', 'leech%1')
								trim = mw.ustring.gsub(trim, 'loache(%f[\0, ])', 'loach%1')
								trim = mw.ustring.gsub(trim, 'mos(%f[\0, ])', 'moss%1')
								trim = mw.ustring.gsub(trim, 'mosse(%f[\0, ])', 'moss%1')
								trim = mw.ustring.gsub(trim, '[sS]pecie(%f[\0, ])', 'species%1')
								trim = mw.ustring.gsub(trim, 'venomou(%f[\0, ])', 'venomous%1')
								trim = mw.ustring.gsub(trim, 'viruse(%f[\0, ])', 'virus%1')
							end
							trim = mw.ustring.gsub(trim, ' %(fossil%)', '') --" (fossil)" restored later IIF instance of = fossil taxon type
							if trim == parentDescription then trim = '' end --nothing changed, so no need to duplicate it
							newChildDescription = trim --used 'trim' just b/c it's shorter
							bothDescriptionsFmtd = tab..'[[d:'..parentId..'|'..parentDescription..']]:'..tab..newChildDescription
						else
							ancestorItem = mw.wikibase.getEntity(parentId)
						end
					else
						bothDescriptionFmtds = tab..'parent missing ID??'..tab --probably not a valid falure mode, but jic...
					end
				else
					bothDescriptionsFmtd = tab..'no parent taxon/P171'..tab
				end
			end --while
		end --if oldChildDescription == nil
		
		--test child for instance of: taxon
		local j = 0
		for i, instanceOfState in pairs ( item:getBestStatements('P31') ) do --child's instance of
			local instanceOf = instanceOfState.mainsnak.datavalue.value.id
			if acceptableInstanceOf[instanceOf] or 
			  (acceptableInstanceOf[instanceOf] == nil and otherInstanceOf[instanceOf] and allowNonTaxons == true) then
				
				--extinct/fossil handling in description
				local instanceOfLabel = mw.wikibase.getLabel(instanceOf)
				if oldChildDescription == nil then
					--only append ' (fossil)', when needed, to items missing descrip;
					--retroactively making existing descriptions match their 'instance of taxon rank' is a different/much larger job
					if instanceOfLabel == 'fossil taxon' or 
					   instanceOfLabel == 'monotypic fossil taxon' or
					   instanceOfLabel == 'ichnotaxon' then
						local childSaysFossil = string.match(newChildDescription, 'extinct') or
												string.match(newChildDescription, 'dinosaur') or
												string.match(newChildDescription, 'fossil')
						if childSaysFossil == nil then
							newChildDescription = newChildDescription..' (fossil)' --housekeeping
							bothDescriptionsFmtd = bothDescriptionsFmtd..' (fossil)' --goes to output
						end
					end
				else
					bothDescriptionsFmtd = tab..'-'..tab..'child has description: '..oldChildDescription
				end
				
				--output for successful items
				local title_qid = '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'
				if childRank == nil then childRank = 'UNRANKED' end
				if childRankLabel == nil then 
					if childRank == 'UNRANKED' then childRankLabel = 'UNRANKED' 
					else childRankLabel = 'MISSING LABEL' end
				end
				local cRank_cLabel_pDescrip = childRank..tab..childRankLabel..bothDescriptionsFmtd
				if instanceOfLabel == 'taxon' then
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic taxon' then --force 'Q16521' output (taxon)
					return title_qid..tab..'Q16521'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				elseif instanceOfLabel == 'monotypic fossil taxon' then --force 'Q23038290' output (fossil taxon)
					return title_qid..tab..'Q23038290'..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
					
				else --ichno or polytypic fossil
					return title_qid..tab..instanceOf..tab..'('..instanceOfLabel..')'..tab..cRank_cLabel_pDescrip
				end
			end
			j = i
		end --for child's instance ofs
		
		--output for failed items
		if j > 0 then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon)'
		else
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'instanceOf:'..tab..'(not a taxon? no instanceOf)'
		end
		
	else
		--output for failed items
		if qid then
			return '[['..title..']]'..tab..'[[d:'..qid..'|'..qid..']]'..tab..'item'..tab..'(not a taxon? item lookup failed)'
		else
			return '[['..title.. ']]'..tab..''..'Q#'..tab..'not on Wikidata OR no sitelink'
		end
	end
end

function p.getTaxonRank(frame)
	local resolveEntity = require( 'Module:ResolveEntityId' )
	local title = frame.args[1]
	local qid = resolveEntity._id(title)
	local item = mw.wikibase.getEntity(qid)
	local taxonRanks = {
		['Q7432'] = 'species',
		['Q34740'] = 'genus',
		['Q35409'] = 'family',
		['Q36602'] = 'order',
		['Q37517'] = 'class',
		['Q38348'] = 'phylum',
		['Q2007442'] = 'infraclass',
		['Q2136103'] = 'superfamily',
		['Q227936'] = 'tribe',
		['Q2455704'] = 'subfamily',
		['Q2889003'] = 'infraorder',
		['Q3238261'] = 'subgenus',
		['Q5867051'] = 'subclass',
		['Q5867959'] = 'suborder',
		['Q5868144'] = 'superorder',
		['Q68947'] = 'subspecies',
	}
	
	if qid then
		if item then
			local tab = '&#09;'
			local rankState = item:getBestStatements('P105')[1] --taxon rank
			if rankState then
				local rank = rankState.mainsnak.datavalue.value.id
				if rank then
					if taxonRanks[rank] then return taxonRanks[rank]
					else return rank end
				else return 'No rank found' end
			else
				return 'rankState not found'
			end
		else
			return 'Item not found'
		end
	else
		return 'QID not found'
	end
end

function p.getSDs( frame )
	local pn    = frame.args[1]
	local showR = frame.args[2]
	local wp = p.getSd( frame )
	local wd = p.getWdSd( frame )
	
	local dab_dne = 'dab_dne'
	local pn_dab = pn..' (disambiguation)'
	local pn_dab_result = dab_dne
	local pn_dab_content = mw.title.new(pn_dab):getContent() or nil
	if pn_dab_content then pn_dab_result = '[['..pn_dab..']]' end
	
	local info = '# [['..pn..']] - '..pn_dab_result..' - '..wp..';	'..wd
	
	if wp == 'wp_sd_dne' and 
	   wd == 'wd_sd_dne'
	then
		local R = p.isR( frame )
		if R then
			local R_content = mw.title.new(pn):getContent() or ''
			local R_target = mw.ustring.match(R_content, '^%s*#[Rr][Ee][Dd][Ii][Rr][Ee][Cc][Tt]%s*%[%[(.-)%]%]') or ''
			if R_target ~= '' then
				local R_pn = R_target
				frame.args[1] = R_pn
				local R_wp = p.getSd( frame )
				local R_wd = p.getWdSd( frame )
				
				local R_pn_dab = R_pn..' (disambiguation)'
				local R_pn_dab_result = dab_dne
				local R_pn_dab_content = mw.title.new(R_pn_dab):getContent() or nil
				if R_pn_dab_content then R_pn_dab_result = '[['..R_pn_dab..']]' end
				
				local R_sds = R_wp..';	'..R_wd
				local R_info = '## [['..R_pn..']] - '..R_pn_dab_result..' - '..R_sds
				
				info = info..' → '..R_sds
				if (showR) then info = info..'\n'..R_info..' → ' end
				return info
			else
				return info..' (malformed #R)'
			end
		end
	end
	return info
end

function p.getWdSd( frame )
	local pn = frame.args[1]
	local args = { ['1'] = 'description',
				   ['page'] = pn }
	local sd = frame:expandTemplate{ title = 'Wikidata', args = args }
	if sd == '' then sd = 'wd_sd_dne' end
	return mw.text.trim(sd)
end

function p.getSd( frame )
	local pn = frame.args[1]
	local content = mw.title.new(pn):getContent() or ''
	local sd = mw.ustring.match(content, '{{%s*[Ss]hort ?[dD]escription%s*%|(.-)}}') or
			   mw.ustring.match(content, '{{%s*[Ss]hort ?desc%s*%|(.-)}}') or
			   mw.ustring.match(content, '{{%s*[Dd]es%s*%|(.-)}}') or
			   'wp_sd_dne'
	return mw.text.trim(sd)
end

function p.isR( frame )
	local pn = frame.args[1]
	local title = mw.title.new( pn )
	return title.isRedirect
end

return p