Module:Sandbox/Innesw/PopulationFromWikidata-upgrade

From English Wikipedia @ Freddythechick

---------------- Defining variables--------------------
local Pop_P = "P1082"          -- population property
local Applies_P = "P518"       -- applies to part property
local Pointin_P = "P585"       -- point in time property
local DetMeth_P = "P459"       -- determination method property
local RefURL_P = "P854"        -- reference URL
local RefTitle_P = "P1476"     -- reference title
local RefPubIn_P = "P1433"     -- reference published in property
local DatePub_P = "P577"       -- date of publication property
local Publisher_P = "P123"     -- publisher property
local Retrieved_P = "P813"     -- retrieved property
local Instof_P = "P31"         -- instance of property
local ShortN_P = "P1813"       -- short name property

local CensusAU_I = "Q5058971"   -- Australian census item
local SAL_I = "Q33112019"       -- state suburb item (includes SSC and SAL)
--local GCCSA_I = "Q112762887"  -- Greater Capital City Statistical Area item
local LGA_I = "Q33127844"       -- Local Government Area item
local UCL_I = "Q33127891"       -- Urban Centre or Locality
--local SA2_I = "Q33128776"     -- SA2
--local SA3_I = "Q118313924"    -- SA3
local ILOC_I = "Q112729549"     -- Indigenous Location

local item = nil

--------------- Function LastURLSection returns last section of a url, ie: the text after the last '/' ----------------------

local function LastURLSection(url)
    local pos = 1
    local f
    while true do
        f = string.find(url, '/', pos, true)
        if (f == nil) then
            break
        else
            pos = f + 1
        end
    end
    return string.sub(url, pos)
end

--------------- Function IdForGeog returns the place ID for the specified geography abbreviation. Returns nil if abbreviation is blank. ----------------------

local function IdForGeog(geog)
    local id = nil
    if geog == "ucl" then
        id = UCL_I
    elseif geog == "sal" then
        id = SAL_I -- includes SSC and SAL
    elseif geog == "lga" then
        id = UCL_I
    elseif geog == "iloc" then
        id = ILOC_I
    end
    return id
end

--------------- Function GetRefsForClaim to check, collate and format all the reference components----------------------

local function GetRefsForClaim(claim, defaulttitle)
    local refs = ""
    local r = 0
    for b, x in pairs(claim.references) do -- loop through all references in a claim
        -- each reference in the wikidata will produce a citation reference for the claim
        r = r + 1

        -- gather various values for potential later use
    
        local refurl = ""
        if claim.references[b].snaks[RefURL_P] ~= nil then -- if reference has a reference url, use it
            refurl = claim.references[b].snaks[RefURL_P][1].datavalue.value
        end

        local reftitle = defaulttitle -- default title is the Wikidata item title
        if claim.references[b].snaks[RefTitle_P] ~= nil then -- if reference has a title, use it
            reftitle = claim.references[b].snaks[RefTitle_P][1].datavalue.value.text
        end

        local detmet = mw.wikibase.getEntity(claim.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

        local pubinlabel = ""
        if claim.references[b].snaks[RefPubIn_P] ~= nil then -- if reference has a published in (it should for all references), use its item's label
            local pubin = mw.wikibase.getEntity(claim.references[b].snaks[RefPubIn_P][1].datavalue.value.id)
            pubinlabel = pubin.labels.en.value
        end

        local refwork = pubinlabel -- the default reference work for for non-census references, or fall-back for census references with missing parts

        local pubdate = ""
        if claim.references[b].snaks[DatePub_P] ~= nil then -- if reference has a date published, use it. This is the second-best option for the published date.
            pubdate = mw.language.getContentLanguage():formatDate('j F Y', claim.references[b].snaks[DatePub_P][1].datavalue.value.time)
        end
        
        if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then
            -- if determination method is an instance of an australian census
            refwork = detmet.labels.en.value .. " " .. pubinlabel -- reference work is determination method label + published in
            if detmet.claims[DatePub_P] ~=nil then -- if determination method has a date published, use that as the date
                pubdate = mw.language.getContentLanguage():formatDate('j F Y', detmet.claims[DatePub_P][1].mainsnak.datavalue.value.time)
            end
        end

        local refpublisher = ""
        if detmet.claims[Publisher_P] ~= nil then -- if determination method has a publisher, use its item's label
            local publisheritem = mw.wikibase.getEntity(detmet.claims[Publisher_P][1].mainsnak.datavalue.value.id)
            refpublisher = publisheritem.labels.en.value
        end

        local refaccessdate = ""
        if claim.references[b].snaks[Retrieved_P] ~= nil then -- if reference has an access date, use it.
            refaccessdate = mw.language.getContentLanguage():formatDate('j F Y', claim.references[b].snaks[Retrieved_P][1].datavalue.value.time)
        end

        local appliespart = mw.wikibase.getEntity(claim.qualifiers[Applies_P][1].datavalue.value.id).labels.en.value -- the label of the item of the applies to part of the claim

        local year = string.sub(claim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string

        local reference

        if detmet.claims[Instof_P] ~=nil and detmet.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then
            -- if determination method is an instance of an australian census
            refwork = detmet.labels.en.value .. " " .. pubinlabel -- reference work is determination method label + published in
            
            -- the reference is built using the specific template for the census year, which ensures the link format is correct
            local geogid = LastURLSection(refurl) -- the id for the specific ABS reference is easiest to get from the ref URL. It may be the only place it is available.
            reference = mw.getCurrentFrame():expandTemplate{title = 'Census_' .. year .. '_AUS', args = {id = geogid, name = reftitle .. " (" .. appliespart .. ")", ["access-date"] = refaccessdate, quick = 'on'} }
        else
            -- use the provided reference url, and whatever other citation data is available

            local citewebargs = {
                url = refurl,
                title = reftitle .. " (" .. appliespart .. ")" ,
                date = pubdate,
                work = refwork,
                author = "[[" .. refpublisher .. "]]", -- author is used to match existing population references
                accessdate = refaccessdate
            }

            reference = mw.getCurrentFrame():expandTemplate{ title = 'cite web', args = citewebargs }
        end

        local wdeditpencil = mw.getCurrentFrame():expandTemplate{title = 'EditAtWikidata', args = {qid = item.id, pid = claim.id, nbsp = 1}} -- the Edit At Wikidata icon & link
        
        reference = reference .. wdeditpencil

         -- The name of the citation reference will be the same for each wikidata claim reference. This will allow references to the same data to be combined into a single citation reference.
        local refname = refwork .. "_" .. year .. "_" .. appliespart .. "_" .. reftitle
        if r > 1 then -- 2nd and later references in the same wikidata claim have their number appended, to keep them unique
            refname = refname .. "_" .. r
        end

        refs = refs .. mw.getCurrentFrame():extensionTag{ name = 'ref', content = reference, args = { name = refname} } -- accumulate the citation references
    end
    return refs
end

--------------- Function GetAbbrLabelYearLink gets the population geography abbreviation and the Wikipedia article link for the population year ---------------

local function GetAbbrLabelYearLink(returnclaim)
    local appliespartitem = mw.wikibase.getEntity(returnclaim.qualifiers[Applies_P][1].datavalue.value.id) -- load the applies to part item
    
    local abbrelabel = appliespartitem.labels.en.value -- the fall back value for the geography label if no abbreviation (short name) value exists in Wikidata item
    if appliespartitem.claims[ShortN_P] ~= nil then -- if a short name value exists, use it, with the full label as a tooltip
        abbrelabel = mw.getCurrentFrame():expandTemplate{title = 'Abbr', args = {appliespartitem.claims[ShortN_P][1].mainsnak.datavalue.value.text, appliespartitem.labels.en.value } }
    end

    local year = string.sub(returnclaim.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string
    local yearreturn = year -- if no links to Wikipedia articles describing population determination method exist, default is year

    local detmetitem = mw.wikibase.getEntity(returnclaim.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item

    if detmetitem.sitelinks ~=nil and detmetitem.sitelinks.enwiki ~=nil then -- if determination method item has an enwiki URL
        yearreturn = "[[" .. detmetitem.sitelinks.enwiki.title .. "|" .. year .. "]]" -- use that URL as the link for the year value
    elseif detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I then -- if determination method is an instance of an australian census
        yearreturn = "[[Census_in_Australia#" .. year .. "|" .. year .. "]]" -- use the section of the Census in Australia article as the link for the year value
    end
    
    return abbrelabel .. " " .. yearreturn
end

local p = {}

---------------- Function HistoricPopulations returns a wikitable of all census population values for all geography types, or a specified one ---------------
-- parameters:
-- optional: wikidata= the wikidata item to be used instead of the one in the current page
-- optional: geog=     a single geography type to return pop values for. Valid are 'ucl', 'sal', 'lga', 'iloc'. If left blank, all geographies will be returned.

function p.HistoricPopulations( frame )
    if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it
        item = mw.wikibase.getEntity(frame.args.wikidata)
    else
        item = mw.wikibase.getEntity() -- if there's a Wikidata item connected to the article it will find it here.
    end

    -- if there are no population claims in the item, return an empty string
    if not (item and item.claims and item.claims[Pop_P]) then
        return ""
    end

    -- Find claims with:
    -- (1) point in time is not nil
    -- (2) applies to part is not nil
    -- (3) determination method is not nil
    -- (4) References table is not empty
    -- (5) The determination method for the claim is an australian census

    local validpopclaims = {}
    local z = 0
    for j, s in pairs(item.claims[Pop_P]) do
        local detmetitem = mw.wikibase.getEntity(s.qualifiers[DetMeth_P][1].datavalue.value.id) -- load the claim determination method item
        local isCensus = (detmetitem.claims[Instof_P] ~=nil and detmetitem.claims[Instof_P][1].mainsnak.datavalue.value.id == CensusAU_I) -- is determination method an instance of an australian census?
        if s.qualifiers ~= nil and
          s.qualifiers[Pointin_P] ~= nil and
          s.qualifiers[Applies_P] ~= nil and
          s.qualifiers[DetMeth_P] ~= nil and
          s.references ~= nil and
          isCensus then
            z = z + 1
            validpopclaims[z] = s -- add to valid claims table
        end
    end

    -- if there are no valid claims, return an empty string
    if #validpopclaims < 1 then
        return ""
    end

    -- add to history table for all (or requested-geography-only) claims
    local geog = nil
    if frame.args.geog ~= nil and frame.args.geog ~= "" then
        geog = string.lower(frame.args.geog)
    end

    local oneplaceid = IdForGeog(geog)

    local history = {}
    local years = {}
    local glist= {}
    for i, q in pairs(validpopclaims) do
        -- get the id and name for the geography of the claim
        local claimgeogid = q.qualifiers[Applies_P][1].datavalue.value.id -- the ID of the applies_to_part item in the claim
        local appliespartitem = mw.wikibase.getEntity(claimgeogid) -- the applies_to_part item itself
        local claimgeogname = appliespartitem.labels.en.value -- the full label for the applies part item
        if appliespartitem.claims[ShortN_P] ~= nil then  -- If a short name value exists then use it instead of the full item label. The short names in fact should always exist.
            claimgeogname = string.upper(appliespartitem.claims[ShortN_P][1].mainsnak.datavalue.value.text)
        end
        local claimyear = string.sub(q.qualifiers[Pointin_P][1].datavalue.value.time, 2, 5) -- the population point in time as a year string
        if (not oneplaceid) or (claimgeogid == oneplaceid) then -- if geog is not specified, or it is and the claim applies_to_part matches it
            if not history[claimyear] then
                history[claimyear] = {year = claimyear, claim = {}}
                table.insert(years, claimyear)
            end
            local refs = GetRefsForClaim(q, item.labels.en.value)
            history[claimyear].claim[claimgeogname] = mw.language.getContentLanguage():formatNum(tonumber(q.mainsnak.datavalue.value.amount)) .. refs
            -- also build lists of years and geog names used
            glist[claimgeogname] = 1
        end
    end
    
    -- sort the years table
    table.sort(years)

    -- build the wikidata table from the history table
    local wt = {}
    table.insert(wt, '{| class="wikitable"') -- start of table
    -- header row
    table.insert(wt, '\n|-\n!') -- empty top-left cell
    for g, l in pairs(glist) do
        table.insert(wt, ' !! ' .. g)
    end
    -- data rows
    for k, v in ipairs(years) do
        table.insert(wt, '\n|-\n! | ' .. v) -- left column, contains years
        for g, l in pairs(glist) do
            if not history[v].claim[g] then
                table.insert(wt, '\n| ') -- empty table cell
            else
                table.insert(wt, '\n| ' .. history[v].claim[g])
            end
        end
    end
    table.insert(wt, '\n|}') -- end of table

    return table.concat(wt)
end

---------------- Function ListForInfobox returns the most recent population values ---------------
-- parameters:
-- required: type=     the type value as for the Infobox
-- optional: wikidata= the wikidata item to be used instead of the one in the current page
-- optional: geog=     a single geography type to return pop values for. Valid are 'ucl', 'sal', 'lga', 'iloc'. If left blank, all geographies will be returned.

function p.ListForInfobox( frame )
    if frame.args.type == nil then
        return ""
    end

    local luaplacetype = ""

    local articleplacetype = string.lower(frame.args.type) -- for the place type supplied, change to a lower case string

    if articleplacetype == "town" -- Check for valid place types
      or articleplacetype == "suburb" 
      or articleplacetype == "city" 
      or articleplacetype == "settlement"
      or articleplacetype == "locality"
      or articleplacetype == "townlocality"
      or articleplacetype == "lga"
      or articleplacetype == "region"
    then
      -- OK to continue
    elseif articleplacetype == "cadastral"
      or articleplacetype == "protected" then
        -- these place types don't have ABS populations
        return ""
    else
        -- unrecognised type supplied
        return ""
    end

    if frame.args.wikidata ~= nil and frame.args.wikidata ~= "" then -- if there's a Wikidata item specified, use it
        item = mw.wikibase.getEntity(frame.args.wikidata)
    else
        item = mw.wikibase.getEntity() -- if there's a Wikidata item connected to the article it will find it here.
    end

    -- if there are no population claims in the item, return an empty string
    if not (item and item.claims and item.claims[Pop_P]) then
        return ""
    end

    ------------ PART 1: Find claims that meet mimimum criteria

    -- (1) point in time is not nil
    -- (2) applies to part is not nil
    -- (3) determination method is not nil
    -- (4) References table is not empty

    local validpopclaims = {}
    local z = 0
    for j, s in pairs(item.claims[Pop_P]) do
        if s.qualifiers ~= nil and
          s.qualifiers[Pointin_P]~= nil and
          s.qualifiers[Applies_P] ~= nil and
          s.qualifiers[DetMeth_P] ~= nil and
          s.references ~= nil
        then
            z = z + 1
            validpopclaims[z] = s -- add to valid claims table
        end
    end

    -- if there are no valid claims, return an empty string
    if #validpopclaims <1 then
        return ""
    end
    
    --------------- PART 2: Find the latest claim for each geography found
    
    local latestclaim = {}
    
    for i, q in pairs(validpopclaims) do
        local oclaimdate = q.qualifiers[Pointin_P][1].datavalue.value.time
        local claimgeog = q.qualifiers[Applies_P][1].datavalue.value.id
        if  latestclaim[claimgeog] == nil
          or (latestclaim[claimgeog] ~= nil and oclaimdate >= latestclaim[claimgeog].qualifiers[Pointin_P][1].datavalue.value.time)  -- if the max date for a particular geography value is bigger than the last iteration, overwrite with the current claim
        then
            latestclaim[claimgeog] = q
        end
    end

    --------------- PART 3: specify the geography types that can be returned for each place type

    local showtypes = {}
    
    if frame.args.geog ~= nil and frame.args.geog ~= "" then -- if geog is specified, only claims for its id are returned
        table.insert(showtypes, IdForGeog(string.lower(frame.args.geog)))
    elseif articleplacetype == "town" then
        showtypes = {UCL_I, ILOC_I, SAL_I}
    elseif articleplacetype == "suburb" then
        showtypes = {SAL_I}
    elseif articleplacetype == "city" then
        showtypes = {UCL_I}
    elseif articleplacetype == "settlement" then
        showtypes = {SAL_I, ILOC_I}
    elseif articleplacetype == "locality" then
        showtypes = {SAL_I, ILOC_I}
    elseif articleplacetype == "townandlocality" then
        showtypes = {UCL_I, ILOC_I, SAL_I}
    elseif articleplacetype == "lga" then
        showtypes = {LGA_I}
    elseif articleplacetype == "region" then -- for now saying region uses LGA_I, but unclear what is most apprpriate ABS geography type. Can revise.
        showtypes = {LGA_I}
    end

    --------------- PART 4: Compile the module output, using only latest claims in specified geographies

    local returnlist = {}

    for j, t in pairs(showtypes) do
        if latestclaim[t] ~= nil then
            local refs = GetRefsForClaim(latestclaim[t], item.labels.en.value) -- the references for the max date claim
            table.insert (returnlist, mw.language.getContentLanguage():formatNum(tonumber(latestclaim[t].mainsnak.datavalue.value.amount)) .. " (" .. GetAbbrLabelYearLink(latestclaim[t]) .. ")" .. refs)
        end
    end

    local wikitext = ""
    if #returnlist == 1 then
        -- if there is only one entry in returnlist, return it without a bullet point
        wikitext = returnlist[1]
    else
        -- if there are multiple entries in returnlist, return all the rows with new line and bullet points between them
        wikitext = "\n*" .. table.concat(returnlist, "\n*")
    end
    return  wikitext .. '[[Category:Australian place articles using Wikidata population values]]' -- Append the category to the output so we can keep track of which articles are using this module to output a population value
end

-- ######  this function is just for testing of the upgrade during development

function p.main()
    return p.HistoricPopulations( { args = {type = 'town', wikidata = 'Q649969'} } ) .. '\n\n' .. p.HistoricPopulations( { args = {type = 'town', wikidata = 'Q649969', geog='sal'} } )
--    return p.ListForInfobox( { args = {type = 'town', wikidata = 'Q649969'} } )
end

return p