Module:Monthly Challenge statistics

--[=[
Statistics for data using the Monthly Challenge format.

There are two data sources:
 * daily data, which provides _day-by-day_ total counts
 * the core PRP Lua module, which provides direct _per-index_ counts
]=]
require('strict')

local p = {} --p stands for package
local getArgs = require('Module:Arguments').getArgs

-- ProofreadPage core module
local proofreadPage = require('mw.ext.proofreadPage')

-- Default data prefixes
local dDataPrefix = "Module:Monthly Challenge daily stats/data/"
local workDataPrefix = "Module:Monthly Challenge/data/"

local function length(arr)
	if arr == nil then
		return 0
	end
	local count = 0
	for k, v in pairs(arr) do
		count = count + 1
	end
	return count
end

--[=[
Calculate "processed" pages for a given stats dump
 * validated is double because it includes a prior proofread step (otherwise
   validating is a zero sum, as q3 loses one, and q4 gains one)
]=]
local function getProcessed(t)
	return t.q0 + t.q3 + (t.q4 * 2)
end

local function getProofread(t)
	return t.q3	
end

local function getComplete(t)
	return t.q0 + t.q4
end

local function getIncomplete(t)
	return t.count - (t.q0 + t.q4)
end

--[=[
Get a list of works in a given month's data
]=]
local function getMonthWorks(monthData)
	local works = {}

	-- If asked for a bum date, monthData will be empty.
	if monthData.works ~= nil then
		for age, tab in pairs(monthData.works) do
			for index, _ in pairs(tab) do
				table.insert(works, proofreadPage.newIndex(index))
			end
		end
	end

	return works
end

--[=[
Get the number of works in the MC data list
]=]
local function getNumberOfWorks(monthData)
	return #getMonthWorks(monthData)
end

--[=[
Get the total number of pages of all indexes in the month
]=]
local function getTotalPages(monthData)
	local total = 0
	local works = getMonthWorks( monthData )
	for _, work in pairs( works ) do
		total = total + work.pageCount
	end
	return total
end
local function getDailyUplifts(dData, excludeLastDay)
	local uplifts = {
		total = {
			processed = 0,
			proofread = 0,
			complete = 0
		},
		days = {}
	}
	
	-- guard against totally bogus/missing input data
	if dData == nil or dData.days == nil or dData.days[0] == nil then
		return uplifts
	end
	
	local untilDay
	if not untilDay then
		-- includes 0 and today
		local daysAvailable = length(dData.days)
		untilDay = length(dData.days) - 1
	end
	
	if excludeLastDay and untilDay > 1 then
		untilDay = untilDay - 1
	end
	
	local last = {
		processed = getProcessed(dData.days[0]),
		proofread = getProofread(dData.days[0]),
		complete = getComplete(dData.days[0])
	}
	
	for day = 1, untilDay do
		local day_data = dData.days[day]

		local day_stats = {
			processed = 0,
			proofread = 0,
			complete = 0
		}
		
		if day_data then
			day_stats.processed = getProcessed(day_data) - last.processed
			day_stats.proofread = getProofread(day_data) - last.proofread
			day_stats.complete = getComplete(day_data) - last.complete
		end
		
		last.processed = last.processed + day_stats.processed
		last.proofread = last.proofread + day_stats.proofread
		last.complete = last.complete + day_stats.complete

		table.insert(uplifts.days, day_stats)

		uplifts.total.processed = uplifts.total.processed + day_stats.processed
		uplifts.total.proofread = uplifts.total.proofread + day_stats.proofread
		uplifts.total.complete = uplifts.total.complete + 	day_stats.complete
	end
	
	return uplifts
end

local function getMeans(uplifts)
	
	local n = length(uplifts.days)
	
	if n == 0 then
		return {
			processed = 0,
			proofread = 0,
			complete = 0,
		}
	end

	return {
		processed = uplifts.total.processed / n,
		proofread = uplifts.total.proofread / n,
		complete = uplifts.total.complete / n,
	}
end


--[=[
  Load data from dated page and return it.
  
  Encapsulates load and error handling mechanics for multiple different data.
]=]
local function getData(prefix, year, month)
	local statsPage = prefix .. string.format('%d-%02d', year, month)
	local success, data = pcall(mw.loadData, statsPage)
	if success ~= true then
		-- TODO: add a tracking cat
		return {}
	end
	return data
end

--[=[
Function docs
]=]
function p.statistics(frame)
	local args = getArgs(frame)
	
	local year = tonumber(args.year)
	local month = tonumber(args.month)

	local dData = getData(dDataPrefix, year, month)
	local workData = getData(workDataPrefix, year, month)
	local excludeToday = args.exclude_today

	local txt = args.format
	
	txt = txt:gsub('@num_works@', function()
		return getNumberOfWorks(workData)
	end)
	
	txt = txt:gsub('@count_all_pages@', function()
		return getTotalPages(workData)
	end)

	txt = txt:gsub('@mean_processed_to_date@', function()
		local uplifts = getDailyUplifts(dData, excludeToday)
		local monthlyMeans = getMeans(uplifts)
		return math.floor(monthlyMeans.processed + 0.5)
	end)
	
	txt = txt:gsub('@processed_this_month@', function()
		local uplifts = getDailyUplifts(dData, excludeToday)
		return uplifts.total.processed
	end)
	
	txt = txt:gsub('@processed_yesterday@', function()
		
		local timeNow = os.date("*t")
		timeNow.day = timeNow.day - 1
		local timeYesterday = os.date("*t", os.time(timeNow))
		
		local yestData = dData
		if not (timeYesterday.month == month and timeYesterday.year == year) then
			yestData = getData(dDataPrefix, timeYesterday.year, timeYesterday.month)
		end
		
		local uplifts = getDailyUplifts(yestData, false)
		if uplifts.days[timeYesterday.day] then
			return uplifts.days[timeYesterday.day].processed
		end
		return 0
	end)
	
	txt = txt:gsub('@target_pages@', function()
		return workData.target
	end)
	
	txt = txt:gsub('@processed_percent@', function()
		local uplifts = getDailyUplifts(dData, excludeToday)
		if workData.target == nil or workData.target <= 0 then
			return '0'
		end
		return string.format("%.1f", 100 * uplifts.total.processed / workData.target)
	end)
	
	return txt
end

return p