Module:Sandbox/Erutuon/UTF-8

From English Wikipedia @ Freddythechick

This is the current revision of this page, as edited by imported>Erutuon at 19:58, 6 July 2018 (Erutuon moved page Module:Sandbox/Erutuon/Unicode to Module:Sandbox/Erutuon/UTF-8 without leaving a redirect). The present address (URL) is a permanent link to this version.

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

local p = {}

local bit = require("bit32")
local band = bit.band
local rshift = bit.rshift

function table.forEach(t, func)
	for i, item in ipairs(t) do
		func(item)
	end
end

local function setMt(arr)
	return setmetatable(arr, { __index =  table })
end

-- Converts a string representing a number in binary base to a Lua number.
local function binary(stringBinary)
	return tonumber(stringBinary, 2)
end

-- Find the digit at a certain position in a byte.
local function digitAt(number, index)
	if type(number) == "string" then
		number = binary(number)
	end
	return band(rshift(number, 8 - index), 1)
end

-- Returns a table containing bits in a byte, from highest to lowest.
local function getBits(byte)
	local t = {}
	for bit = 8, 1, -1 do
		t[bit] = band(byte, 1)
		byte = rshift(byte, 1)
	end
	return t
end

-- mw.log(table.concat(getBits(rshift(binary("11100001"), 8 - 3))))

-- Do something to each byte in a string; put the result in a table.
local function iterBytes(str, func)
	local out = {}
	for i = 1, #str do
		table.insert(out, func(string.byte(str, i)))
	end
	return out
end

local function makeTag(color)
	return { '<span style="color: ' .. color .. ';">', '</span>' }
end

-- Find leading digits marking ASCII, leading bytes, or continuation bytes,
-- else tag byte as red.
local function markDigits(byteTable)
	local onesCount = 0
	setMt(byteTable)
	for i, digit in ipairs(byteTable) do
		if digit == 1 then
			onesCount = onesCount + 1
			if onesCount > 4 then
				local tag = makeTag("red")
				byteTable:insert(#byteTable, tag[2])
				byteTable:insert(1, tag[1])
			end
		else
			local tag
			-- ASCII (0x00 - 0x7F)
			if onesCount == 0 then
				tag = makeTag("darkgray")
			
			-- continuation bytes
			elseif onesCount == 1 then
				tag = makeTag("chocolate")
			
			-- leading bytes
			else
				tag = makeTag("deeppink")
			end
			byteTable:insert(i + 1, tag[2])
			byteTable:insert(1, tag[1])
			return byteTable
		end
	end
	return byteTable
end

local function printBytes(str)
	return table.concat(
		iterBytes(
			str,
			function(byte)
				return table.concat(markDigits(getBits(byte)))
			end
		),
		" "
	)
end

local function makeCharByteTables(str)
	local chars = setMt {}
	local bytes = setMt {}
	for char in mw.ustring.gmatch(str, ".") do
		chars:insert(char)
		bytes:insert(printBytes(char))
	end
	return chars, bytes
end

local function print(chars, bytes)
	setMt(chars)
	setMt(bytes)
	local output = setMt { '{| class="wikitable"' }
	chars:forEach(
		function(char)
			output:insert("| " .. char)
		end
	)
	output:insert("|-")
	bytes:forEach(
		function(byteString)
			output:insert("| <code>" .. byteString .. "</code>")
		end
	)
	output:insert("|}")
	return output:concat("\n")
end

function p.show(frame)
	local str = frame.args[1] or "abc πρᾶγμᾰ"
	return print(makeCharByteTables(str))
end

return p