Module:Sandbox/Erutuon/UTF-8
< Module:Sandbox | Erutuon
local p = {}
local bit = require("bit32")
local band = bit.band
local rshift = bit.rshift
function table.forEach(t, func)
for i, item in ipairs(t) do
func(item)
end
end
local function setMt(arr)
return setmetatable(arr, { __index = table })
end
-- Converts a string representing a number in binary base to a Lua number.
local function binary(stringBinary)
return tonumber(stringBinary, 2)
end
-- Find the digit at a certain position in a byte.
local function digitAt(number, index)
if type(number) == "string" then
number = binary(number)
end
return band(rshift(number, 8 - index), 1)
end
-- Returns a table containing bits in a byte, from highest to lowest.
local function getBits(byte)
local t = {}
for bit = 8, 1, -1 do
t[bit] = band(byte, 1)
byte = rshift(byte, 1)
end
return t
end
-- mw.log(table.concat(getBits(rshift(binary("11100001"), 8 - 3))))
-- Do something to each byte in a string; put the result in a table.
local function iterBytes(str, func)
local out = {}
for i = 1, #str do
table.insert(out, func(string.byte(str, i)))
end
return out
end
local function makeTag(color)
return { '<span style="color: ' .. color .. ';">', '</span>' }
end
-- Find leading digits marking ASCII, leading bytes, or continuation bytes,
-- else tag byte as red.
local function markDigits(byteTable)
local onesCount = 0
setMt(byteTable)
for i, digit in ipairs(byteTable) do
if digit == 1 then
onesCount = onesCount + 1
if onesCount > 4 then
local tag = makeTag("red")
byteTable:insert(#byteTable, tag[2])
byteTable:insert(1, tag[1])
end
else
local tag
-- ASCII (0x00 - 0x7F)
if onesCount == 0 then
tag = makeTag("darkgray")
-- continuation bytes
elseif onesCount == 1 then
tag = makeTag("chocolate")
-- leading bytes
else
tag = makeTag("deeppink")
end
byteTable:insert(i + 1, tag[2])
byteTable:insert(1, tag[1])
return byteTable
end
end
return byteTable
end
local function printBytes(str)
return table.concat(
iterBytes(
str,
function(byte)
return table.concat(markDigits(getBits(byte)))
end
),
" "
)
end
local function makeCharByteTables(str)
local chars = setMt {}
local bytes = setMt {}
for char in mw.ustring.gmatch(str, ".") do
chars:insert(char)
bytes:insert(printBytes(char))
end
return chars, bytes
end
local function print(chars, bytes)
setMt(chars)
setMt(bytes)
local output = setMt { '{| class="wikitable"' }
chars:forEach(
function(char)
output:insert("| " .. char)
end
)
output:insert("|-")
bytes:forEach(
function(byteString)
output:insert("| <code>" .. byteString .. "</code>")
end
)
output:insert("|}")
return output:concat("\n")
end
function p.show(frame)
local str = frame.args[1] or "abc πρᾶγμᾰ"
return print(makeCharByteTables(str))
end
return p