Module:Sandbox/DePiep/uchar/data

From English Wikipedia @ Freddythechick
local uchar_data = {
tPlanes = {
	[ 0] = "Basic Multilingual Plane";
	[ 1] = "Supplementary Multilingual Plane";
	[ 2] = "Supplementary Ideographic Plane";
	[ 3] = "Tertiary Ideographic Plane";
	[14] = "Supplementary Special-purpose Plane";
	[15] = "Supplementary Private Use Area-A";
	[16] = "Supplementary Private Use Area-B";
},
tGenCat = {
	['L'] = { "Letter",             "x" },
	['LC'] = { "Cased Letter",      "x" },
	['Lu'] = { "Uppercase Letter", "0042" },
	['Ll'] = { "Lowercase Letter", "0062" },
	['Lt'] = { "Titlecase Letter", "01F2" },
	['Lm'] = { "Modifier Letter",  "02B0" },
	['Lo'] = { "Other Letter",     "0294" },
	['M'] = { "Mark", "x" },
	['Mn'] = { "Nonspacing Mark",        "0302" },
	['Mc'] = { "Spacing Combining Mark", "0BC2" },
	['Me'] = { "Enclosing Mark",         "20DF" },
	['N'] = { "Number",                   "x" },
	['Nd'] = { "Decimal Digit Number",   "0039" },
	['Nl'] = { "Letter Number",          "216B" },
	['No'] = { "Other Number",           "00BE" },
	['P'] = { "Punctuation", "x" },
	['Pc'] = { "Connector Punctuation", "x" },
	['Pd'] = { "Dash Punctuation", "x" },
	['Ps'] = { "Open Punctuation", "x" },
	['Pe'] = { "Close Punctuation", "x" },
	['Pi'] = { "Initial Quote Punctuation", "x" },
	['Pf'] = { "Final Quote Punctuation", "x" },
	['Po'] = { "Other Punctuation", "x" },
	['S']  = { "Symbol", "x" },
	['Sm'] = { "Math Symbol", "x" },
	['Sc'] = { "Currency Symbol", "x" },
	['Sk'] = { "Modifer Symbol", "x" },
	['So'] = { "Other Symbol", "x" },
	['Z'] = { "Separator", "x" },
	['Zs'] = { "Space Separator", "x" },
	['Zl'] = { "Line Separator", "x" },
	['Zp'] = { "Paragraph Separator", "x" },
	['C']  = { "Other", "x" },
	['Cc'] = { "Other control", "x" },
	['Cf'] = { "Other format", "00AD" },
	['Cs'] = { "Other surrogate", "x" },
	['Co'] = { "Other private use", "x" },
	['Cn'] = { "Other not assigned", "x" }
},
	-- Module:Unicode data/scripts .. 'aliases' 29-04-2022
	-- Scripts.txt gives full names; here we consider them aliases to save space.
tScriptName = {
		Adlm = "Adlam",
		Aghb = "Caucasian Albanian",
		Ahom = "Ahom",
		Arab = "Arabic",
		Armi = "Imperial Aramaic",
		Armn = "Armenian",
		Avst = "Avestan",
		Bali = "Balinese",
		Bamu = "Bamum",
		Bass = "Bassa Vah",
		Batk = "Batak",
		Beng = "Bengali",
		Bhks = "Bhaiksuki",
		Bopo = "Bopomofo",
		Brah = "Brahmi",
		Brai = "Braille",
		Bugi = "Buginese",
		Buhd = "Buhid",
		Cakm = "Chakma",
		Cans = "Canadian Aboriginal",
		Cari = "Carian",
		Cham = "Cham",
		Cher = "Cherokee",
		Chrs = "Chorasmian",
		Copt = "Coptic",
		Cpmn = "Cypro Minoan",
		Cprt = "Cypriot",
		Cyrl = "Cyrillic",
		Deva = "Devanagari",
		Diak = "Dives Akuru",
		Dogr = "Dogra",
		Dsrt = "Deseret",
		Dupl = "Duployan",
		Egyp = "Egyptian Hieroglyphs",
		Elba = "Elbasan",
		Elym = "Elymaic",
		Ethi = "Ethiopic",
		Geor = "Georgian",
		Glag = "Glagolitic",
		Gong = "Gunjala Gondi",
		Gonm = "Masaram Gondi",
		Goth = "Gothic",
		Gran = "Grantha",
		Grek = "Greek",
		Gujr = "Gujarati",
		Guru = "Gurmukhi",
		Hang = "Hangul",
		Hani = "Han",
		Hano = "Hanunoo",
		Hatr = "Hatran",
		Hebr = "Hebrew",
		Hira = "Hiragana",
		Hluw = "Anatolian Hieroglyphs",
		Hmng = "Pahawh Hmong",
		Hmnp = "Nyiakeng Puachue Hmong",
		Hrkt = "Katakana Or Hiragana",
		Hung = "Old Hungarian",
		Ital = "Old Italic",
		Java = "Javanese",
		Kali = "Kayah Li",
		Kana = "Katakana",
		Khar = "Kharoshthi",
		Khmr = "Khmer",
		Khoj = "Khojki",
		Kits = "Khitan Small Script",
		Knda = "Kannada",
		Kthi = "Kaithi",
		Lana = "Tai Tham",
		Laoo = "Lao",
		Latn = "Latin",
		Lepc = "Lepcha",
		Limb = "Limbu",
		Lina = "Linear A",
		Linb = "Linear B",
		Lisu = "Lisu",
		Lyci = "Lycian",
		Lydi = "Lydian",
		Mahj = "Mahajani",
		Maka = "Makasar",
		Mand = "Mandaic",
		Mani = "Manichaean",
		Marc = "Marchen",
		Medf = "Medefaidrin",
		Mend = "Mende Kikakui",
		Merc = "Meroitic Cursive",
		Mero = "Meroitic Hieroglyphs",
		Mlym = "Malayalam",
		Modi = "Modi",
		Mong = "Mongolian",
		Mroo = "Mro",
		Mtei = "Meetei Mayek",
		Mult = "Multani",
		Mymr = "Myanmar",
		Nand = "Nandinagari",
		Narb = "Old North Arabian",
		Nbat = "Nabataean",
		Newa = "Newa",
		Nkoo = "Nko",
		Nshu = "Nushu",
		Ogam = "Ogham",
		Olck = "Ol Chiki",
		Orkh = "Old Turkic",
		Orya = "Oriya",
		Osge = "Osage",
		Osma = "Osmanya",
		Ougr = "Old Uyghur",
		Palm = "Palmyrene",
		Pauc = "Pau Cin Hau",
		Perm = "Old Permic",
		Phag = "Phags Pa",
		Phli = "Inscriptional Pahlavi",
		Phlp = "Psalter Pahlavi",
		Phnx = "Phoenician",
		Plrd = "Miao",
		Prti = "Inscriptional Parthian",
		Rjng = "Rejang",
		Rohg = "Hanifi Rohingya",
		Runr = "Runic",
		Samr = "Samaritan",
		Sarb = "Old South Arabian",
		Saur = "Saurashtra",
		Sgnw = "SignWriting",
		Shaw = "Shavian",
		Shrd = "Sharada",
		Sidd = "Siddham",
		Sind = "Khudawadi",
		Sinh = "Sinhala",
		Sogd = "Sogdian",
		Sogo = "Old Sogdian",
		Sora = "Sora Sompeng",
		Soyo = "Soyombo",
		Sund = "Sundanese",
		Sylo = "Syloti Nagri",
		Syrc = "Syriac",
		Tagb = "Tagbanwa",
		Takr = "Takri",
		Tale = "Tai Le",
		Talu = "New Tai Lue",
		Taml = "Tamil",
		Tang = "Tangut",
		Tavt = "Tai Viet",
		Telu = "Telugu",
		Tfng = "Tifinagh",
		Tglg = "Tagalog",
		Thaa = "Thaana",
		Thai = "Thai",
		Tibt = "Tibetan",
		Tirh = "Tirhuta",
		Tnsa = "Tangsa",
		Toto = "Toto",
		Ugar = "Ugaritic",
		Vaii = "Vai",
		Vith = "Vithkuqi",
		Wara = "Warang Citi",
		Wcho = "Wancho",
		Xpeo = "Old Persian",
		Xsux = "Cuneiform",
		Yezi = "Yezidi",
		Yiii = "Yi",
		Zanb = "Zanabazar Square",
		Zinh = "Inherited",
		Zyyy = "Common",
		Zzzz = "Unknown",
	},
}
return uchar_data