local uchar_data = {
tPlanes = {
[ 0] = "Basic Multilingual Plane";
[ 1] = "Supplementary Multilingual Plane";
[ 2] = "Supplementary Ideographic Plane";
[ 3] = "Tertiary Ideographic Plane";
[14] = "Supplementary Special-purpose Plane";
[15] = "Supplementary Private Use Area-A";
[16] = "Supplementary Private Use Area-B";
},
tGenCat = {
['L'] = { "Letter", "x" },
['LC'] = { "Cased Letter", "x" },
['Lu'] = { "Uppercase Letter", "0042" },
['Ll'] = { "Lowercase Letter", "0062" },
['Lt'] = { "Titlecase Letter", "01F2" },
['Lm'] = { "Modifier Letter", "02B0" },
['Lo'] = { "Other Letter", "0294" },
['M'] = { "Mark", "x" },
['Mn'] = { "Nonspacing Mark", "0302" },
['Mc'] = { "Spacing Combining Mark", "0BC2" },
['Me'] = { "Enclosing Mark", "20DF" },
['N'] = { "Number", "x" },
['Nd'] = { "Decimal Digit Number", "0039" },
['Nl'] = { "Letter Number", "216B" },
['No'] = { "Other Number", "00BE" },
['P'] = { "Punctuation", "x" },
['Pc'] = { "Connector Punctuation", "x" },
['Pd'] = { "Dash Punctuation", "x" },
['Ps'] = { "Open Punctuation", "x" },
['Pe'] = { "Close Punctuation", "x" },
['Pi'] = { "Initial Quote Punctuation", "x" },
['Pf'] = { "Final Quote Punctuation", "x" },
['Po'] = { "Other Punctuation", "x" },
['S'] = { "Symbol", "x" },
['Sm'] = { "Math Symbol", "x" },
['Sc'] = { "Currency Symbol", "x" },
['Sk'] = { "Modifer Symbol", "x" },
['So'] = { "Other Symbol", "x" },
['Z'] = { "Separator", "x" },
['Zs'] = { "Space Separator", "x" },
['Zl'] = { "Line Separator", "x" },
['Zp'] = { "Paragraph Separator", "x" },
['C'] = { "Other", "x" },
['Cc'] = { "Other control", "x" },
['Cf'] = { "Other format", "00AD" },
['Cs'] = { "Other surrogate", "x" },
['Co'] = { "Other private use", "x" },
['Cn'] = { "Other not assigned", "x" }
},
-- Module:Unicode data/scripts .. 'aliases' 29-04-2022
-- Scripts.txt gives full names; here we consider them aliases to save space.
tScriptName = {
Adlm = "Adlam",
Aghb = "Caucasian Albanian",
Ahom = "Ahom",
Arab = "Arabic",
Armi = "Imperial Aramaic",
Armn = "Armenian",
Avst = "Avestan",
Bali = "Balinese",
Bamu = "Bamum",
Bass = "Bassa Vah",
Batk = "Batak",
Beng = "Bengali",
Bhks = "Bhaiksuki",
Bopo = "Bopomofo",
Brah = "Brahmi",
Brai = "Braille",
Bugi = "Buginese",
Buhd = "Buhid",
Cakm = "Chakma",
Cans = "Canadian Aboriginal",
Cari = "Carian",
Cham = "Cham",
Cher = "Cherokee",
Chrs = "Chorasmian",
Copt = "Coptic",
Cpmn = "Cypro Minoan",
Cprt = "Cypriot",
Cyrl = "Cyrillic",
Deva = "Devanagari",
Diak = "Dives Akuru",
Dogr = "Dogra",
Dsrt = "Deseret",
Dupl = "Duployan",
Egyp = "Egyptian Hieroglyphs",
Elba = "Elbasan",
Elym = "Elymaic",
Ethi = "Ethiopic",
Geor = "Georgian",
Glag = "Glagolitic",
Gong = "Gunjala Gondi",
Gonm = "Masaram Gondi",
Goth = "Gothic",
Gran = "Grantha",
Grek = "Greek",
Gujr = "Gujarati",
Guru = "Gurmukhi",
Hang = "Hangul",
Hani = "Han",
Hano = "Hanunoo",
Hatr = "Hatran",
Hebr = "Hebrew",
Hira = "Hiragana",
Hluw = "Anatolian Hieroglyphs",
Hmng = "Pahawh Hmong",
Hmnp = "Nyiakeng Puachue Hmong",
Hrkt = "Katakana Or Hiragana",
Hung = "Old Hungarian",
Ital = "Old Italic",
Java = "Javanese",
Kali = "Kayah Li",
Kana = "Katakana",
Khar = "Kharoshthi",
Khmr = "Khmer",
Khoj = "Khojki",
Kits = "Khitan Small Script",
Knda = "Kannada",
Kthi = "Kaithi",
Lana = "Tai Tham",
Laoo = "Lao",
Latn = "Latin",
Lepc = "Lepcha",
Limb = "Limbu",
Lina = "Linear A",
Linb = "Linear B",
Lisu = "Lisu",
Lyci = "Lycian",
Lydi = "Lydian",
Mahj = "Mahajani",
Maka = "Makasar",
Mand = "Mandaic",
Mani = "Manichaean",
Marc = "Marchen",
Medf = "Medefaidrin",
Mend = "Mende Kikakui",
Merc = "Meroitic Cursive",
Mero = "Meroitic Hieroglyphs",
Mlym = "Malayalam",
Modi = "Modi",
Mong = "Mongolian",
Mroo = "Mro",
Mtei = "Meetei Mayek",
Mult = "Multani",
Mymr = "Myanmar",
Nand = "Nandinagari",
Narb = "Old North Arabian",
Nbat = "Nabataean",
Newa = "Newa",
Nkoo = "Nko",
Nshu = "Nushu",
Ogam = "Ogham",
Olck = "Ol Chiki",
Orkh = "Old Turkic",
Orya = "Oriya",
Osge = "Osage",
Osma = "Osmanya",
Ougr = "Old Uyghur",
Palm = "Palmyrene",
Pauc = "Pau Cin Hau",
Perm = "Old Permic",
Phag = "Phags Pa",
Phli = "Inscriptional Pahlavi",
Phlp = "Psalter Pahlavi",
Phnx = "Phoenician",
Plrd = "Miao",
Prti = "Inscriptional Parthian",
Rjng = "Rejang",
Rohg = "Hanifi Rohingya",
Runr = "Runic",
Samr = "Samaritan",
Sarb = "Old South Arabian",
Saur = "Saurashtra",
Sgnw = "SignWriting",
Shaw = "Shavian",
Shrd = "Sharada",
Sidd = "Siddham",
Sind = "Khudawadi",
Sinh = "Sinhala",
Sogd = "Sogdian",
Sogo = "Old Sogdian",
Sora = "Sora Sompeng",
Soyo = "Soyombo",
Sund = "Sundanese",
Sylo = "Syloti Nagri",
Syrc = "Syriac",
Tagb = "Tagbanwa",
Takr = "Takri",
Tale = "Tai Le",
Talu = "New Tai Lue",
Taml = "Tamil",
Tang = "Tangut",
Tavt = "Tai Viet",
Telu = "Telugu",
Tfng = "Tifinagh",
Tglg = "Tagalog",
Thaa = "Thaana",
Thai = "Thai",
Tibt = "Tibetan",
Tirh = "Tirhuta",
Tnsa = "Tangsa",
Toto = "Toto",
Ugar = "Ugaritic",
Vaii = "Vai",
Vith = "Vithkuqi",
Wara = "Warang Citi",
Wcho = "Wancho",
Xpeo = "Old Persian",
Xsux = "Cuneiform",
Yezi = "Yezidi",
Yiii = "Yi",
Zanb = "Zanabazar Square",
Zinh = "Inherited",
Zyyy = "Common",
Zzzz = "Unknown",
},
}
return uchar_data