Module:Wikt-lang/data and Module:Wikt-lang/data/sandbox: Difference between pages

(Difference between pages)
Page 1
Page 2
imported>Asukite
 
imported>Asukite
 
Line 21: Line 21:
This is a table of Wiktionary language codes with data belonging to them.
This is a table of Wiktionary language codes with data belonging to them.
Name is the "canonical name" used on Wiktionary.
Name is the "canonical name" used on Wiktionary.
Article is the Wikipedia article.
Script is the ISO 15924 code.
]]
]]
local data = {
local data = {
["languages"] = {
["languages"] = {
["aaq"] = {
["name"] = "Penobscot",
},
["ab"] = {
["ab"] = {
["name"] = "Abkhaz",
["name"] = "Abkhaz",
},
["abe"] = {
["name"] = "Abenaki",
},
},
["ang"] = {
["ang"] = {
["name"] = "Old English",
["article"] = {"Old English"},
-- Remove macrons, acutes, and overdots
-- Remove macrons, acutes, and overdots
["replacements"] = {
["replacements"] = {
Line 45: Line 35:
},
},
["ar"] = {
["ar"] = {
["name"] = "Arabic",
["article"] = "Arabic language",
["direction"] = "rtl", -- Should be in the script data module.
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
Line 59: Line 47:
},
},
},
},
["ara"] = {
["bal"] = {
["name"] = "Arabic",
["name"] = "Baluchi",
["article"] = "Arabic language",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
},
["arb"] = {
["be"] = {
["name"] = "Modern Standard Arabic",
["article"] = "Modern Standard Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["apc"] = {
["name"] = "North Levantine Arabic",
["article"] = "North Levantine Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[acute] = "",  
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
},
["ajp"] = {
["name"] = "South Levantine Arabic",
["article"] = "South Levantine Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["arz"] = {
["name"] = "Egyptian Arabic",
["article"] = "Egyptian Arabic",
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
-- ālif with wasla is replaced by ālif;
[U(0x0671)] = U(0x0627),
-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-- fatḥa, ḍamma, kasra,
-- shadda, sukūn, and superscript (dagger) ālif are removed.
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["av"] = {
["name"] = "Avar"
},
["be"] = {
["article"] = "Belarusian language",
["replacements"] = { [acute] = "", },
},
["bn"] = {
["name"] = "Bengali",
["article"] = "Bengali language",
},
},
["bua"] = {
["bua"] = {
["name"] = "Buryat",
["name"] = "Buryat",
},
},
["cel-pro"] = { -- Incorrect tag
["name"] = "Proto-Celtic",
["Wikipedia_code"] = "cel-x-proto",
},
["cel-x-proto"] = {
["name"] = "Proto-Celtic",
},
["cel-bry-pro"] = { -- Incorrect tag
["name"] = "Proto-Brythonic",
["article"] = "Common Brittonic",
["type"] = "reconstructed",
},
    ["com"] = {
    ["name"] = "Comanche",
    ["article"] = "Comanche language",
    },
["cu"] = {
["cu"] = {
["name"] = "Old Church Slavonic",
["name"] = "Old Church Slavonic",
["article"] = "Old Church Slavonic",
},
["de"] = {
["name"] = "German",
["article"] = "German language",
},
["en"] = {
["name"] = "English",
["article"] = "English language",
},
["es"] = {
["name"] = "Spanish",
["article"] = "Spanish language",
},
},
["egy"] = {
["egy"] = {
["name"] = "Egyptian",
["name"] = "Egyptian",
},
["evn"] = {
["name"] = "Evenki",
["article"] = "Evenki language",
},
["fr"] = {
["name"] = "French",
["article"] = "French language",
},
["frm"] = {
["name"] = "Middle French",
["article"] = "Middle French",
},
},
["frp"] = {
["frp"] = {
["name"] = "Franco-Provençal",
["name"] = "Franco-Provençal",
},
["ff"] = {
["name"] = "Fula",
},
["gem-pro"] = { -- Incorrect tag
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "gem-x-proto",
},
["gem-x-proto"] = {
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
["type"] = "reconstructed",
["replacements"] = {},
},
["gml"] = {
["name"] = "Middle Low German",
},
["gmw-ecg"] = {
["name"] = "East Central German",
},
["gmw-x-proto"] = {
["name"] = "Proto-West Germanic",
["article"] = "Proto-West Germanic language",
["type"] = "reconstructed",
["replacements"] = {},
},
["gmq-x-gut"] = {
["name"] = "Gutnish",
["article"] = "Gutnish",
},
},
["goh"] = {
["goh"] = {
Line 239: Line 76:
},
},
["got"] = {
["got"] = {
["name"] = "Gothic",
["article"] = "Gothic language",
["replacements"] = {
["replacements"] = {
-- Latin to Gothic since people will not want to have to copy
-- Latin to Gothic since people will not want to have to copy
Line 270: Line 105:
["[OoŌō]"]  = "𐍉",
["[OoŌō]"]  = "𐍉",
},
},
},
["gsw"] = {
["name"] = "Alemannic German",
},
},
["grc"] = {
["grc"] = {
["name"] = "Ancient Greek",
["article"] = "Ancient Greek",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 289: Line 119:
}
}
},
},
},
["grk-pro"] = { -- Incorrect tag
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "grk-x-proto",
},
["grk-x-proto"] = {
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
["type"] = "reconstructed",
["replacements"] = {},
},
["grt"] = {
["name"] = "Garo",
},
},
["ha"] = {
["ha"] = {
["name"] = "Hausa",
-- remove tilde, grave, acute, macron, circumflex
-- remove tilde, grave, acute, macron, circumflex
["replacements"] = {
["replacements"] = {
Line 316: Line 127:
},
},
},
},
["hi"] = {
["jbo"] = {
["name"] = "Hindi",
["article"] = "Hindi",
},
["ine-bsl-pro"] = {
["name"] = "Proto-Balto-Slavic",
["article"] = "Proto-Balto-Slavic language",
["type"] = "reconstructed",
},
["ine-pro"] = { -- Incorrect tag
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "ine-x-proto",
},
["ine-x-proto"] = {
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
["type"] = "reconstructed",
["replacements"] = {},
},
["ja"] = {
["name"] = "Japanese",
["article"] = "Japanese language",
},
["jbo"] = { -- Lojban
["type"] = "appendix",
["type"] = "appendix",
},
["ket"] = {
["name"] = "Ket",
["article"] = "Ket language",
},
["ksk"] = {
["name"] = "Kansa",
["article"] = "Kansa language",
},
},
["la"] = {
["la"] = {
["name"] = "Latin",
["article"] = "Latin",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 362: Line 137:
},
},
["lt"] = {
["lt"] = {
["name"] = "Lithuanian",
-- remove acute, tilde, grave
-- remove acute, tilde, grave
["replacements"] = {
["replacements"] = {
Line 374: Line 148:
["mul"] = {
["mul"] = {
["name"] = "Translingual",
["name"] = "Translingual",
["article"] = "",
},
},
["nci"] = {
["nci"] = {
["name"] = "Classical Nahuatl",
["article"] = "Classical Nahuatl",
-- Remove macrons, acutes, circumflexes and graves
-- Remove macrons, acutes, circumflexes and graves
["replacements"] = {
["replacements"] = {
Line 389: Line 160:
["nds-de"] = {
["nds-de"] = {
["name"] = "German Low German",
["name"] = "German Low German",
},
["non"] = {
["name"] = "Old Norse",
},
["non-x-proto"] = {
["name"] = "Proto-Norse",
},
["odt"] = {
["name"] = "Old Dutch",
},
["oge"] = {
["name"] = "Old Georgian",
},
["oj"] = {
["name"] = "Ojibwe",
},
},
["orv"] = {
["orv"] = {
["name"] = "Old East Slavic",
["article"] = "Old East Slavic",
["replacements"] = {
["replacements"] = {
[U(0x484)] = "",
[U(0x484)] = "",
},
},
},
["osx"] = {
["name"] = "Old Saxon",
},
["pt"] = {
["name"] = "Portuguese",
["article"] = "Portuguese language",
-- ["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Punjabi language",
},
["pgl"] = {
["name"] = "Primitive Irish",
["article"] = "Primitive Irish",
},
["pis"] = {
["name"] = "Pijin",
["article"] = "Pijin language",
},
["poz-x-poly-proto"] = {
["name"] = "Proto-Nuclear Polynesian",
["article"] = "Proto-Polynesian language",
["type"] = "reconstructed",
},
["rap"] = {
["name"] = "Rapa Nui",
["article"] = "Rapa Nui language",
},
},
["ru"] = {
["ru"] = {
["name"] = "Russian",
["article"] = "Russian language",
["replacements"] = { [acute] = "", },
["replacements"] = { [acute] = "", },
},
},
Line 453: Line 176:
["([đflmnŋrsšŧv])'%1"] = "%1%1",
["([đflmnŋrsšŧv])'%1"] = "%1%1",
},
},
},
["sem-pro"] = {
["name"] = "Proto-Semitic",
["article"] = "Proto-Semitic",
["type"] = "reconstructed",
},
},
["sh"] = {
["sh"] = {
["article"] = "Serbo-Croatian language",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 469: Line 186:
},
},
["sl"] = {
["sl"] = {
["name"] = "Slovene",
["replacements"] = {
["replacements"] = {
decompose = true,
decompose = true,
Line 477: Line 193:
},
},
},
},
["sla-pro"] = {
["uk"] = {
["name"] = "Proto-Slavic", -- also Common Slavic
["replacements"] = { [acute] = "", }
},
["xcl"] = {
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
["xgf"] = {
["replacements"] = {
["['`ʔ]"] = "ʼ",
},
},
 
-- Custom private-use codes which should be added to [[Module:Lang]].
-- Codes are in the format of "code-x-code"
["gem-x-proto"] = {
["type"] = "reconstructed",
},
["grk-x-proto"] = {
["name"] = "Proto-Hellenic",
["type"] = "reconstructed",
},
["ine-x-proto"] = {
["type"] = "reconstructed",
},
["sem-x-proto"] = {
["type"] = "reconstructed",
},
["sla-x-proto"] = {
["type"] = "reconstructed",
["type"] = "reconstructed",
["replacements"] = {
["replacements"] = {
Line 499: Line 244:
["ŭ"] = "ъ",
["ŭ"] = "ъ",
},
},
},
["tts"] = {
["name"] = "Isan", -- also "Northeastern Thai"
["article"] = "Isan language",
},
["ug"] = {
["name"] = "Uyghur", --also less commonly "Uighur"
["article"] = "Uyghur language",
},
["uk"] = {
["article"] = "Ukrainian language",
["replacements"] = { [acute] = "", }
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Urdu",
},
["xcl"] = {
["name"] = "Old Armenian",
["article"] = "Classical Armenian",
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
["xgf"] = {
["name"] = "Tongva", -- not ISO name "Gabrielino-Fernandeño"
["article"] = "Tongva language",
["replacements"] = {
["['`ʔ]"] = "ʼ",
},
},
["xlu"] = {
["name"] = "Luwian", -- not ISO name "Cuneiform Luwian"
["article"] = "Cuneiform Luwian"
},
["xpq"] = {
["name"] = "Mohegan-Pequot",
},
["xxt"] = {
["name"] = "Tambora",
["article"] = "Tambora language",
},
["xvn"] = {
["name"] = "Vandalic",
["article"] = "Vandalic language",
},
["yua"] = {
["name"] = "Yucatec Maya",
["article"] = "Yucatec Maya language",
},
["zh"] = {
["name"] = "Chinese",
["article"] = "Chinese language",
-- ["scripts"] = { "Hani" },
},
["zle-ort"] = {
["name"] = "Old Ruthenian",
["article"] = "Old Ruthenian",
["replacements"] = { [acute] = "", },
},
},
},
},
 
-- Here, keys (for example, "gem") are Wikipedia language codes used in
-- Here, keys (for example, "gem") are Wikipedia language codes used in
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
Line 580: Line 265:
["fil"] = "tl",
["fil"] = "tl",
["fuf"] = "ff",
["fuf"] = "ff",
["gem"] = "gem-pro", -- Not correct, but is commonly used.
["gem"] = "gem-x-proto", -- Not correct, but is commonly used.
["gmw-ecg"] = "gmw-x-ecg",
["hak"] = "zh",
["hak"] = "zh",
["hbo"] = "he",
["hbo"] = "he",
["hr"] = "sh",
["hr"] = "sh",
["ine"] = "ine-pro", -- Not correct, but might be commonly used.
["ine"] = "ine-x-proto", -- Not correct, but might be commonly used.
["kjv"] = "sh",
["kjv"] = "sh",
["nan"] = "zh",
["nan"] = "zh",
["prs"] = "fa",
["prs"] = "fa",
["rn"] = "rw",
["rn"] = "rw",
["sli"] = "gmw-ecg",
["sli"] = "gmw-x-ecg",
["sr"] = "sh",
["sr"] = "sh",
["src"] = "sc",
["src"] = "sc",
Line 598: Line 284:
["yue"] = "zh",
["yue"] = "zh",
["xno"] = "fro",
["xno"] = "fro",
-- Incorrect private use tags
["cel-proto"] = "cel-x-proto",
["gem-pro"] = "gem-x-proto",
["grk-pro"] = "grk-x-proto",
["ine-pro"] = "ine-x-proto",
["ine-bsl-pro"] = "ine-x-proto",
["sem-pro"] = "sem-x-proto",
["sla-pro"] = "sla-x-proto",
},
},
}
}


return data
return data