|
|
Line 21: |
Line 21: |
| This is a table of Wiktionary language codes with data belonging to them. | | This is a table of Wiktionary language codes with data belonging to them. |
| Name is the "canonical name" used on Wiktionary. | | Name is the "canonical name" used on Wiktionary. |
| Article is the Wikipedia article.
| |
| Script is the ISO 15924 code.
| |
| ]] | | ]] |
| local data = { | | local data = { |
| ["languages"] = { | | ["languages"] = { |
| ["aaq"] = {
| |
| ["name"] = "Penobscot",
| |
| },
| |
| ["ab"] = { | | ["ab"] = { |
| ["name"] = "Abkhaz", | | ["name"] = "Abkhaz", |
| },
| |
| ["abe"] = {
| |
| ["name"] = "Abenaki",
| |
| }, | | }, |
| ["ang"] = { | | ["ang"] = { |
| ["name"] = "Old English",
| |
| ["article"] = {"Old English"},
| |
| -- Remove macrons, acutes, and overdots | | -- Remove macrons, acutes, and overdots |
| ["replacements"] = { | | ["replacements"] = { |
Line 45: |
Line 35: |
| }, | | }, |
| ["ar"] = { | | ["ar"] = { |
| ["name"] = "Arabic",
| |
| ["article"] = "Arabic language",
| |
| ["direction"] = "rtl", -- Should be in the script data module. | | ["direction"] = "rtl", -- Should be in the script data module. |
| ["replacements"] = { | | ["replacements"] = { |
Line 59: |
Line 47: |
| }, | | }, |
| }, | | }, |
| ["ara"] = { | | ["bal"] = { |
| ["name"] = "Arabic", | | ["name"] = "Baluchi", |
| ["article"] = "Arabic language",
| |
| ["direction"] = "rtl", -- Should be in the script data module.
| |
| ["replacements"] = {
| |
| -- ālif with wasla is replaced by ālif;
| |
| [U(0x0671)] = U(0x0627),
| |
| -- taṭwīl, fatḥatan, ḍammatan, kasratan,
| |
| -- fatḥa, ḍamma, kasra,
| |
| -- shadda, sukūn, and superscript (dagger) ālif are removed.
| |
| ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
| |
| ..U(0x064E)..U(0x064F)..U(0x0650)
| |
| ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
| |
| },
| |
| }, | | }, |
| ["arb"] = { | | ["be"] = { |
| ["name"] = "Modern Standard Arabic",
| |
| ["article"] = "Modern Standard Arabic",
| |
| ["direction"] = "rtl", -- Should be in the script data module.
| |
| ["replacements"] = {
| |
| -- ālif with wasla is replaced by ālif;
| |
| [U(0x0671)] = U(0x0627),
| |
| -- taṭwīl, fatḥatan, ḍammatan, kasratan,
| |
| -- fatḥa, ḍamma, kasra,
| |
| -- shadda, sukūn, and superscript (dagger) ālif are removed.
| |
| ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
| |
| ..U(0x064E)..U(0x064F)..U(0x0650)
| |
| ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
| |
| },
| |
| },
| |
| ["apc"] = {
| |
| ["name"] = "North Levantine Arabic",
| |
| ["article"] = "North Levantine Arabic",
| |
| ["direction"] = "rtl", -- Should be in the script data module.
| |
| ["replacements"] = { | | ["replacements"] = { |
| -- ālif with wasla is replaced by ālif;
| | [acute] = "", |
| [U(0x0671)] = U(0x0627), | |
| -- taṭwīl, fatḥatan, ḍammatan, kasratan,
| |
| -- fatḥa, ḍamma, kasra,
| |
| -- shadda, sukūn, and superscript (dagger) ālif are removed.
| |
| ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
| |
| ..U(0x064E)..U(0x064F)..U(0x0650)
| |
| ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
| |
| }, | | }, |
| },
| |
| ["ajp"] = {
| |
| ["name"] = "South Levantine Arabic",
| |
| ["article"] = "South Levantine Arabic",
| |
| ["direction"] = "rtl", -- Should be in the script data module.
| |
| ["replacements"] = {
| |
| -- ālif with wasla is replaced by ālif;
| |
| [U(0x0671)] = U(0x0627),
| |
| -- taṭwīl, fatḥatan, ḍammatan, kasratan,
| |
| -- fatḥa, ḍamma, kasra,
| |
| -- shadda, sukūn, and superscript (dagger) ālif are removed.
| |
| ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
| |
| ..U(0x064E)..U(0x064F)..U(0x0650)
| |
| ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
| |
| },
| |
| },
| |
| ["arz"] = {
| |
| ["name"] = "Egyptian Arabic",
| |
| ["article"] = "Egyptian Arabic",
| |
| ["direction"] = "rtl", -- Should be in the script data module.
| |
| ["replacements"] = {
| |
| -- ālif with wasla is replaced by ālif;
| |
| [U(0x0671)] = U(0x0627),
| |
| -- taṭwīl, fatḥatan, ḍammatan, kasratan,
| |
| -- fatḥa, ḍamma, kasra,
| |
| -- shadda, sukūn, and superscript (dagger) ālif are removed.
| |
| ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
| |
| ..U(0x064E)..U(0x064F)..U(0x0650)
| |
| ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
| |
| },
| |
| },
| |
| ["av"] = {
| |
| ["name"] = "Avar"
| |
| },
| |
| ["be"] = {
| |
| ["article"] = "Belarusian language",
| |
| ["replacements"] = { [acute] = "", },
| |
| },
| |
| ["bn"] = {
| |
| ["name"] = "Bengali",
| |
| ["article"] = "Bengali language",
| |
| }, | | }, |
| ["bua"] = { | | ["bua"] = { |
| ["name"] = "Buryat", | | ["name"] = "Buryat", |
| }, | | }, |
| ["cel-pro"] = { -- Incorrect tag
| |
| ["name"] = "Proto-Celtic",
| |
| ["Wikipedia_code"] = "cel-x-proto",
| |
| },
| |
| ["cel-x-proto"] = {
| |
| ["name"] = "Proto-Celtic",
| |
| },
| |
| ["cel-bry-pro"] = { -- Incorrect tag
| |
| ["name"] = "Proto-Brythonic",
| |
| ["article"] = "Common Brittonic",
| |
| ["type"] = "reconstructed",
| |
| },
| |
| ["com"] = {
| |
| ["name"] = "Comanche",
| |
| ["article"] = "Comanche language",
| |
| },
| |
| ["cu"] = { | | ["cu"] = { |
| ["name"] = "Old Church Slavonic", | | ["name"] = "Old Church Slavonic", |
| ["article"] = "Old Church Slavonic",
| |
| },
| |
| ["de"] = {
| |
| ["name"] = "German",
| |
| ["article"] = "German language",
| |
| },
| |
| ["en"] = {
| |
| ["name"] = "English",
| |
| ["article"] = "English language",
| |
| },
| |
| ["es"] = {
| |
| ["name"] = "Spanish",
| |
| ["article"] = "Spanish language",
| |
| }, | | }, |
| ["egy"] = { | | ["egy"] = { |
| ["name"] = "Egyptian", | | ["name"] = "Egyptian", |
| },
| |
| ["evn"] = {
| |
| ["name"] = "Evenki",
| |
| ["article"] = "Evenki language",
| |
| },
| |
| ["fr"] = {
| |
| ["name"] = "French",
| |
| ["article"] = "French language",
| |
| },
| |
| ["frm"] = {
| |
| ["name"] = "Middle French",
| |
| ["article"] = "Middle French",
| |
| }, | | }, |
| ["frp"] = { | | ["frp"] = { |
| ["name"] = "Franco-Provençal", | | ["name"] = "Franco-Provençal", |
| },
| |
| ["ff"] = {
| |
| ["name"] = "Fula",
| |
| },
| |
| ["gem-pro"] = { -- Incorrect tag
| |
| ["name"] = "Proto-Germanic",
| |
| ["article"] = "Proto-Germanic language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| ["Wikipedia_code"] = "gem-x-proto",
| |
| },
| |
| ["gem-x-proto"] = {
| |
| ["name"] = "Proto-Germanic",
| |
| ["article"] = "Proto-Germanic language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| },
| |
| ["gml"] = {
| |
| ["name"] = "Middle Low German",
| |
| },
| |
| ["gmw-ecg"] = {
| |
| ["name"] = "East Central German",
| |
| },
| |
| ["gmw-x-proto"] = {
| |
| ["name"] = "Proto-West Germanic",
| |
| ["article"] = "Proto-West Germanic language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| },
| |
| ["gmq-x-gut"] = {
| |
| ["name"] = "Gutnish",
| |
| ["article"] = "Gutnish",
| |
| }, | | }, |
| ["goh"] = { | | ["goh"] = { |
Line 239: |
Line 76: |
| }, | | }, |
| ["got"] = { | | ["got"] = { |
| ["name"] = "Gothic",
| |
| ["article"] = "Gothic language",
| |
| ["replacements"] = { | | ["replacements"] = { |
| -- Latin to Gothic since people will not want to have to copy | | -- Latin to Gothic since people will not want to have to copy |
Line 270: |
Line 105: |
| ["[OoŌō]"] = "𐍉", | | ["[OoŌō]"] = "𐍉", |
| }, | | }, |
| },
| |
| ["gsw"] = {
| |
| ["name"] = "Alemannic German",
| |
| }, | | }, |
| ["grc"] = { | | ["grc"] = { |
| ["name"] = "Ancient Greek",
| |
| ["article"] = "Ancient Greek",
| |
| ["replacements"] = { | | ["replacements"] = { |
| decompose = true, | | decompose = true, |
Line 289: |
Line 119: |
| } | | } |
| }, | | }, |
| },
| |
| ["grk-pro"] = { -- Incorrect tag
| |
| ["name"] = "Proto-Hellenic",
| |
| ["Wikipedia_name"] = "Proto-Greek",
| |
| ["article"] = "Proto-Greek language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| ["Wikipedia_code"] = "grk-x-proto",
| |
| },
| |
| ["grk-x-proto"] = {
| |
| ["name"] = "Proto-Hellenic",
| |
| ["Wikipedia_name"] = "Proto-Greek",
| |
| ["article"] = "Proto-Greek language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| },
| |
| ["grt"] = {
| |
| ["name"] = "Garo",
| |
| }, | | }, |
| ["ha"] = { | | ["ha"] = { |
| ["name"] = "Hausa",
| |
| -- remove tilde, grave, acute, macron, circumflex | | -- remove tilde, grave, acute, macron, circumflex |
| ["replacements"] = { | | ["replacements"] = { |
Line 316: |
Line 127: |
| }, | | }, |
| }, | | }, |
| ["hi"] = {
| | ["jbo"] = { |
| ["name"] = "Hindi",
| |
| ["article"] = "Hindi",
| |
| },
| |
| ["ine-bsl-pro"] = {
| |
| ["name"] = "Proto-Balto-Slavic",
| |
| ["article"] = "Proto-Balto-Slavic language",
| |
| ["type"] = "reconstructed",
| |
| },
| |
| ["ine-pro"] = { -- Incorrect tag
| |
| ["name"] = "Proto-Indo-European",
| |
| ["article"] = "Proto-Indo-European language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| ["Wikipedia_code"] = "ine-x-proto",
| |
| },
| |
| ["ine-x-proto"] = {
| |
| ["name"] = "Proto-Indo-European",
| |
| ["article"] = "Proto-Indo-European language",
| |
| ["type"] = "reconstructed",
| |
| ["replacements"] = {},
| |
| },
| |
| ["ja"] = {
| |
| ["name"] = "Japanese",
| |
| ["article"] = "Japanese language",
| |
| },
| |
| ["jbo"] = { -- Lojban | |
| ["type"] = "appendix", | | ["type"] = "appendix", |
| },
| |
| ["ket"] = {
| |
| ["name"] = "Ket",
| |
| ["article"] = "Ket language",
| |
| },
| |
| ["ksk"] = {
| |
| ["name"] = "Kansa",
| |
| ["article"] = "Kansa language",
| |
| }, | | }, |
| ["la"] = { | | ["la"] = { |
| ["name"] = "Latin",
| |
| ["article"] = "Latin",
| |
| ["replacements"] = { | | ["replacements"] = { |
| decompose = true, | | decompose = true, |
Line 362: |
Line 137: |
| }, | | }, |
| ["lt"] = { | | ["lt"] = { |
| ["name"] = "Lithuanian",
| |
| -- remove acute, tilde, grave | | -- remove acute, tilde, grave |
| ["replacements"] = { | | ["replacements"] = { |
Line 374: |
Line 148: |
| ["mul"] = { | | ["mul"] = { |
| ["name"] = "Translingual", | | ["name"] = "Translingual", |
| ["article"] = "",
| |
| }, | | }, |
| ["nci"] = { | | ["nci"] = { |
| ["name"] = "Classical Nahuatl",
| |
| ["article"] = "Classical Nahuatl",
| |
| -- Remove macrons, acutes, circumflexes and graves | | -- Remove macrons, acutes, circumflexes and graves |
| ["replacements"] = { | | ["replacements"] = { |
Line 389: |
Line 160: |
| ["nds-de"] = { | | ["nds-de"] = { |
| ["name"] = "German Low German", | | ["name"] = "German Low German", |
| },
| |
| ["non"] = {
| |
| ["name"] = "Old Norse",
| |
| },
| |
| ["non-x-proto"] = {
| |
| ["name"] = "Proto-Norse",
| |
| },
| |
| ["odt"] = {
| |
| ["name"] = "Old Dutch",
| |
| },
| |
| ["oge"] = {
| |
| ["name"] = "Old Georgian",
| |
| },
| |
| ["oj"] = {
| |
| ["name"] = "Ojibwe",
| |
| }, | | }, |
| ["orv"] = { | | ["orv"] = { |
| ["name"] = "Old East Slavic",
| |
| ["article"] = "Old East Slavic",
| |
| ["replacements"] = { | | ["replacements"] = { |
| [U(0x484)] = "", | | [U(0x484)] = "", |
| }, | | }, |
| },
| |
| ["osx"] = {
| |
| ["name"] = "Old Saxon",
| |
| },
| |
| ["pt"] = {
| |
| ["name"] = "Portuguese",
| |
| ["article"] = "Portuguese language",
| |
| -- ["scripts"] = { "Latn" },
| |
| },
| |
| ["pa"] = {
| |
| ["name"] = "Punjabi",
| |
| ["article"] = "Punjabi language",
| |
| },
| |
| ["pgl"] = {
| |
| ["name"] = "Primitive Irish",
| |
| ["article"] = "Primitive Irish",
| |
| },
| |
| ["pis"] = {
| |
| ["name"] = "Pijin",
| |
| ["article"] = "Pijin language",
| |
| },
| |
| ["poz-x-poly-proto"] = {
| |
| ["name"] = "Proto-Nuclear Polynesian",
| |
| ["article"] = "Proto-Polynesian language",
| |
| ["type"] = "reconstructed",
| |
| },
| |
| ["rap"] = {
| |
| ["name"] = "Rapa Nui",
| |
| ["article"] = "Rapa Nui language",
| |
| }, | | }, |
| ["ru"] = { | | ["ru"] = { |
| ["name"] = "Russian",
| |
| ["article"] = "Russian language",
| |
| ["replacements"] = { [acute] = "", }, | | ["replacements"] = { [acute] = "", }, |
| }, | | }, |
Line 453: |
Line 176: |
| ["([đflmnŋrsšŧv])'%1"] = "%1%1", | | ["([đflmnŋrsšŧv])'%1"] = "%1%1", |
| }, | | }, |
| },
| |
| ["sem-pro"] = {
| |
| ["name"] = "Proto-Semitic",
| |
| ["article"] = "Proto-Semitic",
| |
| ["type"] = "reconstructed",
| |
| }, | | }, |
| ["sh"] = { | | ["sh"] = { |
| ["article"] = "Serbo-Croatian language",
| |
| ["replacements"] = { | | ["replacements"] = { |
| decompose = true, | | decompose = true, |
Line 469: |
Line 186: |
| }, | | }, |
| ["sl"] = { | | ["sl"] = { |
| ["name"] = "Slovene",
| |
| ["replacements"] = { | | ["replacements"] = { |
| decompose = true, | | decompose = true, |
Line 477: |
Line 193: |
| }, | | }, |
| }, | | }, |
| ["sla-pro"] = { | | ["uk"] = { |
| ["name"] = "Proto-Slavic", -- also Common Slavic | | ["replacements"] = { [acute] = "", } |
| | }, |
| | ["xcl"] = { |
| | ["replacements"] = { |
| | ["[՞՜՛՟]"] = "", |
| | ["և"] = "եւ", |
| | }, |
| | }, |
| | ["xgf"] = { |
| | ["replacements"] = { |
| | ["['`ʔ]"] = "ʼ", |
| | }, |
| | }, |
| | |
| | -- Custom private-use codes which should be added to [[Module:Lang]]. |
| | -- Codes are in the format of "code-x-code" |
| | ["gem-x-proto"] = { |
| | ["type"] = "reconstructed", |
| | }, |
| | ["grk-x-proto"] = { |
| | ["name"] = "Proto-Hellenic", |
| | ["type"] = "reconstructed", |
| | }, |
| | ["ine-x-proto"] = { |
| | ["type"] = "reconstructed", |
| | }, |
| | ["sem-x-proto"] = { |
| | ["type"] = "reconstructed", |
| | }, |
| | ["sla-x-proto"] = { |
| ["type"] = "reconstructed", | | ["type"] = "reconstructed", |
| ["replacements"] = { | | ["replacements"] = { |
Line 499: |
Line 244: |
| ["ŭ"] = "ъ", | | ["ŭ"] = "ъ", |
| }, | | }, |
| },
| |
| ["tts"] = {
| |
| ["name"] = "Isan", -- also "Northeastern Thai"
| |
| ["article"] = "Isan language",
| |
| },
| |
| ["ug"] = {
| |
| ["name"] = "Uyghur", --also less commonly "Uighur"
| |
| ["article"] = "Uyghur language",
| |
| },
| |
| ["uk"] = {
| |
| ["article"] = "Ukrainian language",
| |
| ["replacements"] = { [acute] = "", }
| |
| },
| |
| ["ur"] = {
| |
| ["name"] = "Urdu",
| |
| ["article"] = "Urdu",
| |
| },
| |
| ["xcl"] = {
| |
| ["name"] = "Old Armenian",
| |
| ["article"] = "Classical Armenian",
| |
| ["replacements"] = {
| |
| ["[՞՜՛՟]"] = "",
| |
| ["և"] = "եւ",
| |
| },
| |
| },
| |
| ["xgf"] = {
| |
| ["name"] = "Tongva", -- not ISO name "Gabrielino-Fernandeño"
| |
| ["article"] = "Tongva language",
| |
| ["replacements"] = {
| |
| ["['`ʔ]"] = "ʼ",
| |
| },
| |
| },
| |
| ["xlu"] = {
| |
| ["name"] = "Luwian", -- not ISO name "Cuneiform Luwian"
| |
| ["article"] = "Cuneiform Luwian"
| |
| },
| |
| ["xpq"] = {
| |
| ["name"] = "Mohegan-Pequot",
| |
| },
| |
| ["xxt"] = {
| |
| ["name"] = "Tambora",
| |
| ["article"] = "Tambora language",
| |
| },
| |
| ["xvn"] = {
| |
| ["name"] = "Vandalic",
| |
| ["article"] = "Vandalic language",
| |
| },
| |
| ["yua"] = {
| |
| ["name"] = "Yucatec Maya",
| |
| ["article"] = "Yucatec Maya language",
| |
| },
| |
| ["zh"] = {
| |
| ["name"] = "Chinese",
| |
| ["article"] = "Chinese language",
| |
| -- ["scripts"] = { "Hani" },
| |
| },
| |
| ["zle-ort"] = {
| |
| ["name"] = "Old Ruthenian",
| |
| ["article"] = "Old Ruthenian",
| |
| ["replacements"] = { [acute] = "", },
| |
| }, | | }, |
| }, | | }, |
| | | |
| -- Here, keys (for example, "gem") are Wikipedia language codes used in | | -- Here, keys (for example, "gem") are Wikipedia language codes used in |
| -- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary | | -- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary |
Line 580: |
Line 265: |
| ["fil"] = "tl", | | ["fil"] = "tl", |
| ["fuf"] = "ff", | | ["fuf"] = "ff", |
| ["gem"] = "gem-pro", -- Not correct, but is commonly used. | | ["gem"] = "gem-x-proto", -- Not correct, but is commonly used. |
| | ["gmw-ecg"] = "gmw-x-ecg", |
| ["hak"] = "zh", | | ["hak"] = "zh", |
| ["hbo"] = "he", | | ["hbo"] = "he", |
| ["hr"] = "sh", | | ["hr"] = "sh", |
| ["ine"] = "ine-pro", -- Not correct, but might be commonly used. | | ["ine"] = "ine-x-proto", -- Not correct, but might be commonly used. |
| ["kjv"] = "sh", | | ["kjv"] = "sh", |
| ["nan"] = "zh", | | ["nan"] = "zh", |
| ["prs"] = "fa", | | ["prs"] = "fa", |
| ["rn"] = "rw", | | ["rn"] = "rw", |
| ["sli"] = "gmw-ecg", | | ["sli"] = "gmw-x-ecg", |
| ["sr"] = "sh", | | ["sr"] = "sh", |
| ["src"] = "sc", | | ["src"] = "sc", |
Line 598: |
Line 284: |
| ["yue"] = "zh", | | ["yue"] = "zh", |
| ["xno"] = "fro", | | ["xno"] = "fro", |
| | |
| | -- Incorrect private use tags |
| | ["cel-proto"] = "cel-x-proto", |
| | ["gem-pro"] = "gem-x-proto", |
| | ["grk-pro"] = "grk-x-proto", |
| | ["ine-pro"] = "ine-x-proto", |
| | ["ine-bsl-pro"] = "ine-x-proto", |
| | ["sem-pro"] = "sem-x-proto", |
| | ["sla-pro"] = "sla-x-proto", |
| }, | | }, |
| } | | } |
|
| |
|
| return data | | return data |