Module:Lang/data: Difference between revisions

no edit summary
m (1 revision imported)
No edit summary
Line 1:
local lang_obj = mw.language.getContentLanguage();
local this_wiki_lang_tag = lang_obj.code; -- get this wiki's language tag
 
 
--[[--------------------------< L A N G _ N A M E _ T A B L E >------------------------------------------------
 
Line 13 ⟶ 17:
have multiple associated names; Module:lang is only concerned with the first name so key_to_lower() only fetches
the first name.
 
TODO: instead of returning:
["key"] = {"name"}
where each table has only one name, return
["key"] = "name"
requires changes in Module:Lang.
 
]]
Line 27 ⟶ 25:
if 'var_sup' == src_type then
for k, v in pairs (source) do
out[k:lower()] = v; -- for variant, and suppressed everything is needed
end
 
elseif 'lang' == src_type and source.active then -- for ~/iana_languages (active)
for k, v in pairs (source.active) do
out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only
end
 
elseif 'lang_dep' == src_type and source.deprecated then -- for ~/iana_languages (deprecated)
for k, v in pairs (source.deprecated) do
out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only
end
 
else -- here for all other sources
for k, v in pairs (source) do
out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only
end
end
Line 48 ⟶ 46:
end
 
local lang_name_tablelang_name_table_t = {
lang = key_to_lower ('Module:Language/data/iana languages', 'lang'),
lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'),
Line 56 ⟶ 54:
suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts', 'var_sup'), -- script keys are capitalized; set to lower
}
 
 
--[[--------------------------< I 1 8 N M E D I A W I K I O V E R R I D E >--------------------------------
 
For internationalization; not used at en.wiki
 
The language names taken from the IANA language-subtag-registry file are given in English. That may not be ideal.
Translating ~8,000 language names is also not ideal. MediaWiki maintains (much) shorter lists of language names
in most languages for which there is a Wikipedia edition. When desired, Module:Lang can use the MediaWiki
language list for the local language.
 
Caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all.
When incomplete, MediaWiki's list will 'fall back' to another language (typically English). When that happens
add an appropriate entry to the override table below.
 
Caveat lector: the list of MediaWiki language names for your language may not be correct. At en.wiki, the
MediaWiki language names do not agree with the IANA language names for these ISO 639-1 tags. Often it is simply
spelling differences:
bh: IANA: Bihari languages MW: Bhojpuri – the ISO 639-3 tag for Bhojpuri is bho
bn: IANA: Bengali MW: Bangla – Bengali is the exonym, Bangla is the endonym
dv: IANA: Dhivehi MW: Divehi
el: IANA: Modern Greek MW: Greek
ht: IANA: Haitian MW: Haitian Creole
ky: IANA: Kirghiz MW: Kyrgyz
li: IANA: Limburgan MW: Limburgish
or: IANA: Oriya MW: Odia
os: IANA: Ossetian MW: Ossetic
"pa: IANA: Panjabi MW: Punjabi
"ps: IANA: Pushto MW: Pashto
"to: IANA: Tonga MW: Tongan
"ug: IANA: Uighur MW: Uyghur
use the override table to override language names that are incorrect for your project
 
To see the list of names that MediaWiki has for your language, enter this in the Debug colsole:
=mw.dumpObject (mw.language.fetchLanguageNames ('<tag>', 'all'))
(replacing <tag> with the language tag for your language)
 
Use of the MediaWiki language names lists is enabled when media_wiki_override_enable is set to boolean true.
]]
 
local media_wiki_override_enable = false; -- set to true to override IANA names with MediaWiki names; always false at en.wiki
-- caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all
if true == media_wiki_override_enable then
local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_lang_tag, 'all'); -- get a table of language tag/name pairs known to MediaWiki
for tag, name in pairs (mw_languages_by_tag_t) do -- loop through each tag/name pair in the MediaWiki list
if lang_name_table_t.lang[tag] then -- if the tag is in the main list
lang_name_table_t.lang[tag] = name; -- overwrite exisiting name with the name from MediaWiki
end
end
end
 
 
Line 69 ⟶ 118:
------------------------------< I S O _ 6 3 9 - 1 >------------------------------------------------------------
 
["ca-valencia"] = {"Valencian"},
["cu"] = {"Church Slavonic"}, -- 2nd IANA name;
["de-at"] = {"Austrian German"}, -- these code-region and code-variant tags to match en.wiki article names
["de-ch"] = {"Swiss Standard German"},
["en-au"] = {"Australian English"},
["en-ca"] = {"Canadian English"},
["en-emodeng"] = {"Early Modern English"},
["en-gb"] = {"British English"},
["en-ie"] = {"Irish English"},
["en-in"] = {"Indian English"},
["en-nz"] = {"New Zealand English"},
["en-us"] = {"American English"},
["en-za"] = {"South African English"},
["fy"] = {"West Frisian"}, -- Western Frisian
["mo"] = {"Moldovan"}, -- Moldavian (deprecated code); to match en.wiki article title
["nl-be"] = "Flemish", -- match MediaWiki
["oc-provenc"] = {"Provençal"},
["oc-provenc"] = "Provençal",
["ps"] = {"Pashto"}, -- Pushto
["ps"] = "Pashto", -- Pushto
["tw-asante"] = {"Asante Twi"},
["pt-br"] = "Brazilian Portuguese", -- match MediaWiki
["tw-asante"] = "Asante Twi",
 
-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages
--<begin do-not-edit except to comment out>--
["av"] = {"Avar"}, -- Avaric
["bo"] = {"Standard Tibetan"}, -- Tibetan
["el"] = {"Greek"}, -- Modern Greek
-- ["en-SA"] = {"South African English"}, -- English; no; SA is not South Africa it Saudi Arabia; ZA is South Africa
["ff"] = {"Fula"}, -- Fulah
["ht"] = {"Haitian Creole"}, -- Haitian
["hz"] = {"Otjiherero"}, -- Herero
["ii"] = {"Yi"}, -- Sichuan Yi
["ki"] = {"Gikuyu"}, -- Kikuyu
["kl"] = {"Greenlandic"}, -- Kalaallisut
["ky"] = {"Kyrgyz"}, -- Kirghiz
["lg"] = {"Luganda"}, -- Ganda
["li"] = {"Limburgish"}, -- Limburgan
["mi"] = {"Māori"}, -- Maori
["na"] = {"Nauruan"}, -- Nauru
["nb"] = {"Bokmål"}, -- Norwegian Bokmål
["nd"] = {"Northern Ndebele"}, -- North Ndebele
["nn"] = {"Nynorsk"}, -- Norwegian Nynorsk
["nr"] = {"Southern Ndebele"}, -- South Ndebele
["ny"] = {"Chichewa"}, -- Nyanja
["oj"] = {"Ojibwe"}, -- Ojibwa
["or"] = {"Odia"}, -- Oriya
["pa"] = {"Punjabi"}, -- Panjabi
["rn"] = {"Kirundi"}, -- Rundi
["sl"] = {"Slovene"}, -- Slovenian
["ss"] = {"Swazi"}, -- Swati
["st"] = {"Sotho"}, -- Southern Sotho
["to"] = {"Tongan"}, -- Tonga
--<end do-not-edit except to comment out>--
 
Line 123 ⟶ 174:
------------------------------< I S O _ 6 3 9 - 2, - 3, - 5 >----------------------------------------------
 
["arcalv"] = {"AramaicAtlantic–Congo languages"}, -- Officialto Aramaicmatch (700-300en.wiki BCE),article Imperial Aramaictitle (700-300 BCEendash);
["artarc"] = {"constructedAramaic"}, -- toOfficial matchAramaic en.wiki(700-300 article;BCE), lowercaseImperial forAramaic category(700-300 nameBCE);
["bhdart"] = {"Bhadarwahiconstructed"}, -- Bhadrawahi; to match en.wiki article; lowercase for category titlename
["blabhd"] = {"BlackfootBhadarwahi"}, -- SiksikaBhadrawahi; to match en.wiki article title
["buabla"] = {"BuryatBlackfoot"}, -- BuriatSiksika; thisto ismatch aen.wiki macroarticle language; these four use wp preferred transliteration;title
["bxmbua"] = {"Mongolian Buryat"}, -- Mongolia Buriat; this is a macro language; these threefour alluse redirectwp topreferred Buryattransliteration;
["bxrbxm"] = {"RussianMongolian Buryat"}, -- RussiaMongolia Buriat; these three all redirect to Buryat
["bxubxr"] = {"ChineseRussian Buryat"}, -- ChinaRussia Buriat;
["byrbxu"] = {"YipmaChinese Buryat"}, -- Baruya,China YipmaBuriat;
["egybyr"] = {"Ancient EgyptianYipma"}, -- Egyptian (Ancient); distinguish from contemporary arz: Egyptian ArabicBaruya, Yipma
["emsegy"] = {"AlutiiqAncient Egyptian"}, -- PacificEgyptian Gulf Yupik(Ancient); todistinguish from contemporary matcharz: en.wikiEgyptian articleArabic title
["frrems"] = {"North FrisianAlutiiq"}, -- NorthernPacific Gulf Yupik; to match en.wiki article Frisiantitle
["frsesx"] = {"EastEskimo–Aleut Frisian Low Saxonlanguages"}, -- Easternto match en.wiki article title Frisian(endash)
["ilofrr"] = {"IlocanoNorth Frisian"}, -- Iloko; to match en.wiki articleNorthern titleFrisian
["jamfrs"] = {"JamaicanEast PatoisFrisian Low Saxon"}, -- Jamaican CreoleEastern EnglishFrisian
["luogsw-fr"] = {"DholuoAlsatian"}, -- IANA (primary) /ISO 639-3: Luo (Kenya and Tanzania); IANA (secondary):match DholuoMediaWiki
["mhrhaa"] = {"Meadow MariHän"}, -- EasternHan; to match en.wiki article Marititle
["midhmx"] = {"ModernHmong–Mien Mandaiclanguages"}, -- Mandaicto match en.wiki article title (endash)
["mlailo"] = {"TamamboIlocano"}, -- MaloIloko; to match en.wiki article title
['mte'"jam"] = {"Mono-AluJamaican Patois"}, -- MonoJamaican (SolomonCreole Islands)English
["nan-twluo"] = {"Taiwanese HokkienDholuo"}, -- make room for IANA (primary) /ISO 639-3: nanLuo Min(Kenya Nanand ChineseTanzania); match en.wikiIANA article(secondary): titleDholuo
["newmhr"] = {"NewarMeadow Mari"}, -- Newari, Nepal Bhasa; to match en,wiki articleEastern titleMari
["nrfmid"] = {"NormanModern Mandaic"}, -- not quite a collective - IANA name: Jèrriais; categorizes to Norman-language textMandaic
["nzi"'mis'] = {"Nzemauncoded"}, -- NzimaUncoded languages; tocapitalization; matchspecial en.wikiscope, articlenot titlecollective scope;
["orvmkh"] = {"OldMon–Khmer East Slaviclanguages"}, -- Oldto match en.wiki article title Russian(endash)
["pflmla"] = {"Palatine GermanTamambo"}, -- Pfaelzisch; to match en.wiki articleMalo
["pms"'mte'] = {"PiedmonteseMono-Alu"}, -- Piemontese; to match en.wikiMono article(Solomon titleIslands)
['mul'] = "multiple", -- Multiple languages; capitalization; special scope, not collective scope;
["pnb"] = {"Punjabi (Western)"}, -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
["stqnan-tw"] = {"SaterlandTaiwanese FrisianHokkien"}, -- Saterfriesischmake room for IANA / 639-3 nan Min Nan Chinese; match en.wiki article title
["undnew"] = {"undeterminedNewar"}, -- capitalizationNewari, Nepal Bhasa; to match existingen,wiki article categorytitle
["wrgngf"] = {"WarrongoTrans–New Guinea languages"}, -- Warunguto match en.wiki article title (endash)
["xal-runic"] = {"KalmykNiger–Congo languages"}, -- Niger-Kordofanian languages; to match en.,wiki article title
["xgfnrf"] = {"TongvaNorman"}, -- ISOnot quite a collective 639-3 isIANA Gabrielinoname: Jèrriais + Guernésiais; categorizes to Norman-Fernandeñolanguage text
["nrf-gg"] = "Guernésiais", -- match MediaWiki
["nrf-je"] = "Jèrriais", -- match MediaWiki
["nzi"] = "Nzema", -- Nzima; to match en.wiki article title
["oma"] = "Omaha–Ponca", -- to match en.wiki article title (endash)
["orv"] = "Old East Slavic", -- Old Russian
["pfl"] = "Palatine German", -- Pfaelzisch; to match en.wiki article
["pie"] = "Piro Pueblo", -- Piro; to match en.wiki article
["pms"] = "Piedmontese", -- Piemontese; to match en.wiki article title
["pnb"] = "Punjabi (Western)", -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
["rop"] = "Australian Kriol", -- Kriol; en.wiki article is a dab; point to correct en.wiki article
["sdo"] = "Bukar–Sadong", -- Bukar-Sadung Bidayuh; to match en.wiki article title
["stq"] = "Saterland Frisian", -- Saterfriesisch
["und"] = "undetermined", -- capitalization to match existing category
["wrg"] = "Warrongo", -- Warungu
["xal-ru"] = "Kalmyk", -- to match en.wiki article title
["xgf"] = "Tongva", -- ISO 639-3 is Gabrielino-Fernandeño
["yuf"] = "Havasupai–Hualapai", -- Havasupai-Walapai-Yavapai; to match en.wiki article title
["zxx"] = "no linguistic content", -- capitalization
 
-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages
--<begin do-not-edit except to comment out>--
["ace"] = {"Acehnese"}, -- Achinese
["aec"] = {"Sa'idi Arabic"}, -- Saidi Arabic
["akl"] = {"Aklan"}, -- Aklanon
["alt"] = {"Altay"}, -- Southern Altai
["apm"] = {"Mescalero-Chiricahua"}, -- Mescalero-Chiricahua Apache
["bal"] = {"Balochi"}, -- Baluchi
-- ["bcl"] = {"Central Bicolano"}, -- Central Bikol
["bin"] = {"Edo"}, -- Bini
["bpy"] = {"Bishnupriya Manipuri"}, -- Bishnupriya
["chg"] = {"Chagatay"}, -- Chagatai
["ckb"] = {"Sorani Kurdish"}, -- Central Kurdish
["cnu"] = {"Shenwa"}, -- Chenoua
["coc"] = {"Cocopah"}, -- Cocopa
["diq"] = {"Zazaki"}, -- Dimli
["fit"] = {"Meänkieli"}, -- Tornedalen Finnish
["fkv"] = {"Kven"}, -- Kven Finnish
["frk"] = {"Old Frankish"}, -- Frankish
["gez"] = {"Ge'ez"}, -- Geez
["gju"] = {"Gujari"}, -- Gujari
["gsw"] = {"Alemannic German"}, -- Swiss German
["gul"] = {"Gullah"}, -- Sea Island Creole English
["hak"] = {"Hakka"}, -- Hakka Chinese
["hbo"] = {"Biblical Hebrew"}, -- Ancient Hebrew
["hnd"] = {"Hindko"}, -- Southern Hindko
-- ["ikt"] = {"Inuvialuk"}, -- Inuinnaqtun
["kaa"] = {"Karakalpak"}, -- Kara-Kalpak
["khb"] = {"Tai Lü"}, -- Lü
["kmr"] = {"Kurmanji Kurdish"}, -- Northern Kurdish
["kpo"] = {"Kposo"}, -- Ikposo
["krj"] = {"Kinaray-a"}, -- Kinaray-A
["ktz"] = {"Juǀ'hoan"}, -- Juǀʼhoan
["lez"] = {"Lezgian"}, -- Lezghian
["liv"] = {"Livonian"}, -- Liv
["lng"] = {"Lombardic"}, -- Langobardic
["mia"] = {"Miami-Illinois"}, -- Miami
["miq"] = {"Miskito"}, -- Mískito
["mix"] = {"Mixtec"}, -- Mixtepec Mixtec
["mni"] = {"Meitei"}, -- Manipuri
["mrj"] = {"Hill Mari"}, -- Western Mari
["mww"] = {"White Hmong"}, -- Hmong Daw
["nds-nl"] = {"Dutch Low Saxon"}, -- Low German
-- ["new"] = {"Nepal Bhasa"}, -- Newari
["nso"] = {"Northern Sotho"}, -- Pedi
-- ["nwc"] = {"Classical Nepal Bhasa"}, -- Classical Newari, Classical Nepal Bhasa, Old Newari
["ood"] = {"O'odham"}, -- Tohono O'odham
["otk"] = {"Old Turkic"}, -- Old Turkish
["pal"] = {"Middle Persian"}, -- Pahlavi
["pam"] = {"Kapampangan"}, -- Pampanga
["phr"] = {"Potwari"}, -- Pahari-Potwari
["pka"] = {"Jain Prakrit"}, -- Ardhamāgadhī Prākrit
-- ["pnb"] = {"Punjabi"}, -- Western Panjabi
["psu"] = {"Shauraseni"}, -- Sauraseni Prākrit
["rap"] = {"Rapa Nui"}, -- Rapanui
["rar"] = {"Cook Islands Māori"}, -- Rarotongan
["rmu"] = {"Scandoromani"}, -- Tavringer Romani
["rom"] = {"Romani"}, -- Romany
["rup"] = {"Aromanian"}, -- Macedo-Romanian
["ryu"] = {"Okinawan"}, -- Central Okinawan
["sdc"] = {"Sassarese"}, -- Sassarese Sardinian
["sdn"] = {"Gallurese"}, -- Gallurese Sardinian
["shp"] = {"Shipibo"}, -- Shipibo-Conibo
["src"] = {"Logudorese"}, -- Logudorese Sardinian
["sro"] = {"Campidanese"}, -- Campidanese Sardinian
["tkl"] = {"Tokelauan"}, -- Tokelau
["tvl"] = {"Tuvaluan"}, -- Tuvalu
["tyv"] = {"Tuvan"}, -- Tuvinian
["vls"] = {"West Flemish"}, -- Vlaams
["wep"] = {"Westphalian"}, -- Westphalien
["xal"] = {"Oirat"}, -- Kalmyk
["xcl"] = {"Old Armenian"}, -- Classical Armenian
["yua"] = {"Yucatec Maya"}, -- Yucateco
--<end do-not-edit except to comment out>--
 
Line 235 ⟶ 304:
------------------------------< P R I V A T E _ U S E _ T A G S >----------------------------------------------
 
["celalg-x-proto"] = {"Proto-CelticAlgonquian"}, -- celalg in IANA is CelticAlgonquian languages
["gemcel-x-proto"] = {"Proto-GermanicCeltic"}, -- gemcel in IANA is GermanicCeltic languages
["gem-x-proto"] = "Proto-Germanic", -- gem in IANA is Germanic languages
["gmw-x-ecg"] = {"East Central German"},
["gmw-x-ecg"] = "East Central German",
["grc-x-aeolic"] = {"Aeolic Greek"}, -- these grc-x-... codes are preferred alternates to the non-standard catchall code grc-gre
["grc-x-aeolic"] = "Aeolic Greek", -- these grc-x-... codes are preferred alternates to the non-standard catchall code grc-gre
["grc-x-attic"] = {"Attic Greek"},
["grc-x-biblicalattic"] = {"BiblicalAttic Greek"},
["grc-x-byzantbiblical"] = {"ByzantineBiblical Greek"},
["grc-x-classicbyzant"] = {"ClassicalByzantine Greek"},
["grc-x-doricclassic"] = {"DoricClassical Greek"},
["grc-x-hellendoric"] = {"HellenisticDoric Greek"},
["grc-x-ionichellen"] = {"IonicHellenistic Greek"},
["grc-x-koineionic"] = {"KoinēIonic Greek"},
["grc-x-medievalkoine"] = {"MedievalKoinē Greek"},
["grc-x-patrismedieval"] = {"PatristicMedieval Greek"},
["grkgrc-x-protopatris"] = {"Proto-Patristic Greek"}, -- grk in IANA is Greek languages
["iirgrk-x-proto"] = {"Proto-Indo-IranianGreek"}, -- iirgrk in IANA is Indo-IranianGreek Languageslanguages
["ineiir-x-proto"] = {"Proto-Indo-EuropeanIranian"}, -- iir in IANA is Indo-Iranian Languages
["iraine-x-proto"] = {"Proto-IranianIndo-European"}, -- ira in IANA is Iranian languages
["itcira-x-proto"] = {"Proto-ItalicIranian"}, -- itcira in IANA is ItalicIranian languages
["kshitc-x-cologproto"] = {"ColognianProto-Italic"}, -- en.wikiitc articlein IANA is Colognian; ksh (Kölsch) redirectsItalic therelanguages
["ksh-x-colog"] = "Colognian", -- en.wiki article is Colognian; ksh (Kölsch) redirects there
["la-x-medieval"] = {"Medieval Latin"},
["la-x-medieval"] = "Medieval Latin",
["mis-x-ripuar"] = {"Ripuarian"}, -- replaces improper use of ksh in wp_languages
["mis-x-ripuar"] = "Ripuarian", -- replaces improper use of ksh in wp_languages
["sem-x-proto"] = {"Proto-Semitic"},
["slasem-x-proto"] = {"Proto-SlavicSemitic"}, -- sla in IANA is Slavic languages
["yufsla-x-havproto"] = {"HavasupaiProto-Slavic"}, -- IANAsla namein forIANA theseis threeSlavic is Havasupai-Walapai-Yavapailanguages
["yuf-x-hav"] = "Havasupai", -- IANA name for these three is Havasupai-Walapai-Yavapai
["yuf-x-wal"] = {"Walapai"},
["yuf-x-yavwal"] = {"YavapaiWalapai"},
["yuf-x-yav"] = "Yavapai",
}
 
Line 274 ⟶ 344:
 
local article_name = {
["lij"] = {"Ligurian (Romance language)"}, -- Ligurian; see Template_talk:Lang#Ligurian_dab
['mnh'] = {"Mono language (Congo)"}, -- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages
['mnr'] = {"Mono language (California)"}, -- Mono (USA)
['mru'] = {"Mono language (Cameroon)"}, -- Mono (Cameroon)
["xlg"] = {"Ligurian (ancient language)"}, -- see Template_talk:Lang#Ligurian_dab
}
 
Line 462 ⟶ 532:
['jyutping'] = {
['default'] = 'Jyutping transliteration',
},
 
['mlcts'] = {
['default'] = 'Myanmar Language Commission Transcription System',
},
 
Line 519 ⟶ 593:
return
{
this_wiki_lang_tag = this_wiki_lang_tag,
this_wiki_lang_dir = lang_obj:getDir(), -- wiki's language direction
article_name = article_name,
lang_name_table = lang_name_tablelang_name_table_t,
override = override,
rtl_scripts = rtl_scripts,
special_tags_table = special_tags_table,
translit_title_table = translit_title_table,
};