Modul:languages/data2

Daripada Wiktionary
Jump to navigation Jump to search

Pendokumenan untuk modul ini boleh diciptakan di Modul:languages/data2/doc

local u = mw.ustring.char

-- Rentetan terkod UTF-8 untuk beberapa diakritik yang biasa digunakan
local GRAVE     = u(0x0300)
local ACUTE     = u(0x0301)
local CIRC      = u(0x0302)
local TILDE     = u(0x0303)
local MACRON    = u(0x0304)
local BREVE     = u(0x0306)
local DOTABOVE  = u(0x0307)
local DIAER     = u(0x0308)
local CARON     = u(0x030C)
local DGRAVE    = u(0x030F)
local INVBREVE  = u(0x0311)
local DOTBELOW  = u(0x0323)
local RINGBELOW = u(0x0325)
local CEDILLA   = u(0x0327)
local OGONEK    = u(0x0328)
local DOUBLEINVBREVE = u(0x0361)

-- Tanda baca yang digunakan untuk standardChars field
local PUNCTUATION = ' !#$%&*+,-./:;<=>?@^_`|~\'()'

local Cyrl = {"Cyrl"}
local Latn = {"Latn"}
local LatnArab = {"Latn", "Arab"}

local m = {}

m["aa"] = {
	"Afar",
	"Q27811",
	"cus",
	aliases = {"Qafar"},
	scripts = Latn,
}

m["ab"] = {
	"Abkhaz",
	"Q5111",
	"cau-abz",
	aliases = {"Abkhazia", "Abxazo"},
	scripts = {"Cyrl", "Geor", "Latn"},
	translit_module = "ab-translit",
	override_translit = true,
	entry_name = {
		from = {GRAVE, ACUTE},
		to   = {}} ,
}

m["ae"] = {
	"Avesta",
	"Q29572",
	"ira-cen",
	aliases = {"Zend", "Bactria Lama"},
	scripts = {"Avst", "Gujr"},
	translit_module = "Avst-translit",
}

m["af"] = {
	"Afrikaans",
	"Q14196",
	"gmw",
	scripts = LatnArab,
	ancestors = {"nl"},
	sort_key = {
		from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"},
		to   = {"a"	 , "e"	, "i"	, "o"	, "u"  , "y" }} ,
}

m["ak"] = {
	"Akan",
	"Q28026",
	"alv-ctn",
	varieties = {"Twi-Fante", "Twi", {"Fante", "Fanti"}, "Asante", "Akuapem"},
	scripts = Latn,
}

m["am"] = {
	"Amhara",
	"Q28244",
	"sem-eth",
	scripts = {"Ethi"},
	translit_module = "Ethi-translit",
}

m["an"] = {
	"Aragon",
	"Q8765",
	"roa-ibe",
	scripts = Latn,
	ancestors = {"roa-oan"},
}

m["ar"] = {
	"Arab",
	"Q13955",
	"sem-arb",
	-- FIXME, beberapa yang berikut adalah ragam tetapi tidak jelas yang mana satu
	otherNames = {"Arab Piawai Moden", "Arab Piawai", "Arab Sastera", "Arab Klasik"},
	scripts = {"Arab", "Brai"},
	-- ganti alif waṣl dengan alif
	-- buang tatweel dan diakritik: fathatan, dammatan, kasratan, fathah,
	-- dammah, kasrah, sabdu, sukun, alif khanjariyah
	entry_name = {
		from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)},
		to   = {u(0x0627)}},
	translit_module = "ar-translit",
	standardChars = "ء-غف-ْٰٱ" .. PUNCTUATION .. "٠-٩،؛؟٫٬ـ",
}

m["as"] = {
	"Assam",
	aliases = {"Asamiya"},
	"Q29401",
	"inc",
	scripts = {"as-Beng"},
	ancestors = {"inc-mas"},
	translit_module = "as-translit",
}

m["av"] = {
	"Avar",
	"Q29561",
	"cau-nec",
	scripts = Cyrl,
	ancestors = {"oav"},
	translit_module = "av-translit",
	override_translit = true,
	entry_name = {
		from = {GRAVE, ACUTE},
		to   = {}} ,
}

m["ay"] = {
	"Aymara",
	"Q4627",
	"sai-aym",
	varieties = {"Aymara Selatan", "Aymara Pusat"},
	scripts = Latn,
}

m["az"] = {
	"Azerbaijan",
	"Q9292",
	"trk-ogz",
	aliases = {"Azeri", "Azari", "Turkik Azeri", "Turkik Azerbaijan"},
	varieties = {"Azerbaijan Utara", "Azerbaijan Selatan",
		{"Afshar", "Afshari", "Azerbaijan Afshar", "Afchar"},
		{"Qashqa'i", "Qashqai", "Kashkay"},
		"Sonqor"
	},
	scripts = {"Latn", "Cyrl", "fa-Arab"},
	ancestors = {"trk-oat"},
}

m["ba"] = {
	"Bashkir",
	"Q13389",
	"trk-kbu",
	scripts = Cyrl,
	translit_module = "ba-translit",
	override_translit = true,
}

m["be"] = {
	"Belarus",
	"Q9091",
	"zle",
	aliases = {"Belorus", "Bielorus", "Byelorus", "Belarus", "Rusia Putih"},
	scripts = Cyrl,
	ancestors = {"orv"},
	translit_module = "be-translit",
	sort_key = {
		from = {"Ё", "ё"},
		to   = {"Е" , "е"}},
	entry_name = {
		from = {"Ѐ", "ѐ", GRAVE, ACUTE},
		to   = {"Е", "е"}},
}

m["bg"] = {
	"Bulgaria",
	"Q7918",
	"zls",
	scripts = {"Cyrl"},
	ancestors = {"cu"},
	translit_module = "bg-translit",
	entry_name = {
		from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
		to   = {"Е", "е", "И", "и"}},
}

m["bh"] = {
	"Bihari",
	"Q135305",
	"inc",
	scripts = {"Deva"},
	ancestors = {"inc-mgd"},
}

m["bi"] = {
	"Bislama",
	"Q35452",
	"crp",
	scripts = Latn,
	ancestors = {"en"},
}

m["bm"] = {
	"Bambara",
	"Q33243",
	"dmn-emn",
	aliases = {"Bamanankan"},
	scripts = Latn,
}

m["bn"] = {
	"Bengali",
	"Q9610",
	"inc",
	aliases = {"Bangla"},
	scripts = {"Beng", "Newa"},
	ancestors = {"inc-mbn"},
	translit_module = "bn-translit",
}

m["bo"] = {
	"Tibet",
	"Q34271",
	"sit-tib",
	varieties = {
		{"Tibet Amdo", "Amdo"},
		"Dolpo",
		{"Khams", "Tibet Khams"}, "Khamba",
		"Gola",
		"Humla",
		"Limi", {"Lhasa", "Lhasa Tibetan"}, "Lhomi", "Loke", "Lowa",
		"Mugom", "Mugu", "Mustang",
		"Nubri",
		"Panang",
		"Shing Saapa",
		"Thudam", "Tichurong", "Tseku",
		{"Ü", "Dbus"},
		"Walungge"}, -- dan "Gyalsumdo", "Manang Bawah"? "Kyirong"?
	scripts = {"Tibt"}, -- kadangkala Deva?
	ancestors = {"xct"},
	translit_module = "bo-translit",
	override_translit = true,
}

m["br"] = {
	"Breton",
	"Q12107",
	"cel-bry",
	varieties = {{"Gwenedeg", "Vannetais"}, {"Kerneveg", "Cornouaillais"}, {"Leoneg", "Léonard"}, {"Tregerieg", "Trégorrois"}},
	scripts = Latn,
	ancestors = {"xbm"},
}

m["ca"] = {
	"Catalan",
	"Q7026",
	"roa",
	-- jangan senaraikan ragam di sini yang ada di dalam [[Modul:etymology languages/data]]
	aliases = {"Catalonia"},
	scripts = Latn,
	ancestors = {"roa-oca"},
	sort_key = {
		from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"},
		to   = {"a", "e"   , "i"   , "o"   , "u"   , "c", "ll" }} ,
}

m["ce"] = {
	"Chechen",
	"Q33350",
	"cau-nkh",
	scripts = Cyrl,
	translit_module = "ce-translit",
	override_translit = true,
	entry_name = {
		from = {MACRON},
		to   = {}},
}

m["ch"] = {
	"Chamorro",
	"Q33262",
	"poz-sus",
	aliases = {"Chamoru"},
	scripts = Latn,
}

m["co"] = {
	"Corsica",
	"Q33111",
	"roa-itd",
	aliases = {"Corsu"},
	scripts = Latn,
}

m["cr"] = {
	"Cree",
	"Q33390",
	"alg",
	scripts = {"Cans", "Latn"},
	translit_module = "cr-translit",
}

m["cs"] = {
	"Czech",
	"Q9056",
	"zlw",
	scripts = Latn,
	ancestors = {"zlw-ocs"},
	sort_key = {
		from = {"á", "é", "í", "ó", "[úů]", "ý"},
		to   = {"a", "e", "i", "o", "u"   , "y"}} ,
}

m["cu"] = {
	"Slavonik Gereja Lama",
	"Q35499",
	"zls",
	aliases = {"Slavik Gereja Lama"},
	scripts = {"Cyrs", "Glag"},
	translit_module = "Cyrs-Glag-translit",
	entry_name = {
		from = {u(0x0484)}, -- kamora
		to   = {}},
	sort_key = {
		from = {"оу", "є"},
		to   = {"у" , "е"}} ,
}

m["cv"] = {
	"Chuvash",
	"Q33348",
	"trk-ogr",
	scripts = Cyrl,
	translit_module = "cv-translit",
	override_translit = true,
}

m["cy"] = {
	"Welsh",
	"Q9309",
	"cel-bry",
	varieties = {"Welsh Cofi", {"Dyfedeg", "Welsh Dyfed", "Demetian"}, {"Gwenhwyseg", "Welsh Gwent", "Gwent"}, {"Gwyndodeg", "Welsh Gwynedd", "Venedotian"}, "Welsh Wales Utara", {"Powyseg", "Welsh Powys", "Powys"}, "Welsh Wales Selatan", "Welsh Patagonia"},
	scripts = Latn,
	ancestors = {"wlm"},
	sort_key = {
		from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"},
		to   = {"a"	    , "c~", "d~", "e"	  , "f~", "g~h", "i"	  , "l~", "o"	  , "p~", "r~", "t~", "u"	  , "w"     , "y"	       }} ,
	standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION,
}

m["da"] = {
	"Denmark",
	"Q9035",
	"gmq",
	scripts = Latn,
	ancestors = {"gmq-oda"},
}

m["de"] = {
	"Jerman",
	"Q188",
	"gmw",
	aliases = {"Jerman Tinggi", "Jerman Tinggi Baru", "Deutsch"},
	varieties = {"Jerman Alsace", "Jerman Amerika",
		"Jerman Bavaria", "Jerman Belgium",
		"Jerman Pusat",
		"Jerman DDR",
		"Jerman Afrika Timur",
		"Jerman Jerman",
		"Jerman Hesse",
		"Jerman Indiana",
		"Jerman Liechtenstein", "Jerman Lorraine", "Jerman Luxembourg",
		"Jerman Namibia", "Jerman Utara",
		"Jerman Prusia",
		"Jerman Silesia", "Jerman Afrika Selatan", "Jerman Selatan", "Jerman Tyrol Selatan", "Jerman Switzerland",
		"Jerman Texas"},
	scripts = {"Latn", "Latf"},
	ancestors = {"gmh"},
	sort_key = {
		from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" },
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 , "ss"}} ,
	standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION,
}

m["dv"] = {
	"Dhivehi",
	"Q32656",
	"inc",
	aliases = {"Divehi", "Maldivian"},
	varieties = {{"Mahal", "Mahl"}},
	scripts = {"Thaa"},
	ancestors = {"elu-prk"},
	translit_module = "dv-translit",
	override_translit = true,
}

m["dz"] = {
	"Dzongkha",
	"Q33081",
	"sit-tib",
	scripts = {"Tibt"},
	ancestors = {"xct"},
	translit_module = "bo-translit",
	override_translit = true,
}

m["ee"] = {
	"Ewe",
	"Q30005",
	"alv-gbe",
	scripts = Latn,
}

m["el"] = {
	"Greek",
	"Q9129",
	"grk",
	aliases = {"Yunani", "Greek Moden", "Neo-Hellen"},
	scripts = {"Grek", "Brai"},
	ancestors = {"grc"},
	translit_module = "el-translit",
	override_translit = true,
	sort_key = {  -- Simpan ini selaras dengan grc, cpg, pnt, tsd
		from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"},
		to   = {"α"						, "ε"		 , "η"						, "ι"				, "ο"		 , "υ"				, "ω"						, "ρ", "σ"}} ,
	standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION .. "ἀ-῾",
}

m["en"] = {
	"Inggeris",
	"Q1860",
	"gmw",
	aliases = {"Inggeris Moden", "Inggeris Baru"},
	varieties = {{"Inggeris Kreol Hawaii", "Inggeris Kreol Hawai'ian", "Kreol Hawaiian", "Kreol Hawai'ian"}, "Polari", "Yinglish"},
	scripts = {"Latn", "Brai", "Shaw", "Dsrt"}, -- masukan dalam Shaw atay Dsrt mungkin memerlukan perbincangan dahulu
	ancestors = {"enm"},
	sort_key = {
		from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"},
		to   = {"a"       , "e"      , "i"      , "o"      , "u"      , "ae", "oe", "c"   , "n"}},
	wikimedia_codes = {"en", "simple"},
	standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}

m["eo"] = {
	"Esperanto",
	"Q143",
	"art",
	scripts = Latn,
	sort_key = {
		from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"},
		to   = {"a"	   , "e"  , "i"  , "o"  , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} ,
}

m["es"] = {
	"Sepanyol",
	"Q1321",
	"roa-ibe",
	aliases = {"Castile"},
	varieties = {{"Sepanyol Amazon", "Sepanyol Amazonic"}, "Sepanyol Loreto-Ucayali"},
	scripts = {"Latn", "Brai"},
	ancestors = {"osp"},
	sort_key = {
		from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"},
		to   = {"a", "e", "i", "o", "u"   , "c", "n"}},
	standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION,
}

m["et"] = {
	"Estonia",
	"Q9072",
	"fiu-fin",
	scripts = Latn,
}

m["eu"] = {
	"Basque",
	"Q8752",
	"euq",
	aliases = {"Euskara"},
	scripts = Latn,
}

m["fa"] = {
	"Parsi",
	"Q9168",
	"ira-swi",
	aliases = {"Farsi", "Parsi Baru", "Parsi Moden"},
	varieties = {{"Parsi Barat", "Parsi Iran"}, {"Parsi Timur", "Dari"}, {"Aimaq", "Aimak", "Aymaq", "Eimak"}},
	scripts = {"fa-Arab"},
	ancestors = {"pal"}, -- "ira-mid"
	entry_name = {
		from = {u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
		to   = {}} ,
}

m["ff"] = {
	"Fula",
	"Q33454",
	"alv-fwo",
	aliases = {"Fulani"},
	varieties = {"Adamawa Fulfulde", "Bagirmi Fulfulde", "Borgu Fulfulde", "Central-Eastern Niger Fulfulde", "Fulfulde", "Maasina Fulfulde", "Nigerian Fulfulde", "Pular", "Pulaar", "Western Niger Fulfulde"}, -- Maasina, etc are dialects, subsumed into this code; Pular and Pulaar are distinct
	scripts = {"Latn", "Adlm"},
}

m["fi"] = {
	"Finland",
	"Q1412",
	"fiu-fin",
	aliases = {"Suomi"},
	scripts = Latn,
	entry_name = {
		from = {"ˣ"},  -- Used to indicate gemination of the next consonant
		to   = {}},
	sort_key = {
		from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"},
		to   = {"a"	 , "e"	 , "i"	 , "o"	 , "u"	 ,  "y"	 , "ö"	, "ae", "oe", "c"   , "s", "z", "ss"}} ,
}

m["fj"] = {
	"Fiji",
	"Q33295",
	"poz-occ",
	scripts = Latn,
}

m["fo"] = {
	"Faroe",
	"Q25258",
	"gmq",
	aliases = {"Faeroe"},
	scripts = Latn,
	ancestors = {"non"},
}

m["fr"] = {
	"Perancis",
	"Q150",
	"roa-oil",
	aliases = {"Perancis Moden"},
	varieties = {"African French", "Algerian French", "Alsatian French", "Antilles French", "Atlantic Canadian French",
		"Belgian French",
		"Congolese French",
		"European French",
		"French French",
		"Haitian French",
		"Ivorian French",
		"Lorraine French", "Louisiana French", "Luxembourgish French",
		"Malian French", "Marseille French", "Missourian French", "Moroccan French",
		"Newfoundland French", "North American French",
		"Picard French", "Provençal French‎",
		"Quebec French",
		"Réunion French", "Rwandan French",
		"Tunisian French",
		"West African French"},
	scripts = {"Latn", "Brai"},
	ancestors = {"frm"},
	sort_key = {
		from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"},
		to   = {"a"	 , "e"	 , "i"	 , "o"	 , "u"	 , "y"	 , "c", "ae", "oe"}},
	standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION,
}

m["fy"] = {
	"Frisia Barat",
	"Q27175",
	"gmw-fri",
	scripts = Latn,
	ancestors = {"ofs"},
	sort_key = {
		from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"},
		to   = {"a"	 , "e"	, "i"	, "o"	, "u", "ae"}} ,
	standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION,
}

m["ga"] = {
	"Ireland",
	"Q9142",
	"cel-gae",
	aliases = {"Irish Gaelic", "Gaelic"}, -- calling it simply "Gaelic" is rare in Ireland, but relatively common in the Irish diaspora
	varieties = {{"Cois Fharraige Irish", "Cois Fhairrge Irish"}, {"Connacht Irish", "Connaught Irish"}, "Cork Irish", "Donegal Irish", "Galway Irish", "Kerry Irish", "Mayo Irish", "Munster Irish", "Ulster Irish", "Waterford Irish", "West Muskerry Irish"},
	scripts = Latn,
	ancestors = {"mga"},
	sort_key = {
		from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" },
		to   = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} ,
	standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}

m["gd"] = {
	"Gaelik Scotland",
	"Q9314",
	"cel-gae",
	aliases = {"Gaelic", "Gàidhlig", "Scots Gaelic", "Scottish"},
	varieties = {"Argyll Gaelic", "Arran Scottish Gaelic", {"Canadian Gaelic", "Canadian Scottish Gaelic", "Cape Breton Gaelic"}, "East Sutherland Gaelic", {"Galwegian Gaelic", "Gallovidian Gaelic", "Gallowegian Gaelic", "Galloway Gaelic"}, "Hebridean Gaelic", "Highland Gaelic"},
	scripts = Latn,
	ancestors = {"mga"},
	sort_key = {
		from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"},
		to   = {"a"   , "e"   , "i"   , "o"   , "u"   , "y"   }} ,
	standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION,
}

m["gl"] = {
	"Galicia",
	"Q9307",
	"roa-ibe",
	scripts = Latn,
	ancestors = {"roa-opt"},
	sort_key = {
		from = {"á", "é", "í", "ó", "ú"},
		to   = {"a", "e", "i", "o", "u"}} ,
}

m["gn"] = {
	"Guaraní",
	"Q35876",
	"tup-gua",
	scripts = Latn,
}

m["gu"] = {
	"Gujarati",
	"Q5137",
	"inc",
	scripts = {"Gujr"},
	ancestors = {"inc-mgu"},
	translit_module = "gu-translit",
}

m["gv"] = {
	"Manx",
	"Q12175",
	"cel-gae",
	aliases = {"Manx Gaelic"},
	varieties = {"Northern Manx", "Southern Manx"},
	scripts = Latn,
	ancestors = {"mga"},
	sort_key = {
		from = {"ç", "-"},
		to   = {"c"}} ,
	standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION,
}

m["ha"] = {
	"Hausa",
	"Q56475",
	"cdc-wst",
	scripts = LatnArab,
	sort_key = {
		from = {"ɓ",   "ɗ",   "ƙ",  "'y", "ƴ",  "'" },
		to   = {"b~" , "d~"	, "k~", "y~", "y~", ""  }},
	entry_name = {
		from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE},
		to   = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}},
}

m["he"] = {
	"Ibrani",
	"Q9288",
	"sem-can",
	aliases = {"Ivrit"},
	scripts = {"Hebr", "Phnx", "Brai"},
	entry_name = {
		from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. "]"},
		to   = {}} ,
}

m["hi"] = {
	"Hindi",
	"Q1568",
	"inc",
	-- FIXME, are the following varieties?
	otherNames = {"Hindavi", "Khariboli", "Khari Boli", "Manak Hindi"},
	scripts = {"Deva", "Kthi", "Newa"},
	ancestors = {"inc-ohi"},
	translit_module = "hi-translit",
}

m["ho"] = {
	"Hiri Motu",
	"Q33617",
	"crp",
	aliases = {"Pidgin Motu", "Police Motu"},
	scripts = Latn,
	ancestors = {"meu"},
}

m["ht"] = {
	"Kreol Haiti",
	"Q33491",
	"crp",
	aliases = {"Creole", "Haiti", "Kreyòl"},
	scripts = Latn,
	ancestors = {"fr"},
}

m["hu"] = {
	"Hungary",
	"Q9067",
	"urj-ugr",
	aliases = {"Magyar"},
	scripts = {"Latn", "Hung"},
	ancestors = {"ohu"},
	sort_key = {
		from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"},
		to   = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"},
	},
}

m["hy"] = {
	"Armenia",
	"Q8785",
	"hyx",
	aliases = {"Armenia Moden"},
	varieties = {"Armenia Timur", "Armenia Barat"},
	scripts = {"Armn", "Brai"},
	ancestors = {"axm"},
	translit_module = "Armn-translit",
	override_translit = true,
	sort_key = {
		from = {"ու", "և", "եւ"},
		to   = {"ւ", "եվ", "եվ"}},
	entry_name = {
		from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>"},
		to   = {"", "", "", "", "եւ", "յ", "ի"}} ,
}

m["hz"] = {
	"Herero",
	"Q33315",
	"bnt-swb",
	scripts = Latn,
}

m["ia"] = {
	"Interlingua",
	"Q35934",
	"art",
	scripts = Latn,
}

m["id"] = {
	"Indonesia",
	"Q9240",
	"poz-mly",
	scripts = Latn,
	ancestors = {"ms"},
}

m["ie"] = {
	"Interlingue",
	"Q35850",
	"art",
	aliases = {"Occidental"},
	scripts = Latn,
}

m["ig"] = {
	"Igbo",
	"Q33578",
	"alv-igb",
	scripts = Latn,
}

m["ii"] = {
	"Sichuan Yi",
	"Q34235",
	"tbq-lol",
	aliases = {"Nuosu", "Nosu", "Northern Yi", "Liangshan Yi"},
	scripts = {"Yiii"},
	translit_module = "ii-translit",
}

m["ik"] = {
	"Inupiaq",
	"Q27183",
	"esx-inu",
	aliases = {"Inupiak", "Iñupiaq", "Inupiatun"},
	scripts = Latn,
}

m["io"] = {
	"Ido",
	"Q35224",
	"art",
	scripts = Latn,
}

m["is"] = {
	"Iceland",
	"Q294",
	"gmq",
	scripts = Latn,
	ancestors = {"non"},
}

m["it"] = {
	"Itali",
	"Q652",
	"roa-itd",
	scripts = Latn,
	sort_key = {
		from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"},
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 }} ,
}

m["iu"] = {
	"Inuktitut",
	"Q29921",
	"esx-inu",
	varieties = {
		"Aivilimmiut",
		{"Eastern Canadian Inuktitut", "Eastern Canadian Inuit"},
		{"Inuinnaq", "Inuinnaqtun"},
		{"Inuvialuktun", "Inuvialuk", "Western Canadian Inuktitut", "Western Canadian Inuit", "Western Canadian Inuktun"},
		"Kivallirmiut",
		"Natsilingmiut", "Nunavimmiutit", "Nunatsiavummiut",
		{"Siglitun", "Siglit"}},
	scripts = {"Cans", "Latn"},
	translit_module = "iu-translit",
	override_translit = true,
}

m["ja"] = {
	"Jepun",
	"Q5287",
	"jpx",
	aliases = {"Jepun Moden", "Nippon", "Nihongo"},
	scripts = {"Jpan", "Brai"},
	ancestors = {"ojp"},
	--[=[
	-- Handled by jsort function in [[Module:ja]].
	sort_key = {
		from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"},
		to   = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}},
	--]=]
}

m["jv"] = {
	"Jawa",
	"Q33549",
	"poz-sus",
	scripts = {"Latn", "Java"},
	translit_module = "jv-translit",
	ancestors = {"kaw"},
	link_tr = true,
}

m["ka"] = {
	"Georgia",
	"Q8108",
	"ccs-gzn",
	varieties = {{"Judeo-Georgian", "Kivruli", "Gruzinic"}},
	scripts = {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian
	ancestors = {"oge"},
	translit_module = "Geor-translit",
	override_translit = true,
	entry_name = {
		from = {"̂"},
		to   = {""}},
}

m["kg"] = {
	"Kongo",
	"Q33702",
	"bnt-kng",
	aliases = {"Kikongo"},
	varieties = {"Koongo", "Laari", "San Salvador Kongo", "Yombe"},
	scripts = Latn,
}

m["ki"] = {
	"Kikuyu",
	"Q33587",
	"bnt-kka",
	aliases = {"Gikuyu", "Gĩkũyũ"},
	scripts = Latn,
}

m["kj"] = {
	"Kwanyama",
	"Q1405077",
	"bnt-ova",
	aliases = {"Kuanyama", "Oshikwanyama"},
	scripts = Latn,
}

m["kk"] = {
	"Kazakh",
	"Q9252",
	"trk-kno",
	scripts = {"Cyrl", "Latn", "kk-Arab"},
	translit_module = "kk-translit",
	override_translit = true,
}

m["kl"] = {
	"Greenland",
	"Q25355",
	"esx-inu",
	aliases = {"Kalaallisut"},
	scripts = Latn,
}

m["km"] = {
	"Khmer",
	"Q9205",
	"mkh",
	aliases = {"Kemboja"},
	scripts = {"Khmr"},
	ancestors = {"mkh-mkm"},
	translit_module = "km-translit",
}

m["kn"] = {
	"Kannada",
	"Q33673",
	"dra",
	scripts = {"Knda"},
	ancestors = {"dra-mkn"},
	translit_module = "kn-translit",
}

m["ko"] = {
	"Korea",
	"Q9176",
	"qfa-kor",
	aliases = {"Korea Moden"},
	scripts = {"Kore", "Brai"},
	ancestors = {"okm"},
	translit_module = "ko-translit",
}

m["kr"] = {
	"Kanuri",
	"Q36094",
	"ssa-sah",
	varieties = {"Kanembu", "Bilma Kanuri", "Central Kanuri", "Manga Kanuri", "Tumari Kanuri"},
	scripts = LatnArab,
	sort_key = {
		from = {"ny", "ǝ", "sh"},
		to   = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
	entry_name = {
		from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE},
		to   = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}},
}

m["ks"] = {
	"Kashmiri",
	aliases = {"Koshur"},
	"Q33552",
	"inc-dar",
	scripts = {"ks-Arab", "Deva", "Shrd", "Latn"},
	translit_module = "translit-redirect",
	ancestors = {"inc-dar-pro"},
}

m["ku"] = {
	"Kurdi",
	"Q36368",
	"ira-nwi",
	scripts = {"Latn", "ku-Arab", "Armn", "Cyrl"},
	translit_module = "translit-redirect",
}

-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT

m["kw"] = {
	"Cornish",
	"Q25289",
	"cel-bry",
	scripts = Latn,
	ancestors = {"cnx"},
}

m["ky"] = {
	"Kyrgyz",
	"Q9255",
	"trk-kip",
	aliases = {"Kirghiz", "Kirgiz"},
	scripts = {"Cyrl", "Latn", "Arab"},
	translit_module = "ky-translit",
	override_translit = true,
}

m["la"] = {
	"Latin",
	"Q397",
	"itc",
	scripts = Latn,
	ancestors = {"itc-ola"},
	entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE},
	standardChars = "A-Za-zÆ挜Ā-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION,
}

m["lb"] = {
	"Luxembourg",
	"Q9051",
	"gmw",
	scripts = Latn,
	ancestors = {"gmh"},
}

m["lg"] = {
	"Luganda",
	"Q33368",
	"bnt-nyg",
	aliases = {"Ganda", "Oluganda"},
	scripts = Latn,
	entry_name = {
		from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" },
		to   = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}},
	sort_key = {
		from = {"ŋ"},
		to   = {"n"}} ,
}

m["li"] = {
	"Limburg",
	"Q102172",
	"gmw",
	aliases = {"Limburgan", "Limburgian", "Limburgic"},
	scripts = Latn,
	ancestors = {"dum"},
}

m["ln"] = {
	"Lingala",
	"Q36217",
	"bnt-bmo",
	aliases = {"Ngala"},
	scripts = Latn,
}

m["lo"] = {
	"Lao",
	"Q9211",
	"tai-swe",
	aliases = {"Laotian"},
	scripts = {"Laoo"},
	translit_module = "lo-translit",
	sort_key = {
		from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"},
		to   = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}},
}

m["lt"] = {
	"Lithuania",
	"Q9083",
	"bat",
	scripts = Latn,
	ancestors = {"olt"},
	entry_name = {
		from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE},
		to   = {"a",       "A",     "e",     "E",     "i",     "I",     "y",   "Y",   "n",   "o",    "O",     "u",      "U"}} ,
}

m["lu"] = {
	"Luba-Katanga",
	"Q36157",
	"bnt-lub",
	scripts = Latn,
}

m["lv"] = {
	"Latvia",
	"Q9078",
	"bat",
	aliases = {"Lettish", "Lett"},
	scripts = Latn,
	entry_name = {
		-- This attempts to convert vowels with tone marks to vowels either with
		-- or without macrons. Specifically, there should be no macrons if the
		-- vowel is part of a diphthong (including resonant diphthongs such
		-- pìrksts -> pirksts not #pīrksts). What we do is first convert the
		-- vowel + tone mark to a vowel + tilde in a decomposed fashion,
		-- then remove the tilde in diphthongs, then convert the remaining
		-- vowel + tilde sequences to macroned vowels, then delete any other
		-- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār
		-- occur before consonants. FIXME: This still might not be sufficient.
		from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE},
		to   = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}},
}

m["mg"] = {
	"Malagasy",
	"Q7930",
	"poz-bre",
	varieties = {
		{"Antankarana", "Antankarana Malagasy"},
		{"Bara Malagasy", "Bara"}, {"Betsimisaraka Malagasy", "Betsimisaraka"}, {"Northern Betsimisaraka Malagasy", "Northern Betsimisaraka"}, {"Southern Betsimisaraka Malagasy", "Southern Betsimisaraka"}, {"Bushi", "Shibushi", "Kibushi"},
		{"Masikoro Malagasy", "Masikoro"},
		"Plateau Malagasy",
		"Sakalava",
		{"Tandroy Malagasy", "Tandroy"}, {"Tanosy", "Tanosy Malagasy"}, "Tesaka", {"Tsimihety", "Tsimihety Malagasy"}},
	scripts = Latn,
}

m["mh"] = {
	"Marshall",
	"Q36280",
	"poz-mic",
	scripts = Latn,
	sort_key = {
		from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄"  , "o̧" , "ō"  , "ū" },
		to   = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} ,
}

m["mi"] = {
	"Maori",
	"Q36451",
	"poz-pep",
	aliases = {"Māori"},
	scripts = Latn,
}

m["mk"] = {
	"Macedonia",
	"Q9296",
	"zls",
	scripts = Cyrl,
	translit_module = "mk-translit",
	entry_name = {
		from = {ACUTE},
		to   = {}},
}

m["ml"] = {
	"Malayalam",
	"Q36236",
	"dra",
	scripts = {"Mlym"},
	translit_module = "ml-translit",
	override_translit = true,
}

m["mn"] = {
	"Mongolia",
	"Q9246",
	"xgn",
	varieties = {"Khalkha Mongolian"},
	scripts = {"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion
	ancestors = {"cmg"},
	translit_module = "mn-translit",
	override_translit = true,
}

-- "mo" IS TREATED AS "ro", SEE WT:LT

m["mr"] = {
	"Marathi",
	"Q1571",
	"inc",
	scripts = {"Deva", "Modi"},
	ancestors = {"omr"},
	translit_module = "mr-translit",
}

m["ms"] = {
	"Melayu",
	"Q9237",
	"poz-mly",
	aliases = {"Malaysia", "Melayu Piawai", "Melayu Baku"},
	scripts = {"Latn", "ms-Arab"},
}

m["mt"] = {
	"Malta",
	"Q9166",
	"sem-arb",
	scripts = Latn,
	ancestors = {"sqr"},
}

m["my"] = {
	"Burma",
	"Q9228",
	"tbq-brm",
	aliases = {"Myanmar"},
	varieties = {"Mandalay Burmese", "Myeik Burmese", "Palaw Burmese", {"Rangoon Burmese", "Yangon Burmese"}, "Yaw Burmese"},
	scripts = {"Mymr"},
	ancestors = {"obr"},
	translit_module = "my-translit",
	override_translit = true,
}

m["na"] = {
	"Nauru",
	"Q13307",
	"poz-mic",
	aliases = {"Nauru"},
	scripts = Latn,
}

m["nb"] = {
	"Norway Bokmål",
	"Q25167",
	"gmq",
	aliases = {"Bokmål"},
	scripts = Latn,
	ancestors = {"gmq-mno"},
	wikimedia_codes = {"no"},
}

m["nd"] = {
	"Ndebele Utara",
	"Q35613",
	"bnt-ngu",
	scripts = Latn,
	entry_name = {
		from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
		to   = {"a"      , "e"      , "i"      , "o"      , "u"      , "m", "n"    }},
}

m["ne"] = {
	"Nepal",
	"Q33823",
	"inc-pah",
	aliases = {"Nepali"},
	scripts = {"Deva", "Newa"},
	translit_module = "ne-translit",
}

m["ng"] = {
	"Ndonga",
	"Q33900",
	"bnt-ova",
	scripts = Latn,
}

m["nl"] = {
	"Belanda",
	"Q7411",
	"gmw",
	varieties = {"Netherlandic", "Flemish"}, -- FIXME, check this
	scripts = Latn,
	ancestors = {"dum"},
	sort_key = {
		from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"},
		to   = {"a"	 , "e"	, "i"	, "o"	, "u"	, "c", "n"}} ,
	standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}

m["nn"] = {
	"Norway Nynorsk",
	"Q25164",
	"gmq",
	aliases = {"New Norwegian", "Nynorsk"},
	scripts = Latn,
	ancestors = {"gmq-mno"},
}

m["no"] = {
	"Norway",
	"Q9043",
	"gmq",
	scripts = Latn,
	ancestors = {"gmq-mno"},
}

m["nr"] = {
	"Ndebele Selatan",
	"Q36785",
	"bnt-ngu",
	aliases = {"South Ndebele"},
	scripts = Latn,
	entry_name = {
		from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
		to   = {"a"      , "e"      , "i"      , "o"      , "u"      , "m", "n"    }},
}

m["nv"] = {
	"Navajo",
	"Q13310",
	"apa",
	aliases = {"Navaho", "Diné bizaad"},
	scripts = {"nv-Latn"},
	sort_key = {
		from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE},
		to   = {"a"   , "e"   , "i"   , "o"   , "n", "ni%1"	  , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l
}

m["ny"] = {
	"Chichewa",
	"Q33273",
	"bnt-nys",
	aliases = {"Chicheŵa", "Chinyanja", "Nyanja", "Chewa", "Cicewa", "Cewa", "Cinyanja"},
	scripts = Latn,
	entry_name = {
		from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" },
		to   = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}},
	sort_key = {
		from = {"ng'"},
		to   = {"ng"}} ,
}

m["oc"] = {
	"Occitan",
	"Q14185",
	"roa",
	-- don't list varieties here that are in [[Module:etymology languages/data]]
	scripts = {"Latn", "Hebr"},
	ancestors = {"pro"},
	sort_key = {
		from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"},
		to   = {"a"   , "e"   , "i"   , "o"   , "u"   , "c", "%1h"	  }} ,
}

m["oj"] = {
	"Ojibwe",
	"Q33875",
	"alg",
	aliases = {"Ojibway"},
	varieties = {{"Chippewa", "Ojibwemowin", "Southwestern Ojibwa"}},
	scripts = {"Cans", "Latn"},
}

m["om"] = {
	"Oromo",
	"Q33864",
	"cus",
	varieties = {"Orma", "Borana-Arsi-Guji Oromo", "West Central Oromo"},
	scripts = {"Latn", "Ethi"},
}

m["or"] = {
	"Oriya",
	"Q33810",
	"inc",
	aliases = {"Odia", "Oorya"},
	scripts = {"Orya"},
	ancestors = {"inc-mor"},
	translit_module = "or-translit",
}

m["os"] = {
	"Ossetia",
	"Q33968",
	"xsc",
	aliases = {"Ossete", "Ossetic"},
	varieties = {"Digor", "Iron"},
	scripts = {"Cyrl", "Geor", "Latn"},
	ancestors = {"oos"},
	translit_module = "os-translit",
	override_translit = true,
	entry_name = {
		from = {GRAVE, ACUTE},
		to   = {}} ,
}

m["pa"] = {
	"Punjabi",
	"Q58635",
	"inc",
	aliases = {"Panjabi"},
	scripts = {"Guru", "pa-Arab"},
	ancestors = {"psu"},
	translit_module = "translit-redirect",
	entry_name = {
		from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
		to   = {}} ,
}

m["pi"] = {
	"Pali",
	"Q36727",
	"inc-old",
	scripts = {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"},
	ancestors = {"sa"},
	sort_key = {
		from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "[ṇñṅ]", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
		to   = {"a", "i", "u", "d", "l", "m"   , "n"	, "t", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
	entry_name = {
		from = {u(0xFE00)},
		to   = {}},
}

m["pl"] = {
	"Poland",
	"Q809",
	"zlw-lch",
	scripts = Latn,
	ancestors = {"zlw-opl"},
	sort_key = {
		from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"},
		to   = {
			"a" .. u(0x10FFFF),
			"c" .. u(0x10FFFF),
			"e" .. u(0x10FFFF),
			"l" .. u(0x10FFFF),
			"n" .. u(0x10FFFF),
			"o" .. u(0x10FFFF),
			"s" .. u(0x10FFFF),
			"z" .. u(0x10FFFF),
			"z" .. u(0x10FFFE)}} ,
}

m["ps"] = {
	"Pashto",
	"Q58680",
	"ira-pat",
	aliases = {"Pashtun", "Pushto", "Pashtu", "Afghani"},
	varieties = {"Central Pashto", "Northern Pashto", "Southern Pashto", {"Pukhto", "Pakhto", "Pakkhto"}},
	scripts = {"ps-Arab"},
	ancestors = {"ira-pat-pro"},
}

m["pt"] = {
	"Portugis",
	"Q5146",
	"roa-ibe",
	aliases = {"Portugis Moden"},
	scripts = {"Latn", "Brai"},
	ancestors = {"roa-opt"},
	sort_key = {
		from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"},
		to   = {"a"	  , "e"	  , "i"	 , "o"	  , "u"	 , "c", "n"}} ,
}

m["qu"] = {
	"Quechua",
	"Q5218",
	"qwe",
	scripts = Latn,
}

m["rm"] = {
	"Romansch",
	"Q13199",
	"roa-rhe",
	aliases = {"Romansh", "Rumantsch", "Romanche"},
	scripts = Latn,
}

m["ro"] = {
	"Romania",
	"Q7913",
	"roa-eas",
	aliases = {"Daco-Romanian", "Roumanian", "Rumanian"},
	scripts = {"Latn", "Cyrl"},
	sort_key = {
		from = {"ă" , "â"  , "î" , "ș" , "ț" },
		to   = {"a~", "a~~", "i~", "s~", "t~"}},
}

m["ru"] = {
	"Rusia",
	"Q7737",
	"zle",
	scripts = {"Cyrl", "Brai"},
	translit_module = "ru-translit",
	sort_key = {
		from = {"ё"},
		to   = {"е" .. mw.ustring.char(0x10FFFF)}},
	entry_name = {
		from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
		to   = {"Е", "е", "И", "и"}},
	standardChars = "ЁІА-яёі0-9—" .. PUNCTUATION,
}

m["rw"] = {
	"Rwanda-Rundi",
	"Q33573",
	"bnt-glb",
	-- don't list varieties here that are in [[Module:etymology languages/data]]
	varieties = {{"Ha", "Giha"}, "Hangaza", "Vinza", "Shubi"}, -- Deleted "Subi", which normally refers to a different language
	scripts = Latn,
}

m["sa"] = {
	"Sanskrit",
	"Q11059",
	"inc-old",
	scripts = {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Mymr", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"},
	sort_key = {
		from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "[ṇñṅ]", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
		to   = {"a", "i", "u", "d", "l", "m"   , "n"	, "t", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
	entry_name = {
		from = {u(0xFE00)},
		to   = {}},
	translit_module = "translit-redirect",
}

m["sc"] = {
	"Sardinia",
	"Q33976",
	"roa",
	-- don't list varieties here that are in [[Module:etymology languages/data]]
	scripts = Latn,
}

m["sd"] = {
	"Sindhi",
	"Q33997",
	"inc",
	scripts = {"sd-Arab", "Deva", "Sind", "Khoj"},
	entry_name = {
		from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)},
		to   = {u(0x0627)}},
	ancestors = {"psu"},
}

m["se"] = {
	"Sami Utara",
	"Q33947",
	"smi",
	aliases = {"Saami Utara"},
	scripts = Latn,
	entry_name = {
		from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"},
		to   = {"a", "e"   , "i", "o"   , "u"} },
	sort_key = {
		from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" },
		to   = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} },
	standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION,
}

m["sg"] = {
	"Sango",
	"Q33954",
	"crp",
	scripts = Latn,
	ancestors = {"ngb"},
}

m["sh"] = {
	"Serbo-Croatia",
	"Q9301",
	"zls",
	aliases = {"BCS", "Croato-Serbian", "Serbocroatian"},
	-- don't list varieties here that are in [[Module:etymology languages/data]]
	varieties = {"Bosnian", "Croatian", "Montenegrin", "Serbian", "Shtokavian"},
	scripts = {"Latn", "Cyrl", "Glag"},
	entry_name = {
		from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
		to   = {"A"	  , "a"	  , "E"	  , "e"	  , "I"	  , "i"	  , "O"	  , "o"	  , "R"	, "r"	, "U"	  , "u"	  , "Е", "е", "И"   , "и", "У", "у"   }},
	wikimedia_codes = {"sh", "bs", "hr", "sr"},
}

m["si"] = {
	"Sinhala",
	"Q13267",
	"inc",
	aliases = {"Singhala"},
	scripts = {"Sinh"},
	ancestors = {"elu-prk"},
	translit_module = "si-translit",
	override_translit = true,
}

m["sk"] = {
	"Slovak",
	"Q9058",
	"zlw",
	scripts = Latn,
	sort_key = {
		from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"},
		to   = {"a"   , "e", "i", "o"   , "u", "y", "r", "l", ""}} ,
}

m["sl"] = {
	"Slovene",
	"Q9063",
	"zls",
	aliases = {"Slovenia"},
	scripts = Latn,
	entry_name = {
		from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW},
		to   = {"A"       , "a"       , "E"         , "e"          , "I"       , "i"       , "O"         , "o"         , "R"    , "r"    , "U"       , "u"       , "l"},
	},
	sort_key = {
		from = {"č" , "š" , "ž" },
		to   = {"c²", "s²", "z²"},
	},
}

m["sm"] = {
	"Samoa",
	"Q34011",
	"poz-pnp",
	scripts = Latn,
}

m["sn"] = {
	"Shona",
	"Q34004",
	"bnt-sho",
	scripts = Latn,
	entry_name = {
		from = {ACUTE},
		to = {}} ,
}

m["so"] = {
	"Somali",
	"Q13275",
	"cus",
	scripts = {"Latn", "Arab", "Osma"},
	entry_name = {
		from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"},
		to   = {"A"	  , "a"	  , "E"	, "e" , "I"	  , "i"	  , "O"	, "o"	, "U"  , "u", "Y", "y"}} ,
}

m["sq"] = {
	"Albania",
	"Q8748",
	"sqj",
	-- don't list varieties here that are in [[Module:etymology languages/data]]
	scripts = {"Latn", "Grek", "Elba"},
	entry_name = {remove_diacritics = ACUTE},
	sort_key = {
		from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' },
		to   = {     'a',     'A',     'e',     'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } ,
}

m["ss"] = {
	"Swazi",
	"Q34014",
	"bnt-ngu",
	aliases = {"Swati"},
	scripts = Latn,
	entry_name = {
		from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
		to   = {"a"      , "e"      , "i"      , "o"      , "u"      , "m", "n"    }},
}

m["st"] = {
	"Sotho",
	"Q34340",
	"bnt-sts",
	aliases = {"Sesotho", "Southern Sesotho", "Southern Sotho"},
	scripts = Latn,
	entry_name = {
		from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
		to   = {"a"      , "e"      , "i"      , "o"      , "u"      , "m", "n"    }},
}

m["su"] = {
	"Sunda",
	"Q34002",
	"poz-msa",
	scripts = {"Latn", "Sund"},
	translit_module = "su-translit",
}

m["sv"] = {
	"Sweden",
	"Q9027",
	"gmq",
	scripts = Latn,
	ancestors = {"gmq-osw"},
}

m["sw"] = {
	"Swahili",
	"Q7838",
	"bnt-swh",
	varieties = {{"Settler Swahili", "KiSetla", "KiSettla", "Setla", "Settla", "Kitchen Swahili"}, {"Kihindi", "Indian Swahili"}, {"KiShamba", "Kishamba", "Field Swahili"}, {"Kibabu", "Asian Swahili"}, {"Kimanga", "Arab Swahili"}, {"Kitvita", "Army Swahili"}},
	scripts = LatnArab,
	sort_key = {
		from = {"ng'", "^-"},
		to   = {"ngz"}} ,
}

m["ta"] = {
	"Tamil",
	"Q5885",
	"dra",
	scripts = {"Taml"},
	ancestors = {"oty"},
	translit_module = "ta-translit",
	override_translit = true,
}

m["te"] = {
	"Telugu",
	"Q8097",
	"dra",
	scripts = {"Telu"},
	translit_module = "te-translit",
	override_translit = true,
}

m["tg"] = {
	"Tajik",
	"Q9260",
	"ira-swi",
	aliases = {"Tadjik", "Tadzhik", "Tajiki", "Tajik Persian", "Tajiki Persian"},
	scripts = {"Cyrl", "fa-Arab", "Latn"},
	ancestors = {"pal"}, -- same as "fa", see WT:T:AFA
	translit_module = "tg-translit",
	override_translit = true,
	sort_key = {
		from = {"Ё", "ё"},
		to   = {"Е" , "е"}} ,
	entry_name = {
		from = {ACUTE},
		to   = {}} ,
}

m["th"] = {
	"Thai",
	"Q9217",
	"tai-swe",
	aliases = {"Thai Pusat", "Siam"},
	scripts = {"Thai", "Brai"},
	translit_module = "th-translit",
	sort_key = {
		from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
		to   = {"", "", "%2%1"}},
}

m["ti"] = {
	"Tigrinya",
	"Q34124",
	"sem-eth",
	aliases = {"Tigrigna"},
	scripts = {"Ethi"},
	translit_module = "Ethi-translit",
}

m["tk"] = {
	"Turkmen",
	"Q9267",
	"trk-ogz",
	scripts = {"Latn", "Cyrl"},
	entry_name = {
		from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON},
		to   = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}},
	ancestors = {"trk-ogz-pro"},
}

m["tl"] = {
	"Tagalog",
	"Q34057",
	"phi",
	scripts = {"Latn", "Tglg"},
	entry_name = {
		from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC},
		to   = {"a"    , "e"    , "i"    , "o"    , "u"    }},
}

m["tn"] = {
	"Tswana",
	"Q34137",
	"bnt-sts",
	aliases = {"Setswana"},
	scripts = Latn,
}

m["to"] = {
	"Tonga",
	"Q34094",
	"poz-pol",
	scripts = Latn,
	sort_key = {
		from = {"ā", "ē", "ī", "ō", "ū", MACRON},
		to   = {"a", "e", "i", "o", "u", ""}},
	entry_name = {
		from = {"á", "é", "í", "ó", "ú", ACUTE},
		to   = {"a", "e", "i", "o", "u", ""}},
}

m["tr"] = {
	"Turki",
	"Q256",
	"trk-ogz",
	scripts = Latn,
	ancestors = {"ota"},
}

m["ts"] = {
	"Tsonga",
	"Q34327",
	"bnt-tsr",
	scripts = Latn,
}

m["tt"] = {
	"Tatar",
	"Q25285",
	"trk-kbu",
	scripts = {"Cyrl", "Latn", "tt-Arab"},
	translit_module = "tt-translit",
	override_translit = true,
}

-- "tw" IS TREATED AS "ak", SEE WT:LT

m["ty"] = {
	"Tahiti",
	"Q34128",
	"poz-pep",
	scripts = Latn,
}

m["ug"] = {
	"Uyghur",
	"Q13263",
	"trk-kar",
	aliases = {"Uigur", "Uighur", "Uygur"},
	scripts = {"ug-Arab", "Latn", "Cyrl"},
	ancestors = {"chg"},
	translit_module = "ug-translit",
	override_translit = true,
}

m["uk"] = {
	"Ukraine",
	"Q8798",
	"zle",
	scripts = Cyrl,
	ancestors = {"orv"},
	translit_module = "uk-translit",
	entry_name = {
		from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
		to   = {"Е", "е", "И", "и"}},
	standardChars = "ЄІЇА-ЩЫЬЮ-щыьюяєії" .. PUNCTUATION,
} 
m["ur"] = {
	"Urdu",
	"Q1617",
	"inc",
	scripts = {"ur-Arab"},
	ancestors = {"inc-sap"},
	entry_name = {
		from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
		to   = {}} ,
}

m["uz"] = {
	"Uzbek",
	"Q9264",
	"trk-kar",
	varieties = {"Northern Uzbek", "Southern Uzbek"},
	scripts = {"Latn", "Cyrl", "fa-Arab"},
	ancestors = {"chg"},
}

m["ve"] = {
	"Venda",
	"Q32704",
	"bnt-bso",
	scripts = Latn,
}

m["vi"] = {
	"Vietnam",
	"Q9199",
	"mkh-vie",
	aliases = {"Annam", "Annamite"},
	scripts = {"Latn", "Hani"},
	ancestors = {"mkh-mvi"},
	sort_key = "vi-sortkey",
}

m["vo"] = {
	"Volapük",
	"Q36986",
	"art",
	scripts = Latn,
}

m["wa"] = {
	"Walloon",
	"Q34219",
	"roa-oil",
	varieties = {"Liégeois", "Namurois", "Wallo-Picard", "Wallo-Lorrain"},
	scripts = Latn,
	ancestors = {"fro"},
	sort_key = {
		from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"},
		to   = {"a"	  , "e"	 , "i"	 , "o"	 , "u"	 , "y"	 , "c"}} ,
}

m["wo"] = {
	"Wolof",
	"Q34257",
	"alv-fwo",
	varieties = {"Gambian Wolof"}, -- the subsumed dialect 'wof'
	scripts = LatnArab,
}

m["xh"] = {
	"Xhosa",
	"Q13218",
	"bnt-ngu",
	scripts = Latn,
	entry_name = {
		from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
		to   = {"a"      , "e"      , "i"      , "o"      , "u"      , "m", "n"    }},
}

m["yi"] = {
	"Yiddish",
	"Q8641",
	"gmw",
	varieties = {"American Yiddish", "Daytshmerish Yiddish", "Mideastern Yiddish", "Galitzish",
		{"Northeastern Yiddish", "Litvish", "Lithuanian Yiddish"},
		{"Northwestern Yiddish", "Netherlandic Yiddish"},
		{"Polish Yiddish", "Poylish"},
		"South African Yiddish",
		{"Southeastern Yiddish", "Ukrainian Yiddish", "Ukrainish"},
		{"Southwestern Yiddish", "Judeo-Alsatian"},
		"Udmurtish"
	},
	scripts = {"Hebr"},
	ancestors = {"gmh"},
	sort_key = {
		from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"},
		to = {"א", "ב", "ו", "י",	"יי", "פ"}} ,
	translit_module = "yi-translit",
}

m["yo"] = {
	"Yoruba",
	"Q34311",
	"alv-yor",
	scripts = Latn,
}

m["za"] = {
	"Zhuang",
	"Q13216",
	"tai",
	-- FIXME, are all of the following distinct?
	varieties = {
		"Chongzuo Zhuang",
		"Guibei Zhuang", "Guibian Zhuang",
		"Central Hongshuihe Zhuang", "Eastern Hongshuihe Zhuang",
		"Lianshan Zhuang", "Liujiang Zhuang", "Liuqian Zhuang",
		{"Min Zhuang", "Minz Zhuang"},
		"Nong Zhuang",
		"Qiubei Zhuang",
		"Shangsi Zhuang",
		{"Dai Zhuang", "Wenma", "Wenma Thu", "Wenma Zhuang"},
		"Yang Zhuang",
		{"Yongbei Zhuang", "Wuming Zhuang", "Standard Zhuang"},
		"Yongnan Zhuang", "Youjiang Zhuang",
		"Zuojiang Zhuang"},
	scripts = {"Latn", "Hani"},
	sort_key = {
		from = {"%p"},
		to   = {""}},
}

m["zh"] = {
	"Cina",
	"Q7850",
	"zhx",
	scripts = {"Hani", "Brai", "Nshu"},
	ancestors = {"ltc"},
	sort_key = "zh-sortkey",
}

m["zu"] = {
	"Zulu",
	"Q10179",
	"bnt-ngu",
	aliases = {"isiZulu"},
	scripts = Latn,
	entry_name = {
		from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
		to   = {"a"      , "e"      , "i"      , "o"      , "u"      , "m", "n"    }},
}

return m