Add complete NLLB-200 support with all 204 FLORES-200 languages
Updated dual model system to fully support both M2M100 and NLLB-200: **NLLB-200 Model (204 languages)** - Added all 204 FLORES-200 language codes to nllb200_lang_codes dictionary - Updated language code mappings with FLORES-200 format (xxx_Yyyy) - Added 24+ NLLB-exclusive languages including: - Southeast Asian: Acehnese, Balinese, Banjar, Buginese, Minangkabau - South Asian: Assamese, Awadhi, Bhojpuri, Chhattisgarhi, Magahi, Maithili, Meitei, Odia, Santali - African: Akan, Bambara, Bemba, Chokwe, Dyula, Fon, Kikuyu, Kimbundu, Kongo, Luba-Kasai, Luo, Mossi, Nuer - Arabic dialects: Mesopotamian, Najdi, Moroccan, Egyptian, Tunisian, South/North Levantine - European regional: Asturian, Friulian, Latgalian, Ligurian, Limburgish, Lombard, Norwegian Nynorsk/Bokmål, Occitan, Sardinian, Sicilian, Silesian, Venetian - Other: Dzongkha, Fijian, Guarani, Kabyle, Kabuverdianu, Papiamento, Quechua, Samoan, Sango, Shan, Tamasheq, Tibetan, Tok Pisin **Updated Files** - app/translator.py: Complete NLLB-200 language mappings (204 languages) - app/main.py: Added display names for all 204+ language codes - README.md: Updated with dual model system, NLLB-200 details, license info - CLAUDE.md: Updated developer documentation with model architecture **Testing** - Verified M2M100: 105 languages working ✅ - Verified NLLB-200: 204 languages working ✅ - Tested NLLB-exclusive languages (Bemba, Fon, etc.) ✅ **License Information** - M2M100: Apache 2.0 - Commercial use allowed - NLLB-200: CC-BY-NC 4.0 - Non-commercial only 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
87
app/main.py
87
app/main.py
@ -245,91 +245,111 @@ async def get_supported_languages(model: str = "m2m100"):
|
||||
"yi": {"name": "Yiddish", "native": "ייִדיש"},
|
||||
|
||||
# Additional NLLB-200 exclusive languages
|
||||
"ace_arab": {"name": "Acehnese (Arabic script)", "native": "أتشيه"},
|
||||
"ace": {"name": "Acehnese", "native": "Acèh"},
|
||||
"acm": {"name": "Mesopotamian Arabic", "native": "عراقي"},
|
||||
"acq": {"name": "Ta'izzi-Adeni Arabic", "native": "تعزية-عدنية"},
|
||||
"aeb": {"name": "Tunisian Arabic", "native": "تونسي"},
|
||||
"ajp": {"name": "South Levantine Arabic", "native": "شامي"},
|
||||
"als": {"name": "Tosk Albanian", "native": "Toskë"},
|
||||
"aka": {"name": "Akan", "native": "Akan"},
|
||||
"apc": {"name": "North Levantine Arabic", "native": "شامي شمالي"},
|
||||
"ar_latn": {"name": "Arabic (Latin script)", "native": "Arabic (Latin)"},
|
||||
"ars": {"name": "Najdi Arabic", "native": "نجدي"},
|
||||
"ary": {"name": "Moroccan Arabic", "native": "الدارجة"},
|
||||
"arz": {"name": "Egyptian Arabic", "native": "مصري"},
|
||||
"asm": {"name": "Assamese", "native": "অসমীয়া"},
|
||||
"as": {"name": "Assamese", "native": "অসমীয়া"},
|
||||
"ast": {"name": "Asturian", "native": "Asturianu"},
|
||||
"awa": {"name": "Awadhi", "native": "अवधी"},
|
||||
"ayr": {"name": "Central Aymara", "native": "Aymar aru"},
|
||||
"azb": {"name": "South Azerbaijani", "native": "تۆرکجه"},
|
||||
"bak": {"name": "Bashkir", "native": "Башҡортса"},
|
||||
"ba": {"name": "Bashkir", "native": "Башҡортса"},
|
||||
"bam": {"name": "Bambara", "native": "Bamanankan"},
|
||||
"ban": {"name": "Balinese", "native": "Basa Bali"},
|
||||
"bem": {"name": "Bemba", "native": "Ichibemba"},
|
||||
"bho": {"name": "Bhojpuri", "native": "भोजपुरी"},
|
||||
"bjn_arab": {"name": "Banjar (Arabic script)", "native": "بنجر"},
|
||||
"bjn": {"name": "Banjar", "native": "Bahasa Banjar"},
|
||||
"bod": {"name": "Tibetan", "native": "བོད་སྐད་"},
|
||||
"bo": {"name": "Tibetan", "native": "བོད་སྐད་"},
|
||||
"bug": {"name": "Buginese", "native": "Basa Ugi"},
|
||||
"crh": {"name": "Crimean Tatar", "native": "Qırımtatar tili"},
|
||||
"cjk": {"name": "Chokwe", "native": "Chokwe"},
|
||||
"ckb": {"name": "Central Kurdish", "native": "کوردیی ناوەندی"},
|
||||
"dik": {"name": "Southwestern Dinka", "native": "Thuɔŋjäŋ"},
|
||||
"dyu": {"name": "Dyula", "native": "Jula"},
|
||||
"dzo": {"name": "Dzongkha", "native": "རྫོང་ཁ"},
|
||||
"dz": {"name": "Dzongkha", "native": "རྫོང་ཁ"},
|
||||
"ee": {"name": "Ewe", "native": "Eʋegbe"},
|
||||
"fo": {"name": "Faroese", "native": "Føroyskt"},
|
||||
"fj": {"name": "Fijian", "native": "Na Vosa Vakaviti"},
|
||||
"fon": {"name": "Fon", "native": "Fɔngbe"},
|
||||
"fur": {"name": "Friulian", "native": "Furlan"},
|
||||
"fuv": {"name": "Nigerian Fulfulde", "native": "Fulfulde"},
|
||||
"gaz": {"name": "West Central Oromo", "native": "Oromoo"},
|
||||
"grn": {"name": "Guarani", "native": "Avañe'ẽ"},
|
||||
"om": {"name": "West Central Oromo", "native": "Oromoo"},
|
||||
"gn": {"name": "Guarani", "native": "Avañe'ẽ"},
|
||||
"hne": {"name": "Chhattisgarhi", "native": "छत्तीसगढ़ी"},
|
||||
"ilo": {"name": "Iloko", "native": "Ilokano"},
|
||||
"kab": {"name": "Kabyle", "native": "Taqbaylit"},
|
||||
"kac": {"name": "Jingpho", "native": "Jinghpaw"},
|
||||
"kam": {"name": "Kamba", "native": "Kikamba"},
|
||||
"kas": {"name": "Kashmiri", "native": "कॉशुर"},
|
||||
"ks": {"name": "Kashmiri", "native": "کٲشُر"},
|
||||
"ks_deva": {"name": "Kashmiri (Devanagari)", "native": "कॉशुर"},
|
||||
"kbp": {"name": "Kabiyè", "native": "Kabɩyɛ"},
|
||||
"kea": {"name": "Kabuverdianu", "native": "Kabuverdianu"},
|
||||
"khk": {"name": "Halh Mongolian", "native": "Монгол хэл"},
|
||||
"kin": {"name": "Kinyarwanda", "native": "Ikinyarwanda"},
|
||||
"ki": {"name": "Kikuyu", "native": "Gĩkũyũ"},
|
||||
"rw": {"name": "Kinyarwanda", "native": "Ikinyarwanda"},
|
||||
"kmb": {"name": "Kimbundu", "native": "Kimbundu"},
|
||||
"knc_arab": {"name": "Kanuri (Arabic script)", "native": "كانوري"},
|
||||
"knc": {"name": "Kanuri", "native": "Kanuri"},
|
||||
"kg": {"name": "Kongo", "native": "Kikongo"},
|
||||
"lij": {"name": "Ligurian", "native": "Ligure"},
|
||||
"lim": {"name": "Limburgish", "native": "Limburgs"},
|
||||
"lin": {"name": "Lingala", "native": "Lingála"},
|
||||
"li": {"name": "Limburgish", "native": "Limburgs"},
|
||||
"lmo": {"name": "Lombard", "native": "Lombard"},
|
||||
"ltg": {"name": "Latgalian", "native": "Latgalīšu"},
|
||||
"lua": {"name": "Luba-Kasai", "native": "Tshiluba"},
|
||||
"luo": {"name": "Luo", "native": "Dholuo"},
|
||||
"lus": {"name": "Mizo", "native": "Mizo ṭawng"},
|
||||
"mag": {"name": "Magahi", "native": "मगही"},
|
||||
"mai": {"name": "Maithili", "native": "मैथिली"},
|
||||
"min_arab": {"name": "Minangkabau (Arabic)", "native": "مينڠكاباو"},
|
||||
"min": {"name": "Minangkabau", "native": "Baso Minangkabau"},
|
||||
"mni": {"name": "Meitei", "native": "মৈতৈলোন্"},
|
||||
"mos": {"name": "Mossi", "native": "Mooré"},
|
||||
"mri": {"name": "Maori", "native": "Te Reo Māori"},
|
||||
"nn": {"name": "Norwegian Nynorsk", "native": "Nynorsk"},
|
||||
"nb": {"name": "Norwegian Bokmål", "native": "Bokmål"},
|
||||
"nso": {"name": "Northern Sotho", "native": "Sesotho sa Leboa"},
|
||||
"nus": {"name": "Nuer", "native": "Thok Naath"},
|
||||
"ory": {"name": "Odia", "native": "ଓଡ଼ିଆ"},
|
||||
"oc": {"name": "Occitan", "native": "Occitan"},
|
||||
"or": {"name": "Odia", "native": "ଓଡ଼ିଆ"},
|
||||
"pag": {"name": "Pangasinan", "native": "Pangasinan"},
|
||||
"pap": {"name": "Papiamento", "native": "Papiamentu"},
|
||||
"prs": {"name": "Dari", "native": "دری"},
|
||||
"quy": {"name": "Ayacucho Quechua", "native": "Chanka Qhichwa"},
|
||||
"run": {"name": "Rundi", "native": "Ikirundi"},
|
||||
"sag": {"name": "Sango", "native": "Sängö"},
|
||||
"san": {"name": "Sanskrit", "native": "संस्कृतम्"},
|
||||
"qu": {"name": "Ayacucho Quechua", "native": "Chanka Qhichwa"},
|
||||
"rn": {"name": "Rundi", "native": "Ikirundi"},
|
||||
"sg": {"name": "Sango", "native": "Sängö"},
|
||||
"sa": {"name": "Sanskrit", "native": "संस्कृतम्"},
|
||||
"sat": {"name": "Santali", "native": "ᱥᱟᱱᱛᱟᱲᱤ"},
|
||||
"scn": {"name": "Sicilian", "native": "Sicilianu"},
|
||||
"shn": {"name": "Shan", "native": "လိၵ်ႈတႆး"},
|
||||
"srd": {"name": "Sardinian", "native": "Sardu"},
|
||||
"sm": {"name": "Samoan", "native": "Gagana Sāmoa"},
|
||||
"sd": {"name": "Sindhi", "native": "سنڌي"},
|
||||
"sc": {"name": "Sardinian", "native": "Sardu"},
|
||||
"ss": {"name": "Swazi", "native": "SiSwati"},
|
||||
"szl": {"name": "Silesian", "native": "Ślōnski"},
|
||||
"taq": {"name": "Tamasheq", "native": "Tamasheq"},
|
||||
"tat": {"name": "Tatar", "native": "Татарча"},
|
||||
"tir": {"name": "Tigrinya", "native": "ትግርኛ"},
|
||||
"taq_tfng": {"name": "Tamasheq (Tifinagh)", "native": "ⵜⴰⵎⴰⵛⴰⵆ"},
|
||||
"tt": {"name": "Tatar", "native": "Татарча"},
|
||||
"ti": {"name": "Tigrinya", "native": "ትግርኛ"},
|
||||
"tpi": {"name": "Tok Pisin", "native": "Tok Pisin"},
|
||||
"tsn": {"name": "Tswana", "native": "Setswana"},
|
||||
"tso": {"name": "Tsonga", "native": "Xitsonga"},
|
||||
"tn": {"name": "Tswana", "native": "Setswana"},
|
||||
"ts": {"name": "Tsonga", "native": "Xitsonga"},
|
||||
"tum": {"name": "Tumbuka", "native": "Chitumbuka"},
|
||||
"twi": {"name": "Twi", "native": "Twi"},
|
||||
"tw": {"name": "Twi", "native": "Twi"},
|
||||
"tzm": {"name": "Central Atlas Tamazight", "native": "ⵜⴰⵎⴰⵣⵉⵖⵜ"},
|
||||
"uig": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
|
||||
"umb": {"name": "Umbundu", "native": "Umbundu"},
|
||||
"vec": {"name": "Venetian", "native": "Vèneto"},
|
||||
"war": {"name": "Waray", "native": "Winaray"},
|
||||
"wol": {"name": "Wolof", "native": "Wolof"},
|
||||
"xho": {"name": "Xhosa", "native": "isiXhosa"},
|
||||
"ydd": {"name": "Eastern Yiddish", "native": "ייִדיש"},
|
||||
"yor": {"name": "Yoruba", "native": "Yorùbá"},
|
||||
"wo": {"name": "Wolof", "native": "Wolof"},
|
||||
"yue": {"name": "Cantonese", "native": "粵語"},
|
||||
"zho_hant": {"name": "Chinese (Traditional)", "native": "繁體中文"},
|
||||
"zh_hant": {"name": "Chinese (Traditional)", "native": "繁體中文"},
|
||||
}
|
||||
|
||||
# Get all supported language codes from translator based on model type
|
||||
@ -354,11 +374,12 @@ async def get_supported_languages(model: str = "m2m100"):
|
||||
"model_id": "facebook/m2m100_418M"
|
||||
},
|
||||
"nllb200": {
|
||||
"name": "NLLB-200",
|
||||
"languages": 200,
|
||||
"name": "NLLB-200 (FLORES-200)",
|
||||
"languages": 204,
|
||||
"license": "CC-BY-NC 4.0",
|
||||
"commercial_use": False,
|
||||
"model_id": "facebook/nllb-200-distilled-600M"
|
||||
"model_id": "facebook/nllb-200-distilled-600M",
|
||||
"note": "Includes multiple script variants for some languages"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -143,203 +143,214 @@ class TranslationService:
|
||||
"yi": "yi", # Yiddish
|
||||
}
|
||||
|
||||
# NLLB-200 supported language codes (200 languages)
|
||||
# NLLB uses different format: xxx_Yyyy (language_Script)
|
||||
# Full list: https://huggingface.co/facebook/nllb-200-distilled-600M
|
||||
# NLLB-200 supported language codes (204 languages from FLORES-200)
|
||||
# NLLB uses format: xxx_Yyyy (language_Script)
|
||||
# Full list: https://github.com/facebookresearch/flores/blob/main/flores200/README.md
|
||||
self.nllb200_lang_codes = {
|
||||
# Major languages
|
||||
"en": "eng_Latn", # English
|
||||
"zh": "zho_Hans", # Chinese (Simplified)
|
||||
"es": "spa_Latn", # Spanish
|
||||
"ar": "arb_Arab", # Arabic (Standard)
|
||||
"hi": "hin_Deva", # Hindi
|
||||
"bn": "ben_Beng", # Bengali
|
||||
"pt": "por_Latn", # Portuguese
|
||||
"ru": "rus_Cyrl", # Russian
|
||||
"ja": "jpn_Jpan", # Japanese
|
||||
"de": "deu_Latn", # German
|
||||
"fr": "fra_Latn", # French
|
||||
"ko": "kor_Hang", # Korean
|
||||
"it": "ita_Latn", # Italian
|
||||
"tr": "tur_Latn", # Turkish
|
||||
"vi": "vie_Latn", # Vietnamese
|
||||
"th": "tha_Thai", # Thai
|
||||
"pl": "pol_Latn", # Polish
|
||||
"nl": "nld_Latn", # Dutch
|
||||
"uk": "ukr_Cyrl", # Ukrainian
|
||||
"ro": "ron_Latn", # Romanian
|
||||
|
||||
# Southeast Asian languages
|
||||
"ms": "zsm_Latn", # Malay (Standard)
|
||||
"id": "ind_Latn", # Indonesian
|
||||
"tl": "tgl_Latn", # Tagalog
|
||||
"my": "mya_Mymr", # Burmese
|
||||
"km": "khm_Khmr", # Khmer
|
||||
"lo": "lao_Laoo", # Lao
|
||||
|
||||
# South Asian languages
|
||||
"ur": "urd_Arab", # Urdu
|
||||
"ta": "tam_Taml", # Tamil
|
||||
"te": "tel_Telu", # Telugu
|
||||
"mr": "mar_Deva", # Marathi
|
||||
"gu": "guj_Gujr", # Gujarati
|
||||
"kn": "kan_Knda", # Kannada
|
||||
"ml": "mal_Mlym", # Malayalam
|
||||
"pa": "pan_Guru", # Punjabi
|
||||
"ne": "npi_Deva", # Nepali
|
||||
"si": "sin_Sinh", # Sinhala
|
||||
|
||||
# European languages
|
||||
"sv": "swe_Latn", # Swedish
|
||||
"da": "dan_Latn", # Danish
|
||||
"fi": "fin_Latn", # Finnish
|
||||
"no": "nob_Latn", # Norwegian (Bokmål)
|
||||
"cs": "ces_Latn", # Czech
|
||||
"sk": "slk_Latn", # Slovak
|
||||
"hu": "hun_Latn", # Hungarian
|
||||
"bg": "bul_Cyrl", # Bulgarian
|
||||
"sr": "srp_Cyrl", # Serbian
|
||||
"hr": "hrv_Latn", # Croatian
|
||||
"sl": "slv_Latn", # Slovenian
|
||||
"et": "est_Latn", # Estonian
|
||||
"lv": "lvs_Latn", # Latvian
|
||||
"lt": "lit_Latn", # Lithuanian
|
||||
"el": "ell_Grek", # Greek
|
||||
"he": "heb_Hebr", # Hebrew
|
||||
"fa": "pes_Arab", # Persian
|
||||
|
||||
# African languages
|
||||
"sw": "swh_Latn", # Swahili
|
||||
"am": "amh_Ethi", # Amharic
|
||||
"ha": "hau_Latn", # Hausa
|
||||
"ig": "ibo_Latn", # Igbo
|
||||
"yo": "yor_Latn", # Yoruba
|
||||
"zu": "zul_Latn", # Zulu
|
||||
"xh": "xho_Latn", # Xhosa
|
||||
"af": "afr_Latn", # Afrikaans
|
||||
"sn": "sna_Latn", # Shona
|
||||
"so": "som_Latn", # Somali
|
||||
|
||||
# Other languages
|
||||
"az": "azj_Latn", # Azerbaijani (North)
|
||||
"ka": "kat_Geor", # Georgian
|
||||
"kk": "kaz_Cyrl", # Kazakh
|
||||
"uz": "uzn_Latn", # Uzbek (Northern)
|
||||
"mn": "khk_Cyrl", # Mongolian (Halh)
|
||||
"sq": "als_Latn", # Albanian
|
||||
"hy": "hye_Armn", # Armenian
|
||||
"be": "bel_Cyrl", # Belarusian
|
||||
"bs": "bos_Latn", # Bosnian
|
||||
"ca": "cat_Latn", # Catalan
|
||||
"ceb": "ceb_Latn", # Cebuano
|
||||
"cy": "cym_Latn", # Welsh
|
||||
"eo": "epo_Latn", # Esperanto
|
||||
"eu": "eus_Latn", # Basque
|
||||
"gl": "glg_Latn", # Galician
|
||||
"is": "isl_Latn", # Icelandic
|
||||
"jv": "jav_Latn", # Javanese
|
||||
"ku": "kmr_Latn", # Kurdish (Kurmanji)
|
||||
"ky": "kir_Cyrl", # Kyrgyz
|
||||
"la": "lat_Latn", # Latin
|
||||
"lb": "ltz_Latn", # Luxembourgish
|
||||
"lg": "lug_Latn", # Luganda
|
||||
"mg": "plt_Latn", # Malagasy
|
||||
"mk": "mkd_Cyrl", # Macedonian
|
||||
"mt": "mlt_Latn", # Maltese
|
||||
"ny": "nya_Latn", # Chichewa
|
||||
"ps": "pbt_Arab", # Pashto (Southern)
|
||||
"st": "sot_Latn", # Sesotho
|
||||
"su": "sun_Latn", # Sundanese
|
||||
"tg": "tgk_Cyrl", # Tajik
|
||||
"tk": "tuk_Latn", # Turkmen
|
||||
"ug": "uig_Arab", # Uyghur
|
||||
|
||||
# Additional NLLB-200 exclusive languages (examples, 95 more)
|
||||
"ace_arab": "ace_Arab", # Acehnese (Arabic script)
|
||||
"ace": "ace_Latn", # Acehnese
|
||||
"acm": "acm_Arab", # Mesopotamian Arabic
|
||||
"acq": "acq_Arab", # Ta'izzi-Adeni Arabic
|
||||
"aeb": "aeb_Arab", # Tunisian Arabic
|
||||
"af": "afr_Latn", # Afrikaans
|
||||
"ajp": "ajp_Arab", # South Levantine Arabic
|
||||
"als": "als_Latn", # Tosk Albanian
|
||||
"aka": "aka_Latn", # Akan
|
||||
"am": "amh_Ethi", # Amharic
|
||||
"apc": "apc_Arab", # North Levantine Arabic
|
||||
"ar": "arb_Arab", # Arabic (Standard)
|
||||
"ar_latn": "arb_Latn", # Arabic (Latin script)
|
||||
"ars": "ars_Arab", # Najdi Arabic
|
||||
"ary": "ary_Arab", # Moroccan Arabic
|
||||
"arz": "arz_Arab", # Egyptian Arabic
|
||||
"asm": "asm_Beng", # Assamese
|
||||
"as": "asm_Beng", # Assamese
|
||||
"ast": "ast_Latn", # Asturian
|
||||
"awa": "awa_Deva", # Awadhi
|
||||
"ayr": "ayr_Latn", # Central Aymara
|
||||
"azb": "azb_Arab", # South Azerbaijani
|
||||
"bak": "bak_Cyrl", # Bashkir
|
||||
"az": "azj_Latn", # Azerbaijani
|
||||
"ba": "bak_Cyrl", # Bashkir
|
||||
"bam": "bam_Latn", # Bambara
|
||||
"ban": "ban_Latn", # Balinese
|
||||
"be": "bel_Cyrl", # Belarusian
|
||||
"bem": "bem_Latn", # Bemba
|
||||
"bn": "ben_Beng", # Bengali
|
||||
"bho": "bho_Deva", # Bhojpuri
|
||||
"bjn_arab": "bjn_Arab", # Banjar (Arabic script)
|
||||
"bjn": "bjn_Latn", # Banjar
|
||||
"bod": "bod_Tibt", # Tibetan
|
||||
"bo": "bod_Tibt", # Tibetan
|
||||
"bs": "bos_Latn", # Bosnian
|
||||
"bug": "bug_Latn", # Buginese
|
||||
"crh": "crh_Latn", # Crimean Tatar
|
||||
"bg": "bul_Cyrl", # Bulgarian
|
||||
"ca": "cat_Latn", # Catalan
|
||||
"ceb": "ceb_Latn", # Cebuano
|
||||
"cs": "ces_Latn", # Czech
|
||||
"cjk": "cjk_Latn", # Chokwe
|
||||
"ckb": "ckb_Arab", # Central Kurdish
|
||||
"crh": "crh_Latn", # Crimean Tatar
|
||||
"cy": "cym_Latn", # Welsh
|
||||
"da": "dan_Latn", # Danish
|
||||
"de": "deu_Latn", # German
|
||||
"dik": "dik_Latn", # Southwestern Dinka
|
||||
"dyu": "dyu_Latn", # Dyula
|
||||
"dzo": "dzo_Tibt", # Dzongkha
|
||||
"dz": "dzo_Tibt", # Dzongkha
|
||||
"el": "ell_Grek", # Greek
|
||||
"en": "eng_Latn", # English
|
||||
"eo": "epo_Latn", # Esperanto
|
||||
"et": "est_Latn", # Estonian
|
||||
"eu": "eus_Latn", # Basque
|
||||
"ee": "ewe_Latn", # Ewe
|
||||
"fo": "fao_Latn", # Faroese
|
||||
"fj": "fij_Latn", # Fijian
|
||||
"fi": "fin_Latn", # Finnish
|
||||
"fon": "fon_Latn", # Fon
|
||||
"fr": "fra_Latn", # French
|
||||
"fur": "fur_Latn", # Friulian
|
||||
"fuv": "fuv_Latn", # Nigerian Fulfulde
|
||||
"gaz": "gaz_Latn", # West Central Oromo
|
||||
"grn": "grn_Latn", # Guarani
|
||||
"om": "gaz_Latn", # West Central Oromo
|
||||
"gd": "gla_Latn", # Scottish Gaelic
|
||||
"ga": "gle_Latn", # Irish
|
||||
"gl": "glg_Latn", # Galician
|
||||
"gn": "grn_Latn", # Guarani
|
||||
"gu": "guj_Gujr", # Gujarati
|
||||
"ht": "hat_Latn", # Haitian Creole
|
||||
"ha": "hau_Latn", # Hausa
|
||||
"he": "heb_Hebr", # Hebrew
|
||||
"hi": "hin_Deva", # Hindi
|
||||
"hne": "hne_Deva", # Chhattisgarhi
|
||||
"hr": "hrv_Latn", # Croatian
|
||||
"hu": "hun_Latn", # Hungarian
|
||||
"hy": "hye_Armn", # Armenian
|
||||
"ig": "ibo_Latn", # Igbo
|
||||
"ilo": "ilo_Latn", # Iloko
|
||||
"id": "ind_Latn", # Indonesian
|
||||
"is": "isl_Latn", # Icelandic
|
||||
"it": "ita_Latn", # Italian
|
||||
"jv": "jav_Latn", # Javanese
|
||||
"ja": "jpn_Jpan", # Japanese
|
||||
"kab": "kab_Latn", # Kabyle
|
||||
"kac": "kac_Latn", # Jingpho
|
||||
"kam": "kam_Latn", # Kamba
|
||||
"kas": "kas_Arab", # Kashmiri
|
||||
"kn": "kan_Knda", # Kannada
|
||||
"ks": "kas_Arab", # Kashmiri (Arabic)
|
||||
"ks_deva": "kas_Deva", # Kashmiri (Devanagari)
|
||||
"ka": "kat_Geor", # Georgian
|
||||
"kk": "kaz_Cyrl", # Kazakh
|
||||
"kbp": "kbp_Latn", # Kabiyè
|
||||
"kea": "kea_Latn", # Kabuverdianu
|
||||
"khk": "khk_Cyrl", # Halh Mongolian
|
||||
"kin": "kin_Latn", # Kinyarwanda
|
||||
"mn": "khk_Cyrl", # Mongolian (Halh)
|
||||
"km": "khm_Khmr", # Khmer
|
||||
"ki": "kik_Latn", # Kikuyu
|
||||
"rw": "kin_Latn", # Kinyarwanda
|
||||
"ky": "kir_Cyrl", # Kyrgyz
|
||||
"kmb": "kmb_Latn", # Kimbundu
|
||||
"ku": "kmr_Latn", # Kurdish (Kurmanji)
|
||||
"knc_arab": "knc_Arab", # Kanuri (Arabic script)
|
||||
"knc": "knc_Latn", # Kanuri
|
||||
"kg": "kon_Latn", # Kongo
|
||||
"ko": "kor_Hang", # Korean
|
||||
"lo": "lao_Laoo", # Lao
|
||||
"lij": "lij_Latn", # Ligurian
|
||||
"lim": "lim_Latn", # Limburgish
|
||||
"lin": "lin_Latn", # Lingala
|
||||
"li": "lim_Latn", # Limburgish
|
||||
"ln": "lin_Latn", # Lingala
|
||||
"lt": "lit_Latn", # Lithuanian
|
||||
"lmo": "lmo_Latn", # Lombard
|
||||
"ltg": "ltg_Latn", # Latgalian
|
||||
"lb": "ltz_Latn", # Luxembourgish
|
||||
"lua": "lua_Latn", # Luba-Kasai
|
||||
"lg": "lug_Latn", # Luganda
|
||||
"luo": "luo_Latn", # Luo
|
||||
"lus": "lus_Latn", # Mizo
|
||||
"lv": "lvs_Latn", # Latvian
|
||||
"mag": "mag_Deva", # Magahi
|
||||
"mai": "mai_Deva", # Maithili
|
||||
"ml": "mal_Mlym", # Malayalam
|
||||
"mr": "mar_Deva", # Marathi
|
||||
"min_arab": "min_Arab", # Minangkabau (Arabic)
|
||||
"min": "min_Latn", # Minangkabau
|
||||
"mk": "mkd_Cyrl", # Macedonian
|
||||
"mt": "mlt_Latn", # Maltese
|
||||
"mni": "mni_Beng", # Meitei
|
||||
"mos": "mos_Latn", # Mossi
|
||||
"mri": "mri_Latn", # Maori
|
||||
"mi": "mri_Latn", # Maori
|
||||
"my": "mya_Mymr", # Burmese
|
||||
"nl": "nld_Latn", # Dutch
|
||||
"nn": "nno_Latn", # Norwegian Nynorsk
|
||||
"nb": "nob_Latn", # Norwegian Bokmål
|
||||
"ne": "npi_Deva", # Nepali
|
||||
"nso": "nso_Latn", # Northern Sotho
|
||||
"nus": "nus_Latn", # Nuer
|
||||
"ory": "ory_Orya", # Odia
|
||||
"ny": "nya_Latn", # Chichewa
|
||||
"oc": "oci_Latn", # Occitan
|
||||
"or": "ory_Orya", # Odia
|
||||
"pag": "pag_Latn", # Pangasinan
|
||||
"pa": "pan_Guru", # Punjabi
|
||||
"pap": "pap_Latn", # Papiamento
|
||||
"ps": "pbt_Arab", # Pashto (Southern)
|
||||
"fa": "pes_Arab", # Persian
|
||||
"mg": "plt_Latn", # Malagasy
|
||||
"pl": "pol_Latn", # Polish
|
||||
"pt": "por_Latn", # Portuguese
|
||||
"prs": "prs_Arab", # Dari
|
||||
"quy": "quy_Latn", # Ayacucho Quechua
|
||||
"run": "run_Latn", # Rundi
|
||||
"sag": "sag_Latn", # Sango
|
||||
"san": "san_Deva", # Sanskrit
|
||||
"sat": "sat_Beng", # Santali
|
||||
"qu": "quy_Latn", # Ayacucho Quechua
|
||||
"ro": "ron_Latn", # Romanian
|
||||
"rn": "run_Latn", # Rundi
|
||||
"ru": "rus_Cyrl", # Russian
|
||||
"sg": "sag_Latn", # Sango
|
||||
"sa": "san_Deva", # Sanskrit
|
||||
"sat": "sat_Olck", # Santali
|
||||
"scn": "scn_Latn", # Sicilian
|
||||
"shn": "shn_Mymr", # Shan
|
||||
"srd": "srd_Latn", # Sardinian
|
||||
"si": "sin_Sinh", # Sinhala
|
||||
"sk": "slk_Latn", # Slovak
|
||||
"sl": "slv_Latn", # Slovenian
|
||||
"sm": "smo_Latn", # Samoan
|
||||
"sn": "sna_Latn", # Shona
|
||||
"sd": "snd_Arab", # Sindhi
|
||||
"so": "som_Latn", # Somali
|
||||
"st": "sot_Latn", # Sesotho
|
||||
"es": "spa_Latn", # Spanish
|
||||
"sq": "als_Latn", # Albanian (Tosk)
|
||||
"sc": "srd_Latn", # Sardinian
|
||||
"sr": "srp_Cyrl", # Serbian
|
||||
"ss": "ssw_Latn", # Swazi
|
||||
"su": "sun_Latn", # Sundanese
|
||||
"sv": "swe_Latn", # Swedish
|
||||
"sw": "swh_Latn", # Swahili
|
||||
"szl": "szl_Latn", # Silesian
|
||||
"taq": "taq_Latn", # Tamasheq
|
||||
"tat": "tat_Cyrl", # Tatar
|
||||
"tir": "tir_Ethi", # Tigrinya
|
||||
"ta": "tam_Taml", # Tamil
|
||||
"taq": "taq_Latn", # Tamasheq (Latin)
|
||||
"taq_tfng": "taq_Tfng", # Tamasheq (Tifinagh)
|
||||
"tt": "tat_Cyrl", # Tatar
|
||||
"te": "tel_Telu", # Telugu
|
||||
"tg": "tgk_Cyrl", # Tajik
|
||||
"tl": "tgl_Latn", # Tagalog
|
||||
"th": "tha_Thai", # Thai
|
||||
"ti": "tir_Ethi", # Tigrinya
|
||||
"tpi": "tpi_Latn", # Tok Pisin
|
||||
"tsn": "tsn_Latn", # Tswana
|
||||
"tso": "tso_Latn", # Tsonga
|
||||
"tn": "tsn_Latn", # Tswana
|
||||
"ts": "tso_Latn", # Tsonga
|
||||
"tk": "tuk_Latn", # Turkmen
|
||||
"tum": "tum_Latn", # Tumbuka
|
||||
"twi": "twi_Latn", # Twi
|
||||
"tr": "tur_Latn", # Turkish
|
||||
"tw": "twi_Latn", # Twi
|
||||
"tzm": "tzm_Tfng", # Central Atlas Tamazight
|
||||
"uig": "uig_Arab", # Uyghur
|
||||
"ug": "uig_Arab", # Uyghur
|
||||
"uk": "ukr_Cyrl", # Ukrainian
|
||||
"umb": "umb_Latn", # Umbundu
|
||||
"ur": "urd_Arab", # Urdu
|
||||
"uz": "uzn_Latn", # Uzbek (Northern)
|
||||
"vec": "vec_Latn", # Venetian
|
||||
"vi": "vie_Latn", # Vietnamese
|
||||
"war": "war_Latn", # Waray
|
||||
"wol": "wol_Latn", # Wolof
|
||||
"xho": "xho_Latn", # Xhosa
|
||||
"ydd": "ydd_Hebr", # Eastern Yiddish
|
||||
"yor": "yor_Latn", # Yoruba
|
||||
"wo": "wol_Latn", # Wolof
|
||||
"xh": "xho_Latn", # Xhosa
|
||||
"yi": "ydd_Hebr", # Eastern Yiddish
|
||||
"yo": "yor_Latn", # Yoruba
|
||||
"yue": "yue_Hant", # Cantonese
|
||||
"zho_hant": "zho_Hant", # Chinese (Traditional)
|
||||
"zh": "zho_Hans", # Chinese (Simplified)
|
||||
"zh_hant": "zho_Hant", # Chinese (Traditional)
|
||||
"ms": "zsm_Latn", # Malay (Standard)
|
||||
"zu": "zul_Latn", # Zulu
|
||||
}
|
||||
|
||||
def get_supported_languages(self, model_type: str = "m2m100") -> Dict[str, str]:
|
||||
|
||||
Reference in New Issue
Block a user