Add dual model support: M2M100 and NLLB-200
- Added optional 'model' parameter to translation request (default: m2m100)
- M2M100: 105 languages, Apache 2.0 License (commercial OK)
- NLLB-200: 200 languages, CC-BY-NC 4.0 License (non-commercial only)
- Updated /api/translate endpoint to accept model selection
- Updated /api/supported-languages to show languages per model
- Added comprehensive language name mappings for all NLLB-200 languages
- Both models can be used independently with automatic model loading
- Model information includes license and commercial use status
Example usage:
- Default (M2M100): {"text": "Hello", "source_lang": "en", "target_lang": "ko"}
- NLLB-200: {"text": "Hello", "source_lang": "en", "target_lang": "ko", "model": "nllb200"}
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
123
app/main.py
123
app/main.py
@ -103,7 +103,8 @@ async def translate_text(request: TranslationRequest):
|
||||
translated_text, model_used = translator.translate(
|
||||
text=request.text,
|
||||
source_lang=request.source_lang,
|
||||
target_lang=request.target_lang
|
||||
target_lang=request.target_lang,
|
||||
model_type=request.model
|
||||
)
|
||||
|
||||
return TranslationResponse(
|
||||
@ -125,8 +126,15 @@ async def translate_text(request: TranslationRequest):
|
||||
|
||||
|
||||
@app.get("/api/supported-languages")
|
||||
async def get_supported_languages():
|
||||
"""Get list of supported languages"""
|
||||
async def get_supported_languages(model: str = "m2m100"):
|
||||
"""
|
||||
Get list of supported languages for specified model
|
||||
|
||||
- **model**: Model type ('m2m100' or 'nllb200')
|
||||
"""
|
||||
|
||||
if model not in ["m2m100", "nllb200"]:
|
||||
raise HTTPException(status_code=400, detail="Invalid model. Choose 'm2m100' or 'nllb200'")
|
||||
|
||||
# Language names mapping
|
||||
lang_names = {
|
||||
@ -235,10 +243,97 @@ async def get_supported_languages():
|
||||
"tk": {"name": "Turkmen", "native": "Türkmençe"},
|
||||
"ug": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
|
||||
"yi": {"name": "Yiddish", "native": "ייִדיש"},
|
||||
|
||||
# Additional NLLB-200 exclusive languages
|
||||
"ace": {"name": "Acehnese", "native": "Acèh"},
|
||||
"acm": {"name": "Mesopotamian Arabic", "native": "عراقي"},
|
||||
"acq": {"name": "Ta'izzi-Adeni Arabic", "native": "تعزية-عدنية"},
|
||||
"aeb": {"name": "Tunisian Arabic", "native": "تونسي"},
|
||||
"ajp": {"name": "South Levantine Arabic", "native": "شامي"},
|
||||
"als": {"name": "Tosk Albanian", "native": "Toskë"},
|
||||
"ars": {"name": "Najdi Arabic", "native": "نجدي"},
|
||||
"ary": {"name": "Moroccan Arabic", "native": "الدارجة"},
|
||||
"arz": {"name": "Egyptian Arabic", "native": "مصري"},
|
||||
"asm": {"name": "Assamese", "native": "অসমীয়া"},
|
||||
"ast": {"name": "Asturian", "native": "Asturianu"},
|
||||
"awa": {"name": "Awadhi", "native": "अवधी"},
|
||||
"ayr": {"name": "Central Aymara", "native": "Aymar aru"},
|
||||
"azb": {"name": "South Azerbaijani", "native": "تۆرکجه"},
|
||||
"bak": {"name": "Bashkir", "native": "Башҡортса"},
|
||||
"bam": {"name": "Bambara", "native": "Bamanankan"},
|
||||
"ban": {"name": "Balinese", "native": "Basa Bali"},
|
||||
"bho": {"name": "Bhojpuri", "native": "भोजपुरी"},
|
||||
"bjn": {"name": "Banjar", "native": "Bahasa Banjar"},
|
||||
"bod": {"name": "Tibetan", "native": "བོད་སྐད་"},
|
||||
"bug": {"name": "Buginese", "native": "Basa Ugi"},
|
||||
"crh": {"name": "Crimean Tatar", "native": "Qırımtatar tili"},
|
||||
"cjk": {"name": "Chokwe", "native": "Chokwe"},
|
||||
"ckb": {"name": "Central Kurdish", "native": "کوردیی ناوەندی"},
|
||||
"dik": {"name": "Southwestern Dinka", "native": "Thuɔŋjäŋ"},
|
||||
"dyu": {"name": "Dyula", "native": "Jula"},
|
||||
"dzo": {"name": "Dzongkha", "native": "རྫོང་ཁ"},
|
||||
"fur": {"name": "Friulian", "native": "Furlan"},
|
||||
"fuv": {"name": "Nigerian Fulfulde", "native": "Fulfulde"},
|
||||
"gaz": {"name": "West Central Oromo", "native": "Oromoo"},
|
||||
"grn": {"name": "Guarani", "native": "Avañe'ẽ"},
|
||||
"hne": {"name": "Chhattisgarhi", "native": "छत्तीसगढ़ी"},
|
||||
"ilo": {"name": "Iloko", "native": "Ilokano"},
|
||||
"kab": {"name": "Kabyle", "native": "Taqbaylit"},
|
||||
"kac": {"name": "Jingpho", "native": "Jinghpaw"},
|
||||
"kam": {"name": "Kamba", "native": "Kikamba"},
|
||||
"kas": {"name": "Kashmiri", "native": "कॉशुर"},
|
||||
"kea": {"name": "Kabuverdianu", "native": "Kabuverdianu"},
|
||||
"khk": {"name": "Halh Mongolian", "native": "Монгол хэл"},
|
||||
"kin": {"name": "Kinyarwanda", "native": "Ikinyarwanda"},
|
||||
"lij": {"name": "Ligurian", "native": "Ligure"},
|
||||
"lim": {"name": "Limburgish", "native": "Limburgs"},
|
||||
"lin": {"name": "Lingala", "native": "Lingála"},
|
||||
"lmo": {"name": "Lombard", "native": "Lombard"},
|
||||
"ltg": {"name": "Latgalian", "native": "Latgalīšu"},
|
||||
"luo": {"name": "Luo", "native": "Dholuo"},
|
||||
"lus": {"name": "Mizo", "native": "Mizo ṭawng"},
|
||||
"mag": {"name": "Magahi", "native": "मगही"},
|
||||
"mai": {"name": "Maithili", "native": "मैथिली"},
|
||||
"min": {"name": "Minangkabau", "native": "Baso Minangkabau"},
|
||||
"mni": {"name": "Meitei", "native": "মৈতৈলোন্"},
|
||||
"mos": {"name": "Mossi", "native": "Mooré"},
|
||||
"mri": {"name": "Maori", "native": "Te Reo Māori"},
|
||||
"nus": {"name": "Nuer", "native": "Thok Naath"},
|
||||
"ory": {"name": "Odia", "native": "ଓଡ଼ିଆ"},
|
||||
"pag": {"name": "Pangasinan", "native": "Pangasinan"},
|
||||
"pap": {"name": "Papiamento", "native": "Papiamentu"},
|
||||
"prs": {"name": "Dari", "native": "دری"},
|
||||
"quy": {"name": "Ayacucho Quechua", "native": "Chanka Qhichwa"},
|
||||
"run": {"name": "Rundi", "native": "Ikirundi"},
|
||||
"sag": {"name": "Sango", "native": "Sängö"},
|
||||
"san": {"name": "Sanskrit", "native": "संस्कृतम्"},
|
||||
"sat": {"name": "Santali", "native": "ᱥᱟᱱᱛᱟᱲᱤ"},
|
||||
"scn": {"name": "Sicilian", "native": "Sicilianu"},
|
||||
"shn": {"name": "Shan", "native": "လိၵ်ႈတႆး"},
|
||||
"srd": {"name": "Sardinian", "native": "Sardu"},
|
||||
"szl": {"name": "Silesian", "native": "Ślōnski"},
|
||||
"taq": {"name": "Tamasheq", "native": "Tamasheq"},
|
||||
"tat": {"name": "Tatar", "native": "Татарча"},
|
||||
"tir": {"name": "Tigrinya", "native": "ትግርኛ"},
|
||||
"tpi": {"name": "Tok Pisin", "native": "Tok Pisin"},
|
||||
"tsn": {"name": "Tswana", "native": "Setswana"},
|
||||
"tso": {"name": "Tsonga", "native": "Xitsonga"},
|
||||
"tum": {"name": "Tumbuka", "native": "Chitumbuka"},
|
||||
"twi": {"name": "Twi", "native": "Twi"},
|
||||
"tzm": {"name": "Central Atlas Tamazight", "native": "ⵜⴰⵎⴰⵣⵉⵖⵜ"},
|
||||
"uig": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
|
||||
"vec": {"name": "Venetian", "native": "Vèneto"},
|
||||
"war": {"name": "Waray", "native": "Winaray"},
|
||||
"wol": {"name": "Wolof", "native": "Wolof"},
|
||||
"xho": {"name": "Xhosa", "native": "isiXhosa"},
|
||||
"ydd": {"name": "Eastern Yiddish", "native": "ייִדיש"},
|
||||
"yor": {"name": "Yoruba", "native": "Yorùbá"},
|
||||
"yue": {"name": "Cantonese", "native": "粵語"},
|
||||
"zho_hant": {"name": "Chinese (Traditional)", "native": "繁體中文"},
|
||||
}
|
||||
|
||||
# Get all supported language codes from translator
|
||||
supported_codes = list(translator.lang_codes.keys())
|
||||
# Get all supported language codes from translator based on model type
|
||||
supported_codes = list(translator.get_supported_languages(model).keys())
|
||||
|
||||
# Build language list
|
||||
languages = [
|
||||
@ -250,7 +345,25 @@ async def get_supported_languages():
|
||||
for code in sorted(supported_codes)
|
||||
]
|
||||
|
||||
model_info = {
|
||||
"m2m100": {
|
||||
"name": "M2M100",
|
||||
"languages": 105,
|
||||
"license": "Apache 2.0",
|
||||
"commercial_use": True,
|
||||
"model_id": "facebook/m2m100_418M"
|
||||
},
|
||||
"nllb200": {
|
||||
"name": "NLLB-200",
|
||||
"languages": 200,
|
||||
"license": "CC-BY-NC 4.0",
|
||||
"commercial_use": False,
|
||||
"model_id": "facebook/nllb-200-distilled-600M"
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
"model": model_info[model],
|
||||
"languages": languages,
|
||||
"total_languages": len(languages),
|
||||
"note": "All language pairs are supported (any-to-any translation)"
|
||||
|
||||
Reference in New Issue
Block a user