- Added optional 'model' parameter to translation request (default: m2m100)
- M2M100: 105 languages, Apache 2.0 License (commercial OK)
- NLLB-200: 200 languages, CC-BY-NC 4.0 License (non-commercial only)
- Updated /api/translate endpoint to accept model selection
- Updated /api/supported-languages to show languages per model
- Added comprehensive language name mappings for all NLLB-200 languages
- Both models can be used independently with automatic model loading
- Model information includes license and commercial use status
Example usage:
- Default (M2M100): {"text": "Hello", "source_lang": "en", "target_lang": "ko"}
- NLLB-200: {"text": "Hello", "source_lang": "en", "target_lang": "ko", "model": "nllb200"}
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
371 lines
17 KiB
Python
371 lines
17 KiB
Python
from fastapi import FastAPI, HTTPException
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from contextlib import asynccontextmanager
|
||
import logging
|
||
|
||
from .config import settings
|
||
from .models import TranslationRequest, TranslationResponse, HealthResponse
|
||
from .translator import translator
|
||
|
||
# Configure logging
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
@asynccontextmanager
|
||
async def lifespan(app: FastAPI):
|
||
"""Lifecycle event handler for startup and shutdown"""
|
||
# Startup
|
||
logger.info("Starting Multilingual Translation API...")
|
||
try:
|
||
# Preload translation models
|
||
logger.info("Preloading translation models...")
|
||
translator.preload_all_models()
|
||
logger.info("Models loaded successfully")
|
||
except Exception as e:
|
||
logger.error(f"Error during startup: {str(e)}")
|
||
raise
|
||
|
||
yield
|
||
|
||
# Shutdown
|
||
logger.info("Shutting down Multilingual Translation API...")
|
||
|
||
|
||
# Create FastAPI app
|
||
app = FastAPI(
|
||
title=settings.api_title,
|
||
version=settings.api_version,
|
||
description=settings.api_description,
|
||
lifespan=lifespan
|
||
)
|
||
|
||
# Add CORS middleware
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=settings.allowed_origins,
|
||
allow_credentials=True,
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
|
||
@app.get("/", response_model=dict)
|
||
async def root():
|
||
"""Root endpoint with API information"""
|
||
return {
|
||
"name": settings.api_title,
|
||
"version": settings.api_version,
|
||
"description": settings.api_description,
|
||
"endpoints": {
|
||
"translate": "/api/translate",
|
||
"health": "/health",
|
||
"docs": "/docs"
|
||
}
|
||
}
|
||
|
||
|
||
@app.get("/health", response_model=HealthResponse)
|
||
async def health_check():
|
||
"""Health check endpoint"""
|
||
models_ready = translator.is_ready()
|
||
|
||
return HealthResponse(
|
||
status="healthy" if models_ready else "degraded",
|
||
message="Translation service is running" if models_ready else "Models not loaded",
|
||
models_loaded=models_ready
|
||
)
|
||
|
||
|
||
@app.post("/api/translate", response_model=TranslationResponse)
|
||
async def translate_text(request: TranslationRequest):
|
||
"""
|
||
Translate text between 105+ languages using M2M100 model
|
||
|
||
- **text**: Text to translate (1-5000 characters)
|
||
- **source_lang**: Source language code (e.g., 'en', 'ko', 'ms', 'bn', 'ja', 'zh', etc.)
|
||
- **target_lang**: Target language code (e.g., 'en', 'ko', 'ms', 'bn', 'ja', 'zh', etc.)
|
||
|
||
Supports any-to-any translation between 105 languages. See /api/supported-languages for full list.
|
||
"""
|
||
# Validate language pair
|
||
if request.source_lang == request.target_lang:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Source and target languages must be different"
|
||
)
|
||
|
||
try:
|
||
# Perform translation
|
||
translated_text, model_used = translator.translate(
|
||
text=request.text,
|
||
source_lang=request.source_lang,
|
||
target_lang=request.target_lang,
|
||
model_type=request.model
|
||
)
|
||
|
||
return TranslationResponse(
|
||
original_text=request.text,
|
||
translated_text=translated_text,
|
||
source_lang=request.source_lang,
|
||
target_lang=request.target_lang,
|
||
model_used=model_used
|
||
)
|
||
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"Translation error: {str(e)}")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail="Translation failed. Please try again."
|
||
)
|
||
|
||
|
||
@app.get("/api/supported-languages")
|
||
async def get_supported_languages(model: str = "m2m100"):
|
||
"""
|
||
Get list of supported languages for specified model
|
||
|
||
- **model**: Model type ('m2m100' or 'nllb200')
|
||
"""
|
||
|
||
if model not in ["m2m100", "nllb200"]:
|
||
raise HTTPException(status_code=400, detail="Invalid model. Choose 'm2m100' or 'nllb200'")
|
||
|
||
# Language names mapping
|
||
lang_names = {
|
||
"en": {"name": "English", "native": "English"},
|
||
"zh": {"name": "Chinese", "native": "中文"},
|
||
"es": {"name": "Spanish", "native": "Español"},
|
||
"ar": {"name": "Arabic", "native": "العربية"},
|
||
"hi": {"name": "Hindi", "native": "हिन्दी"},
|
||
"bn": {"name": "Bengali", "native": "বাংলা"},
|
||
"pt": {"name": "Portuguese", "native": "Português"},
|
||
"ru": {"name": "Russian", "native": "Русский"},
|
||
"ja": {"name": "Japanese", "native": "日本語"},
|
||
"de": {"name": "German", "native": "Deutsch"},
|
||
"fr": {"name": "French", "native": "Français"},
|
||
"ko": {"name": "Korean", "native": "한국어"},
|
||
"it": {"name": "Italian", "native": "Italiano"},
|
||
"tr": {"name": "Turkish", "native": "Türkçe"},
|
||
"vi": {"name": "Vietnamese", "native": "Tiếng Việt"},
|
||
"th": {"name": "Thai", "native": "ไทย"},
|
||
"pl": {"name": "Polish", "native": "Polski"},
|
||
"nl": {"name": "Dutch", "native": "Nederlands"},
|
||
"uk": {"name": "Ukrainian", "native": "Українська"},
|
||
"ro": {"name": "Romanian", "native": "Română"},
|
||
"ms": {"name": "Malay", "native": "Bahasa Melayu"},
|
||
"id": {"name": "Indonesian", "native": "Bahasa Indonesia"},
|
||
"tl": {"name": "Tagalog", "native": "Tagalog"},
|
||
"my": {"name": "Burmese", "native": "မြန်မာဘာသာ"},
|
||
"km": {"name": "Khmer", "native": "ភាសាខ្មែរ"},
|
||
"lo": {"name": "Lao", "native": "ລາວ"},
|
||
"ur": {"name": "Urdu", "native": "اردو"},
|
||
"ta": {"name": "Tamil", "native": "தமிழ்"},
|
||
"te": {"name": "Telugu", "native": "తెలుగు"},
|
||
"mr": {"name": "Marathi", "native": "मराठी"},
|
||
"gu": {"name": "Gujarati", "native": "ગુજરાતી"},
|
||
"kn": {"name": "Kannada", "native": "ಕನ್ನಡ"},
|
||
"ml": {"name": "Malayalam", "native": "മലയാളം"},
|
||
"pa": {"name": "Punjabi", "native": "ਪੰਜਾਬੀ"},
|
||
"ne": {"name": "Nepali", "native": "नेपाली"},
|
||
"si": {"name": "Sinhala", "native": "සිංහල"},
|
||
"sv": {"name": "Swedish", "native": "Svenska"},
|
||
"da": {"name": "Danish", "native": "Dansk"},
|
||
"fi": {"name": "Finnish", "native": "Suomi"},
|
||
"no": {"name": "Norwegian", "native": "Norsk"},
|
||
"cs": {"name": "Czech", "native": "Čeština"},
|
||
"sk": {"name": "Slovak", "native": "Slovenčina"},
|
||
"hu": {"name": "Hungarian", "native": "Magyar"},
|
||
"bg": {"name": "Bulgarian", "native": "Български"},
|
||
"sr": {"name": "Serbian", "native": "Српски"},
|
||
"hr": {"name": "Croatian", "native": "Hrvatski"},
|
||
"sl": {"name": "Slovenian", "native": "Slovenščina"},
|
||
"et": {"name": "Estonian", "native": "Eesti"},
|
||
"lv": {"name": "Latvian", "native": "Latviešu"},
|
||
"lt": {"name": "Lithuanian", "native": "Lietuvių"},
|
||
"el": {"name": "Greek", "native": "Ελληνικά"},
|
||
"he": {"name": "Hebrew", "native": "עברית"},
|
||
"fa": {"name": "Persian", "native": "فارسی"},
|
||
"sw": {"name": "Swahili", "native": "Kiswahili"},
|
||
"am": {"name": "Amharic", "native": "አማርኛ"},
|
||
"ha": {"name": "Hausa", "native": "Hausa"},
|
||
"ig": {"name": "Igbo", "native": "Igbo"},
|
||
"yo": {"name": "Yoruba", "native": "Yorùbá"},
|
||
"zu": {"name": "Zulu", "native": "isiZulu"},
|
||
"xh": {"name": "Xhosa", "native": "isiXhosa"},
|
||
"af": {"name": "Afrikaans", "native": "Afrikaans"},
|
||
"az": {"name": "Azerbaijani", "native": "Azərbaycan"},
|
||
"ka": {"name": "Georgian", "native": "ქართული"},
|
||
"kk": {"name": "Kazakh", "native": "Қазақша"},
|
||
"uz": {"name": "Uzbek", "native": "Oʻzbekcha"},
|
||
"mn": {"name": "Mongolian", "native": "Монгол"},
|
||
"sq": {"name": "Albanian", "native": "Shqip"},
|
||
"hy": {"name": "Armenian", "native": "Հայերեն"},
|
||
"be": {"name": "Belarusian", "native": "Беларуская"},
|
||
"bs": {"name": "Bosnian", "native": "Bosanski"},
|
||
"ca": {"name": "Catalan", "native": "Català"},
|
||
"ceb": {"name": "Cebuano", "native": "Cebuano"},
|
||
"cy": {"name": "Welsh", "native": "Cymraeg"},
|
||
"eo": {"name": "Esperanto", "native": "Esperanto"},
|
||
"eu": {"name": "Basque", "native": "Euskara"},
|
||
"fil": {"name": "Filipino", "native": "Filipino"},
|
||
"fy": {"name": "Frisian", "native": "Frysk"},
|
||
"ga": {"name": "Irish", "native": "Gaeilge"},
|
||
"gd": {"name": "Scottish Gaelic", "native": "Gàidhlig"},
|
||
"gl": {"name": "Galician", "native": "Galego"},
|
||
"haw": {"name": "Hawaiian", "native": "ʻŌlelo Hawaiʻi"},
|
||
"hmn": {"name": "Hmong", "native": "Hmong"},
|
||
"ht": {"name": "Haitian Creole", "native": "Kreyòl ayisyen"},
|
||
"is": {"name": "Icelandic", "native": "Íslenska"},
|
||
"jv": {"name": "Javanese", "native": "Basa Jawa"},
|
||
"ku": {"name": "Kurdish", "native": "Kurdî"},
|
||
"ky": {"name": "Kyrgyz", "native": "Кыргызча"},
|
||
"la": {"name": "Latin", "native": "Latina"},
|
||
"lb": {"name": "Luxembourgish", "native": "Lëtzebuergesch"},
|
||
"lg": {"name": "Luganda", "native": "Luganda"},
|
||
"ln": {"name": "Lingala", "native": "Lingála"},
|
||
"mg": {"name": "Malagasy", "native": "Malagasy"},
|
||
"mi": {"name": "Maori", "native": "Te Reo Māori"},
|
||
"mk": {"name": "Macedonian", "native": "Македонски"},
|
||
"mt": {"name": "Maltese", "native": "Malti"},
|
||
"ny": {"name": "Chichewa", "native": "Chichewa"},
|
||
"ps": {"name": "Pashto", "native": "پښتو"},
|
||
"sn": {"name": "Shona", "native": "chiShona"},
|
||
"so": {"name": "Somali", "native": "Soomaali"},
|
||
"st": {"name": "Sesotho", "native": "Sesotho"},
|
||
"su": {"name": "Sundanese", "native": "Basa Sunda"},
|
||
"tg": {"name": "Tajik", "native": "Тоҷикӣ"},
|
||
"tk": {"name": "Turkmen", "native": "Türkmençe"},
|
||
"ug": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
|
||
"yi": {"name": "Yiddish", "native": "ייִדיש"},
|
||
|
||
# Additional NLLB-200 exclusive languages
|
||
"ace": {"name": "Acehnese", "native": "Acèh"},
|
||
"acm": {"name": "Mesopotamian Arabic", "native": "عراقي"},
|
||
"acq": {"name": "Ta'izzi-Adeni Arabic", "native": "تعزية-عدنية"},
|
||
"aeb": {"name": "Tunisian Arabic", "native": "تونسي"},
|
||
"ajp": {"name": "South Levantine Arabic", "native": "شامي"},
|
||
"als": {"name": "Tosk Albanian", "native": "Toskë"},
|
||
"ars": {"name": "Najdi Arabic", "native": "نجدي"},
|
||
"ary": {"name": "Moroccan Arabic", "native": "الدارجة"},
|
||
"arz": {"name": "Egyptian Arabic", "native": "مصري"},
|
||
"asm": {"name": "Assamese", "native": "অসমীয়া"},
|
||
"ast": {"name": "Asturian", "native": "Asturianu"},
|
||
"awa": {"name": "Awadhi", "native": "अवधी"},
|
||
"ayr": {"name": "Central Aymara", "native": "Aymar aru"},
|
||
"azb": {"name": "South Azerbaijani", "native": "تۆرکجه"},
|
||
"bak": {"name": "Bashkir", "native": "Башҡортса"},
|
||
"bam": {"name": "Bambara", "native": "Bamanankan"},
|
||
"ban": {"name": "Balinese", "native": "Basa Bali"},
|
||
"bho": {"name": "Bhojpuri", "native": "भोजपुरी"},
|
||
"bjn": {"name": "Banjar", "native": "Bahasa Banjar"},
|
||
"bod": {"name": "Tibetan", "native": "བོད་སྐད་"},
|
||
"bug": {"name": "Buginese", "native": "Basa Ugi"},
|
||
"crh": {"name": "Crimean Tatar", "native": "Qırımtatar tili"},
|
||
"cjk": {"name": "Chokwe", "native": "Chokwe"},
|
||
"ckb": {"name": "Central Kurdish", "native": "کوردیی ناوەندی"},
|
||
"dik": {"name": "Southwestern Dinka", "native": "Thuɔŋjäŋ"},
|
||
"dyu": {"name": "Dyula", "native": "Jula"},
|
||
"dzo": {"name": "Dzongkha", "native": "རྫོང་ཁ"},
|
||
"fur": {"name": "Friulian", "native": "Furlan"},
|
||
"fuv": {"name": "Nigerian Fulfulde", "native": "Fulfulde"},
|
||
"gaz": {"name": "West Central Oromo", "native": "Oromoo"},
|
||
"grn": {"name": "Guarani", "native": "Avañe'ẽ"},
|
||
"hne": {"name": "Chhattisgarhi", "native": "छत्तीसगढ़ी"},
|
||
"ilo": {"name": "Iloko", "native": "Ilokano"},
|
||
"kab": {"name": "Kabyle", "native": "Taqbaylit"},
|
||
"kac": {"name": "Jingpho", "native": "Jinghpaw"},
|
||
"kam": {"name": "Kamba", "native": "Kikamba"},
|
||
"kas": {"name": "Kashmiri", "native": "कॉशुर"},
|
||
"kea": {"name": "Kabuverdianu", "native": "Kabuverdianu"},
|
||
"khk": {"name": "Halh Mongolian", "native": "Монгол хэл"},
|
||
"kin": {"name": "Kinyarwanda", "native": "Ikinyarwanda"},
|
||
"lij": {"name": "Ligurian", "native": "Ligure"},
|
||
"lim": {"name": "Limburgish", "native": "Limburgs"},
|
||
"lin": {"name": "Lingala", "native": "Lingála"},
|
||
"lmo": {"name": "Lombard", "native": "Lombard"},
|
||
"ltg": {"name": "Latgalian", "native": "Latgalīšu"},
|
||
"luo": {"name": "Luo", "native": "Dholuo"},
|
||
"lus": {"name": "Mizo", "native": "Mizo ṭawng"},
|
||
"mag": {"name": "Magahi", "native": "मगही"},
|
||
"mai": {"name": "Maithili", "native": "मैथिली"},
|
||
"min": {"name": "Minangkabau", "native": "Baso Minangkabau"},
|
||
"mni": {"name": "Meitei", "native": "মৈতৈলোন্"},
|
||
"mos": {"name": "Mossi", "native": "Mooré"},
|
||
"mri": {"name": "Maori", "native": "Te Reo Māori"},
|
||
"nus": {"name": "Nuer", "native": "Thok Naath"},
|
||
"ory": {"name": "Odia", "native": "ଓଡ଼ିଆ"},
|
||
"pag": {"name": "Pangasinan", "native": "Pangasinan"},
|
||
"pap": {"name": "Papiamento", "native": "Papiamentu"},
|
||
"prs": {"name": "Dari", "native": "دری"},
|
||
"quy": {"name": "Ayacucho Quechua", "native": "Chanka Qhichwa"},
|
||
"run": {"name": "Rundi", "native": "Ikirundi"},
|
||
"sag": {"name": "Sango", "native": "Sängö"},
|
||
"san": {"name": "Sanskrit", "native": "संस्कृतम्"},
|
||
"sat": {"name": "Santali", "native": "ᱥᱟᱱᱛᱟᱲᱤ"},
|
||
"scn": {"name": "Sicilian", "native": "Sicilianu"},
|
||
"shn": {"name": "Shan", "native": "လိၵ်ႈတႆး"},
|
||
"srd": {"name": "Sardinian", "native": "Sardu"},
|
||
"szl": {"name": "Silesian", "native": "Ślōnski"},
|
||
"taq": {"name": "Tamasheq", "native": "Tamasheq"},
|
||
"tat": {"name": "Tatar", "native": "Татарча"},
|
||
"tir": {"name": "Tigrinya", "native": "ትግርኛ"},
|
||
"tpi": {"name": "Tok Pisin", "native": "Tok Pisin"},
|
||
"tsn": {"name": "Tswana", "native": "Setswana"},
|
||
"tso": {"name": "Tsonga", "native": "Xitsonga"},
|
||
"tum": {"name": "Tumbuka", "native": "Chitumbuka"},
|
||
"twi": {"name": "Twi", "native": "Twi"},
|
||
"tzm": {"name": "Central Atlas Tamazight", "native": "ⵜⴰⵎⴰⵣⵉⵖⵜ"},
|
||
"uig": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
|
||
"vec": {"name": "Venetian", "native": "Vèneto"},
|
||
"war": {"name": "Waray", "native": "Winaray"},
|
||
"wol": {"name": "Wolof", "native": "Wolof"},
|
||
"xho": {"name": "Xhosa", "native": "isiXhosa"},
|
||
"ydd": {"name": "Eastern Yiddish", "native": "ייִדיש"},
|
||
"yor": {"name": "Yoruba", "native": "Yorùbá"},
|
||
"yue": {"name": "Cantonese", "native": "粵語"},
|
||
"zho_hant": {"name": "Chinese (Traditional)", "native": "繁體中文"},
|
||
}
|
||
|
||
# Get all supported language codes from translator based on model type
|
||
supported_codes = list(translator.get_supported_languages(model).keys())
|
||
|
||
# Build language list
|
||
languages = [
|
||
{
|
||
"code": code,
|
||
"name": lang_names.get(code, {}).get("name", code.upper()),
|
||
"native_name": lang_names.get(code, {}).get("native", code.upper())
|
||
}
|
||
for code in sorted(supported_codes)
|
||
]
|
||
|
||
model_info = {
|
||
"m2m100": {
|
||
"name": "M2M100",
|
||
"languages": 105,
|
||
"license": "Apache 2.0",
|
||
"commercial_use": True,
|
||
"model_id": "facebook/m2m100_418M"
|
||
},
|
||
"nllb200": {
|
||
"name": "NLLB-200",
|
||
"languages": 200,
|
||
"license": "CC-BY-NC 4.0",
|
||
"commercial_use": False,
|
||
"model_id": "facebook/nllb-200-distilled-600M"
|
||
}
|
||
}
|
||
|
||
return {
|
||
"model": model_info[model],
|
||
"languages": languages,
|
||
"total_languages": len(languages),
|
||
"note": "All language pairs are supported (any-to-any translation)"
|
||
}
|