Files
multilingual-translation/app/main.py
jungwoo choi 28e26d19b6 Add dual model support: M2M100 and NLLB-200
- Added optional 'model' parameter to translation request (default: m2m100)
- M2M100: 105 languages, Apache 2.0 License (commercial OK)
- NLLB-200: 200 languages, CC-BY-NC 4.0 License (non-commercial only)
- Updated /api/translate endpoint to accept model selection
- Updated /api/supported-languages to show languages per model
- Added comprehensive language name mappings for all NLLB-200 languages
- Both models can be used independently with automatic model loading
- Model information includes license and commercial use status

Example usage:
- Default (M2M100): {"text": "Hello", "source_lang": "en", "target_lang": "ko"}
- NLLB-200: {"text": "Hello", "source_lang": "en", "target_lang": "ko", "model": "nllb200"}

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-11 15:57:00 +09:00

371 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import logging
from .config import settings
from .models import TranslationRequest, TranslationResponse, HealthResponse
from .translator import translator
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifecycle event handler for startup and shutdown"""
# Startup
logger.info("Starting Multilingual Translation API...")
try:
# Preload translation models
logger.info("Preloading translation models...")
translator.preload_all_models()
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error during startup: {str(e)}")
raise
yield
# Shutdown
logger.info("Shutting down Multilingual Translation API...")
# Create FastAPI app
app = FastAPI(
title=settings.api_title,
version=settings.api_version,
description=settings.api_description,
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.allowed_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/", response_model=dict)
async def root():
"""Root endpoint with API information"""
return {
"name": settings.api_title,
"version": settings.api_version,
"description": settings.api_description,
"endpoints": {
"translate": "/api/translate",
"health": "/health",
"docs": "/docs"
}
}
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Health check endpoint"""
models_ready = translator.is_ready()
return HealthResponse(
status="healthy" if models_ready else "degraded",
message="Translation service is running" if models_ready else "Models not loaded",
models_loaded=models_ready
)
@app.post("/api/translate", response_model=TranslationResponse)
async def translate_text(request: TranslationRequest):
"""
Translate text between 105+ languages using M2M100 model
- **text**: Text to translate (1-5000 characters)
- **source_lang**: Source language code (e.g., 'en', 'ko', 'ms', 'bn', 'ja', 'zh', etc.)
- **target_lang**: Target language code (e.g., 'en', 'ko', 'ms', 'bn', 'ja', 'zh', etc.)
Supports any-to-any translation between 105 languages. See /api/supported-languages for full list.
"""
# Validate language pair
if request.source_lang == request.target_lang:
raise HTTPException(
status_code=400,
detail="Source and target languages must be different"
)
try:
# Perform translation
translated_text, model_used = translator.translate(
text=request.text,
source_lang=request.source_lang,
target_lang=request.target_lang,
model_type=request.model
)
return TranslationResponse(
original_text=request.text,
translated_text=translated_text,
source_lang=request.source_lang,
target_lang=request.target_lang,
model_used=model_used
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Translation error: {str(e)}")
raise HTTPException(
status_code=500,
detail="Translation failed. Please try again."
)
@app.get("/api/supported-languages")
async def get_supported_languages(model: str = "m2m100"):
"""
Get list of supported languages for specified model
- **model**: Model type ('m2m100' or 'nllb200')
"""
if model not in ["m2m100", "nllb200"]:
raise HTTPException(status_code=400, detail="Invalid model. Choose 'm2m100' or 'nllb200'")
# Language names mapping
lang_names = {
"en": {"name": "English", "native": "English"},
"zh": {"name": "Chinese", "native": "中文"},
"es": {"name": "Spanish", "native": "Español"},
"ar": {"name": "Arabic", "native": "العربية"},
"hi": {"name": "Hindi", "native": "हिन्दी"},
"bn": {"name": "Bengali", "native": "বাংলা"},
"pt": {"name": "Portuguese", "native": "Português"},
"ru": {"name": "Russian", "native": "Русский"},
"ja": {"name": "Japanese", "native": "日本語"},
"de": {"name": "German", "native": "Deutsch"},
"fr": {"name": "French", "native": "Français"},
"ko": {"name": "Korean", "native": "한국어"},
"it": {"name": "Italian", "native": "Italiano"},
"tr": {"name": "Turkish", "native": "Türkçe"},
"vi": {"name": "Vietnamese", "native": "Tiếng Việt"},
"th": {"name": "Thai", "native": "ไทย"},
"pl": {"name": "Polish", "native": "Polski"},
"nl": {"name": "Dutch", "native": "Nederlands"},
"uk": {"name": "Ukrainian", "native": "Українська"},
"ro": {"name": "Romanian", "native": "Română"},
"ms": {"name": "Malay", "native": "Bahasa Melayu"},
"id": {"name": "Indonesian", "native": "Bahasa Indonesia"},
"tl": {"name": "Tagalog", "native": "Tagalog"},
"my": {"name": "Burmese", "native": "မြန်မာဘာသာ"},
"km": {"name": "Khmer", "native": "ភាសាខ្មែរ"},
"lo": {"name": "Lao", "native": "ລາວ"},
"ur": {"name": "Urdu", "native": "اردو"},
"ta": {"name": "Tamil", "native": "தமிழ்"},
"te": {"name": "Telugu", "native": "తెలుగు"},
"mr": {"name": "Marathi", "native": "मराठी"},
"gu": {"name": "Gujarati", "native": "ગુજરાતી"},
"kn": {"name": "Kannada", "native": "ಕನ್ನಡ"},
"ml": {"name": "Malayalam", "native": "മലയാളം"},
"pa": {"name": "Punjabi", "native": "ਪੰਜਾਬੀ"},
"ne": {"name": "Nepali", "native": "नेपाली"},
"si": {"name": "Sinhala", "native": "සිංහල"},
"sv": {"name": "Swedish", "native": "Svenska"},
"da": {"name": "Danish", "native": "Dansk"},
"fi": {"name": "Finnish", "native": "Suomi"},
"no": {"name": "Norwegian", "native": "Norsk"},
"cs": {"name": "Czech", "native": "Čeština"},
"sk": {"name": "Slovak", "native": "Slovenčina"},
"hu": {"name": "Hungarian", "native": "Magyar"},
"bg": {"name": "Bulgarian", "native": "Български"},
"sr": {"name": "Serbian", "native": "Српски"},
"hr": {"name": "Croatian", "native": "Hrvatski"},
"sl": {"name": "Slovenian", "native": "Slovenščina"},
"et": {"name": "Estonian", "native": "Eesti"},
"lv": {"name": "Latvian", "native": "Latviešu"},
"lt": {"name": "Lithuanian", "native": "Lietuvių"},
"el": {"name": "Greek", "native": "Ελληνικά"},
"he": {"name": "Hebrew", "native": "עברית"},
"fa": {"name": "Persian", "native": "فارسی"},
"sw": {"name": "Swahili", "native": "Kiswahili"},
"am": {"name": "Amharic", "native": "አማርኛ"},
"ha": {"name": "Hausa", "native": "Hausa"},
"ig": {"name": "Igbo", "native": "Igbo"},
"yo": {"name": "Yoruba", "native": "Yorùbá"},
"zu": {"name": "Zulu", "native": "isiZulu"},
"xh": {"name": "Xhosa", "native": "isiXhosa"},
"af": {"name": "Afrikaans", "native": "Afrikaans"},
"az": {"name": "Azerbaijani", "native": "Azərbaycan"},
"ka": {"name": "Georgian", "native": "ქართული"},
"kk": {"name": "Kazakh", "native": "Қазақша"},
"uz": {"name": "Uzbek", "native": "Oʻzbekcha"},
"mn": {"name": "Mongolian", "native": "Монгол"},
"sq": {"name": "Albanian", "native": "Shqip"},
"hy": {"name": "Armenian", "native": "Հայերեն"},
"be": {"name": "Belarusian", "native": "Беларуская"},
"bs": {"name": "Bosnian", "native": "Bosanski"},
"ca": {"name": "Catalan", "native": "Català"},
"ceb": {"name": "Cebuano", "native": "Cebuano"},
"cy": {"name": "Welsh", "native": "Cymraeg"},
"eo": {"name": "Esperanto", "native": "Esperanto"},
"eu": {"name": "Basque", "native": "Euskara"},
"fil": {"name": "Filipino", "native": "Filipino"},
"fy": {"name": "Frisian", "native": "Frysk"},
"ga": {"name": "Irish", "native": "Gaeilge"},
"gd": {"name": "Scottish Gaelic", "native": "Gàidhlig"},
"gl": {"name": "Galician", "native": "Galego"},
"haw": {"name": "Hawaiian", "native": "ʻŌlelo Hawaiʻi"},
"hmn": {"name": "Hmong", "native": "Hmong"},
"ht": {"name": "Haitian Creole", "native": "Kreyòl ayisyen"},
"is": {"name": "Icelandic", "native": "Íslenska"},
"jv": {"name": "Javanese", "native": "Basa Jawa"},
"ku": {"name": "Kurdish", "native": "Kurdî"},
"ky": {"name": "Kyrgyz", "native": "Кыргызча"},
"la": {"name": "Latin", "native": "Latina"},
"lb": {"name": "Luxembourgish", "native": "Lëtzebuergesch"},
"lg": {"name": "Luganda", "native": "Luganda"},
"ln": {"name": "Lingala", "native": "Lingála"},
"mg": {"name": "Malagasy", "native": "Malagasy"},
"mi": {"name": "Maori", "native": "Te Reo Māori"},
"mk": {"name": "Macedonian", "native": "Македонски"},
"mt": {"name": "Maltese", "native": "Malti"},
"ny": {"name": "Chichewa", "native": "Chichewa"},
"ps": {"name": "Pashto", "native": "پښتو"},
"sn": {"name": "Shona", "native": "chiShona"},
"so": {"name": "Somali", "native": "Soomaali"},
"st": {"name": "Sesotho", "native": "Sesotho"},
"su": {"name": "Sundanese", "native": "Basa Sunda"},
"tg": {"name": "Tajik", "native": "Тоҷикӣ"},
"tk": {"name": "Turkmen", "native": "Türkmençe"},
"ug": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
"yi": {"name": "Yiddish", "native": "ייִדיש"},
# Additional NLLB-200 exclusive languages
"ace": {"name": "Acehnese", "native": "Acèh"},
"acm": {"name": "Mesopotamian Arabic", "native": "عراقي"},
"acq": {"name": "Ta'izzi-Adeni Arabic", "native": "تعزية-عدنية"},
"aeb": {"name": "Tunisian Arabic", "native": "تونسي"},
"ajp": {"name": "South Levantine Arabic", "native": "شامي"},
"als": {"name": "Tosk Albanian", "native": "Toskë"},
"ars": {"name": "Najdi Arabic", "native": "نجدي"},
"ary": {"name": "Moroccan Arabic", "native": "الدارجة"},
"arz": {"name": "Egyptian Arabic", "native": "مصري"},
"asm": {"name": "Assamese", "native": "অসমীয়া"},
"ast": {"name": "Asturian", "native": "Asturianu"},
"awa": {"name": "Awadhi", "native": "अवधी"},
"ayr": {"name": "Central Aymara", "native": "Aymar aru"},
"azb": {"name": "South Azerbaijani", "native": "تۆرکجه"},
"bak": {"name": "Bashkir", "native": "Башҡортса"},
"bam": {"name": "Bambara", "native": "Bamanankan"},
"ban": {"name": "Balinese", "native": "Basa Bali"},
"bho": {"name": "Bhojpuri", "native": "भोजपुरी"},
"bjn": {"name": "Banjar", "native": "Bahasa Banjar"},
"bod": {"name": "Tibetan", "native": "བོད་སྐད་"},
"bug": {"name": "Buginese", "native": "Basa Ugi"},
"crh": {"name": "Crimean Tatar", "native": "Qırımtatar tili"},
"cjk": {"name": "Chokwe", "native": "Chokwe"},
"ckb": {"name": "Central Kurdish", "native": "کوردیی ناوەندی"},
"dik": {"name": "Southwestern Dinka", "native": "Thuɔŋjäŋ"},
"dyu": {"name": "Dyula", "native": "Jula"},
"dzo": {"name": "Dzongkha", "native": "རྫོང་ཁ"},
"fur": {"name": "Friulian", "native": "Furlan"},
"fuv": {"name": "Nigerian Fulfulde", "native": "Fulfulde"},
"gaz": {"name": "West Central Oromo", "native": "Oromoo"},
"grn": {"name": "Guarani", "native": "Avañe'"},
"hne": {"name": "Chhattisgarhi", "native": "छत्तीसगढ़ी"},
"ilo": {"name": "Iloko", "native": "Ilokano"},
"kab": {"name": "Kabyle", "native": "Taqbaylit"},
"kac": {"name": "Jingpho", "native": "Jinghpaw"},
"kam": {"name": "Kamba", "native": "Kikamba"},
"kas": {"name": "Kashmiri", "native": "कॉशुर"},
"kea": {"name": "Kabuverdianu", "native": "Kabuverdianu"},
"khk": {"name": "Halh Mongolian", "native": "Монгол хэл"},
"kin": {"name": "Kinyarwanda", "native": "Ikinyarwanda"},
"lij": {"name": "Ligurian", "native": "Ligure"},
"lim": {"name": "Limburgish", "native": "Limburgs"},
"lin": {"name": "Lingala", "native": "Lingála"},
"lmo": {"name": "Lombard", "native": "Lombard"},
"ltg": {"name": "Latgalian", "native": "Latgalīšu"},
"luo": {"name": "Luo", "native": "Dholuo"},
"lus": {"name": "Mizo", "native": "Mizo ṭawng"},
"mag": {"name": "Magahi", "native": "मगही"},
"mai": {"name": "Maithili", "native": "मैथिली"},
"min": {"name": "Minangkabau", "native": "Baso Minangkabau"},
"mni": {"name": "Meitei", "native": "মৈতৈলোন্"},
"mos": {"name": "Mossi", "native": "Mooré"},
"mri": {"name": "Maori", "native": "Te Reo Māori"},
"nus": {"name": "Nuer", "native": "Thok Naath"},
"ory": {"name": "Odia", "native": "ଓଡ଼ିଆ"},
"pag": {"name": "Pangasinan", "native": "Pangasinan"},
"pap": {"name": "Papiamento", "native": "Papiamentu"},
"prs": {"name": "Dari", "native": "دری"},
"quy": {"name": "Ayacucho Quechua", "native": "Chanka Qhichwa"},
"run": {"name": "Rundi", "native": "Ikirundi"},
"sag": {"name": "Sango", "native": "Sängö"},
"san": {"name": "Sanskrit", "native": "संस्कृतम्"},
"sat": {"name": "Santali", "native": "ᱥᱟᱱᱛᱟᱲᱤ"},
"scn": {"name": "Sicilian", "native": "Sicilianu"},
"shn": {"name": "Shan", "native": "လိၵ်ႈတႆး"},
"srd": {"name": "Sardinian", "native": "Sardu"},
"szl": {"name": "Silesian", "native": "Ślōnski"},
"taq": {"name": "Tamasheq", "native": "Tamasheq"},
"tat": {"name": "Tatar", "native": "Татарча"},
"tir": {"name": "Tigrinya", "native": "ትግርኛ"},
"tpi": {"name": "Tok Pisin", "native": "Tok Pisin"},
"tsn": {"name": "Tswana", "native": "Setswana"},
"tso": {"name": "Tsonga", "native": "Xitsonga"},
"tum": {"name": "Tumbuka", "native": "Chitumbuka"},
"twi": {"name": "Twi", "native": "Twi"},
"tzm": {"name": "Central Atlas Tamazight", "native": "ⵜⴰⵎⴰⵣⵉⵖⵜ"},
"uig": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
"vec": {"name": "Venetian", "native": "Vèneto"},
"war": {"name": "Waray", "native": "Winaray"},
"wol": {"name": "Wolof", "native": "Wolof"},
"xho": {"name": "Xhosa", "native": "isiXhosa"},
"ydd": {"name": "Eastern Yiddish", "native": "ייִדיש"},
"yor": {"name": "Yoruba", "native": "Yorùbá"},
"yue": {"name": "Cantonese", "native": "粵語"},
"zho_hant": {"name": "Chinese (Traditional)", "native": "繁體中文"},
}
# Get all supported language codes from translator based on model type
supported_codes = list(translator.get_supported_languages(model).keys())
# Build language list
languages = [
{
"code": code,
"name": lang_names.get(code, {}).get("name", code.upper()),
"native_name": lang_names.get(code, {}).get("native", code.upper())
}
for code in sorted(supported_codes)
]
model_info = {
"m2m100": {
"name": "M2M100",
"languages": 105,
"license": "Apache 2.0",
"commercial_use": True,
"model_id": "facebook/m2m100_418M"
},
"nllb200": {
"name": "NLLB-200",
"languages": 200,
"license": "CC-BY-NC 4.0",
"commercial_use": False,
"model_id": "facebook/nllb-200-distilled-600M"
}
}
return {
"model": model_info[model],
"languages": languages,
"total_languages": len(languages),
"note": "All language pairs are supported (any-to-any translation)"
}