from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager import logging from .config import settings from .models import TranslationRequest, TranslationResponse, HealthResponse from .translator import translator # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) @asynccontextmanager async def lifespan(app: FastAPI): """Lifecycle event handler for startup and shutdown""" # Startup logger.info("Starting Multilingual Translation API...") try: # Preload translation models logger.info("Preloading translation models...") translator.preload_all_models() logger.info("Models loaded successfully") except Exception as e: logger.error(f"Error during startup: {str(e)}") raise yield # Shutdown logger.info("Shutting down Multilingual Translation API...") # Create FastAPI app app = FastAPI( title=settings.api_title, version=settings.api_version, description=settings.api_description, lifespan=lifespan ) # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=settings.allowed_origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/", response_model=dict) async def root(): """Root endpoint with API information""" return { "name": settings.api_title, "version": settings.api_version, "description": settings.api_description, "endpoints": { "translate": "/api/translate", "health": "/health", "docs": "/docs" } } @app.get("/health", response_model=HealthResponse) async def health_check(): """Health check endpoint""" models_ready = translator.is_ready() return HealthResponse( status="healthy" if models_ready else "degraded", message="Translation service is running" if models_ready else "Models not loaded", models_loaded=models_ready ) @app.post("/api/translate", response_model=TranslationResponse) async def translate_text(request: TranslationRequest): """ Translate text between 105+ languages using M2M100 model - **text**: Text to translate (1-5000 characters) - **source_lang**: Source language code (e.g., 'en', 'ko', 'ms', 'bn', 'ja', 'zh', etc.) - **target_lang**: Target language code (e.g., 'en', 'ko', 'ms', 'bn', 'ja', 'zh', etc.) Supports any-to-any translation between 105 languages. See /api/supported-languages for full list. """ # Validate language pair if request.source_lang == request.target_lang: raise HTTPException( status_code=400, detail="Source and target languages must be different" ) try: # Perform translation translated_text, model_used = translator.translate( text=request.text, source_lang=request.source_lang, target_lang=request.target_lang, model_type=request.model ) return TranslationResponse( original_text=request.text, translated_text=translated_text, source_lang=request.source_lang, target_lang=request.target_lang, model_used=model_used ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"Translation error: {str(e)}") raise HTTPException( status_code=500, detail="Translation failed. Please try again." ) @app.get("/api/supported-languages") async def get_supported_languages(model: str = "m2m100"): """ Get list of supported languages for specified model - **model**: Model type ('m2m100' or 'nllb200') """ if model not in ["m2m100", "nllb200"]: raise HTTPException(status_code=400, detail="Invalid model. Choose 'm2m100' or 'nllb200'") # Language names mapping lang_names = { "en": {"name": "English", "native": "English"}, "zh": {"name": "Chinese", "native": "中文"}, "es": {"name": "Spanish", "native": "Español"}, "ar": {"name": "Arabic", "native": "العربية"}, "hi": {"name": "Hindi", "native": "हिन्दी"}, "bn": {"name": "Bengali", "native": "বাংলা"}, "pt": {"name": "Portuguese", "native": "Português"}, "ru": {"name": "Russian", "native": "Русский"}, "ja": {"name": "Japanese", "native": "日本語"}, "de": {"name": "German", "native": "Deutsch"}, "fr": {"name": "French", "native": "Français"}, "ko": {"name": "Korean", "native": "한국어"}, "it": {"name": "Italian", "native": "Italiano"}, "tr": {"name": "Turkish", "native": "Türkçe"}, "vi": {"name": "Vietnamese", "native": "Tiếng Việt"}, "th": {"name": "Thai", "native": "ไทย"}, "pl": {"name": "Polish", "native": "Polski"}, "nl": {"name": "Dutch", "native": "Nederlands"}, "uk": {"name": "Ukrainian", "native": "Українська"}, "ro": {"name": "Romanian", "native": "Română"}, "ms": {"name": "Malay", "native": "Bahasa Melayu"}, "id": {"name": "Indonesian", "native": "Bahasa Indonesia"}, "tl": {"name": "Tagalog", "native": "Tagalog"}, "my": {"name": "Burmese", "native": "မြန်မာဘာသာ"}, "km": {"name": "Khmer", "native": "ភាសាខ្មែរ"}, "lo": {"name": "Lao", "native": "ລາວ"}, "ur": {"name": "Urdu", "native": "اردو"}, "ta": {"name": "Tamil", "native": "தமிழ்"}, "te": {"name": "Telugu", "native": "తెలుగు"}, "mr": {"name": "Marathi", "native": "मराठी"}, "gu": {"name": "Gujarati", "native": "ગુજરાતી"}, "kn": {"name": "Kannada", "native": "ಕನ್ನಡ"}, "ml": {"name": "Malayalam", "native": "മലയാളം"}, "pa": {"name": "Punjabi", "native": "ਪੰਜਾਬੀ"}, "ne": {"name": "Nepali", "native": "नेपाली"}, "si": {"name": "Sinhala", "native": "සිංහල"}, "sv": {"name": "Swedish", "native": "Svenska"}, "da": {"name": "Danish", "native": "Dansk"}, "fi": {"name": "Finnish", "native": "Suomi"}, "no": {"name": "Norwegian", "native": "Norsk"}, "cs": {"name": "Czech", "native": "Čeština"}, "sk": {"name": "Slovak", "native": "Slovenčina"}, "hu": {"name": "Hungarian", "native": "Magyar"}, "bg": {"name": "Bulgarian", "native": "Български"}, "sr": {"name": "Serbian", "native": "Српски"}, "hr": {"name": "Croatian", "native": "Hrvatski"}, "sl": {"name": "Slovenian", "native": "Slovenščina"}, "et": {"name": "Estonian", "native": "Eesti"}, "lv": {"name": "Latvian", "native": "Latviešu"}, "lt": {"name": "Lithuanian", "native": "Lietuvių"}, "el": {"name": "Greek", "native": "Ελληνικά"}, "he": {"name": "Hebrew", "native": "עברית"}, "fa": {"name": "Persian", "native": "فارسی"}, "sw": {"name": "Swahili", "native": "Kiswahili"}, "am": {"name": "Amharic", "native": "አማርኛ"}, "ha": {"name": "Hausa", "native": "Hausa"}, "ig": {"name": "Igbo", "native": "Igbo"}, "yo": {"name": "Yoruba", "native": "Yorùbá"}, "zu": {"name": "Zulu", "native": "isiZulu"}, "xh": {"name": "Xhosa", "native": "isiXhosa"}, "af": {"name": "Afrikaans", "native": "Afrikaans"}, "az": {"name": "Azerbaijani", "native": "Azərbaycan"}, "ka": {"name": "Georgian", "native": "ქართული"}, "kk": {"name": "Kazakh", "native": "Қазақша"}, "uz": {"name": "Uzbek", "native": "Oʻzbekcha"}, "mn": {"name": "Mongolian", "native": "Монгол"}, "sq": {"name": "Albanian", "native": "Shqip"}, "hy": {"name": "Armenian", "native": "Հայերեն"}, "be": {"name": "Belarusian", "native": "Беларуская"}, "bs": {"name": "Bosnian", "native": "Bosanski"}, "ca": {"name": "Catalan", "native": "Català"}, "ceb": {"name": "Cebuano", "native": "Cebuano"}, "cy": {"name": "Welsh", "native": "Cymraeg"}, "eo": {"name": "Esperanto", "native": "Esperanto"}, "eu": {"name": "Basque", "native": "Euskara"}, "fil": {"name": "Filipino", "native": "Filipino"}, "fy": {"name": "Frisian", "native": "Frysk"}, "ga": {"name": "Irish", "native": "Gaeilge"}, "gd": {"name": "Scottish Gaelic", "native": "Gàidhlig"}, "gl": {"name": "Galician", "native": "Galego"}, "haw": {"name": "Hawaiian", "native": "ʻŌlelo Hawaiʻi"}, "hmn": {"name": "Hmong", "native": "Hmong"}, "ht": {"name": "Haitian Creole", "native": "Kreyòl ayisyen"}, "is": {"name": "Icelandic", "native": "Íslenska"}, "jv": {"name": "Javanese", "native": "Basa Jawa"}, "ku": {"name": "Kurdish", "native": "Kurdî"}, "ky": {"name": "Kyrgyz", "native": "Кыргызча"}, "la": {"name": "Latin", "native": "Latina"}, "lb": {"name": "Luxembourgish", "native": "Lëtzebuergesch"}, "lg": {"name": "Luganda", "native": "Luganda"}, "ln": {"name": "Lingala", "native": "Lingála"}, "mg": {"name": "Malagasy", "native": "Malagasy"}, "mi": {"name": "Maori", "native": "Te Reo Māori"}, "mk": {"name": "Macedonian", "native": "Македонски"}, "mt": {"name": "Maltese", "native": "Malti"}, "ny": {"name": "Chichewa", "native": "Chichewa"}, "ps": {"name": "Pashto", "native": "پښتو"}, "sn": {"name": "Shona", "native": "chiShona"}, "so": {"name": "Somali", "native": "Soomaali"}, "st": {"name": "Sesotho", "native": "Sesotho"}, "su": {"name": "Sundanese", "native": "Basa Sunda"}, "tg": {"name": "Tajik", "native": "Тоҷикӣ"}, "tk": {"name": "Turkmen", "native": "Türkmençe"}, "ug": {"name": "Uyghur", "native": "ئۇيغۇرچە"}, "yi": {"name": "Yiddish", "native": "ייִדיש"}, # Additional NLLB-200 exclusive languages "ace_arab": {"name": "Acehnese (Arabic script)", "native": "أتشيه"}, "ace": {"name": "Acehnese", "native": "Acèh"}, "acm": {"name": "Mesopotamian Arabic", "native": "عراقي"}, "acq": {"name": "Ta'izzi-Adeni Arabic", "native": "تعزية-عدنية"}, "aeb": {"name": "Tunisian Arabic", "native": "تونسي"}, "ajp": {"name": "South Levantine Arabic", "native": "شامي"}, "aka": {"name": "Akan", "native": "Akan"}, "apc": {"name": "North Levantine Arabic", "native": "شامي شمالي"}, "ar_latn": {"name": "Arabic (Latin script)", "native": "Arabic (Latin)"}, "ars": {"name": "Najdi Arabic", "native": "نجدي"}, "ary": {"name": "Moroccan Arabic", "native": "الدارجة"}, "arz": {"name": "Egyptian Arabic", "native": "مصري"}, "as": {"name": "Assamese", "native": "অসমীয়া"}, "ast": {"name": "Asturian", "native": "Asturianu"}, "awa": {"name": "Awadhi", "native": "अवधी"}, "ayr": {"name": "Central Aymara", "native": "Aymar aru"}, "azb": {"name": "South Azerbaijani", "native": "تۆرکجه"}, "ba": {"name": "Bashkir", "native": "Башҡортса"}, "bam": {"name": "Bambara", "native": "Bamanankan"}, "ban": {"name": "Balinese", "native": "Basa Bali"}, "bem": {"name": "Bemba", "native": "Ichibemba"}, "bho": {"name": "Bhojpuri", "native": "भोजपुरी"}, "bjn_arab": {"name": "Banjar (Arabic script)", "native": "بنجر"}, "bjn": {"name": "Banjar", "native": "Bahasa Banjar"}, "bo": {"name": "Tibetan", "native": "བོད་སྐད་"}, "bug": {"name": "Buginese", "native": "Basa Ugi"}, "crh": {"name": "Crimean Tatar", "native": "Qırımtatar tili"}, "cjk": {"name": "Chokwe", "native": "Chokwe"}, "ckb": {"name": "Central Kurdish", "native": "کوردیی ناوەندی"}, "dik": {"name": "Southwestern Dinka", "native": "Thuɔŋjäŋ"}, "dyu": {"name": "Dyula", "native": "Jula"}, "dz": {"name": "Dzongkha", "native": "རྫོང་ཁ"}, "ee": {"name": "Ewe", "native": "Eʋegbe"}, "fo": {"name": "Faroese", "native": "Føroyskt"}, "fj": {"name": "Fijian", "native": "Na Vosa Vakaviti"}, "fon": {"name": "Fon", "native": "Fɔngbe"}, "fur": {"name": "Friulian", "native": "Furlan"}, "fuv": {"name": "Nigerian Fulfulde", "native": "Fulfulde"}, "om": {"name": "West Central Oromo", "native": "Oromoo"}, "gn": {"name": "Guarani", "native": "Avañe'ẽ"}, "hne": {"name": "Chhattisgarhi", "native": "छत्तीसगढ़ी"}, "ilo": {"name": "Iloko", "native": "Ilokano"}, "kab": {"name": "Kabyle", "native": "Taqbaylit"}, "kac": {"name": "Jingpho", "native": "Jinghpaw"}, "kam": {"name": "Kamba", "native": "Kikamba"}, "ks": {"name": "Kashmiri", "native": "کٲشُر"}, "ks_deva": {"name": "Kashmiri (Devanagari)", "native": "कॉशुर"}, "kbp": {"name": "Kabiyè", "native": "Kabɩyɛ"}, "kea": {"name": "Kabuverdianu", "native": "Kabuverdianu"}, "ki": {"name": "Kikuyu", "native": "Gĩkũyũ"}, "rw": {"name": "Kinyarwanda", "native": "Ikinyarwanda"}, "kmb": {"name": "Kimbundu", "native": "Kimbundu"}, "knc_arab": {"name": "Kanuri (Arabic script)", "native": "كانوري"}, "knc": {"name": "Kanuri", "native": "Kanuri"}, "kg": {"name": "Kongo", "native": "Kikongo"}, "lij": {"name": "Ligurian", "native": "Ligure"}, "li": {"name": "Limburgish", "native": "Limburgs"}, "lmo": {"name": "Lombard", "native": "Lombard"}, "ltg": {"name": "Latgalian", "native": "Latgalīšu"}, "lua": {"name": "Luba-Kasai", "native": "Tshiluba"}, "luo": {"name": "Luo", "native": "Dholuo"}, "lus": {"name": "Mizo", "native": "Mizo ṭawng"}, "mag": {"name": "Magahi", "native": "मगही"}, "mai": {"name": "Maithili", "native": "मैथिली"}, "min_arab": {"name": "Minangkabau (Arabic)", "native": "مينڠكاباو"}, "min": {"name": "Minangkabau", "native": "Baso Minangkabau"}, "mni": {"name": "Meitei", "native": "মৈতৈলোন্"}, "mos": {"name": "Mossi", "native": "Mooré"}, "nn": {"name": "Norwegian Nynorsk", "native": "Nynorsk"}, "nb": {"name": "Norwegian Bokmål", "native": "Bokmål"}, "nso": {"name": "Northern Sotho", "native": "Sesotho sa Leboa"}, "nus": {"name": "Nuer", "native": "Thok Naath"}, "oc": {"name": "Occitan", "native": "Occitan"}, "or": {"name": "Odia", "native": "ଓଡ଼ିଆ"}, "pag": {"name": "Pangasinan", "native": "Pangasinan"}, "pap": {"name": "Papiamento", "native": "Papiamentu"}, "prs": {"name": "Dari", "native": "دری"}, "qu": {"name": "Ayacucho Quechua", "native": "Chanka Qhichwa"}, "rn": {"name": "Rundi", "native": "Ikirundi"}, "sg": {"name": "Sango", "native": "Sängö"}, "sa": {"name": "Sanskrit", "native": "संस्कृतम्"}, "sat": {"name": "Santali", "native": "ᱥᱟᱱᱛᱟᱲᱤ"}, "scn": {"name": "Sicilian", "native": "Sicilianu"}, "shn": {"name": "Shan", "native": "လိၵ်ႈတႆး"}, "sm": {"name": "Samoan", "native": "Gagana Sāmoa"}, "sd": {"name": "Sindhi", "native": "سنڌي"}, "sc": {"name": "Sardinian", "native": "Sardu"}, "ss": {"name": "Swazi", "native": "SiSwati"}, "szl": {"name": "Silesian", "native": "Ślōnski"}, "taq": {"name": "Tamasheq", "native": "Tamasheq"}, "taq_tfng": {"name": "Tamasheq (Tifinagh)", "native": "ⵜⴰⵎⴰⵛⴰⵆ"}, "tt": {"name": "Tatar", "native": "Татарча"}, "ti": {"name": "Tigrinya", "native": "ትግርኛ"}, "tpi": {"name": "Tok Pisin", "native": "Tok Pisin"}, "tn": {"name": "Tswana", "native": "Setswana"}, "ts": {"name": "Tsonga", "native": "Xitsonga"}, "tum": {"name": "Tumbuka", "native": "Chitumbuka"}, "tw": {"name": "Twi", "native": "Twi"}, "tzm": {"name": "Central Atlas Tamazight", "native": "ⵜⴰⵎⴰⵣⵉⵖⵜ"}, "umb": {"name": "Umbundu", "native": "Umbundu"}, "vec": {"name": "Venetian", "native": "Vèneto"}, "war": {"name": "Waray", "native": "Winaray"}, "wo": {"name": "Wolof", "native": "Wolof"}, "yue": {"name": "Cantonese", "native": "粵語"}, "zh_hant": {"name": "Chinese (Traditional)", "native": "繁體中文"}, } # Get all supported language codes from translator based on model type supported_codes = list(translator.get_supported_languages(model).keys()) # Build language list languages = [ { "code": code, "name": lang_names.get(code, {}).get("name", code.upper()), "native_name": lang_names.get(code, {}).get("native", code.upper()) } for code in sorted(supported_codes) ] model_info = { "m2m100": { "name": "M2M100", "languages": 105, "license": "Apache 2.0", "commercial_use": True, "model_id": "facebook/m2m100_418M" }, "nllb200": { "name": "NLLB-200 (FLORES-200)", "languages": 204, "license": "CC-BY-NC 4.0", "commercial_use": False, "model_id": "facebook/nllb-200-distilled-600M", "note": "Includes multiple script variants for some languages" } } return { "model": model_info[model], "languages": languages, "total_languages": len(languages), "note": "All language pairs are supported (any-to-any translation)" }