Files
multilingual-translation/app/main.py
jungwoo choi f586f930b6 Initial commit: Multilingual Translation API
- Implemented REST API for 105+ language translation
- Used Facebook M2M100 model (Apache 2.0 License - Commercial use allowed)
- Supports any-to-any translation between 105 languages
- Major languages: English, Chinese, Spanish, Arabic, Russian, Japanese, Korean, etc.
- Southeast Asian: Malay, Indonesian, Thai, Vietnamese, Tagalog, Burmese, Khmer, Lao
- South Asian: Bengali, Hindi, Urdu, Tamil, Telugu, Marathi, Gujarati, etc.
- European: German, French, Italian, Spanish, Portuguese, Russian, etc.
- African: Swahili, Amharic, Hausa, Igbo, Yoruba, Zulu, Xhosa
- And many more languages

Tech Stack:
- FastAPI for REST API
- Transformers (Hugging Face) for ML model
- PyTorch for inference
- Docker for containerization
- M2M100 418M parameter model

Features:
- Health check endpoint
- Supported languages listing
- Dynamic language validation
- Model caching for performance
- GPU support (auto-detection)
- CORS enabled for web clients

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-10 14:11:20 +09:00

256 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import logging
from .config import settings
from .models import TranslationRequest, TranslationResponse, HealthResponse
from .translator import translator
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifecycle event handler for startup and shutdown"""
# Startup
logger.info("Starting Malaysian Translation API...")
try:
# Preload translation models
logger.info("Preloading translation models...")
translator.preload_all_models()
logger.info("Models loaded successfully")
except Exception as e:
logger.error(f"Error during startup: {str(e)}")
raise
yield
# Shutdown
logger.info("Shutting down Malaysian Translation API...")
# Create FastAPI app
app = FastAPI(
title=settings.api_title,
version=settings.api_version,
description=settings.api_description,
lifespan=lifespan
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=settings.allowed_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/", response_model=dict)
async def root():
"""Root endpoint with API information"""
return {
"name": settings.api_title,
"version": settings.api_version,
"description": settings.api_description,
"endpoints": {
"translate": "/api/translate",
"health": "/health",
"docs": "/docs"
}
}
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Health check endpoint"""
models_ready = translator.is_ready()
return HealthResponse(
status="healthy" if models_ready else "degraded",
message="Translation service is running" if models_ready else "Models not loaded",
models_loaded=models_ready
)
@app.post("/api/translate", response_model=TranslationResponse)
async def translate_text(request: TranslationRequest):
"""
Translate text between Malay and English
- **text**: Text to translate (1-5000 characters)
- **source_lang**: Source language code ('ms' for Malay, 'en' for English)
- **target_lang**: Target language code ('ms' for Malay, 'en' for English)
"""
# Validate language pair
if request.source_lang == request.target_lang:
raise HTTPException(
status_code=400,
detail="Source and target languages must be different"
)
try:
# Perform translation
translated_text, model_used = translator.translate(
text=request.text,
source_lang=request.source_lang,
target_lang=request.target_lang
)
return TranslationResponse(
original_text=request.text,
translated_text=translated_text,
source_lang=request.source_lang,
target_lang=request.target_lang,
model_used=model_used
)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Translation error: {str(e)}")
raise HTTPException(
status_code=500,
detail="Translation failed. Please try again."
)
@app.get("/api/supported-languages")
async def get_supported_languages():
"""Get list of supported languages"""
# Language names mapping
lang_names = {
"en": {"name": "English", "native": "English"},
"zh": {"name": "Chinese", "native": "中文"},
"es": {"name": "Spanish", "native": "Español"},
"ar": {"name": "Arabic", "native": "العربية"},
"hi": {"name": "Hindi", "native": "हिन्दी"},
"bn": {"name": "Bengali", "native": "বাংলা"},
"pt": {"name": "Portuguese", "native": "Português"},
"ru": {"name": "Russian", "native": "Русский"},
"ja": {"name": "Japanese", "native": "日本語"},
"de": {"name": "German", "native": "Deutsch"},
"fr": {"name": "French", "native": "Français"},
"ko": {"name": "Korean", "native": "한국어"},
"it": {"name": "Italian", "native": "Italiano"},
"tr": {"name": "Turkish", "native": "Türkçe"},
"vi": {"name": "Vietnamese", "native": "Tiếng Việt"},
"th": {"name": "Thai", "native": "ไทย"},
"pl": {"name": "Polish", "native": "Polski"},
"nl": {"name": "Dutch", "native": "Nederlands"},
"uk": {"name": "Ukrainian", "native": "Українська"},
"ro": {"name": "Romanian", "native": "Română"},
"ms": {"name": "Malay", "native": "Bahasa Melayu"},
"id": {"name": "Indonesian", "native": "Bahasa Indonesia"},
"tl": {"name": "Tagalog", "native": "Tagalog"},
"my": {"name": "Burmese", "native": "မြန်မာဘာသာ"},
"km": {"name": "Khmer", "native": "ភាសាខ្មែរ"},
"lo": {"name": "Lao", "native": "ລາວ"},
"ur": {"name": "Urdu", "native": "اردو"},
"ta": {"name": "Tamil", "native": "தமிழ்"},
"te": {"name": "Telugu", "native": "తెలుగు"},
"mr": {"name": "Marathi", "native": "मराठी"},
"gu": {"name": "Gujarati", "native": "ગુજરાતી"},
"kn": {"name": "Kannada", "native": "ಕನ್ನಡ"},
"ml": {"name": "Malayalam", "native": "മലയാളം"},
"pa": {"name": "Punjabi", "native": "ਪੰਜਾਬੀ"},
"ne": {"name": "Nepali", "native": "नेपाली"},
"si": {"name": "Sinhala", "native": "සිංහල"},
"sv": {"name": "Swedish", "native": "Svenska"},
"da": {"name": "Danish", "native": "Dansk"},
"fi": {"name": "Finnish", "native": "Suomi"},
"no": {"name": "Norwegian", "native": "Norsk"},
"cs": {"name": "Czech", "native": "Čeština"},
"sk": {"name": "Slovak", "native": "Slovenčina"},
"hu": {"name": "Hungarian", "native": "Magyar"},
"bg": {"name": "Bulgarian", "native": "Български"},
"sr": {"name": "Serbian", "native": "Српски"},
"hr": {"name": "Croatian", "native": "Hrvatski"},
"sl": {"name": "Slovenian", "native": "Slovenščina"},
"et": {"name": "Estonian", "native": "Eesti"},
"lv": {"name": "Latvian", "native": "Latviešu"},
"lt": {"name": "Lithuanian", "native": "Lietuvių"},
"el": {"name": "Greek", "native": "Ελληνικά"},
"he": {"name": "Hebrew", "native": "עברית"},
"fa": {"name": "Persian", "native": "فارسی"},
"sw": {"name": "Swahili", "native": "Kiswahili"},
"am": {"name": "Amharic", "native": "አማርኛ"},
"ha": {"name": "Hausa", "native": "Hausa"},
"ig": {"name": "Igbo", "native": "Igbo"},
"yo": {"name": "Yoruba", "native": "Yorùbá"},
"zu": {"name": "Zulu", "native": "isiZulu"},
"xh": {"name": "Xhosa", "native": "isiXhosa"},
"af": {"name": "Afrikaans", "native": "Afrikaans"},
"az": {"name": "Azerbaijani", "native": "Azərbaycan"},
"ka": {"name": "Georgian", "native": "ქართული"},
"kk": {"name": "Kazakh", "native": "Қазақша"},
"uz": {"name": "Uzbek", "native": "Oʻzbekcha"},
"mn": {"name": "Mongolian", "native": "Монгол"},
"sq": {"name": "Albanian", "native": "Shqip"},
"hy": {"name": "Armenian", "native": "Հայերեն"},
"be": {"name": "Belarusian", "native": "Беларуская"},
"bs": {"name": "Bosnian", "native": "Bosanski"},
"ca": {"name": "Catalan", "native": "Català"},
"ceb": {"name": "Cebuano", "native": "Cebuano"},
"cy": {"name": "Welsh", "native": "Cymraeg"},
"eo": {"name": "Esperanto", "native": "Esperanto"},
"eu": {"name": "Basque", "native": "Euskara"},
"fil": {"name": "Filipino", "native": "Filipino"},
"fy": {"name": "Frisian", "native": "Frysk"},
"ga": {"name": "Irish", "native": "Gaeilge"},
"gd": {"name": "Scottish Gaelic", "native": "Gàidhlig"},
"gl": {"name": "Galician", "native": "Galego"},
"haw": {"name": "Hawaiian", "native": "ʻŌlelo Hawaiʻi"},
"hmn": {"name": "Hmong", "native": "Hmong"},
"ht": {"name": "Haitian Creole", "native": "Kreyòl ayisyen"},
"is": {"name": "Icelandic", "native": "Íslenska"},
"jv": {"name": "Javanese", "native": "Basa Jawa"},
"ku": {"name": "Kurdish", "native": "Kurdî"},
"ky": {"name": "Kyrgyz", "native": "Кыргызча"},
"la": {"name": "Latin", "native": "Latina"},
"lb": {"name": "Luxembourgish", "native": "Lëtzebuergesch"},
"lg": {"name": "Luganda", "native": "Luganda"},
"ln": {"name": "Lingala", "native": "Lingála"},
"mg": {"name": "Malagasy", "native": "Malagasy"},
"mi": {"name": "Maori", "native": "Te Reo Māori"},
"mk": {"name": "Macedonian", "native": "Македонски"},
"mt": {"name": "Maltese", "native": "Malti"},
"ny": {"name": "Chichewa", "native": "Chichewa"},
"ps": {"name": "Pashto", "native": "پښتو"},
"sn": {"name": "Shona", "native": "chiShona"},
"so": {"name": "Somali", "native": "Soomaali"},
"st": {"name": "Sesotho", "native": "Sesotho"},
"su": {"name": "Sundanese", "native": "Basa Sunda"},
"tg": {"name": "Tajik", "native": "Тоҷикӣ"},
"tk": {"name": "Turkmen", "native": "Türkmençe"},
"ug": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
"yi": {"name": "Yiddish", "native": "ייִדיש"},
}
# Get all supported language codes from translator
supported_codes = list(translator.lang_codes.keys())
# Build language list
languages = [
{
"code": code,
"name": lang_names.get(code, {}).get("name", code.upper()),
"native_name": lang_names.get(code, {}).get("native", code.upper())
}
for code in sorted(supported_codes)
]
return {
"languages": languages,
"total_languages": len(languages),
"note": "All language pairs are supported (any-to-any translation)"
}