Initial commit: Multilingual Translation API
- Implemented REST API for 105+ language translation - Used Facebook M2M100 model (Apache 2.0 License - Commercial use allowed) - Supports any-to-any translation between 105 languages - Major languages: English, Chinese, Spanish, Arabic, Russian, Japanese, Korean, etc. - Southeast Asian: Malay, Indonesian, Thai, Vietnamese, Tagalog, Burmese, Khmer, Lao - South Asian: Bengali, Hindi, Urdu, Tamil, Telugu, Marathi, Gujarati, etc. - European: German, French, Italian, Spanish, Portuguese, Russian, etc. - African: Swahili, Amharic, Hausa, Igbo, Yoruba, Zulu, Xhosa - And many more languages Tech Stack: - FastAPI for REST API - Transformers (Hugging Face) for ML model - PyTorch for inference - Docker for containerization - M2M100 418M parameter model Features: - Health check endpoint - Supported languages listing - Dynamic language validation - Model caching for performance - GPU support (auto-detection) - CORS enabled for web clients 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
255
app/main.py
Normal file
255
app/main.py
Normal file
@ -0,0 +1,255 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import logging
|
||||
|
||||
from .config import settings
|
||||
from .models import TranslationRequest, TranslationResponse, HealthResponse
|
||||
from .translator import translator
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Lifecycle event handler for startup and shutdown"""
|
||||
# Startup
|
||||
logger.info("Starting Malaysian Translation API...")
|
||||
try:
|
||||
# Preload translation models
|
||||
logger.info("Preloading translation models...")
|
||||
translator.preload_all_models()
|
||||
logger.info("Models loaded successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error during startup: {str(e)}")
|
||||
raise
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down Malaysian Translation API...")
|
||||
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title=settings.api_title,
|
||||
version=settings.api_version,
|
||||
description=settings.api_description,
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.allowed_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
@app.get("/", response_model=dict)
|
||||
async def root():
|
||||
"""Root endpoint with API information"""
|
||||
return {
|
||||
"name": settings.api_title,
|
||||
"version": settings.api_version,
|
||||
"description": settings.api_description,
|
||||
"endpoints": {
|
||||
"translate": "/api/translate",
|
||||
"health": "/health",
|
||||
"docs": "/docs"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse)
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
models_ready = translator.is_ready()
|
||||
|
||||
return HealthResponse(
|
||||
status="healthy" if models_ready else "degraded",
|
||||
message="Translation service is running" if models_ready else "Models not loaded",
|
||||
models_loaded=models_ready
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/translate", response_model=TranslationResponse)
|
||||
async def translate_text(request: TranslationRequest):
|
||||
"""
|
||||
Translate text between Malay and English
|
||||
|
||||
- **text**: Text to translate (1-5000 characters)
|
||||
- **source_lang**: Source language code ('ms' for Malay, 'en' for English)
|
||||
- **target_lang**: Target language code ('ms' for Malay, 'en' for English)
|
||||
"""
|
||||
# Validate language pair
|
||||
if request.source_lang == request.target_lang:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Source and target languages must be different"
|
||||
)
|
||||
|
||||
try:
|
||||
# Perform translation
|
||||
translated_text, model_used = translator.translate(
|
||||
text=request.text,
|
||||
source_lang=request.source_lang,
|
||||
target_lang=request.target_lang
|
||||
)
|
||||
|
||||
return TranslationResponse(
|
||||
original_text=request.text,
|
||||
translated_text=translated_text,
|
||||
source_lang=request.source_lang,
|
||||
target_lang=request.target_lang,
|
||||
model_used=model_used
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Translation error: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Translation failed. Please try again."
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/supported-languages")
|
||||
async def get_supported_languages():
|
||||
"""Get list of supported languages"""
|
||||
|
||||
# Language names mapping
|
||||
lang_names = {
|
||||
"en": {"name": "English", "native": "English"},
|
||||
"zh": {"name": "Chinese", "native": "中文"},
|
||||
"es": {"name": "Spanish", "native": "Español"},
|
||||
"ar": {"name": "Arabic", "native": "العربية"},
|
||||
"hi": {"name": "Hindi", "native": "हिन्दी"},
|
||||
"bn": {"name": "Bengali", "native": "বাংলা"},
|
||||
"pt": {"name": "Portuguese", "native": "Português"},
|
||||
"ru": {"name": "Russian", "native": "Русский"},
|
||||
"ja": {"name": "Japanese", "native": "日本語"},
|
||||
"de": {"name": "German", "native": "Deutsch"},
|
||||
"fr": {"name": "French", "native": "Français"},
|
||||
"ko": {"name": "Korean", "native": "한국어"},
|
||||
"it": {"name": "Italian", "native": "Italiano"},
|
||||
"tr": {"name": "Turkish", "native": "Türkçe"},
|
||||
"vi": {"name": "Vietnamese", "native": "Tiếng Việt"},
|
||||
"th": {"name": "Thai", "native": "ไทย"},
|
||||
"pl": {"name": "Polish", "native": "Polski"},
|
||||
"nl": {"name": "Dutch", "native": "Nederlands"},
|
||||
"uk": {"name": "Ukrainian", "native": "Українська"},
|
||||
"ro": {"name": "Romanian", "native": "Română"},
|
||||
"ms": {"name": "Malay", "native": "Bahasa Melayu"},
|
||||
"id": {"name": "Indonesian", "native": "Bahasa Indonesia"},
|
||||
"tl": {"name": "Tagalog", "native": "Tagalog"},
|
||||
"my": {"name": "Burmese", "native": "မြန်မာဘာသာ"},
|
||||
"km": {"name": "Khmer", "native": "ភាសាខ្មែរ"},
|
||||
"lo": {"name": "Lao", "native": "ລາວ"},
|
||||
"ur": {"name": "Urdu", "native": "اردو"},
|
||||
"ta": {"name": "Tamil", "native": "தமிழ்"},
|
||||
"te": {"name": "Telugu", "native": "తెలుగు"},
|
||||
"mr": {"name": "Marathi", "native": "मराठी"},
|
||||
"gu": {"name": "Gujarati", "native": "ગુજરાતી"},
|
||||
"kn": {"name": "Kannada", "native": "ಕನ್ನಡ"},
|
||||
"ml": {"name": "Malayalam", "native": "മലയാളം"},
|
||||
"pa": {"name": "Punjabi", "native": "ਪੰਜਾਬੀ"},
|
||||
"ne": {"name": "Nepali", "native": "नेपाली"},
|
||||
"si": {"name": "Sinhala", "native": "සිංහල"},
|
||||
"sv": {"name": "Swedish", "native": "Svenska"},
|
||||
"da": {"name": "Danish", "native": "Dansk"},
|
||||
"fi": {"name": "Finnish", "native": "Suomi"},
|
||||
"no": {"name": "Norwegian", "native": "Norsk"},
|
||||
"cs": {"name": "Czech", "native": "Čeština"},
|
||||
"sk": {"name": "Slovak", "native": "Slovenčina"},
|
||||
"hu": {"name": "Hungarian", "native": "Magyar"},
|
||||
"bg": {"name": "Bulgarian", "native": "Български"},
|
||||
"sr": {"name": "Serbian", "native": "Српски"},
|
||||
"hr": {"name": "Croatian", "native": "Hrvatski"},
|
||||
"sl": {"name": "Slovenian", "native": "Slovenščina"},
|
||||
"et": {"name": "Estonian", "native": "Eesti"},
|
||||
"lv": {"name": "Latvian", "native": "Latviešu"},
|
||||
"lt": {"name": "Lithuanian", "native": "Lietuvių"},
|
||||
"el": {"name": "Greek", "native": "Ελληνικά"},
|
||||
"he": {"name": "Hebrew", "native": "עברית"},
|
||||
"fa": {"name": "Persian", "native": "فارسی"},
|
||||
"sw": {"name": "Swahili", "native": "Kiswahili"},
|
||||
"am": {"name": "Amharic", "native": "አማርኛ"},
|
||||
"ha": {"name": "Hausa", "native": "Hausa"},
|
||||
"ig": {"name": "Igbo", "native": "Igbo"},
|
||||
"yo": {"name": "Yoruba", "native": "Yorùbá"},
|
||||
"zu": {"name": "Zulu", "native": "isiZulu"},
|
||||
"xh": {"name": "Xhosa", "native": "isiXhosa"},
|
||||
"af": {"name": "Afrikaans", "native": "Afrikaans"},
|
||||
"az": {"name": "Azerbaijani", "native": "Azərbaycan"},
|
||||
"ka": {"name": "Georgian", "native": "ქართული"},
|
||||
"kk": {"name": "Kazakh", "native": "Қазақша"},
|
||||
"uz": {"name": "Uzbek", "native": "Oʻzbekcha"},
|
||||
"mn": {"name": "Mongolian", "native": "Монгол"},
|
||||
"sq": {"name": "Albanian", "native": "Shqip"},
|
||||
"hy": {"name": "Armenian", "native": "Հայերեն"},
|
||||
"be": {"name": "Belarusian", "native": "Беларуская"},
|
||||
"bs": {"name": "Bosnian", "native": "Bosanski"},
|
||||
"ca": {"name": "Catalan", "native": "Català"},
|
||||
"ceb": {"name": "Cebuano", "native": "Cebuano"},
|
||||
"cy": {"name": "Welsh", "native": "Cymraeg"},
|
||||
"eo": {"name": "Esperanto", "native": "Esperanto"},
|
||||
"eu": {"name": "Basque", "native": "Euskara"},
|
||||
"fil": {"name": "Filipino", "native": "Filipino"},
|
||||
"fy": {"name": "Frisian", "native": "Frysk"},
|
||||
"ga": {"name": "Irish", "native": "Gaeilge"},
|
||||
"gd": {"name": "Scottish Gaelic", "native": "Gàidhlig"},
|
||||
"gl": {"name": "Galician", "native": "Galego"},
|
||||
"haw": {"name": "Hawaiian", "native": "ʻŌlelo Hawaiʻi"},
|
||||
"hmn": {"name": "Hmong", "native": "Hmong"},
|
||||
"ht": {"name": "Haitian Creole", "native": "Kreyòl ayisyen"},
|
||||
"is": {"name": "Icelandic", "native": "Íslenska"},
|
||||
"jv": {"name": "Javanese", "native": "Basa Jawa"},
|
||||
"ku": {"name": "Kurdish", "native": "Kurdî"},
|
||||
"ky": {"name": "Kyrgyz", "native": "Кыргызча"},
|
||||
"la": {"name": "Latin", "native": "Latina"},
|
||||
"lb": {"name": "Luxembourgish", "native": "Lëtzebuergesch"},
|
||||
"lg": {"name": "Luganda", "native": "Luganda"},
|
||||
"ln": {"name": "Lingala", "native": "Lingála"},
|
||||
"mg": {"name": "Malagasy", "native": "Malagasy"},
|
||||
"mi": {"name": "Maori", "native": "Te Reo Māori"},
|
||||
"mk": {"name": "Macedonian", "native": "Македонски"},
|
||||
"mt": {"name": "Maltese", "native": "Malti"},
|
||||
"ny": {"name": "Chichewa", "native": "Chichewa"},
|
||||
"ps": {"name": "Pashto", "native": "پښتو"},
|
||||
"sn": {"name": "Shona", "native": "chiShona"},
|
||||
"so": {"name": "Somali", "native": "Soomaali"},
|
||||
"st": {"name": "Sesotho", "native": "Sesotho"},
|
||||
"su": {"name": "Sundanese", "native": "Basa Sunda"},
|
||||
"tg": {"name": "Tajik", "native": "Тоҷикӣ"},
|
||||
"tk": {"name": "Turkmen", "native": "Türkmençe"},
|
||||
"ug": {"name": "Uyghur", "native": "ئۇيغۇرچە"},
|
||||
"yi": {"name": "Yiddish", "native": "ייִדיש"},
|
||||
}
|
||||
|
||||
# Get all supported language codes from translator
|
||||
supported_codes = list(translator.lang_codes.keys())
|
||||
|
||||
# Build language list
|
||||
languages = [
|
||||
{
|
||||
"code": code,
|
||||
"name": lang_names.get(code, {}).get("name", code.upper()),
|
||||
"native_name": lang_names.get(code, {}).get("native", code.upper())
|
||||
}
|
||||
for code in sorted(supported_codes)
|
||||
]
|
||||
|
||||
return {
|
||||
"languages": languages,
|
||||
"total_languages": len(languages),
|
||||
"note": "All language pairs are supported (any-to-any translation)"
|
||||
}
|
||||
Reference in New Issue
Block a user