version: '3.8' services: mbart-api: build: context: . dockerfile: Dockerfile container_name: mbart-translation-api ports: - "8000:8000" environment: - HOST=0.0.0.0 - PORT=8000 - MODEL_NAME=facebook/mbart-large-50-many-to-many-mmt - MAX_LENGTH=512 - DEVICE=cpu # Change to 'cuda' for GPU support volumes: # Cache HuggingFace models to avoid re-downloading - huggingface-cache:/home/appuser/.cache/huggingface restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s # GPU support (uncomment if you have NVIDIA GPU) # mbart-api-gpu: # build: # context: . # dockerfile: Dockerfile # container_name: mbart-translation-api-gpu # ports: # - "8000:8000" # environment: # - HOST=0.0.0.0 # - PORT=8000 # - MODEL_NAME=facebook/mbart-large-50-many-to-many-mmt # - MAX_LENGTH=512 # - DEVICE=cuda # volumes: # - huggingface-cache:/home/appuser/.cache/huggingface # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: 1 # capabilities: [gpu] # restart: unless-stopped volumes: huggingface-cache: driver: local