version: '3.8'

services:
  mbart-api:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: mbart-translation-api
    ports:
      - "8000:8000"
    environment:
      - HOST=0.0.0.0
      - PORT=8000
      - MODEL_NAME=facebook/mbart-large-50-many-to-many-mmt
      - MAX_LENGTH=512
      - DEVICE=cpu  # Change to 'cuda' for GPU support
    volumes:
      # Cache HuggingFace models to avoid re-downloading
      - huggingface-cache:/home/appuser/.cache/huggingface
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s

  # GPU support (uncomment if you have NVIDIA GPU)
  # mbart-api-gpu:
  #   build:
  #     context: .
  #     dockerfile: Dockerfile
  #   container_name: mbart-translation-api-gpu
  #   ports:
  #     - "8000:8000"
  #   environment:
  #     - HOST=0.0.0.0
  #     - PORT=8000
  #     - MODEL_NAME=facebook/mbart-large-50-many-to-many-mmt
  #     - MAX_LENGTH=512
  #     - DEVICE=cuda
  #   volumes:
  #     - huggingface-cache:/home/appuser/.cache/huggingface
  #   deploy:
  #     resources:
  #       reservations:
  #         devices:
  #           - driver: nvidia
  #             count: 1
  #             capabilities: [gpu]
  #   restart: unless-stopped

volumes:
  huggingface-cache:
    driver: local