feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api)
- Next.js 프론트엔드 (audio-studio-ui)
- Qwen3-TTS 엔진 (audio-studio-tts)
- MusicGen 서비스 (audio-studio-musicgen)
- Docker Compose 개발/운영 환경

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions

View File

@ -0,0 +1,56 @@
# Audio Studio MusicGen - GPU Dockerfile
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
# 환경 변수
ENV PYTHONUNBUFFERED=1
ENV DEBIAN_FRONTEND=noninteractive
ENV CUDA_HOME=/usr/local/cuda
ENV PATH="${CUDA_HOME}/bin:${PATH}"
ENV LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
# 시스템 패키지 설치
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3-pip \
git \
curl \
libsndfile1 \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*
# Python 심볼릭 링크
RUN ln -sf /usr/bin/python3.11 /usr/bin/python && \
ln -sf /usr/bin/python3.11 /usr/bin/python3
# pip 업그레이드
RUN python -m pip install --upgrade pip setuptools wheel
# 작업 디렉토리
WORKDIR /app
# 의존성 설치 (캐시 활용)
COPY requirements.txt .
# PyTorch + CUDA 설치
RUN pip install --no-cache-dir torch torchaudio --index-url https://download.pytorch.org/whl/cu124
# AudioCraft 설치
RUN pip install --no-cache-dir audiocraft
# 나머지 의존성 설치
RUN pip install --no-cache-dir -r requirements.txt
# 소스 코드 복사
COPY app/ ./app/
# 포트 노출
EXPOSE 8002
# 헬스체크
HEALTHCHECK --interval=30s --timeout=30s --start-period=120s --retries=3 \
CMD curl -f http://localhost:8002/health || exit 1
# 서버 실행
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8002"]

View File

View File

@ -0,0 +1,205 @@
"""Audio Studio MusicGen API
AI 음악 생성 API 서버
"""
import logging
from contextlib import asynccontextmanager
from typing import Optional
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
from fastapi.responses import Response
from pydantic import BaseModel, Field
from app.services.musicgen_service import musicgen_service
# 로깅 설정
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# ========================================
# Pydantic 모델
# ========================================
class GenerateRequest(BaseModel):
"""음악 생성 요청"""
prompt: str = Field(..., min_length=5, max_length=500, description="음악 설명")
duration: int = Field(default=30, ge=5, le=30, description="생성 길이 (초)")
top_k: int = Field(default=250, ge=50, le=500, description="top-k 샘플링")
temperature: float = Field(default=1.0, ge=0.5, le=2.0, description="생성 다양성")
class HealthResponse(BaseModel):
"""헬스체크 응답"""
status: str
model_info: dict
# ========================================
# 앱 생명주기
# ========================================
@asynccontextmanager
async def lifespan(app: FastAPI):
"""앱 시작/종료 시 실행"""
logger.info("MusicGen 서비스 시작...")
try:
await musicgen_service.initialize()
logger.info("MusicGen 서비스 준비 완료")
except Exception as e:
logger.error(f"MusicGen 초기화 실패: {e}")
# 초기화 실패해도 서버는 시작 (lazy loading 시도)
yield
logger.info("MusicGen 서비스 종료")
# ========================================
# FastAPI 앱
# ========================================
app = FastAPI(
title="Audio Studio MusicGen",
description="AI 음악 생성 API (Meta AudioCraft)",
version="0.1.0",
lifespan=lifespan,
)
# ========================================
# API 엔드포인트
# ========================================
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""헬스체크 엔드포인트"""
return HealthResponse(
status="healthy" if musicgen_service.is_initialized() else "initializing",
model_info=musicgen_service.get_model_info(),
)
@app.post("/generate")
async def generate_music(request: GenerateRequest):
"""텍스트 프롬프트로 음악 생성
예시 프롬프트:
- "upbeat electronic music for gaming"
- "calm piano music, peaceful, ambient"
- "energetic rock music with drums"
- "lo-fi hip hop beats, relaxing"
"""
try:
audio_bytes = await musicgen_service.generate(
prompt=request.prompt,
duration=request.duration,
top_k=request.top_k,
temperature=request.temperature,
)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Sample-Rate": "32000",
"X-Duration": str(request.duration),
"Content-Disposition": 'attachment; filename="generated_music.wav"',
},
)
except Exception as e:
logger.error(f"음악 생성 실패: {e}")
raise HTTPException(status_code=500, detail=f"Music generation failed: {str(e)}")
@app.post("/generate-with-melody")
async def generate_with_melody(
prompt: str = Form(..., min_length=5, description="음악 설명"),
duration: int = Form(default=30, ge=5, le=30, description="생성 길이"),
melody_audio: UploadFile = File(..., description="참조 멜로디 오디오"),
):
"""멜로디 조건부 음악 생성
참조 멜로디의 멜로디/하모니를 유지하면서 새로운 음악 생성
"""
try:
melody_bytes = await melody_audio.read()
if len(melody_bytes) < 1000:
raise HTTPException(status_code=400, detail="Melody audio is too small")
audio_bytes = await musicgen_service.generate_with_melody(
prompt=prompt,
melody_audio=melody_bytes,
duration=duration,
)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Sample-Rate": "32000",
"X-Duration": str(duration),
"Content-Disposition": 'attachment; filename="melody_based_music.wav"',
},
)
except HTTPException:
raise
except Exception as e:
logger.error(f"멜로디 기반 생성 실패: {e}")
raise HTTPException(status_code=500, detail=f"Generation failed: {str(e)}")
@app.get("/prompts")
async def get_example_prompts():
"""예시 프롬프트 목록"""
return {
"examples": [
{
"category": "Electronic",
"prompts": [
"upbeat electronic dance music with synthesizers",
"chill electronic ambient music",
"retro synthwave 80s style music",
],
},
{
"category": "Classical",
"prompts": [
"calm piano solo, classical style",
"orchestral epic cinematic music",
"gentle string quartet, romantic",
],
},
{
"category": "Pop/Rock",
"prompts": [
"energetic rock music with electric guitar",
"upbeat pop song with catchy melody",
"acoustic guitar folk music",
],
},
{
"category": "Ambient/Lo-fi",
"prompts": [
"lo-fi hip hop beats, relaxing, study music",
"peaceful ambient nature sounds music",
"meditation music, calm, zen",
],
},
{
"category": "Game/Film",
"prompts": [
"epic adventure game soundtrack",
"tense suspenseful thriller music",
"cheerful happy video game background",
],
},
]
}

View File

@ -0,0 +1,199 @@
"""MusicGen 서비스
Meta AudioCraft MusicGen을 사용한 AI 음악 생성
"""
import os
import io
import logging
from typing import Optional
import torch
import soundfile as sf
import numpy as np
logger = logging.getLogger(__name__)
class MusicGenService:
"""MusicGen 음악 생성 서비스"""
def __init__(self):
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
self.model_name = os.getenv("MODEL_NAME", "facebook/musicgen-medium")
self.model = None
self._initialized = False
async def initialize(self):
"""모델 초기화 (서버 시작 시 호출)"""
if self._initialized:
return
logger.info(f"MusicGen 모델 로딩 중: {self.model_name}")
try:
from audiocraft.models import MusicGen
self.model = MusicGen.get_pretrained(self.model_name)
self.model.set_generation_params(
use_sampling=True,
top_k=250,
duration=30, # 기본 30초
)
self._initialized = True
logger.info(f"MusicGen 모델 로드 완료 (device: {self.device})")
except Exception as e:
logger.error(f"MusicGen 모델 로드 실패: {e}")
raise
async def generate(
self,
prompt: str,
duration: int = 30,
top_k: int = 250,
temperature: float = 1.0,
) -> bytes:
"""텍스트 프롬프트로 음악 생성
Args:
prompt: 음악 설명 (예: "upbeat electronic music for gaming")
duration: 생성 길이 (초, 최대 30초)
top_k: top-k 샘플링 파라미터
temperature: 생성 다양성 (높을수록 다양)
Returns:
WAV 바이트
"""
if not self._initialized:
await self.initialize()
# 파라미터 제한
duration = min(max(duration, 5), 30)
logger.info(f"음악 생성 시작: prompt='{prompt[:50]}...', duration={duration}s")
try:
# 생성 파라미터 설정
self.model.set_generation_params(
use_sampling=True,
top_k=top_k,
top_p=0.0,
temperature=temperature,
duration=duration,
)
# 생성
wav = self.model.generate([prompt])
# 결과 처리 (첫 번째 결과만)
audio_data = wav[0].cpu().numpy()
# 스테레오인 경우 모노로 변환
if len(audio_data.shape) > 1:
audio_data = audio_data.mean(axis=0)
# WAV 바이트로 변환
buffer = io.BytesIO()
sf.write(buffer, audio_data, 32000, format='WAV') # MusicGen은 32kHz
buffer.seek(0)
logger.info(f"음악 생성 완료: {duration}")
return buffer.read()
except Exception as e:
logger.error(f"음악 생성 실패: {e}")
raise
async def generate_with_melody(
self,
prompt: str,
melody_audio: bytes,
duration: int = 30,
) -> bytes:
"""멜로디 조건부 음악 생성
Args:
prompt: 음악 설명
melody_audio: 참조 멜로디 오디오 (WAV)
duration: 생성 길이
Returns:
WAV 바이트
"""
if not self._initialized:
await self.initialize()
duration = min(max(duration, 5), 30)
logger.info(f"멜로디 기반 음악 생성: prompt='{prompt[:50]}...', duration={duration}s")
try:
# 멜로디 로드
import torchaudio
buffer = io.BytesIO(melody_audio)
melody, sr = torchaudio.load(buffer)
# 리샘플링 (32kHz로)
if sr != 32000:
melody = torchaudio.functional.resample(melody, sr, 32000)
# 모노로 변환
if melody.shape[0] > 1:
melody = melody.mean(dim=0, keepdim=True)
# 길이 제한 (30초)
max_samples = 32000 * 30
if melody.shape[1] > max_samples:
melody = melody[:, :max_samples]
# 생성 파라미터 설정
self.model.set_generation_params(
use_sampling=True,
top_k=250,
duration=duration,
)
# 멜로디 조건부 생성
wav = self.model.generate_with_chroma(
descriptions=[prompt],
melody_wavs=melody.unsqueeze(0).to(self.device),
melody_sample_rate=32000,
progress=True,
)
# 결과 처리
audio_data = wav[0].cpu().numpy()
if len(audio_data.shape) > 1:
audio_data = audio_data.mean(axis=0)
buffer = io.BytesIO()
sf.write(buffer, audio_data, 32000, format='WAV')
buffer.seek(0)
logger.info(f"멜로디 기반 음악 생성 완료")
return buffer.read()
except Exception as e:
logger.error(f"멜로디 기반 생성 실패: {e}")
raise
def is_initialized(self) -> bool:
"""초기화 상태 확인"""
return self._initialized
def get_model_info(self) -> dict:
"""모델 정보 반환"""
return {
"model_name": self.model_name,
"device": self.device,
"initialized": self._initialized,
"max_duration": 30,
"sample_rate": 32000,
}
# 싱글톤 인스턴스
musicgen_service = MusicGenService()

View File

@ -0,0 +1,24 @@
# Audio Studio MusicGen - Dependencies
# FastAPI
fastapi==0.115.6
uvicorn[standard]==0.34.0
# PyTorch (CUDA 12.x)
--extra-index-url https://download.pytorch.org/whl/cu124
torch>=2.5.0
torchaudio>=2.5.0
# AudioCraft (MusicGen)
audiocraft>=1.3.0
# Audio Processing
soundfile>=0.12.1
numpy>=1.26.0
scipy>=1.14.0
# Utilities
httpx>=0.28.0
pydantic>=2.10.0
pydantic-settings>=2.7.0
python-dotenv>=1.0.1