feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api) - Next.js 프론트엔드 (audio-studio-ui) - Qwen3-TTS 엔진 (audio-studio-tts) - MusicGen 서비스 (audio-studio-musicgen) - Docker Compose 개발/운영 환경 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions
--- a/audio-studio-api/app/init.py
+++ b/audio-studio-api/app/init.py
--- a/audio-studio-api/app/database.py
+++ b/audio-studio-api/app/database.py
@ -0,0 +1,169 @@
+"""데이터베이스 연결 설정
+
+MongoDB (motor async) + GridFS (오디오 저장)
+"""
+
+import os
+import logging
+from typing import Optional
+
+from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase, AsyncIOMotorGridFSBucket
+from redis.asyncio import Redis
+
+logger = logging.getLogger(__name__)
+
+
+class Database:
+    """데이터베이스 연결 관리"""
+
+    def __init__(self):
+        self.client: Optional[AsyncIOMotorClient] = None
+        self.db: Optional[AsyncIOMotorDatabase] = None
+        self.gridfs: Optional[AsyncIOMotorGridFSBucket] = None
+        self.redis: Optional[Redis] = None
+
+    async def connect(self):
+        """데이터베이스 연결"""
+        # MongoDB
+        mongodb_url = os.getenv("MONGODB_URL", "mongodb://localhost:27017/")
+        db_name = os.getenv("DB_NAME", "audio_studio")
+
+        logger.info(f"MongoDB 연결 중: {db_name}")
+        self.client = AsyncIOMotorClient(mongodb_url)
+        self.db = self.client[db_name]
+
+        # GridFS (오디오 파일 저장용)
+        self.gridfs = AsyncIOMotorGridFSBucket(self.db, bucket_name="audio_files")
+
+        # 연결 테스트
+        await self.client.admin.command("ping")
+        logger.info("MongoDB 연결 성공")
+
+        # Redis
+        redis_url = os.getenv("REDIS_URL", "redis://localhost:6379")
+        logger.info("Redis 연결 중...")
+        self.redis = Redis.from_url(redis_url, decode_responses=True)
+
+        # 연결 테스트
+        await self.redis.ping()
+        logger.info("Redis 연결 성공")
+
+        # 인덱스 생성
+        await self._create_indexes()
+
+    async def _create_indexes(self):
+        """컬렉션 인덱스 생성"""
+        # voices 컬렉션
+        await self.db.voices.create_index("voice_id", unique=True)
+        await self.db.voices.create_index("owner_id")
+        await self.db.voices.create_index("type")
+        await self.db.voices.create_index("language")
+        await self.db.voices.create_index("is_public")
+
+        # tts_generations 컬렉션
+        await self.db.tts_generations.create_index("generation_id", unique=True)
+        await self.db.tts_generations.create_index("user_id")
+        await self.db.tts_generations.create_index("voice_id")
+        await self.db.tts_generations.create_index("created_at")
+
+        # sound_effects 컬렉션
+        await self.db.sound_effects.create_index("source_id")
+        await self.db.sound_effects.create_index("categories")
+        await self.db.sound_effects.create_index("tags")
+
+        # music_tracks 컬렉션
+        await self.db.music_tracks.create_index("source")
+        await self.db.music_tracks.create_index("genre")
+        await self.db.music_tracks.create_index("mood")
+
+        logger.info("인덱스 생성 완료")
+
+    async def disconnect(self):
+        """데이터베이스 연결 해제"""
+        if self.client:
+            self.client.close()
+            logger.info("MongoDB 연결 해제")
+
+        if self.redis:
+            await self.redis.close()
+            logger.info("Redis 연결 해제")
+
+    # ========================================
+    # 컬렉션 접근자
+    # ========================================
+
+    @property
+    def voices(self):
+        """voices 컬렉션"""
+        return self.db.voices
+
+    @property
+    def tts_generations(self):
+        """tts_generations 컬렉션"""
+        return self.db.tts_generations
+
+    @property
+    def sound_effects(self):
+        """sound_effects 컬렉션"""
+        return self.db.sound_effects
+
+    @property
+    def music_tracks(self):
+        """music_tracks 컬렉션"""
+        return self.db.music_tracks
+
+    @property
+    def user_voice_library(self):
+        """user_voice_library 컬렉션"""
+        return self.db.user_voice_library
+
+    # ========================================
+    # GridFS 오디오 저장
+    # ========================================
+
+    async def save_audio(
+        self,
+        audio_bytes: bytes,
+        filename: str,
+        content_type: str = "audio/wav",
+        metadata: dict = None,
+    ) -> str:
+        """오디오 파일을 GridFS에 저장
+
+        Returns:
+            file_id (str)
+        """
+        file_id = await self.gridfs.upload_from_stream(
+            filename,
+            audio_bytes,
+            metadata={
+                "content_type": content_type,
+                **(metadata or {}),
+            }
+        )
+        return str(file_id)
+
+    async def get_audio(self, file_id: str) -> bytes:
+        """GridFS에서 오디오 파일 읽기"""
+        from bson import ObjectId
+        from io import BytesIO
+
+        buffer = BytesIO()
+        await self.gridfs.download_to_stream(ObjectId(file_id), buffer)
+        buffer.seek(0)
+        return buffer.read()
+
+    async def delete_audio(self, file_id: str):
+        """GridFS에서 오디오 파일 삭제"""
+        from bson import ObjectId
+        await self.gridfs.delete(ObjectId(file_id))
+
+
+# 싱글톤 인스턴스
+db = Database()
+
+
+# FastAPI 의존성
+async def get_db() -> Database:
+    """데이터베이스 인스턴스 반환 (의존성 주입용)"""
+    return db
--- a/audio-studio-api/app/main.py
+++ b/audio-studio-api/app/main.py
@ -0,0 +1,163 @@
+"""Drama Studio API Server
+
+AI 라디오 드라마 제작 - TTS, 보이스, 효과음, 배경음악, 드라마 생성 API
+"""
+
+import logging
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+
+from app.database import db
+from app.routers import voices, tts, recordings, sound_effects, music, drama
+
+# 로깅 설정
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+# ========================================
+# 앱 생명주기
+# ========================================
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """앱 시작/종료 시 실행"""
+    # 시작 시 DB 연결
+    logger.info("Drama Studio API 서버 시작...")
+    try:
+        await db.connect()
+        logger.info("데이터베이스 연결 완료")
+    except Exception as e:
+        logger.error(f"데이터베이스 연결 실패: {e}")
+        raise
+
+    yield
+
+    # 종료 시 DB 연결 해제
+    await db.disconnect()
+    logger.info("Drama Studio API 서버 종료")
+
+
+# ========================================
+# FastAPI 앱
+# ========================================
+
+app = FastAPI(
+    title="Drama Studio API",
+    description="""
+Drama Studio API - AI 라디오 드라마 제작 플랫폼
+
+## 기능
+
+### Voice (보이스 관리)
+- 프리셋 보이스 목록 조회
+- Voice Clone (목소리 복제)
+- Voice Design (AI 음성 생성)
+- 사용자 보이스 라이브러리
+
+### TTS (음성 합성)
+- 텍스트를 음성으로 변환
+- 다양한 언어 지원 (한국어, 영어, 일본어 등)
+
+### Recording (녹음)
+- 녹음 업로드 및 품질 검증
+- Voice Clone용 레퍼런스 관리
+
+### Sound Effects (효과음)
+- Freesound 검색 및 다운로드
+- 로컬 효과음 라이브러리
+
+### Drama (드라마 생성)
+- 스크립트 기반 라디오 드라마 생성
+- 자동 TTS/BGM/효과음 합성
+- 타임라인 기반 오디오 믹싱
+    """,
+    version="0.1.0",
+    lifespan=lifespan,
+)
+
+# CORS 설정
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # 개발 환경용, 프로덕션에서는 제한 필요
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ========================================
+# 라우터 등록
+# ========================================
+
+app.include_router(voices.router)
+app.include_router(tts.router)
+app.include_router(recordings.router)
+app.include_router(sound_effects.router)
+app.include_router(music.router)
+app.include_router(drama.router)
+
+
+# ========================================
+# 기본 엔드포인트
+# ========================================
+
+@app.get("/")
+async def root():
+    """API 루트"""
+    return {
+        "name": "Drama Studio API",
+        "version": "0.1.0",
+        "docs": "/docs",
+    }
+
+
+@app.get("/health")
+async def health_check():
+    """헬스체크"""
+    try:
+        # MongoDB 연결 확인
+        await db.client.admin.command("ping")
+        mongo_status = "healthy"
+    except Exception as e:
+        mongo_status = f"unhealthy: {str(e)}"
+
+    try:
+        # Redis 연결 확인
+        await db.redis.ping()
+        redis_status = "healthy"
+    except Exception as e:
+        redis_status = f"unhealthy: {str(e)}"
+
+    status = "healthy" if mongo_status == "healthy" and redis_status == "healthy" else "degraded"
+
+    return JSONResponse(
+        status_code=200 if status == "healthy" else 503,
+        content={
+            "status": status,
+            "services": {
+                "mongodb": mongo_status,
+                "redis": redis_status,
+            },
+        }
+    )
+
+
+# ========================================
+# 에러 핸들러
+# ========================================
+
+@app.exception_handler(Exception)
+async def global_exception_handler(request, exc):
+    """전역 예외 핸들러"""
+    logger.error(f"Unhandled exception: {exc}", exc_info=True)
+    return JSONResponse(
+        status_code=500,
+        content={"detail": "Internal server error"},
+    )
--- a/audio-studio-api/app/routers/init.py
+++ b/audio-studio-api/app/routers/init.py
--- a/audio-studio-api/app/routers/drama.py
+++ b/audio-studio-api/app/routers/drama.py
@ -0,0 +1,193 @@
+# 드라마 API 라우터
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse
+from typing import Optional
+import os
+
+from app.models.drama import (
+    DramaCreateRequest, DramaGenerateRequest, DramaResponse,
+    ParsedScript, Character
+)
+from app.services.script_parser import script_parser
+from app.services.drama_orchestrator import drama_orchestrator
+
+router = APIRouter(prefix="/api/v1/drama", tags=["drama"])
+
+
+@router.post("/parse", response_model=ParsedScript)
+async def parse_script(script: str):
+    """
+    스크립트 파싱 (미리보기)
+
+    마크다운 형식의 스크립트를 구조화된 데이터로 변환합니다.
+    실제 프로젝트 생성 없이 파싱 결과만 확인할 수 있습니다.
+    """
+    is_valid, errors = script_parser.validate_script(script)
+    if not is_valid:
+        raise HTTPException(status_code=400, detail={"errors": errors})
+
+    return script_parser.parse(script)
+
+
+@router.post("/projects", response_model=DramaResponse)
+async def create_project(request: DramaCreateRequest):
+    """
+    새 드라마 프로젝트 생성
+
+    스크립트를 파싱하고 프로젝트를 생성합니다.
+    voice_mapping으로 캐릭터별 보이스를 지정할 수 있습니다.
+    """
+    # 스크립트 유효성 검사
+    is_valid, errors = script_parser.validate_script(request.script)
+    if not is_valid:
+        raise HTTPException(status_code=400, detail={"errors": errors})
+
+    project = await drama_orchestrator.create_project(request)
+
+    return DramaResponse(
+        project_id=project.project_id,
+        title=project.title,
+        status=project.status,
+        characters=project.script_parsed.characters if project.script_parsed else [],
+        element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
+        estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None
+    )
+
+
+@router.get("/projects", response_model=list[DramaResponse])
+async def list_projects(skip: int = 0, limit: int = 20):
+    """프로젝트 목록 조회"""
+    projects = await drama_orchestrator.list_projects(skip=skip, limit=limit)
+
+    return [
+        DramaResponse(
+            project_id=p.project_id,
+            title=p.title,
+            status=p.status,
+            characters=p.script_parsed.characters if p.script_parsed else [],
+            element_count=len(p.script_parsed.elements) if p.script_parsed else 0,
+            output_file_id=p.output_file_id,
+            error_message=p.error_message
+        )
+        for p in projects
+    ]
+
+
+@router.get("/projects/{project_id}", response_model=DramaResponse)
+async def get_project(project_id: str):
+    """프로젝트 상세 조회"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    return DramaResponse(
+        project_id=project.project_id,
+        title=project.title,
+        status=project.status,
+        characters=project.script_parsed.characters if project.script_parsed else [],
+        element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
+        estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None,
+        output_file_id=project.output_file_id,
+        error_message=project.error_message
+    )
+
+
+@router.post("/projects/{project_id}/render")
+async def render_project(
+    project_id: str,
+    background_tasks: BackgroundTasks,
+    output_format: str = "wav"
+):
+    """
+    드라마 렌더링 시작
+
+    백그라운드에서 TTS 생성, 효과음 검색, 믹싱을 수행합니다.
+    완료되면 status가 'completed'로 변경됩니다.
+    """
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    if project.status == "processing":
+        raise HTTPException(status_code=400, detail="이미 렌더링 중입니다")
+
+    # 백그라운드 렌더링 시작
+    background_tasks.add_task(
+        drama_orchestrator.render,
+        project_id,
+        output_format
+    )
+
+    return {
+        "project_id": project_id,
+        "status": "processing",
+        "message": "렌더링이 시작되었습니다"
+    }
+
+
+@router.get("/projects/{project_id}/download")
+async def download_project(project_id: str):
+    """렌더링된 드라마 다운로드"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    if project.status != "completed":
+        raise HTTPException(
+            status_code=400,
+            detail=f"렌더링이 완료되지 않았습니다 (현재 상태: {project.status})"
+        )
+
+    if not project.output_file_id or not os.path.exists(project.output_file_id):
+        raise HTTPException(status_code=404, detail="출력 파일을 찾을 수 없습니다")
+
+    return FileResponse(
+        project.output_file_id,
+        media_type="audio/wav",
+        filename=f"{project.title}.wav"
+    )
+
+
+@router.put("/projects/{project_id}/voices")
+async def update_voice_mapping(
+    project_id: str,
+    voice_mapping: dict[str, str]
+):
+    """캐릭터-보이스 매핑 업데이트"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    from app.database import db
+    from datetime import datetime
+
+    await db.dramas.update_one(
+        {"project_id": project_id},
+        {
+            "$set": {
+                "voice_mapping": voice_mapping,
+                "updated_at": datetime.utcnow()
+            }
+        }
+    )
+
+    return {"message": "보이스 매핑이 업데이트되었습니다"}
+
+
+@router.delete("/projects/{project_id}")
+async def delete_project(project_id: str):
+    """프로젝트 삭제"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    from app.database import db
+
+    # 출력 파일 삭제
+    if project.output_file_id and os.path.exists(project.output_file_id):
+        os.remove(project.output_file_id)
+
+    # DB에서 삭제
+    await db.dramas.delete_one({"project_id": project_id})
+
+    return {"message": "프로젝트가 삭제되었습니다"}
--- a/audio-studio-api/app/routers/music.py
+++ b/audio-studio-api/app/routers/music.py
@ -0,0 +1,278 @@
+"""배경음악 API 라우터
+
+MusicGen 연동 및 외부 음악 소스
+"""
+
+import os
+import uuid
+from datetime import datetime
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
+from fastapi.responses import Response
+from pydantic import BaseModel, Field
+import httpx
+
+from app.database import Database, get_db
+
+router = APIRouter(prefix="/api/v1/music", tags=["music"])
+
+MUSICGEN_URL = os.getenv("MUSICGEN_URL", "http://localhost:8002")
+
+
+# ========================================
+# Pydantic 모델
+# ========================================
+
+class MusicGenerateRequest(BaseModel):
+    """음악 생성 요청"""
+    prompt: str = Field(..., min_length=5, max_length=500, description="음악 설명")
+    duration: int = Field(default=30, ge=5, le=30, description="생성 길이 (초)")
+    save_to_library: bool = Field(default=True, description="라이브러리에 저장")
+
+
+class MusicTrackResponse(BaseModel):
+    """음악 트랙 응답"""
+    id: str
+    name: str
+    description: Optional[str] = None
+    source: str  # musicgen | pixabay | uploaded
+    generation_prompt: Optional[str] = None
+    duration_seconds: float
+    genre: Optional[str] = None
+    mood: List[str] = []
+    license: str = ""
+    created_at: datetime
+
+
+class MusicListResponse(BaseModel):
+    """음악 목록 응답"""
+    tracks: List[MusicTrackResponse]
+    total: int
+    page: int
+    page_size: int
+
+
+# ========================================
+# API 엔드포인트
+# ========================================
+
+@router.post("/generate")
+async def generate_music(
+    request: MusicGenerateRequest,
+    db: Database = Depends(get_db),
+):
+    """AI로 배경음악 생성
+
+    MusicGen을 사용하여 텍스트 프롬프트 기반 음악 생성
+    """
+    try:
+        # MusicGen 서비스 호출
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            response = await client.post(
+                f"{MUSICGEN_URL}/generate",
+                json={
+                    "prompt": request.prompt,
+                    "duration": request.duration,
+                },
+            )
+            response.raise_for_status()
+            audio_bytes = response.content
+
+    except httpx.TimeoutException:
+        raise HTTPException(status_code=504, detail="Music generation timed out")
+    except httpx.HTTPStatusError as e:
+        raise HTTPException(status_code=502, detail=f"MusicGen error: {e.response.text}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Music generation failed: {str(e)}")
+
+    # 라이브러리에 저장
+    if request.save_to_library:
+        track_id = f"music_{uuid.uuid4().hex[:12]}"
+        now = datetime.utcnow()
+
+        # GridFS에 오디오 저장
+        audio_file_id = await db.save_audio(
+            audio_bytes,
+            f"{track_id}.wav",
+            metadata={
+                "type": "generated_music",
+                "prompt": request.prompt,
+            },
+        )
+
+        # DB에 트랙 정보 저장
+        track_doc = {
+            "track_id": track_id,
+            "name": f"Generated: {request.prompt[:30]}...",
+            "description": request.prompt,
+            "source": "musicgen",
+            "generation_prompt": request.prompt,
+            "audio_file_id": audio_file_id,
+            "duration_seconds": request.duration,
+            "format": "wav",
+            "genre": None,
+            "mood": [],
+            "license": "CC-BY-NC",  # MusicGen 모델 라이센스
+            "created_at": now,
+        }
+        await db.music_tracks.insert_one(track_doc)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={
+            "X-Duration": str(request.duration),
+            "Content-Disposition": 'attachment; filename="generated_music.wav"',
+        },
+    )
+
+
+@router.get("/library", response_model=MusicListResponse)
+async def list_music_library(
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    source: Optional[str] = Query(None, description="소스 필터 (musicgen, pixabay, uploaded)"),
+    genre: Optional[str] = Query(None, description="장르 필터"),
+    db: Database = Depends(get_db),
+):
+    """음악 라이브러리 목록 조회"""
+    query = {}
+    if source:
+        query["source"] = source
+    if genre:
+        query["genre"] = genre
+
+    total = await db.music_tracks.count_documents(query)
+    skip = (page - 1) * page_size
+
+    cursor = db.music_tracks.find(query).sort("created_at", -1).skip(skip).limit(page_size)
+
+    tracks = []
+    async for doc in cursor:
+        tracks.append(MusicTrackResponse(
+            id=doc.get("track_id", str(doc["_id"])),
+            name=doc["name"],
+            description=doc.get("description"),
+            source=doc.get("source", "unknown"),
+            generation_prompt=doc.get("generation_prompt"),
+            duration_seconds=doc.get("duration_seconds", 0),
+            genre=doc.get("genre"),
+            mood=doc.get("mood", []),
+            license=doc.get("license", ""),
+            created_at=doc.get("created_at", datetime.utcnow()),
+        ))
+
+    return MusicListResponse(
+        tracks=tracks,
+        total=total,
+        page=page,
+        page_size=page_size,
+    )
+
+
+@router.get("/{track_id}")
+async def get_music_track(
+    track_id: str,
+    db: Database = Depends(get_db),
+):
+    """음악 트랙 상세 정보"""
+    doc = await db.music_tracks.find_one({"track_id": track_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Track not found")
+
+    return MusicTrackResponse(
+        id=doc.get("track_id", str(doc["_id"])),
+        name=doc["name"],
+        description=doc.get("description"),
+        source=doc.get("source", "unknown"),
+        generation_prompt=doc.get("generation_prompt"),
+        duration_seconds=doc.get("duration_seconds", 0),
+        genre=doc.get("genre"),
+        mood=doc.get("mood", []),
+        license=doc.get("license", ""),
+        created_at=doc.get("created_at", datetime.utcnow()),
+    )
+
+
+@router.get("/{track_id}/audio")
+async def get_music_audio(
+    track_id: str,
+    db: Database = Depends(get_db),
+):
+    """음악 오디오 스트리밍"""
+    doc = await db.music_tracks.find_one({"track_id": track_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Track not found")
+
+    audio_file_id = doc.get("audio_file_id")
+    if not audio_file_id:
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    audio_bytes = await db.get_audio(audio_file_id)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={"Content-Disposition": f'inline; filename="{track_id}.wav"'},
+    )
+
+
+@router.delete("/{track_id}")
+async def delete_music_track(
+    track_id: str,
+    db: Database = Depends(get_db),
+):
+    """음악 트랙 삭제"""
+    doc = await db.music_tracks.find_one({"track_id": track_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Track not found")
+
+    # 오디오 파일 삭제
+    if doc.get("audio_file_id"):
+        await db.delete_audio(doc["audio_file_id"])
+
+    # 문서 삭제
+    await db.music_tracks.delete_one({"track_id": track_id})
+
+    return {"status": "deleted", "track_id": track_id}
+
+
+@router.get("/prompts/examples")
+async def get_example_prompts():
+    """예시 프롬프트 목록"""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.get(f"{MUSICGEN_URL}/prompts")
+            response.raise_for_status()
+            return response.json()
+    except Exception:
+        # MusicGen 서비스 연결 실패 시 기본 프롬프트 반환
+        return {
+            "examples": [
+                {
+                    "category": "Ambient",
+                    "prompts": [
+                        "calm piano music, peaceful, ambient",
+                        "lo-fi hip hop beats, relaxing, study music",
+                        "meditation music, calm, zen",
+                    ],
+                },
+                {
+                    "category": "Electronic",
+                    "prompts": [
+                        "upbeat electronic dance music",
+                        "retro synthwave 80s style",
+                        "chill electronic ambient",
+                    ],
+                },
+                {
+                    "category": "Cinematic",
+                    "prompts": [
+                        "epic orchestral cinematic music",
+                        "tense suspenseful thriller music",
+                        "cheerful happy video game background",
+                    ],
+                },
+            ]
+        }
--- a/audio-studio-api/app/routers/recordings.py
+++ b/audio-studio-api/app/routers/recordings.py
@ -0,0 +1,184 @@
+"""녹음 관리 API 라우터"""
+
+import uuid
+import io
+from typing import List
+
+from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
+from fastapi.responses import Response
+import soundfile as sf
+import numpy as np
+
+from app.database import Database, get_db
+from app.models.voice import RecordingValidateResponse, RecordingUploadResponse
+
+router = APIRouter(prefix="/api/v1/recordings", tags=["recordings"])
+
+
+def analyze_audio(audio_bytes: bytes) -> dict:
+    """오디오 파일 분석
+
+    Returns:
+        duration, sample_rate, quality_score, issues
+    """
+    try:
+        # 오디오 로드
+        audio_data, sample_rate = sf.read(io.BytesIO(audio_bytes))
+
+        # 모노로 변환
+        if len(audio_data.shape) > 1:
+            audio_data = audio_data.mean(axis=1)
+
+        duration = len(audio_data) / sample_rate
+
+        # 품질 분석
+        issues = []
+        quality_score = 1.0
+
+        # 길이 체크
+        if duration < 1.0:
+            issues.append("오디오가 너무 짧습니다 (최소 1초 이상)")
+            quality_score -= 0.3
+        elif duration < 3.0:
+            issues.append("Voice Clone에는 3초 이상의 오디오가 권장됩니다")
+            quality_score -= 0.1
+
+        # RMS 레벨 체크 (볼륨)
+        rms = np.sqrt(np.mean(audio_data ** 2))
+        if rms < 0.01:
+            issues.append("볼륨이 너무 낮습니다")
+            quality_score -= 0.2
+        elif rms > 0.5:
+            issues.append("볼륨이 너무 높습니다 (클리핑 가능성)")
+            quality_score -= 0.1
+
+        # 피크 체크
+        peak = np.max(np.abs(audio_data))
+        if peak > 0.99:
+            issues.append("오디오가 클리핑되었습니다")
+            quality_score -= 0.2
+
+        # 노이즈 체크 (간단한 휴리스틱)
+        # 실제로는 더 정교한 노이즈 감지 필요
+        silence_threshold = 0.01
+        silent_samples = np.sum(np.abs(audio_data) < silence_threshold)
+        silence_ratio = silent_samples / len(audio_data)
+
+        if silence_ratio > 0.7:
+            issues.append("대부분이 무음입니다")
+            quality_score -= 0.3
+        elif silence_ratio > 0.5:
+            issues.append("무음 구간이 많습니다")
+            quality_score -= 0.1
+
+        quality_score = max(0.0, min(1.0, quality_score))
+
+        return {
+            "duration": duration,
+            "sample_rate": sample_rate,
+            "quality_score": quality_score,
+            "issues": issues,
+            "rms": float(rms),
+            "peak": float(peak),
+        }
+
+    except Exception as e:
+        return {
+            "duration": 0,
+            "sample_rate": 0,
+            "quality_score": 0,
+            "issues": [f"오디오 분석 실패: {str(e)}"],
+        }
+
+
+@router.post("/validate", response_model=RecordingValidateResponse)
+async def validate_recording(
+    audio: UploadFile = File(..., description="검증할 오디오 파일"),
+):
+    """녹음 품질 검증
+
+    Voice Clone에 사용할 녹음의 품질을 검증합니다.
+    """
+    audio_bytes = await audio.read()
+
+    if len(audio_bytes) < 1000:
+        raise HTTPException(status_code=400, detail="파일이 너무 작습니다")
+
+    analysis = analyze_audio(audio_bytes)
+
+    return RecordingValidateResponse(
+        valid=analysis["quality_score"] > 0.5 and analysis["duration"] > 1.0,
+        duration=analysis["duration"],
+        sample_rate=analysis["sample_rate"],
+        quality_score=analysis["quality_score"],
+        issues=analysis["issues"],
+    )
+
+
+@router.post("/upload", response_model=RecordingUploadResponse)
+async def upload_recording(
+    audio: UploadFile = File(..., description="업로드할 오디오 파일"),
+    transcript: str = Form(None, description="오디오의 텍스트 내용"),
+    db: Database = Depends(get_db),
+):
+    """녹음 파일 업로드
+
+    Voice Clone에 사용할 녹음을 업로드합니다.
+    """
+    audio_bytes = await audio.read()
+
+    # 품질 분석
+    analysis = analyze_audio(audio_bytes)
+
+    if analysis["duration"] < 0.5:
+        raise HTTPException(status_code=400, detail="오디오가 너무 짧습니다")
+
+    # GridFS에 저장
+    file_id = await db.save_audio(
+        audio_bytes,
+        audio.filename or f"recording_{uuid.uuid4()}.wav",
+        metadata={
+            "type": "recording",
+            "transcript": transcript,
+            "duration": analysis["duration"],
+            "sample_rate": analysis["sample_rate"],
+            "quality_score": analysis["quality_score"],
+        },
+    )
+
+    return RecordingUploadResponse(
+        file_id=file_id,
+        filename=audio.filename or "recording.wav",
+        duration=analysis["duration"],
+        sample_rate=analysis["sample_rate"],
+    )
+
+
+@router.get("/{file_id}")
+async def get_recording(
+    file_id: str,
+    db: Database = Depends(get_db),
+):
+    """녹음 파일 다운로드"""
+    try:
+        audio_bytes = await db.get_audio(file_id)
+        return Response(
+            content=audio_bytes,
+            media_type="audio/wav",
+            headers={"Content-Disposition": f'attachment; filename="{file_id}.wav"'},
+        )
+    except Exception as e:
+        raise HTTPException(status_code=404, detail="Recording not found")
+
+
+@router.delete("/{file_id}")
+async def delete_recording(
+    file_id: str,
+    db: Database = Depends(get_db),
+):
+    """녹음 파일 삭제"""
+    try:
+        await db.delete_audio(file_id)
+        return {"status": "deleted", "file_id": file_id}
+    except Exception as e:
+        raise HTTPException(status_code=404, detail="Recording not found")
--- a/audio-studio-api/app/routers/sound_effects.py
+++ b/audio-studio-api/app/routers/sound_effects.py
@ -0,0 +1,340 @@
+"""효과음 API 라우터
+
+Freesound API 연동
+"""
+
+import uuid
+from datetime import datetime
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Depends, Query
+from fastapi.responses import Response
+from pydantic import BaseModel
+
+from app.database import Database, get_db
+from app.services.freesound_client import freesound_client
+
+router = APIRouter(prefix="/api/v1/sound-effects", tags=["sound-effects"])
+
+
+# ========================================
+# Pydantic 모델
+# ========================================
+
+class SoundEffectResponse(BaseModel):
+    """효과음 응답"""
+    id: str
+    freesound_id: Optional[int] = None
+    name: str
+    description: str
+    duration: float
+    tags: List[str] = []
+    preview_url: Optional[str] = None
+    license: str = ""
+    username: Optional[str] = None
+    source: str = "freesound"  # freesound | local
+
+
+class SoundEffectSearchResponse(BaseModel):
+    """효과음 검색 응답"""
+    count: int
+    page: int
+    page_size: int
+    results: List[SoundEffectResponse]
+
+
+class SoundEffectImportRequest(BaseModel):
+    """효과음 가져오기 요청"""
+    freesound_id: int
+
+
+# ========================================
+# API 엔드포인트
+# ========================================
+
+@router.get("/search", response_model=SoundEffectSearchResponse)
+async def search_sound_effects(
+    query: str = Query(..., min_length=1, description="검색어"),
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    min_duration: Optional[float] = Query(None, ge=0, description="최소 길이 (초)"),
+    max_duration: Optional[float] = Query(None, ge=0, description="최대 길이 (초)"),
+    sort: str = Query("score", description="정렬 (score, duration_asc, duration_desc)"),
+):
+    """Freesound에서 효과음 검색"""
+    try:
+        result = await freesound_client.search(
+            query=query,
+            page=page,
+            page_size=page_size,
+            min_duration=min_duration,
+            max_duration=max_duration,
+            sort=sort,
+        )
+
+        # 응답 형식 변환
+        sounds = []
+        for item in result["results"]:
+            sounds.append(SoundEffectResponse(
+                id=f"fs_{item['freesound_id']}",
+                freesound_id=item["freesound_id"],
+                name=item["name"],
+                description=item["description"],
+                duration=item["duration"],
+                tags=item["tags"],
+                preview_url=item["preview_url"],
+                license=item["license"],
+                username=item.get("username"),
+                source="freesound",
+            ))
+
+        return SoundEffectSearchResponse(
+            count=result["count"],
+            page=page,
+            page_size=page_size,
+            results=sounds,
+        )
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
+
+
+@router.get("/library", response_model=SoundEffectSearchResponse)
+async def list_local_sound_effects(
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    category: Optional[str] = Query(None, description="카테고리 필터"),
+    db: Database = Depends(get_db),
+):
+    """로컬 효과음 라이브러리 조회"""
+    query = {}
+    if category:
+        query["categories"] = category
+
+    total = await db.sound_effects.count_documents(query)
+    skip = (page - 1) * page_size
+
+    cursor = db.sound_effects.find(query).sort("created_at", -1).skip(skip).limit(page_size)
+
+    sounds = []
+    async for doc in cursor:
+        sounds.append(SoundEffectResponse(
+            id=str(doc["_id"]),
+            freesound_id=doc.get("source_id"),
+            name=doc["name"],
+            description=doc.get("description", ""),
+            duration=doc.get("duration_seconds", 0),
+            tags=doc.get("tags", []),
+            preview_url=None,  # 로컬 파일은 별도 엔드포인트로 제공
+            license=doc.get("license", ""),
+            source="local",
+        ))
+
+    return SoundEffectSearchResponse(
+        count=total,
+        page=page,
+        page_size=page_size,
+        results=sounds,
+    )
+
+
+@router.post("/import", response_model=SoundEffectResponse)
+async def import_sound_effect(
+    request: SoundEffectImportRequest,
+    db: Database = Depends(get_db),
+):
+    """Freesound에서 효과음 가져오기 (로컬 캐시)"""
+    try:
+        # Freesound에서 상세 정보 조회
+        sound_info = await freesound_client.get_sound(request.freesound_id)
+
+        # 프리뷰 다운로드
+        preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
+        if not preview_url:
+            raise HTTPException(status_code=400, detail="Preview not available")
+
+        audio_bytes = await freesound_client.download_preview(preview_url)
+
+        # GridFS에 저장
+        file_id = await db.save_audio(
+            audio_bytes,
+            f"sfx_{request.freesound_id}.mp3",
+            content_type="audio/mpeg",
+            metadata={"freesound_id": request.freesound_id},
+        )
+
+        # DB에 메타데이터 저장
+        now = datetime.utcnow()
+        doc = {
+            "name": sound_info.get("name", ""),
+            "description": sound_info.get("description", ""),
+            "source": "freesound",
+            "source_id": request.freesound_id,
+            "source_url": f"https://freesound.org/s/{request.freesound_id}/",
+            "audio_file_id": file_id,
+            "duration_seconds": sound_info.get("duration", 0),
+            "format": "mp3",
+            "categories": [],
+            "tags": sound_info.get("tags", [])[:20],  # 최대 20개
+            "license": sound_info.get("license", ""),
+            "attribution": sound_info.get("username", ""),
+            "created_at": now,
+            "updated_at": now,
+        }
+
+        result = await db.sound_effects.insert_one(doc)
+
+        return SoundEffectResponse(
+            id=str(result.inserted_id),
+            freesound_id=request.freesound_id,
+            name=doc["name"],
+            description=doc["description"],
+            duration=doc["duration_seconds"],
+            tags=doc["tags"],
+            license=doc["license"],
+            source="local",
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
+
+
+@router.get("/{sound_id}")
+async def get_sound_effect_info(
+    sound_id: str,
+    db: Database = Depends(get_db),
+):
+    """효과음 상세 정보 조회"""
+    # Freesound ID인 경우
+    if sound_id.startswith("fs_"):
+        freesound_id = int(sound_id[3:])
+        try:
+            sound_info = await freesound_client.get_sound(freesound_id)
+            return SoundEffectResponse(
+                id=sound_id,
+                freesound_id=freesound_id,
+                name=sound_info.get("name", ""),
+                description=sound_info.get("description", ""),
+                duration=sound_info.get("duration", 0),
+                tags=sound_info.get("tags", []),
+                preview_url=sound_info.get("previews", {}).get("preview-hq-mp3", ""),
+                license=sound_info.get("license", ""),
+                source="freesound",
+            )
+        except Exception as e:
+            raise HTTPException(status_code=404, detail="Sound not found")
+
+    # 로컬 ID인 경우
+    from bson import ObjectId
+    try:
+        doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
+    except:
+        raise HTTPException(status_code=400, detail="Invalid sound ID")
+
+    if not doc:
+        raise HTTPException(status_code=404, detail="Sound not found")
+
+    return SoundEffectResponse(
+        id=str(doc["_id"]),
+        freesound_id=doc.get("source_id"),
+        name=doc["name"],
+        description=doc.get("description", ""),
+        duration=doc.get("duration_seconds", 0),
+        tags=doc.get("tags", []),
+        license=doc.get("license", ""),
+        source="local",
+    )
+
+
+@router.get("/{sound_id}/audio")
+async def get_sound_effect_audio(
+    sound_id: str,
+    db: Database = Depends(get_db),
+):
+    """효과음 오디오 스트리밍"""
+    # Freesound ID인 경우 프리뷰 리다이렉트
+    if sound_id.startswith("fs_"):
+        freesound_id = int(sound_id[3:])
+        try:
+            sound_info = await freesound_client.get_sound(freesound_id)
+            preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
+            if preview_url:
+                audio_bytes = await freesound_client.download_preview(preview_url)
+                return Response(
+                    content=audio_bytes,
+                    media_type="audio/mpeg",
+                    headers={"Content-Disposition": f'inline; filename="{freesound_id}.mp3"'},
+                )
+        except Exception as e:
+            raise HTTPException(status_code=404, detail="Audio not found")
+
+    # 로컬 ID인 경우
+    from bson import ObjectId
+    try:
+        doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
+    except:
+        raise HTTPException(status_code=400, detail="Invalid sound ID")
+
+    if not doc or not doc.get("audio_file_id"):
+        raise HTTPException(status_code=404, detail="Audio not found")
+
+    audio_bytes = await db.get_audio(doc["audio_file_id"])
+    content_type = "audio/mpeg" if doc.get("format") == "mp3" else "audio/wav"
+
+    return Response(
+        content=audio_bytes,
+        media_type=content_type,
+        headers={"Content-Disposition": f'inline; filename="{sound_id}.{doc.get("format", "wav")}"'},
+    )
+
+
+@router.get("/categories")
+async def list_categories(
+    db: Database = Depends(get_db),
+):
+    """효과음 카테고리 목록"""
+    # 로컬 라이브러리의 카테고리 집계
+    pipeline = [
+        {"$unwind": "$categories"},
+        {"$group": {"_id": "$categories", "count": {"$sum": 1}}},
+        {"$sort": {"count": -1}},
+    ]
+
+    categories = []
+    async for doc in db.sound_effects.aggregate(pipeline):
+        categories.append({
+            "name": doc["_id"],
+            "count": doc["count"],
+        })
+
+    return {"categories": categories}
+
+
+@router.delete("/{sound_id}")
+async def delete_sound_effect(
+    sound_id: str,
+    db: Database = Depends(get_db),
+):
+    """로컬 효과음 삭제"""
+    if sound_id.startswith("fs_"):
+        raise HTTPException(status_code=400, detail="Cannot delete Freesound reference")
+
+    from bson import ObjectId
+    try:
+        doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
+    except:
+        raise HTTPException(status_code=400, detail="Invalid sound ID")
+
+    if not doc:
+        raise HTTPException(status_code=404, detail="Sound not found")
+
+    # 오디오 파일 삭제
+    if doc.get("audio_file_id"):
+        await db.delete_audio(doc["audio_file_id"])
+
+    # 문서 삭제
+    await db.sound_effects.delete_one({"_id": ObjectId(sound_id)})
+
+    return {"status": "deleted", "sound_id": sound_id}
--- a/audio-studio-api/app/routers/tts.py
+++ b/audio-studio-api/app/routers/tts.py
@ -0,0 +1,227 @@
+"""TTS API 라우터"""
+
+import uuid
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Depends
+from fastapi.responses import Response, StreamingResponse
+
+from app.database import Database, get_db
+from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType
+from app.services.tts_client import tts_client
+from app.routers.voices import PRESET_VOICES
+
+router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
+
+
+@router.post("/synthesize")
+async def synthesize(
+    request: TTSSynthesizeRequest,
+    db: Database = Depends(get_db),
+):
+    """TTS 음성 합성
+
+    지정된 보이스로 텍스트를 음성으로 변환합니다.
+    """
+    voice_id = request.voice_id
+
+    # 프리셋 보이스 확인
+    preset_speaker = None
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            preset_speaker = preset["preset_voice_id"]
+            break
+
+    if preset_speaker:
+        # 프리셋 음성 합성
+        try:
+            audio_bytes, sr = await tts_client.synthesize(
+                text=request.text,
+                speaker=preset_speaker,
+                language="ko",
+                instruct=request.instruct,
+            )
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
+
+    else:
+        # DB에서 보이스 정보 조회
+        voice_doc = await db.voices.find_one({"voice_id": voice_id})
+        if not voice_doc:
+            raise HTTPException(status_code=404, detail="Voice not found")
+
+        voice_type = voice_doc.get("type")
+
+        if voice_type == VoiceType.CLONED.value:
+            # Voice Clone 합성 (레퍼런스 오디오 필요)
+            ref_audio_id = voice_doc.get("reference_audio_id")
+            ref_transcript = voice_doc.get("reference_transcript", "")
+
+            if not ref_audio_id:
+                raise HTTPException(status_code=400, detail="Reference audio not found")
+
+            ref_audio = await db.get_audio(ref_audio_id)
+
+            try:
+                audio_bytes, sr = await tts_client.voice_clone(
+                    text=request.text,
+                    ref_audio=ref_audio,
+                    ref_text=ref_transcript,
+                    language=voice_doc.get("language", "ko"),
+                )
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}")
+
+        elif voice_type == VoiceType.DESIGNED.value:
+            # Voice Design 합성
+            design_prompt = voice_doc.get("design_prompt", "")
+
+            try:
+                audio_bytes, sr = await tts_client.voice_design(
+                    text=request.text,
+                    instruct=design_prompt,
+                    language=voice_doc.get("language", "ko"),
+                )
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}")
+
+        else:
+            raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}")
+
+    # 생성 기록 저장
+    generation_id = f"gen_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    # 오디오 저장
+    audio_file_id = await db.save_audio(
+        audio_bytes,
+        f"{generation_id}.wav",
+        metadata={"voice_id": voice_id, "text": request.text[:100]},
+    )
+
+    # 생성 기록 저장
+    gen_doc = {
+        "generation_id": generation_id,
+        "voice_id": voice_id,
+        "text": request.text,
+        "audio_file_id": audio_file_id,
+        "status": "completed",
+        "created_at": now,
+    }
+    await db.tts_generations.insert_one(gen_doc)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={
+            "X-Sample-Rate": str(sr),
+            "X-Generation-ID": generation_id,
+            "Content-Disposition": f'attachment; filename="{generation_id}.wav"',
+        },
+    )
+
+
+@router.post("/synthesize/async", response_model=TTSGenerationResponse)
+async def synthesize_async(
+    request: TTSSynthesizeRequest,
+    db: Database = Depends(get_db),
+):
+    """비동기 TTS 음성 합성 (긴 텍스트용)
+
+    생성 작업을 큐에 등록하고 generation_id를 반환합니다.
+    완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다.
+    """
+    # 긴 텍스트 처리를 위한 비동기 방식
+    # 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동)
+
+    generation_id = f"gen_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    gen_doc = {
+        "generation_id": generation_id,
+        "voice_id": request.voice_id,
+        "text": request.text,
+        "status": "pending",
+        "created_at": now,
+    }
+    await db.tts_generations.insert_one(gen_doc)
+
+    # 실제로는 백그라운드 워커에서 처리해야 함
+    # 여기서는 바로 처리
+    try:
+        # synthesize 로직과 동일...
+        # (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용)
+        pass
+    except Exception as e:
+        await db.tts_generations.update_one(
+            {"generation_id": generation_id},
+            {"$set": {"status": "failed", "error_message": str(e)}},
+        )
+
+    return TTSGenerationResponse(
+        generation_id=generation_id,
+        voice_id=request.voice_id,
+        text=request.text,
+        status="pending",
+        created_at=now,
+    )
+
+
+@router.get("/generations/{generation_id}", response_model=TTSGenerationResponse)
+async def get_generation(
+    generation_id: str,
+    db: Database = Depends(get_db),
+):
+    """TTS 생성 상태 조회"""
+    doc = await db.tts_generations.find_one({"generation_id": generation_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Generation not found")
+
+    return TTSGenerationResponse(
+        generation_id=doc["generation_id"],
+        voice_id=doc["voice_id"],
+        text=doc["text"],
+        status=doc["status"],
+        audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None,
+        duration_seconds=doc.get("duration_seconds"),
+        created_at=doc["created_at"],
+    )
+
+
+@router.get("/generations/{generation_id}/audio")
+async def get_generation_audio(
+    generation_id: str,
+    db: Database = Depends(get_db),
+):
+    """생성된 오디오 다운로드"""
+    doc = await db.tts_generations.find_one({"generation_id": generation_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Generation not found")
+
+    if doc["status"] != "completed":
+        raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}")
+
+    audio_file_id = doc.get("audio_file_id")
+    if not audio_file_id:
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    audio_bytes = await db.get_audio(audio_file_id)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={
+            "Content-Disposition": f'attachment; filename="{generation_id}.wav"',
+        },
+    )
+
+
+@router.get("/health")
+async def tts_health():
+    """TTS 엔진 헬스체크"""
+    try:
+        health = await tts_client.health_check()
+        return {"status": "healthy", "tts_engine": health}
+    except Exception as e:
+        return {"status": "unhealthy", "error": str(e)}
--- a/audio-studio-api/app/routers/voices.py
+++ b/audio-studio-api/app/routers/voices.py
@ -0,0 +1,426 @@
+"""Voice 관리 API 라우터"""
+
+import uuid
+from datetime import datetime
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
+from fastapi.responses import Response
+
+from app.database import Database, get_db
+from app.models.voice import (
+    VoiceType,
+    LanguageCode,
+    VoiceResponse,
+    VoiceListResponse,
+    VoiceCloneRequest,
+    VoiceDesignRequest,
+    VoiceUpdateRequest,
+)
+from app.services.tts_client import tts_client
+
+router = APIRouter(prefix="/api/v1/voices", tags=["voices"])
+
+
+# ========================================
+# 프리셋 보이스 목록 (시스템 기본)
+# ========================================
+
+PRESET_VOICES = [
+    {
+        "voice_id": "preset_chelsie",
+        "name": "Chelsie",
+        "description": "밝고 활기찬 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Chelsie",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["bright", "energetic"],
+    },
+    {
+        "voice_id": "preset_ethan",
+        "name": "Ethan",
+        "description": "차분하고 신뢰감 있는 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Ethan",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["calm", "trustworthy"],
+    },
+    {
+        "voice_id": "preset_vivian",
+        "name": "Vivian",
+        "description": "부드럽고 따뜻한 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Vivian",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["soft", "warm"],
+    },
+    {
+        "voice_id": "preset_benjamin",
+        "name": "Benjamin",
+        "description": "깊고 전문적인 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Benjamin",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["deep", "professional"],
+    },
+    {
+        "voice_id": "preset_aurora",
+        "name": "Aurora",
+        "description": "우아하고 세련된 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Aurora",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["elegant", "refined"],
+    },
+    {
+        "voice_id": "preset_oliver",
+        "name": "Oliver",
+        "description": "친근하고 편안한 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Oliver",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["friendly", "casual"],
+    },
+    {
+        "voice_id": "preset_luna",
+        "name": "Luna",
+        "description": "따뜻하고 감성적인 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Luna",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["warm", "emotional"],
+    },
+    {
+        "voice_id": "preset_jasper",
+        "name": "Jasper",
+        "description": "전문적이고 명확한 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Jasper",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["professional", "clear"],
+    },
+    {
+        "voice_id": "preset_aria",
+        "name": "Aria",
+        "description": "표현력 풍부한 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Aria",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["expressive", "dynamic"],
+    },
+]
+
+
+def _voice_doc_to_response(doc: dict) -> VoiceResponse:
+    """MongoDB 문서를 VoiceResponse로 변환"""
+    return VoiceResponse(
+        voice_id=doc["voice_id"],
+        name=doc["name"],
+        description=doc.get("description"),
+        type=doc["type"],
+        language=doc.get("language", LanguageCode.KO),
+        preset_voice_id=doc.get("preset_voice_id"),
+        design_prompt=doc.get("design_prompt"),
+        reference_transcript=doc.get("reference_transcript"),
+        gender=doc.get("gender"),
+        age_range=doc.get("age_range"),
+        style_tags=doc.get("style_tags", []),
+        owner_id=str(doc.get("owner_id")) if doc.get("owner_id") else None,
+        is_public=doc.get("is_public", True),
+        sample_audio_id=str(doc.get("sample_audio_id")) if doc.get("sample_audio_id") else None,
+        created_at=doc.get("created_at", datetime.utcnow()),
+        updated_at=doc.get("updated_at", datetime.utcnow()),
+    )
+
+
+@router.get("", response_model=VoiceListResponse)
+async def list_voices(
+    type: Optional[VoiceType] = Query(None, description="보이스 타입 필터"),
+    language: Optional[LanguageCode] = Query(None, description="언어 필터"),
+    is_public: bool = Query(True, description="공개 보이스만"),
+    include_presets: bool = Query(True, description="프리셋 포함"),
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    db: Database = Depends(get_db),
+):
+    """보이스 목록 조회"""
+    voices = []
+
+    # 프리셋 보이스 추가
+    if include_presets and (type is None or type == VoiceType.PRESET):
+        for preset in PRESET_VOICES:
+            if language and preset["language"] != language:
+                continue
+            voices.append(VoiceResponse(
+                **preset,
+                is_public=True,
+                created_at=datetime.utcnow(),
+                updated_at=datetime.utcnow(),
+            ))
+
+    # DB에서 사용자 보이스 조회
+    query = {"is_public": True} if is_public else {}
+    if type and type != VoiceType.PRESET:
+        query["type"] = type.value
+    if language:
+        query["language"] = language.value
+
+    cursor = db.voices.find(query).sort("created_at", -1)
+    skip = (page - 1) * page_size
+    cursor = cursor.skip(skip).limit(page_size)
+
+    async for doc in cursor:
+        voices.append(_voice_doc_to_response(doc))
+
+    total = len(PRESET_VOICES) + await db.voices.count_documents(query)
+
+    return VoiceListResponse(
+        voices=voices,
+        total=total,
+        page=page,
+        page_size=page_size,
+    )
+
+
+@router.get("/{voice_id}", response_model=VoiceResponse)
+async def get_voice(
+    voice_id: str,
+    db: Database = Depends(get_db),
+):
+    """보이스 상세 조회"""
+    # 프리셋 체크
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            return VoiceResponse(
+                **preset,
+                is_public=True,
+                created_at=datetime.utcnow(),
+                updated_at=datetime.utcnow(),
+            )
+
+    # DB 조회
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    return _voice_doc_to_response(doc)
+
+
+@router.get("/{voice_id}/sample")
+async def get_voice_sample(
+    voice_id: str,
+    db: Database = Depends(get_db),
+):
+    """보이스 샘플 오디오 스트리밍"""
+    # 프리셋인 경우 TTS로 샘플 생성
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            sample_text = "안녕하세요, 저는 AI 음성입니다."
+            audio_bytes, sr = await tts_client.synthesize(
+                text=sample_text,
+                speaker=preset["preset_voice_id"],
+                language="ko",
+            )
+            return Response(
+                content=audio_bytes,
+                media_type="audio/wav",
+                headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
+            )
+
+    # DB에서 조회
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    if not doc.get("sample_audio_id"):
+        raise HTTPException(status_code=404, detail="No sample audio available")
+
+    audio_bytes = await db.get_audio(doc["sample_audio_id"])
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
+    )
+
+
+@router.post("/clone", response_model=VoiceResponse)
+async def create_voice_clone(
+    name: str = Form(...),
+    description: Optional[str] = Form(None),
+    reference_transcript: str = Form(...),
+    language: LanguageCode = Form(LanguageCode.KO),
+    is_public: bool = Form(False),
+    reference_audio: UploadFile = File(...),
+    db: Database = Depends(get_db),
+):
+    """Voice Clone으로 새 보이스 생성
+
+    레퍼런스 오디오를 기반으로 목소리를 복제합니다.
+    3초 이상의 오디오가 권장됩니다.
+    """
+    # 오디오 파일 읽기
+    audio_content = await reference_audio.read()
+
+    # Voice Clone으로 샘플 생성
+    sample_text = "안녕하세요, 저는 복제된 AI 음성입니다."
+    try:
+        sample_audio, sr = await tts_client.voice_clone(
+            text=sample_text,
+            ref_audio=audio_content,
+            ref_text=reference_transcript,
+            language=language.value,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Voice clone failed: {str(e)}")
+
+    # GridFS에 오디오 저장
+    ref_audio_id = await db.save_audio(
+        audio_content,
+        f"ref_{uuid.uuid4()}.wav",
+        metadata={"type": "reference"},
+    )
+    sample_audio_id = await db.save_audio(
+        sample_audio,
+        f"sample_{uuid.uuid4()}.wav",
+        metadata={"type": "sample"},
+    )
+
+    # DB에 보이스 저장
+    voice_id = f"clone_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    doc = {
+        "voice_id": voice_id,
+        "name": name,
+        "description": description,
+        "type": VoiceType.CLONED.value,
+        "language": language.value,
+        "reference_audio_id": ref_audio_id,
+        "reference_transcript": reference_transcript,
+        "sample_audio_id": sample_audio_id,
+        "is_public": is_public,
+        "created_at": now,
+        "updated_at": now,
+    }
+
+    await db.voices.insert_one(doc)
+
+    return _voice_doc_to_response(doc)
+
+
+@router.post("/design", response_model=VoiceResponse)
+async def create_voice_design(
+    request: VoiceDesignRequest,
+    db: Database = Depends(get_db),
+):
+    """Voice Design으로 새 보이스 생성
+
+    텍스트 프롬프트를 기반으로 새로운 음성을 생성합니다.
+    예: "30대 남성, 부드럽고 차분한 목소리"
+    """
+    # Voice Design으로 샘플 생성
+    sample_text = "안녕하세요, 저는 AI로 생성된 음성입니다."
+    try:
+        sample_audio, sr = await tts_client.voice_design(
+            text=sample_text,
+            instruct=request.design_prompt,
+            language=request.language.value,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Voice design failed: {str(e)}")
+
+    # GridFS에 샘플 저장
+    sample_audio_id = await db.save_audio(
+        sample_audio,
+        f"sample_{uuid.uuid4()}.wav",
+        metadata={"type": "sample"},
+    )
+
+    # DB에 보이스 저장
+    voice_id = f"design_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    doc = {
+        "voice_id": voice_id,
+        "name": request.name,
+        "description": request.description,
+        "type": VoiceType.DESIGNED.value,
+        "language": request.language.value,
+        "design_prompt": request.design_prompt,
+        "sample_audio_id": sample_audio_id,
+        "is_public": request.is_public,
+        "created_at": now,
+        "updated_at": now,
+    }
+
+    await db.voices.insert_one(doc)
+
+    return _voice_doc_to_response(doc)
+
+
+@router.patch("/{voice_id}", response_model=VoiceResponse)
+async def update_voice(
+    voice_id: str,
+    request: VoiceUpdateRequest,
+    db: Database = Depends(get_db),
+):
+    """보이스 정보 수정"""
+    # 프리셋은 수정 불가
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            raise HTTPException(status_code=400, detail="Cannot modify preset voice")
+
+    # 업데이트할 필드만 추출
+    update_data = {k: v for k, v in request.model_dump().items() if v is not None}
+    if not update_data:
+        raise HTTPException(status_code=400, detail="No fields to update")
+
+    update_data["updated_at"] = datetime.utcnow()
+
+    result = await db.voices.update_one(
+        {"voice_id": voice_id},
+        {"$set": update_data},
+    )
+
+    if result.matched_count == 0:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    return _voice_doc_to_response(doc)
+
+
+@router.delete("/{voice_id}")
+async def delete_voice(
+    voice_id: str,
+    db: Database = Depends(get_db),
+):
+    """보이스 삭제"""
+    # 프리셋은 삭제 불가
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            raise HTTPException(status_code=400, detail="Cannot delete preset voice")
+
+    # 먼저 조회
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    # 관련 오디오 파일 삭제
+    if doc.get("reference_audio_id"):
+        await db.delete_audio(doc["reference_audio_id"])
+    if doc.get("sample_audio_id"):
+        await db.delete_audio(doc["sample_audio_id"])
+
+    # 보이스 삭제
+    await db.voices.delete_one({"voice_id": voice_id})
+
+    return {"status": "deleted", "voice_id": voice_id}
--- a/audio-studio-api/app/services/init.py
+++ b/audio-studio-api/app/services/init.py
--- a/audio-studio-api/app/services/audio_mixer.py
+++ b/audio-studio-api/app/services/audio_mixer.py
@ -0,0 +1,260 @@
+# 오디오 믹서 서비스
+# pydub를 사용한 오디오 합성/믹싱
+
+import os
+import tempfile
+from typing import Optional
+from pydub import AudioSegment
+from pydub.effects import normalize
+from app.models.drama import TimelineItem
+
+
+class AudioMixer:
+    """
+    오디오 믹서
+
+    기능:
+    - 여러 오디오 트랙 합성
+    - 볼륨 조절
+    - 페이드 인/아웃
+    - 타임라인 기반 믹싱
+    """
+
+    def __init__(self, sample_rate: int = 44100):
+        self.sample_rate = sample_rate
+
+    def load_audio(self, file_path: str) -> AudioSegment:
+        """오디오 파일 로드"""
+        return AudioSegment.from_file(file_path)
+
+    def adjust_volume(self, audio: AudioSegment, volume: float) -> AudioSegment:
+        """볼륨 조절 (0.0 ~ 2.0, 1.0 = 원본)"""
+        if volume == 1.0:
+            return audio
+        # dB 변환: 0.5 = -6dB, 2.0 = +6dB
+        db_change = 20 * (volume ** 0.5 - 1) if volume > 0 else -120
+        return audio + db_change
+
+    def apply_fade(
+        self,
+        audio: AudioSegment,
+        fade_in_ms: int = 0,
+        fade_out_ms: int = 0
+    ) -> AudioSegment:
+        """페이드 인/아웃 적용"""
+        if fade_in_ms > 0:
+            audio = audio.fade_in(fade_in_ms)
+        if fade_out_ms > 0:
+            audio = audio.fade_out(fade_out_ms)
+        return audio
+
+    def concatenate(self, segments: list[AudioSegment]) -> AudioSegment:
+        """오디오 세그먼트 연결"""
+        if not segments:
+            return AudioSegment.silent(duration=0)
+
+        result = segments[0]
+        for segment in segments[1:]:
+            result += segment
+        return result
+
+    def overlay(
+        self,
+        base: AudioSegment,
+        overlay_audio: AudioSegment,
+        position_ms: int = 0
+    ) -> AudioSegment:
+        """오디오 오버레이 (배경음악 위에 보이스 등)"""
+        return base.overlay(overlay_audio, position=position_ms)
+
+    def create_silence(self, duration_ms: int) -> AudioSegment:
+        """무음 생성"""
+        return AudioSegment.silent(duration=duration_ms)
+
+    def mix_timeline(
+        self,
+        timeline: list[TimelineItem],
+        audio_files: dict[str, str]  # audio_path -> 실제 파일 경로
+    ) -> AudioSegment:
+        """
+        타임라인 기반 믹싱
+
+        Args:
+            timeline: 타임라인 아이템 리스트
+            audio_files: 오디오 경로 매핑
+
+        Returns:
+            믹싱된 오디오
+        """
+        if not timeline:
+            return AudioSegment.silent(duration=1000)
+
+        # 전체 길이 계산
+        total_duration_ms = max(
+            int((item.start_time + item.duration) * 1000)
+            for item in timeline
+        )
+
+        # 트랙별 분리 (voice, music, sfx)
+        voice_track = AudioSegment.silent(duration=total_duration_ms)
+        music_track = AudioSegment.silent(duration=total_duration_ms)
+        sfx_track = AudioSegment.silent(duration=total_duration_ms)
+
+        for item in timeline:
+            if not item.audio_path or item.audio_path not in audio_files:
+                continue
+
+            file_path = audio_files[item.audio_path]
+            if not os.path.exists(file_path):
+                continue
+
+            # 오디오 로드 및 처리
+            audio = self.load_audio(file_path)
+
+            # 볼륨 조절
+            audio = self.adjust_volume(audio, item.volume)
+
+            # 페이드 적용
+            fade_in_ms = int(item.fade_in * 1000)
+            fade_out_ms = int(item.fade_out * 1000)
+            audio = self.apply_fade(audio, fade_in_ms, fade_out_ms)
+
+            # 위치 계산
+            position_ms = int(item.start_time * 1000)
+
+            # 트랙에 오버레이
+            if item.type == "voice":
+                voice_track = voice_track.overlay(audio, position=position_ms)
+            elif item.type == "music":
+                music_track = music_track.overlay(audio, position=position_ms)
+            elif item.type == "sfx":
+                sfx_track = sfx_track.overlay(audio, position=position_ms)
+
+        # 트랙 믹싱 (music -> sfx -> voice 순서로 레이어링)
+        mixed = music_track.overlay(sfx_track).overlay(voice_track)
+
+        return mixed
+
+    def auto_duck(
+        self,
+        music: AudioSegment,
+        voice: AudioSegment,
+        duck_amount_db: float = -10,
+        attack_ms: int = 100,
+        release_ms: int = 300
+    ) -> AudioSegment:
+        """
+        Auto-ducking: 보이스가 나올 때 음악 볼륨 자동 감소
+
+        간단한 구현 - 보이스가 있는 구간에서 음악 볼륨 낮춤
+        """
+        # 보이스 길이에 맞춰 음악 조절
+        if len(music) < len(voice):
+            music = music + AudioSegment.silent(duration=len(voice) - len(music))
+
+        # 보이스의 무음/유음 구간 감지 (간단한 RMS 기반)
+        chunk_ms = 50
+        ducked_music = AudioSegment.silent(duration=0)
+
+        for i in range(0, len(voice), chunk_ms):
+            voice_chunk = voice[i:i + chunk_ms]
+            music_chunk = music[i:i + chunk_ms]
+
+            # 보이스 RMS가 임계값 이상이면 ducking
+            if voice_chunk.rms > 100:  # 임계값 조정 가능
+                music_chunk = music_chunk + duck_amount_db
+
+            ducked_music += music_chunk
+
+        return ducked_music
+
+    def export(
+        self,
+        audio: AudioSegment,
+        output_path: str,
+        format: str = "wav",
+        normalize_audio: bool = True
+    ) -> str:
+        """
+        오디오 내보내기
+
+        Args:
+            audio: 오디오 세그먼트
+            output_path: 출력 파일 경로
+            format: 출력 포맷 (wav, mp3)
+            normalize_audio: 노멀라이즈 여부
+
+        Returns:
+            저장된 파일 경로
+        """
+        if normalize_audio:
+            audio = normalize(audio)
+
+        # 포맷별 설정
+        export_params = {}
+        if format == "mp3":
+            export_params = {"format": "mp3", "bitrate": "192k"}
+        else:
+            export_params = {"format": "wav"}
+
+        audio.export(output_path, **export_params)
+        return output_path
+
+    def create_with_background(
+        self,
+        voice_segments: list[tuple[AudioSegment, float]],  # (audio, start_time)
+        background_music: Optional[AudioSegment] = None,
+        music_volume: float = 0.3,
+        gap_between_lines_ms: int = 500
+    ) -> AudioSegment:
+        """
+        보이스 + 배경음악 간단 합성
+
+        Args:
+            voice_segments: (오디오, 시작시간) 튜플 리스트
+            background_music: 배경음악 (없으면 무음)
+            music_volume: 배경음악 볼륨
+            gap_between_lines_ms: 대사 간 간격
+
+        Returns:
+            합성된 오디오
+        """
+        if not voice_segments:
+            return AudioSegment.silent(duration=1000)
+
+        # 전체 보이스 트랙 생성
+        voice_track = AudioSegment.silent(duration=0)
+        for audio, start_time in voice_segments:
+            # 시작 위치까지 무음 추가
+            current_pos = len(voice_track)
+            target_pos = int(start_time * 1000)
+            if target_pos > current_pos:
+                voice_track += AudioSegment.silent(duration=target_pos - current_pos)
+            voice_track += audio
+            voice_track += AudioSegment.silent(duration=gap_between_lines_ms)
+
+        total_duration = len(voice_track)
+
+        # 배경음악 처리
+        if background_music:
+            # 음악 길이 조정
+            if len(background_music) < total_duration:
+                # 루프
+                loops_needed = (total_duration // len(background_music)) + 1
+                background_music = background_music * loops_needed
+            background_music = background_music[:total_duration]
+
+            # 볼륨 조절
+            background_music = self.adjust_volume(background_music, music_volume)
+
+            # Auto-ducking 적용
+            background_music = self.auto_duck(background_music, voice_track)
+
+            # 믹싱
+            return background_music.overlay(voice_track)
+        else:
+            return voice_track
+
+
+# 싱글톤 인스턴스
+audio_mixer = AudioMixer()
--- a/audio-studio-api/app/services/drama_orchestrator.py
+++ b/audio-studio-api/app/services/drama_orchestrator.py
@ -0,0 +1,362 @@
+# 드라마 오케스트레이터
+# 스크립트 파싱 → 에셋 생성 → 타임라인 구성 → 믹싱 조율
+
+import os
+import uuid
+import asyncio
+import tempfile
+from datetime import datetime
+from typing import Optional
+from pydub import AudioSegment
+
+from app.models.drama import (
+    ParsedScript, ScriptElement, ElementType, Character,
+    TimelineItem, DramaProject, DramaCreateRequest
+)
+from app.services.script_parser import script_parser
+from app.services.audio_mixer import audio_mixer
+from app.services.tts_client import tts_client
+from app.services.freesound_client import freesound_client
+from app.database import db
+
+
+class DramaOrchestrator:
+    """
+    드라마 생성 오케스트레이터
+
+    워크플로우:
+    1. 스크립트 파싱
+    2. 캐릭터-보이스 매핑
+    3. 에셋 생성 (TTS, 음악, 효과음)
+    4. 타임라인 구성
+    5. 오디오 믹싱
+    6. 최종 파일 출력
+    """
+
+    # 기본 대사 간격 (초)
+    DEFAULT_DIALOGUE_GAP = 0.5
+    # 효과음 기본 길이 (초)
+    DEFAULT_SFX_DURATION = 2.0
+    # 예상 TTS 속도 (글자/초)
+    TTS_CHARS_PER_SECOND = 5
+
+    async def create_project(
+        self,
+        request: DramaCreateRequest
+    ) -> DramaProject:
+        """새 드라마 프로젝트 생성"""
+        project_id = str(uuid.uuid4())
+
+        # 스크립트 파싱
+        parsed = script_parser.parse(request.script)
+
+        # 보이스 매핑 적용
+        voice_mapping = request.voice_mapping or {}
+        for char in parsed.characters:
+            if char.name in voice_mapping:
+                char.voice_id = voice_mapping[char.name]
+
+        project = DramaProject(
+            project_id=project_id,
+            title=request.title or parsed.title or "Untitled Drama",
+            script_raw=request.script,
+            script_parsed=parsed,
+            voice_mapping=voice_mapping,
+            status="draft"
+        )
+
+        # DB 저장
+        await db.dramas.insert_one(project.model_dump())
+
+        return project
+
+    async def get_project(self, project_id: str) -> Optional[DramaProject]:
+        """프로젝트 조회"""
+        doc = await db.dramas.find_one({"project_id": project_id})
+        if doc:
+            return DramaProject(**doc)
+        return None
+
+    async def update_project_status(
+        self,
+        project_id: str,
+        status: str,
+        error_message: Optional[str] = None
+    ):
+        """프로젝트 상태 업데이트"""
+        update = {
+            "status": status,
+            "updated_at": datetime.utcnow()
+        }
+        if error_message:
+            update["error_message"] = error_message
+
+        await db.dramas.update_one(
+            {"project_id": project_id},
+            {"$set": update}
+        )
+
+    def estimate_duration(self, parsed: ParsedScript) -> float:
+        """예상 재생 시간 계산 (초)"""
+        total = 0.0
+
+        for element in parsed.elements:
+            if element.type == ElementType.DIALOGUE:
+                # 대사 길이 추정
+                text_len = len(element.text or "")
+                total += text_len / self.TTS_CHARS_PER_SECOND
+                total += self.DEFAULT_DIALOGUE_GAP
+            elif element.type == ElementType.PAUSE:
+                total += element.duration or 1.0
+            elif element.type == ElementType.SFX:
+                total += self.DEFAULT_SFX_DURATION
+
+        return total
+
+    async def generate_assets(
+        self,
+        project: DramaProject,
+        temp_dir: str
+    ) -> dict[str, str]:
+        """
+        에셋 생성 (TTS, SFX)
+
+        Returns:
+            audio_id -> 파일 경로 매핑
+        """
+        assets: dict[str, str] = {}
+        parsed = project.script_parsed
+
+        if not parsed:
+            return assets
+
+        dialogue_index = 0
+
+        for element in parsed.elements:
+            if element.type == ElementType.DIALOGUE:
+                # TTS 생성
+                audio_id = f"dialogue_{dialogue_index}"
+
+                # 보이스 ID 결정
+                voice_id = project.voice_mapping.get(element.character)
+                if not voice_id:
+                    # 기본 보이스 사용 (첫 번째 프리셋)
+                    voice_id = "default"
+
+                try:
+                    # TTS 엔진 호출
+                    audio_data = await tts_client.synthesize(
+                        text=element.text or "",
+                        voice_id=voice_id,
+                        instruct=element.emotion
+                    )
+
+                    # 파일 저장
+                    file_path = os.path.join(temp_dir, f"{audio_id}.wav")
+                    with open(file_path, "wb") as f:
+                        f.write(audio_data)
+
+                    assets[audio_id] = file_path
+
+                except Exception as e:
+                    print(f"TTS 생성 실패 ({element.character}): {e}")
+                    # 무음으로 대체
+                    silence_duration = len(element.text or "") / self.TTS_CHARS_PER_SECOND
+                    silence = AudioSegment.silent(duration=int(silence_duration * 1000))
+                    file_path = os.path.join(temp_dir, f"{audio_id}.wav")
+                    silence.export(file_path, format="wav")
+                    assets[audio_id] = file_path
+
+                dialogue_index += 1
+
+            elif element.type == ElementType.SFX:
+                # Freesound에서 효과음 검색
+                audio_id = f"sfx_{element.description}"
+
+                try:
+                    results = await freesound_client.search(
+                        query=element.description,
+                        page_size=1
+                    )
+
+                    if results and len(results) > 0:
+                        sound = results[0]
+                        # 프리뷰 다운로드
+                        if sound.get("preview_url"):
+                            audio_data = await freesound_client.download_preview(
+                                sound["preview_url"]
+                            )
+                            file_path = os.path.join(temp_dir, f"sfx_{sound['id']}.mp3")
+                            with open(file_path, "wb") as f:
+                                f.write(audio_data)
+                            assets[audio_id] = file_path
+
+                except Exception as e:
+                    print(f"SFX 검색 실패 ({element.description}): {e}")
+
+            elif element.type == ElementType.MUSIC:
+                # MusicGen은 GPU 필요하므로 여기서는 placeholder
+                # 실제 구현 시 music_client 추가 필요
+                audio_id = f"music_{element.description}"
+                # TODO: MusicGen 연동
+
+        return assets
+
+    def build_timeline(
+        self,
+        parsed: ParsedScript,
+        assets: dict[str, str]
+    ) -> list[TimelineItem]:
+        """타임라인 구성"""
+        timeline: list[TimelineItem] = []
+        current_time = 0.0
+        dialogue_index = 0
+        current_music: Optional[dict] = None
+
+        for element in parsed.elements:
+            if element.type == ElementType.DIALOGUE:
+                audio_id = f"dialogue_{dialogue_index}"
+
+                if audio_id in assets:
+                    # 오디오 길이 확인
+                    try:
+                        audio = AudioSegment.from_file(assets[audio_id])
+                        duration = len(audio) / 1000.0
+                    except:
+                        duration = len(element.text or "") / self.TTS_CHARS_PER_SECOND
+
+                    timeline.append(TimelineItem(
+                        start_time=current_time,
+                        duration=duration,
+                        type="voice",
+                        audio_path=audio_id,
+                        volume=1.0
+                    ))
+
+                    current_time += duration + self.DEFAULT_DIALOGUE_GAP
+
+                dialogue_index += 1
+
+            elif element.type == ElementType.PAUSE:
+                current_time += element.duration or 1.0
+
+            elif element.type == ElementType.SFX:
+                audio_id = f"sfx_{element.description}"
+
+                if audio_id in assets:
+                    try:
+                        audio = AudioSegment.from_file(assets[audio_id])
+                        duration = len(audio) / 1000.0
+                    except:
+                        duration = self.DEFAULT_SFX_DURATION
+
+                    timeline.append(TimelineItem(
+                        start_time=current_time,
+                        duration=duration,
+                        type="sfx",
+                        audio_path=audio_id,
+                        volume=element.volume or 1.0
+                    ))
+
+            elif element.type == ElementType.MUSIC:
+                audio_id = f"music_{element.description}"
+
+                if element.action == "stop":
+                    current_music = None
+                elif element.action in ("play", "change", "fade_in"):
+                    if audio_id in assets:
+                        # 음악은 현재 시점부터 끝까지 (나중에 조정)
+                        current_music = {
+                            "audio_id": audio_id,
+                            "start_time": current_time,
+                            "volume": element.volume or 0.3,
+                            "fade_in": element.fade_duration if element.action == "fade_in" else 0
+                        }
+
+        # 배경음악 아이템 추가 (전체 길이로)
+        if current_music:
+            timeline.append(TimelineItem(
+                start_time=current_music["start_time"],
+                duration=current_time - current_music["start_time"],
+                type="music",
+                audio_path=current_music["audio_id"],
+                volume=current_music["volume"],
+                fade_in=current_music.get("fade_in", 0)
+            ))
+
+        return timeline
+
+    async def render(
+        self,
+        project_id: str,
+        output_format: str = "wav"
+    ) -> Optional[str]:
+        """
+        드라마 렌더링
+
+        Returns:
+            출력 파일 경로
+        """
+        project = await self.get_project(project_id)
+        if not project or not project.script_parsed:
+            return None
+
+        await self.update_project_status(project_id, "processing")
+
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                # 1. 에셋 생성
+                assets = await self.generate_assets(project, temp_dir)
+
+                # 2. 타임라인 구성
+                timeline = self.build_timeline(project.script_parsed, assets)
+
+                # 3. 믹싱
+                mixed_audio = audio_mixer.mix_timeline(timeline, assets)
+
+                # 4. 출력
+                output_path = os.path.join(temp_dir, f"drama_{project_id}.{output_format}")
+                audio_mixer.export(mixed_audio, output_path, format=output_format)
+
+                # 5. GridFS에 저장 (TODO: 실제 구현)
+                # file_id = await save_to_gridfs(output_path)
+
+                # 임시: 파일 복사
+                final_path = f"/tmp/drama_{project_id}.{output_format}"
+                import shutil
+                shutil.copy(output_path, final_path)
+
+                # 상태 업데이트
+                await db.dramas.update_one(
+                    {"project_id": project_id},
+                    {
+                        "$set": {
+                            "status": "completed",
+                            "timeline": [t.model_dump() for t in timeline],
+                            "output_file_id": final_path,
+                            "updated_at": datetime.utcnow()
+                        }
+                    }
+                )
+
+                return final_path
+
+        except Exception as e:
+            await self.update_project_status(project_id, "error", str(e))
+            raise
+
+    async def list_projects(
+        self,
+        skip: int = 0,
+        limit: int = 20
+    ) -> list[DramaProject]:
+        """프로젝트 목록 조회"""
+        cursor = db.dramas.find().sort("created_at", -1).skip(skip).limit(limit)
+        projects = []
+        async for doc in cursor:
+            projects.append(DramaProject(**doc))
+        return projects
+
+
+# 싱글톤 인스턴스
+drama_orchestrator = DramaOrchestrator()
--- a/audio-studio-api/app/services/freesound_client.py
+++ b/audio-studio-api/app/services/freesound_client.py
@ -0,0 +1,165 @@
+"""Freesound API 클라이언트
+
+효과음 검색 및 다운로드
+https://freesound.org/docs/api/
+"""
+
+import os
+import logging
+from typing import Optional, List, Dict
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class FreesoundClient:
+    """Freesound API 클라이언트"""
+
+    BASE_URL = "https://freesound.org/apiv2"
+
+    def __init__(self):
+        self.api_key = os.getenv("FREESOUND_API_KEY", "")
+        self.timeout = httpx.Timeout(30.0, connect=10.0)
+
+    def _get_headers(self) -> dict:
+        """인증 헤더 반환"""
+        return {"Authorization": f"Token {self.api_key}"}
+
+    async def search(
+        self,
+        query: str,
+        page: int = 1,
+        page_size: int = 20,
+        filter_fields: Optional[str] = None,
+        sort: str = "score",
+        min_duration: Optional[float] = None,
+        max_duration: Optional[float] = None,
+    ) -> Dict:
+        """효과음 검색
+
+        Args:
+            query: 검색어
+            page: 페이지 번호
+            page_size: 페이지당 결과 수
+            filter_fields: 필터 (예: "duration:[1 TO 5]")
+            sort: 정렬 (score, duration_asc, duration_desc, created_desc 등)
+            min_duration: 최소 길이 (초)
+            max_duration: 최대 길이 (초)
+
+        Returns:
+            검색 결과 딕셔너리
+        """
+        if not self.api_key:
+            logger.warning("Freesound API 키가 설정되지 않음")
+            return {"count": 0, "results": []}
+
+        # 필터 구성
+        filters = []
+        if min_duration is not None or max_duration is not None:
+            min_d = min_duration if min_duration is not None else 0
+            max_d = max_duration if max_duration is not None else "*"
+            filters.append(f"duration:[{min_d} TO {max_d}]")
+
+        if filter_fields:
+            filters.append(filter_fields)
+
+        params = {
+            "query": query,
+            "page": page,
+            "page_size": min(page_size, 150),  # Freesound 최대 150
+            "sort": sort,
+            "fields": "id,name,description,duration,tags,previews,license,username",
+        }
+
+        if filters:
+            params["filter"] = " ".join(filters)
+
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                f"{self.BASE_URL}/search/text/",
+                params=params,
+                headers=self._get_headers(),
+            )
+            response.raise_for_status()
+            data = response.json()
+
+        # 결과 정리
+        results = []
+        for sound in data.get("results", []):
+            results.append({
+                "freesound_id": sound["id"],
+                "name": sound.get("name", ""),
+                "description": sound.get("description", ""),
+                "duration": sound.get("duration", 0),
+                "tags": sound.get("tags", []),
+                "preview_url": sound.get("previews", {}).get("preview-hq-mp3", ""),
+                "license": sound.get("license", ""),
+                "username": sound.get("username", ""),
+            })
+
+        return {
+            "count": data.get("count", 0),
+            "page": page,
+            "page_size": page_size,
+            "results": results,
+        }
+
+    async def get_sound(self, sound_id: int) -> Dict:
+        """사운드 상세 정보 조회"""
+        if not self.api_key:
+            raise ValueError("Freesound API 키 필요")
+
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                f"{self.BASE_URL}/sounds/{sound_id}/",
+                headers=self._get_headers(),
+            )
+            response.raise_for_status()
+            return response.json()
+
+    async def download_preview(self, preview_url: str) -> bytes:
+        """프리뷰 오디오 다운로드 (인증 불필요)"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(preview_url)
+            response.raise_for_status()
+            return response.content
+
+    async def get_similar_sounds(
+        self,
+        sound_id: int,
+        page_size: int = 10,
+    ) -> List[Dict]:
+        """유사한 사운드 검색"""
+        if not self.api_key:
+            return []
+
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(
+                f"{self.BASE_URL}/sounds/{sound_id}/similar/",
+                params={
+                    "page_size": page_size,
+                    "fields": "id,name,description,duration,tags,previews,license",
+                },
+                headers=self._get_headers(),
+            )
+            response.raise_for_status()
+            data = response.json()
+
+        results = []
+        for sound in data.get("results", []):
+            results.append({
+                "freesound_id": sound["id"],
+                "name": sound.get("name", ""),
+                "description": sound.get("description", ""),
+                "duration": sound.get("duration", 0),
+                "tags": sound.get("tags", []),
+                "preview_url": sound.get("previews", {}).get("preview-hq-mp3", ""),
+                "license": sound.get("license", ""),
+            })
+
+        return results
+
+
+# 싱글톤 인스턴스
+freesound_client = FreesoundClient()
--- a/audio-studio-api/app/services/script_parser.py
+++ b/audio-studio-api/app/services/script_parser.py
@ -0,0 +1,174 @@
+# 드라마 스크립트 파서
+# 마크다운 형식의 대본을 구조화된 데이터로 변환
+
+import re
+from typing import Optional
+from app.models.drama import (
+    ParsedScript, ScriptElement, Character, ElementType
+)
+
+
+class ScriptParser:
+    """
+    드라마 스크립트 파서
+
+    지원 형식:
+    - # 제목
+    - [장소: 설명] 또는 [지문]
+    - [효과음: 설명]
+    - [음악: 설명] 또는 [음악 시작/중지/변경: 설명]
+    - [쉼: 2초]
+    - 캐릭터명(설명, 감정): 대사
+    - 캐릭터명: 대사
+    """
+
+    # 정규식 패턴
+    TITLE_PATTERN = re.compile(r'^#\s+(.+)$')
+    DIRECTION_PATTERN = re.compile(r'^\[(?:장소|지문|장면):\s*(.+)\]$')
+    SFX_PATTERN = re.compile(r'^\[효과음:\s*(.+)\]$')
+    MUSIC_PATTERN = re.compile(r'^\[음악(?:\s+(시작|중지|변경|페이드인|페이드아웃))?:\s*(.+)\]$')
+    PAUSE_PATTERN = re.compile(r'^\[쉼:\s*(\d+(?:\.\d+)?)\s*초?\]$')
+    DIALOGUE_PATTERN = re.compile(r'^([^(\[:]+?)(?:\(([^)]*)\))?:\s*(.+)$')
+
+    # 음악 액션 매핑
+    MUSIC_ACTIONS = {
+        None: "play",
+        "시작": "play",
+        "중지": "stop",
+        "변경": "change",
+        "페이드인": "fade_in",
+        "페이드아웃": "fade_out",
+    }
+
+    def parse(self, script: str) -> ParsedScript:
+        """스크립트 파싱"""
+        lines = script.strip().split('\n')
+
+        title: Optional[str] = None
+        characters: dict[str, Character] = {}
+        elements: list[ScriptElement] = []
+
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+
+            # 제목
+            if match := self.TITLE_PATTERN.match(line):
+                title = match.group(1)
+                continue
+
+            # 지문/장면
+            if match := self.DIRECTION_PATTERN.match(line):
+                elements.append(ScriptElement(
+                    type=ElementType.DIRECTION,
+                    text=match.group(1)
+                ))
+                continue
+
+            # 효과음
+            if match := self.SFX_PATTERN.match(line):
+                elements.append(ScriptElement(
+                    type=ElementType.SFX,
+                    description=match.group(1),
+                    volume=1.0
+                ))
+                continue
+
+            # 음악
+            if match := self.MUSIC_PATTERN.match(line):
+                action_kr = match.group(1)
+                action = self.MUSIC_ACTIONS.get(action_kr, "play")
+                elements.append(ScriptElement(
+                    type=ElementType.MUSIC,
+                    description=match.group(2),
+                    action=action,
+                    volume=0.3,
+                    fade_duration=2.0
+                ))
+                continue
+
+            # 쉼
+            if match := self.PAUSE_PATTERN.match(line):
+                elements.append(ScriptElement(
+                    type=ElementType.PAUSE,
+                    duration=float(match.group(1))
+                ))
+                continue
+
+            # 대사
+            if match := self.DIALOGUE_PATTERN.match(line):
+                char_name = match.group(1).strip()
+                char_info = match.group(2)  # 괄호 안 내용 (설명, 감정)
+                dialogue_text = match.group(3).strip()
+
+                # 캐릭터 정보 파싱
+                emotion = None
+                description = None
+                if char_info:
+                    parts = [p.strip() for p in char_info.split(',')]
+                    if len(parts) >= 2:
+                        description = parts[0]
+                        emotion = parts[1]
+                    else:
+                        # 단일 값은 감정으로 처리
+                        emotion = parts[0]
+
+                # 캐릭터 등록
+                if char_name not in characters:
+                    characters[char_name] = Character(
+                        name=char_name,
+                        description=description
+                    )
+                elif description and not characters[char_name].description:
+                    characters[char_name].description = description
+
+                elements.append(ScriptElement(
+                    type=ElementType.DIALOGUE,
+                    character=char_name,
+                    text=dialogue_text,
+                    emotion=emotion
+                ))
+                continue
+
+            # 매칭 안 되는 줄은 지문으로 처리 (대괄호 없는 일반 텍스트)
+            if not line.startswith('[') and not line.startswith('#'):
+                # 콜론이 없으면 지문으로 처리
+                if ':' not in line:
+                    elements.append(ScriptElement(
+                        type=ElementType.DIRECTION,
+                        text=line
+                    ))
+
+        return ParsedScript(
+            title=title,
+            characters=list(characters.values()),
+            elements=elements
+        )
+
+    def validate_script(self, script: str) -> tuple[bool, list[str]]:
+        """
+        스크립트 유효성 검사
+        Returns: (is_valid, error_messages)
+        """
+        errors = []
+
+        if not script or not script.strip():
+            errors.append("스크립트가 비어있습니다")
+            return False, errors
+
+        parsed = self.parse(script)
+
+        if not parsed.elements:
+            errors.append("파싱된 요소가 없습니다")
+
+        # 대사가 있는지 확인
+        dialogue_count = sum(1 for e in parsed.elements if e.type == ElementType.DIALOGUE)
+        if dialogue_count == 0:
+            errors.append("대사가 없습니다")
+
+        return len(errors) == 0, errors
+
+
+# 싱글톤 인스턴스
+script_parser = ScriptParser()
--- a/audio-studio-api/app/services/tts_client.py
+++ b/audio-studio-api/app/services/tts_client.py
@ -0,0 +1,135 @@
+"""TTS 엔진 클라이언트
+
+audio-studio-tts 서비스와 통신
+"""
+
+import os
+import logging
+from typing import Optional, Tuple, List
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class TTSClient:
+    """TTS 엔진 HTTP 클라이언트"""
+
+    def __init__(self):
+        self.base_url = os.getenv("TTS_ENGINE_URL", "http://localhost:8001")
+        self.timeout = httpx.Timeout(120.0, connect=10.0)  # TTS는 시간이 걸릴 수 있음
+
+    async def health_check(self) -> dict:
+        """TTS 엔진 헬스체크"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(f"{self.base_url}/health")
+            response.raise_for_status()
+            return response.json()
+
+    async def get_speakers(self) -> List[str]:
+        """프리셋 스피커 목록 조회"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(f"{self.base_url}/speakers")
+            response.raise_for_status()
+            return response.json()["speakers"]
+
+    async def get_languages(self) -> dict:
+        """지원 언어 목록 조회"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(f"{self.base_url}/languages")
+            response.raise_for_status()
+            return response.json()["languages"]
+
+    async def synthesize(
+        self,
+        text: str,
+        speaker: str = "Chelsie",
+        language: str = "ko",
+        instruct: Optional[str] = None,
+    ) -> Tuple[bytes, int]:
+        """프리셋 음성으로 TTS 합성
+
+        Returns:
+            (audio_bytes, sample_rate)
+        """
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            payload = {
+                "text": text,
+                "speaker": speaker,
+                "language": language,
+            }
+            if instruct:
+                payload["instruct"] = instruct
+
+            response = await client.post(
+                f"{self.base_url}/synthesize",
+                json=payload,
+            )
+            response.raise_for_status()
+
+            # 샘플레이트 추출
+            sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
+
+            return response.content, sample_rate
+
+    async def voice_clone(
+        self,
+        text: str,
+        ref_audio: bytes,
+        ref_text: str,
+        language: str = "ko",
+    ) -> Tuple[bytes, int]:
+        """Voice Clone으로 TTS 합성
+
+        Returns:
+            (audio_bytes, sample_rate)
+        """
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            # multipart/form-data로 전송
+            files = {"ref_audio": ("reference.wav", ref_audio, "audio/wav")}
+            data = {
+                "text": text,
+                "ref_text": ref_text,
+                "language": language,
+            }
+
+            response = await client.post(
+                f"{self.base_url}/voice-clone",
+                files=files,
+                data=data,
+            )
+            response.raise_for_status()
+
+            sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
+            return response.content, sample_rate
+
+    async def voice_design(
+        self,
+        text: str,
+        instruct: str,
+        language: str = "ko",
+    ) -> Tuple[bytes, int]:
+        """Voice Design으로 TTS 합성
+
+        Returns:
+            (audio_bytes, sample_rate)
+        """
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            payload = {
+                "text": text,
+                "instruct": instruct,
+                "language": language,
+            }
+
+            response = await client.post(
+                f"{self.base_url}/voice-design",
+                json=payload,
+            )
+            response.raise_for_status()
+
+            sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
+            return response.content, sample_rate
+
+
+# 싱글톤 인스턴스
+tts_client = TTSClient()