feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api) - Next.js 프론트엔드 (audio-studio-ui) - Qwen3-TTS 엔진 (audio-studio-tts) - MusicGen 서비스 (audio-studio-musicgen) - Docker Compose 개발/운영 환경 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions
--- a/audio-studio-api/app/routers/init.py
+++ b/audio-studio-api/app/routers/init.py
--- a/audio-studio-api/app/routers/drama.py
+++ b/audio-studio-api/app/routers/drama.py
@ -0,0 +1,193 @@
+# 드라마 API 라우터
+from fastapi import APIRouter, HTTPException, BackgroundTasks
+from fastapi.responses import FileResponse
+from typing import Optional
+import os
+
+from app.models.drama import (
+    DramaCreateRequest, DramaGenerateRequest, DramaResponse,
+    ParsedScript, Character
+)
+from app.services.script_parser import script_parser
+from app.services.drama_orchestrator import drama_orchestrator
+
+router = APIRouter(prefix="/api/v1/drama", tags=["drama"])
+
+
+@router.post("/parse", response_model=ParsedScript)
+async def parse_script(script: str):
+    """
+    스크립트 파싱 (미리보기)
+
+    마크다운 형식의 스크립트를 구조화된 데이터로 변환합니다.
+    실제 프로젝트 생성 없이 파싱 결과만 확인할 수 있습니다.
+    """
+    is_valid, errors = script_parser.validate_script(script)
+    if not is_valid:
+        raise HTTPException(status_code=400, detail={"errors": errors})
+
+    return script_parser.parse(script)
+
+
+@router.post("/projects", response_model=DramaResponse)
+async def create_project(request: DramaCreateRequest):
+    """
+    새 드라마 프로젝트 생성
+
+    스크립트를 파싱하고 프로젝트를 생성합니다.
+    voice_mapping으로 캐릭터별 보이스를 지정할 수 있습니다.
+    """
+    # 스크립트 유효성 검사
+    is_valid, errors = script_parser.validate_script(request.script)
+    if not is_valid:
+        raise HTTPException(status_code=400, detail={"errors": errors})
+
+    project = await drama_orchestrator.create_project(request)
+
+    return DramaResponse(
+        project_id=project.project_id,
+        title=project.title,
+        status=project.status,
+        characters=project.script_parsed.characters if project.script_parsed else [],
+        element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
+        estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None
+    )
+
+
+@router.get("/projects", response_model=list[DramaResponse])
+async def list_projects(skip: int = 0, limit: int = 20):
+    """프로젝트 목록 조회"""
+    projects = await drama_orchestrator.list_projects(skip=skip, limit=limit)
+
+    return [
+        DramaResponse(
+            project_id=p.project_id,
+            title=p.title,
+            status=p.status,
+            characters=p.script_parsed.characters if p.script_parsed else [],
+            element_count=len(p.script_parsed.elements) if p.script_parsed else 0,
+            output_file_id=p.output_file_id,
+            error_message=p.error_message
+        )
+        for p in projects
+    ]
+
+
+@router.get("/projects/{project_id}", response_model=DramaResponse)
+async def get_project(project_id: str):
+    """프로젝트 상세 조회"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    return DramaResponse(
+        project_id=project.project_id,
+        title=project.title,
+        status=project.status,
+        characters=project.script_parsed.characters if project.script_parsed else [],
+        element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
+        estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None,
+        output_file_id=project.output_file_id,
+        error_message=project.error_message
+    )
+
+
+@router.post("/projects/{project_id}/render")
+async def render_project(
+    project_id: str,
+    background_tasks: BackgroundTasks,
+    output_format: str = "wav"
+):
+    """
+    드라마 렌더링 시작
+
+    백그라운드에서 TTS 생성, 효과음 검색, 믹싱을 수행합니다.
+    완료되면 status가 'completed'로 변경됩니다.
+    """
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    if project.status == "processing":
+        raise HTTPException(status_code=400, detail="이미 렌더링 중입니다")
+
+    # 백그라운드 렌더링 시작
+    background_tasks.add_task(
+        drama_orchestrator.render,
+        project_id,
+        output_format
+    )
+
+    return {
+        "project_id": project_id,
+        "status": "processing",
+        "message": "렌더링이 시작되었습니다"
+    }
+
+
+@router.get("/projects/{project_id}/download")
+async def download_project(project_id: str):
+    """렌더링된 드라마 다운로드"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    if project.status != "completed":
+        raise HTTPException(
+            status_code=400,
+            detail=f"렌더링이 완료되지 않았습니다 (현재 상태: {project.status})"
+        )
+
+    if not project.output_file_id or not os.path.exists(project.output_file_id):
+        raise HTTPException(status_code=404, detail="출력 파일을 찾을 수 없습니다")
+
+    return FileResponse(
+        project.output_file_id,
+        media_type="audio/wav",
+        filename=f"{project.title}.wav"
+    )
+
+
+@router.put("/projects/{project_id}/voices")
+async def update_voice_mapping(
+    project_id: str,
+    voice_mapping: dict[str, str]
+):
+    """캐릭터-보이스 매핑 업데이트"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    from app.database import db
+    from datetime import datetime
+
+    await db.dramas.update_one(
+        {"project_id": project_id},
+        {
+            "$set": {
+                "voice_mapping": voice_mapping,
+                "updated_at": datetime.utcnow()
+            }
+        }
+    )
+
+    return {"message": "보이스 매핑이 업데이트되었습니다"}
+
+
+@router.delete("/projects/{project_id}")
+async def delete_project(project_id: str):
+    """프로젝트 삭제"""
+    project = await drama_orchestrator.get_project(project_id)
+    if not project:
+        raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
+
+    from app.database import db
+
+    # 출력 파일 삭제
+    if project.output_file_id and os.path.exists(project.output_file_id):
+        os.remove(project.output_file_id)
+
+    # DB에서 삭제
+    await db.dramas.delete_one({"project_id": project_id})
+
+    return {"message": "프로젝트가 삭제되었습니다"}
--- a/audio-studio-api/app/routers/music.py
+++ b/audio-studio-api/app/routers/music.py
@ -0,0 +1,278 @@
+"""배경음악 API 라우터
+
+MusicGen 연동 및 외부 음악 소스
+"""
+
+import os
+import uuid
+from datetime import datetime
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
+from fastapi.responses import Response
+from pydantic import BaseModel, Field
+import httpx
+
+from app.database import Database, get_db
+
+router = APIRouter(prefix="/api/v1/music", tags=["music"])
+
+MUSICGEN_URL = os.getenv("MUSICGEN_URL", "http://localhost:8002")
+
+
+# ========================================
+# Pydantic 모델
+# ========================================
+
+class MusicGenerateRequest(BaseModel):
+    """음악 생성 요청"""
+    prompt: str = Field(..., min_length=5, max_length=500, description="음악 설명")
+    duration: int = Field(default=30, ge=5, le=30, description="생성 길이 (초)")
+    save_to_library: bool = Field(default=True, description="라이브러리에 저장")
+
+
+class MusicTrackResponse(BaseModel):
+    """음악 트랙 응답"""
+    id: str
+    name: str
+    description: Optional[str] = None
+    source: str  # musicgen | pixabay | uploaded
+    generation_prompt: Optional[str] = None
+    duration_seconds: float
+    genre: Optional[str] = None
+    mood: List[str] = []
+    license: str = ""
+    created_at: datetime
+
+
+class MusicListResponse(BaseModel):
+    """음악 목록 응답"""
+    tracks: List[MusicTrackResponse]
+    total: int
+    page: int
+    page_size: int
+
+
+# ========================================
+# API 엔드포인트
+# ========================================
+
+@router.post("/generate")
+async def generate_music(
+    request: MusicGenerateRequest,
+    db: Database = Depends(get_db),
+):
+    """AI로 배경음악 생성
+
+    MusicGen을 사용하여 텍스트 프롬프트 기반 음악 생성
+    """
+    try:
+        # MusicGen 서비스 호출
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            response = await client.post(
+                f"{MUSICGEN_URL}/generate",
+                json={
+                    "prompt": request.prompt,
+                    "duration": request.duration,
+                },
+            )
+            response.raise_for_status()
+            audio_bytes = response.content
+
+    except httpx.TimeoutException:
+        raise HTTPException(status_code=504, detail="Music generation timed out")
+    except httpx.HTTPStatusError as e:
+        raise HTTPException(status_code=502, detail=f"MusicGen error: {e.response.text}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Music generation failed: {str(e)}")
+
+    # 라이브러리에 저장
+    if request.save_to_library:
+        track_id = f"music_{uuid.uuid4().hex[:12]}"
+        now = datetime.utcnow()
+
+        # GridFS에 오디오 저장
+        audio_file_id = await db.save_audio(
+            audio_bytes,
+            f"{track_id}.wav",
+            metadata={
+                "type": "generated_music",
+                "prompt": request.prompt,
+            },
+        )
+
+        # DB에 트랙 정보 저장
+        track_doc = {
+            "track_id": track_id,
+            "name": f"Generated: {request.prompt[:30]}...",
+            "description": request.prompt,
+            "source": "musicgen",
+            "generation_prompt": request.prompt,
+            "audio_file_id": audio_file_id,
+            "duration_seconds": request.duration,
+            "format": "wav",
+            "genre": None,
+            "mood": [],
+            "license": "CC-BY-NC",  # MusicGen 모델 라이센스
+            "created_at": now,
+        }
+        await db.music_tracks.insert_one(track_doc)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={
+            "X-Duration": str(request.duration),
+            "Content-Disposition": 'attachment; filename="generated_music.wav"',
+        },
+    )
+
+
+@router.get("/library", response_model=MusicListResponse)
+async def list_music_library(
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    source: Optional[str] = Query(None, description="소스 필터 (musicgen, pixabay, uploaded)"),
+    genre: Optional[str] = Query(None, description="장르 필터"),
+    db: Database = Depends(get_db),
+):
+    """음악 라이브러리 목록 조회"""
+    query = {}
+    if source:
+        query["source"] = source
+    if genre:
+        query["genre"] = genre
+
+    total = await db.music_tracks.count_documents(query)
+    skip = (page - 1) * page_size
+
+    cursor = db.music_tracks.find(query).sort("created_at", -1).skip(skip).limit(page_size)
+
+    tracks = []
+    async for doc in cursor:
+        tracks.append(MusicTrackResponse(
+            id=doc.get("track_id", str(doc["_id"])),
+            name=doc["name"],
+            description=doc.get("description"),
+            source=doc.get("source", "unknown"),
+            generation_prompt=doc.get("generation_prompt"),
+            duration_seconds=doc.get("duration_seconds", 0),
+            genre=doc.get("genre"),
+            mood=doc.get("mood", []),
+            license=doc.get("license", ""),
+            created_at=doc.get("created_at", datetime.utcnow()),
+        ))
+
+    return MusicListResponse(
+        tracks=tracks,
+        total=total,
+        page=page,
+        page_size=page_size,
+    )
+
+
+@router.get("/{track_id}")
+async def get_music_track(
+    track_id: str,
+    db: Database = Depends(get_db),
+):
+    """음악 트랙 상세 정보"""
+    doc = await db.music_tracks.find_one({"track_id": track_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Track not found")
+
+    return MusicTrackResponse(
+        id=doc.get("track_id", str(doc["_id"])),
+        name=doc["name"],
+        description=doc.get("description"),
+        source=doc.get("source", "unknown"),
+        generation_prompt=doc.get("generation_prompt"),
+        duration_seconds=doc.get("duration_seconds", 0),
+        genre=doc.get("genre"),
+        mood=doc.get("mood", []),
+        license=doc.get("license", ""),
+        created_at=doc.get("created_at", datetime.utcnow()),
+    )
+
+
+@router.get("/{track_id}/audio")
+async def get_music_audio(
+    track_id: str,
+    db: Database = Depends(get_db),
+):
+    """음악 오디오 스트리밍"""
+    doc = await db.music_tracks.find_one({"track_id": track_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Track not found")
+
+    audio_file_id = doc.get("audio_file_id")
+    if not audio_file_id:
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    audio_bytes = await db.get_audio(audio_file_id)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={"Content-Disposition": f'inline; filename="{track_id}.wav"'},
+    )
+
+
+@router.delete("/{track_id}")
+async def delete_music_track(
+    track_id: str,
+    db: Database = Depends(get_db),
+):
+    """음악 트랙 삭제"""
+    doc = await db.music_tracks.find_one({"track_id": track_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Track not found")
+
+    # 오디오 파일 삭제
+    if doc.get("audio_file_id"):
+        await db.delete_audio(doc["audio_file_id"])
+
+    # 문서 삭제
+    await db.music_tracks.delete_one({"track_id": track_id})
+
+    return {"status": "deleted", "track_id": track_id}
+
+
+@router.get("/prompts/examples")
+async def get_example_prompts():
+    """예시 프롬프트 목록"""
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.get(f"{MUSICGEN_URL}/prompts")
+            response.raise_for_status()
+            return response.json()
+    except Exception:
+        # MusicGen 서비스 연결 실패 시 기본 프롬프트 반환
+        return {
+            "examples": [
+                {
+                    "category": "Ambient",
+                    "prompts": [
+                        "calm piano music, peaceful, ambient",
+                        "lo-fi hip hop beats, relaxing, study music",
+                        "meditation music, calm, zen",
+                    ],
+                },
+                {
+                    "category": "Electronic",
+                    "prompts": [
+                        "upbeat electronic dance music",
+                        "retro synthwave 80s style",
+                        "chill electronic ambient",
+                    ],
+                },
+                {
+                    "category": "Cinematic",
+                    "prompts": [
+                        "epic orchestral cinematic music",
+                        "tense suspenseful thriller music",
+                        "cheerful happy video game background",
+                    ],
+                },
+            ]
+        }
--- a/audio-studio-api/app/routers/recordings.py
+++ b/audio-studio-api/app/routers/recordings.py
@ -0,0 +1,184 @@
+"""녹음 관리 API 라우터"""
+
+import uuid
+import io
+from typing import List
+
+from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
+from fastapi.responses import Response
+import soundfile as sf
+import numpy as np
+
+from app.database import Database, get_db
+from app.models.voice import RecordingValidateResponse, RecordingUploadResponse
+
+router = APIRouter(prefix="/api/v1/recordings", tags=["recordings"])
+
+
+def analyze_audio(audio_bytes: bytes) -> dict:
+    """오디오 파일 분석
+
+    Returns:
+        duration, sample_rate, quality_score, issues
+    """
+    try:
+        # 오디오 로드
+        audio_data, sample_rate = sf.read(io.BytesIO(audio_bytes))
+
+        # 모노로 변환
+        if len(audio_data.shape) > 1:
+            audio_data = audio_data.mean(axis=1)
+
+        duration = len(audio_data) / sample_rate
+
+        # 품질 분석
+        issues = []
+        quality_score = 1.0
+
+        # 길이 체크
+        if duration < 1.0:
+            issues.append("오디오가 너무 짧습니다 (최소 1초 이상)")
+            quality_score -= 0.3
+        elif duration < 3.0:
+            issues.append("Voice Clone에는 3초 이상의 오디오가 권장됩니다")
+            quality_score -= 0.1
+
+        # RMS 레벨 체크 (볼륨)
+        rms = np.sqrt(np.mean(audio_data ** 2))
+        if rms < 0.01:
+            issues.append("볼륨이 너무 낮습니다")
+            quality_score -= 0.2
+        elif rms > 0.5:
+            issues.append("볼륨이 너무 높습니다 (클리핑 가능성)")
+            quality_score -= 0.1
+
+        # 피크 체크
+        peak = np.max(np.abs(audio_data))
+        if peak > 0.99:
+            issues.append("오디오가 클리핑되었습니다")
+            quality_score -= 0.2
+
+        # 노이즈 체크 (간단한 휴리스틱)
+        # 실제로는 더 정교한 노이즈 감지 필요
+        silence_threshold = 0.01
+        silent_samples = np.sum(np.abs(audio_data) < silence_threshold)
+        silence_ratio = silent_samples / len(audio_data)
+
+        if silence_ratio > 0.7:
+            issues.append("대부분이 무음입니다")
+            quality_score -= 0.3
+        elif silence_ratio > 0.5:
+            issues.append("무음 구간이 많습니다")
+            quality_score -= 0.1
+
+        quality_score = max(0.0, min(1.0, quality_score))
+
+        return {
+            "duration": duration,
+            "sample_rate": sample_rate,
+            "quality_score": quality_score,
+            "issues": issues,
+            "rms": float(rms),
+            "peak": float(peak),
+        }
+
+    except Exception as e:
+        return {
+            "duration": 0,
+            "sample_rate": 0,
+            "quality_score": 0,
+            "issues": [f"오디오 분석 실패: {str(e)}"],
+        }
+
+
+@router.post("/validate", response_model=RecordingValidateResponse)
+async def validate_recording(
+    audio: UploadFile = File(..., description="검증할 오디오 파일"),
+):
+    """녹음 품질 검증
+
+    Voice Clone에 사용할 녹음의 품질을 검증합니다.
+    """
+    audio_bytes = await audio.read()
+
+    if len(audio_bytes) < 1000:
+        raise HTTPException(status_code=400, detail="파일이 너무 작습니다")
+
+    analysis = analyze_audio(audio_bytes)
+
+    return RecordingValidateResponse(
+        valid=analysis["quality_score"] > 0.5 and analysis["duration"] > 1.0,
+        duration=analysis["duration"],
+        sample_rate=analysis["sample_rate"],
+        quality_score=analysis["quality_score"],
+        issues=analysis["issues"],
+    )
+
+
+@router.post("/upload", response_model=RecordingUploadResponse)
+async def upload_recording(
+    audio: UploadFile = File(..., description="업로드할 오디오 파일"),
+    transcript: str = Form(None, description="오디오의 텍스트 내용"),
+    db: Database = Depends(get_db),
+):
+    """녹음 파일 업로드
+
+    Voice Clone에 사용할 녹음을 업로드합니다.
+    """
+    audio_bytes = await audio.read()
+
+    # 품질 분석
+    analysis = analyze_audio(audio_bytes)
+
+    if analysis["duration"] < 0.5:
+        raise HTTPException(status_code=400, detail="오디오가 너무 짧습니다")
+
+    # GridFS에 저장
+    file_id = await db.save_audio(
+        audio_bytes,
+        audio.filename or f"recording_{uuid.uuid4()}.wav",
+        metadata={
+            "type": "recording",
+            "transcript": transcript,
+            "duration": analysis["duration"],
+            "sample_rate": analysis["sample_rate"],
+            "quality_score": analysis["quality_score"],
+        },
+    )
+
+    return RecordingUploadResponse(
+        file_id=file_id,
+        filename=audio.filename or "recording.wav",
+        duration=analysis["duration"],
+        sample_rate=analysis["sample_rate"],
+    )
+
+
+@router.get("/{file_id}")
+async def get_recording(
+    file_id: str,
+    db: Database = Depends(get_db),
+):
+    """녹음 파일 다운로드"""
+    try:
+        audio_bytes = await db.get_audio(file_id)
+        return Response(
+            content=audio_bytes,
+            media_type="audio/wav",
+            headers={"Content-Disposition": f'attachment; filename="{file_id}.wav"'},
+        )
+    except Exception as e:
+        raise HTTPException(status_code=404, detail="Recording not found")
+
+
+@router.delete("/{file_id}")
+async def delete_recording(
+    file_id: str,
+    db: Database = Depends(get_db),
+):
+    """녹음 파일 삭제"""
+    try:
+        await db.delete_audio(file_id)
+        return {"status": "deleted", "file_id": file_id}
+    except Exception as e:
+        raise HTTPException(status_code=404, detail="Recording not found")
--- a/audio-studio-api/app/routers/sound_effects.py
+++ b/audio-studio-api/app/routers/sound_effects.py
@ -0,0 +1,340 @@
+"""효과음 API 라우터
+
+Freesound API 연동
+"""
+
+import uuid
+from datetime import datetime
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Depends, Query
+from fastapi.responses import Response
+from pydantic import BaseModel
+
+from app.database import Database, get_db
+from app.services.freesound_client import freesound_client
+
+router = APIRouter(prefix="/api/v1/sound-effects", tags=["sound-effects"])
+
+
+# ========================================
+# Pydantic 모델
+# ========================================
+
+class SoundEffectResponse(BaseModel):
+    """효과음 응답"""
+    id: str
+    freesound_id: Optional[int] = None
+    name: str
+    description: str
+    duration: float
+    tags: List[str] = []
+    preview_url: Optional[str] = None
+    license: str = ""
+    username: Optional[str] = None
+    source: str = "freesound"  # freesound | local
+
+
+class SoundEffectSearchResponse(BaseModel):
+    """효과음 검색 응답"""
+    count: int
+    page: int
+    page_size: int
+    results: List[SoundEffectResponse]
+
+
+class SoundEffectImportRequest(BaseModel):
+    """효과음 가져오기 요청"""
+    freesound_id: int
+
+
+# ========================================
+# API 엔드포인트
+# ========================================
+
+@router.get("/search", response_model=SoundEffectSearchResponse)
+async def search_sound_effects(
+    query: str = Query(..., min_length=1, description="검색어"),
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    min_duration: Optional[float] = Query(None, ge=0, description="최소 길이 (초)"),
+    max_duration: Optional[float] = Query(None, ge=0, description="최대 길이 (초)"),
+    sort: str = Query("score", description="정렬 (score, duration_asc, duration_desc)"),
+):
+    """Freesound에서 효과음 검색"""
+    try:
+        result = await freesound_client.search(
+            query=query,
+            page=page,
+            page_size=page_size,
+            min_duration=min_duration,
+            max_duration=max_duration,
+            sort=sort,
+        )
+
+        # 응답 형식 변환
+        sounds = []
+        for item in result["results"]:
+            sounds.append(SoundEffectResponse(
+                id=f"fs_{item['freesound_id']}",
+                freesound_id=item["freesound_id"],
+                name=item["name"],
+                description=item["description"],
+                duration=item["duration"],
+                tags=item["tags"],
+                preview_url=item["preview_url"],
+                license=item["license"],
+                username=item.get("username"),
+                source="freesound",
+            ))
+
+        return SoundEffectSearchResponse(
+            count=result["count"],
+            page=page,
+            page_size=page_size,
+            results=sounds,
+        )
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
+
+
+@router.get("/library", response_model=SoundEffectSearchResponse)
+async def list_local_sound_effects(
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    category: Optional[str] = Query(None, description="카테고리 필터"),
+    db: Database = Depends(get_db),
+):
+    """로컬 효과음 라이브러리 조회"""
+    query = {}
+    if category:
+        query["categories"] = category
+
+    total = await db.sound_effects.count_documents(query)
+    skip = (page - 1) * page_size
+
+    cursor = db.sound_effects.find(query).sort("created_at", -1).skip(skip).limit(page_size)
+
+    sounds = []
+    async for doc in cursor:
+        sounds.append(SoundEffectResponse(
+            id=str(doc["_id"]),
+            freesound_id=doc.get("source_id"),
+            name=doc["name"],
+            description=doc.get("description", ""),
+            duration=doc.get("duration_seconds", 0),
+            tags=doc.get("tags", []),
+            preview_url=None,  # 로컬 파일은 별도 엔드포인트로 제공
+            license=doc.get("license", ""),
+            source="local",
+        ))
+
+    return SoundEffectSearchResponse(
+        count=total,
+        page=page,
+        page_size=page_size,
+        results=sounds,
+    )
+
+
+@router.post("/import", response_model=SoundEffectResponse)
+async def import_sound_effect(
+    request: SoundEffectImportRequest,
+    db: Database = Depends(get_db),
+):
+    """Freesound에서 효과음 가져오기 (로컬 캐시)"""
+    try:
+        # Freesound에서 상세 정보 조회
+        sound_info = await freesound_client.get_sound(request.freesound_id)
+
+        # 프리뷰 다운로드
+        preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
+        if not preview_url:
+            raise HTTPException(status_code=400, detail="Preview not available")
+
+        audio_bytes = await freesound_client.download_preview(preview_url)
+
+        # GridFS에 저장
+        file_id = await db.save_audio(
+            audio_bytes,
+            f"sfx_{request.freesound_id}.mp3",
+            content_type="audio/mpeg",
+            metadata={"freesound_id": request.freesound_id},
+        )
+
+        # DB에 메타데이터 저장
+        now = datetime.utcnow()
+        doc = {
+            "name": sound_info.get("name", ""),
+            "description": sound_info.get("description", ""),
+            "source": "freesound",
+            "source_id": request.freesound_id,
+            "source_url": f"https://freesound.org/s/{request.freesound_id}/",
+            "audio_file_id": file_id,
+            "duration_seconds": sound_info.get("duration", 0),
+            "format": "mp3",
+            "categories": [],
+            "tags": sound_info.get("tags", [])[:20],  # 최대 20개
+            "license": sound_info.get("license", ""),
+            "attribution": sound_info.get("username", ""),
+            "created_at": now,
+            "updated_at": now,
+        }
+
+        result = await db.sound_effects.insert_one(doc)
+
+        return SoundEffectResponse(
+            id=str(result.inserted_id),
+            freesound_id=request.freesound_id,
+            name=doc["name"],
+            description=doc["description"],
+            duration=doc["duration_seconds"],
+            tags=doc["tags"],
+            license=doc["license"],
+            source="local",
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
+
+
+@router.get("/{sound_id}")
+async def get_sound_effect_info(
+    sound_id: str,
+    db: Database = Depends(get_db),
+):
+    """효과음 상세 정보 조회"""
+    # Freesound ID인 경우
+    if sound_id.startswith("fs_"):
+        freesound_id = int(sound_id[3:])
+        try:
+            sound_info = await freesound_client.get_sound(freesound_id)
+            return SoundEffectResponse(
+                id=sound_id,
+                freesound_id=freesound_id,
+                name=sound_info.get("name", ""),
+                description=sound_info.get("description", ""),
+                duration=sound_info.get("duration", 0),
+                tags=sound_info.get("tags", []),
+                preview_url=sound_info.get("previews", {}).get("preview-hq-mp3", ""),
+                license=sound_info.get("license", ""),
+                source="freesound",
+            )
+        except Exception as e:
+            raise HTTPException(status_code=404, detail="Sound not found")
+
+    # 로컬 ID인 경우
+    from bson import ObjectId
+    try:
+        doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
+    except:
+        raise HTTPException(status_code=400, detail="Invalid sound ID")
+
+    if not doc:
+        raise HTTPException(status_code=404, detail="Sound not found")
+
+    return SoundEffectResponse(
+        id=str(doc["_id"]),
+        freesound_id=doc.get("source_id"),
+        name=doc["name"],
+        description=doc.get("description", ""),
+        duration=doc.get("duration_seconds", 0),
+        tags=doc.get("tags", []),
+        license=doc.get("license", ""),
+        source="local",
+    )
+
+
+@router.get("/{sound_id}/audio")
+async def get_sound_effect_audio(
+    sound_id: str,
+    db: Database = Depends(get_db),
+):
+    """효과음 오디오 스트리밍"""
+    # Freesound ID인 경우 프리뷰 리다이렉트
+    if sound_id.startswith("fs_"):
+        freesound_id = int(sound_id[3:])
+        try:
+            sound_info = await freesound_client.get_sound(freesound_id)
+            preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
+            if preview_url:
+                audio_bytes = await freesound_client.download_preview(preview_url)
+                return Response(
+                    content=audio_bytes,
+                    media_type="audio/mpeg",
+                    headers={"Content-Disposition": f'inline; filename="{freesound_id}.mp3"'},
+                )
+        except Exception as e:
+            raise HTTPException(status_code=404, detail="Audio not found")
+
+    # 로컬 ID인 경우
+    from bson import ObjectId
+    try:
+        doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
+    except:
+        raise HTTPException(status_code=400, detail="Invalid sound ID")
+
+    if not doc or not doc.get("audio_file_id"):
+        raise HTTPException(status_code=404, detail="Audio not found")
+
+    audio_bytes = await db.get_audio(doc["audio_file_id"])
+    content_type = "audio/mpeg" if doc.get("format") == "mp3" else "audio/wav"
+
+    return Response(
+        content=audio_bytes,
+        media_type=content_type,
+        headers={"Content-Disposition": f'inline; filename="{sound_id}.{doc.get("format", "wav")}"'},
+    )
+
+
+@router.get("/categories")
+async def list_categories(
+    db: Database = Depends(get_db),
+):
+    """효과음 카테고리 목록"""
+    # 로컬 라이브러리의 카테고리 집계
+    pipeline = [
+        {"$unwind": "$categories"},
+        {"$group": {"_id": "$categories", "count": {"$sum": 1}}},
+        {"$sort": {"count": -1}},
+    ]
+
+    categories = []
+    async for doc in db.sound_effects.aggregate(pipeline):
+        categories.append({
+            "name": doc["_id"],
+            "count": doc["count"],
+        })
+
+    return {"categories": categories}
+
+
+@router.delete("/{sound_id}")
+async def delete_sound_effect(
+    sound_id: str,
+    db: Database = Depends(get_db),
+):
+    """로컬 효과음 삭제"""
+    if sound_id.startswith("fs_"):
+        raise HTTPException(status_code=400, detail="Cannot delete Freesound reference")
+
+    from bson import ObjectId
+    try:
+        doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
+    except:
+        raise HTTPException(status_code=400, detail="Invalid sound ID")
+
+    if not doc:
+        raise HTTPException(status_code=404, detail="Sound not found")
+
+    # 오디오 파일 삭제
+    if doc.get("audio_file_id"):
+        await db.delete_audio(doc["audio_file_id"])
+
+    # 문서 삭제
+    await db.sound_effects.delete_one({"_id": ObjectId(sound_id)})
+
+    return {"status": "deleted", "sound_id": sound_id}
--- a/audio-studio-api/app/routers/tts.py
+++ b/audio-studio-api/app/routers/tts.py
@ -0,0 +1,227 @@
+"""TTS API 라우터"""
+
+import uuid
+from datetime import datetime
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Depends
+from fastapi.responses import Response, StreamingResponse
+
+from app.database import Database, get_db
+from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType
+from app.services.tts_client import tts_client
+from app.routers.voices import PRESET_VOICES
+
+router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
+
+
+@router.post("/synthesize")
+async def synthesize(
+    request: TTSSynthesizeRequest,
+    db: Database = Depends(get_db),
+):
+    """TTS 음성 합성
+
+    지정된 보이스로 텍스트를 음성으로 변환합니다.
+    """
+    voice_id = request.voice_id
+
+    # 프리셋 보이스 확인
+    preset_speaker = None
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            preset_speaker = preset["preset_voice_id"]
+            break
+
+    if preset_speaker:
+        # 프리셋 음성 합성
+        try:
+            audio_bytes, sr = await tts_client.synthesize(
+                text=request.text,
+                speaker=preset_speaker,
+                language="ko",
+                instruct=request.instruct,
+            )
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
+
+    else:
+        # DB에서 보이스 정보 조회
+        voice_doc = await db.voices.find_one({"voice_id": voice_id})
+        if not voice_doc:
+            raise HTTPException(status_code=404, detail="Voice not found")
+
+        voice_type = voice_doc.get("type")
+
+        if voice_type == VoiceType.CLONED.value:
+            # Voice Clone 합성 (레퍼런스 오디오 필요)
+            ref_audio_id = voice_doc.get("reference_audio_id")
+            ref_transcript = voice_doc.get("reference_transcript", "")
+
+            if not ref_audio_id:
+                raise HTTPException(status_code=400, detail="Reference audio not found")
+
+            ref_audio = await db.get_audio(ref_audio_id)
+
+            try:
+                audio_bytes, sr = await tts_client.voice_clone(
+                    text=request.text,
+                    ref_audio=ref_audio,
+                    ref_text=ref_transcript,
+                    language=voice_doc.get("language", "ko"),
+                )
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}")
+
+        elif voice_type == VoiceType.DESIGNED.value:
+            # Voice Design 합성
+            design_prompt = voice_doc.get("design_prompt", "")
+
+            try:
+                audio_bytes, sr = await tts_client.voice_design(
+                    text=request.text,
+                    instruct=design_prompt,
+                    language=voice_doc.get("language", "ko"),
+                )
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}")
+
+        else:
+            raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}")
+
+    # 생성 기록 저장
+    generation_id = f"gen_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    # 오디오 저장
+    audio_file_id = await db.save_audio(
+        audio_bytes,
+        f"{generation_id}.wav",
+        metadata={"voice_id": voice_id, "text": request.text[:100]},
+    )
+
+    # 생성 기록 저장
+    gen_doc = {
+        "generation_id": generation_id,
+        "voice_id": voice_id,
+        "text": request.text,
+        "audio_file_id": audio_file_id,
+        "status": "completed",
+        "created_at": now,
+    }
+    await db.tts_generations.insert_one(gen_doc)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={
+            "X-Sample-Rate": str(sr),
+            "X-Generation-ID": generation_id,
+            "Content-Disposition": f'attachment; filename="{generation_id}.wav"',
+        },
+    )
+
+
+@router.post("/synthesize/async", response_model=TTSGenerationResponse)
+async def synthesize_async(
+    request: TTSSynthesizeRequest,
+    db: Database = Depends(get_db),
+):
+    """비동기 TTS 음성 합성 (긴 텍스트용)
+
+    생성 작업을 큐에 등록하고 generation_id를 반환합니다.
+    완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다.
+    """
+    # 긴 텍스트 처리를 위한 비동기 방식
+    # 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동)
+
+    generation_id = f"gen_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    gen_doc = {
+        "generation_id": generation_id,
+        "voice_id": request.voice_id,
+        "text": request.text,
+        "status": "pending",
+        "created_at": now,
+    }
+    await db.tts_generations.insert_one(gen_doc)
+
+    # 실제로는 백그라운드 워커에서 처리해야 함
+    # 여기서는 바로 처리
+    try:
+        # synthesize 로직과 동일...
+        # (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용)
+        pass
+    except Exception as e:
+        await db.tts_generations.update_one(
+            {"generation_id": generation_id},
+            {"$set": {"status": "failed", "error_message": str(e)}},
+        )
+
+    return TTSGenerationResponse(
+        generation_id=generation_id,
+        voice_id=request.voice_id,
+        text=request.text,
+        status="pending",
+        created_at=now,
+    )
+
+
+@router.get("/generations/{generation_id}", response_model=TTSGenerationResponse)
+async def get_generation(
+    generation_id: str,
+    db: Database = Depends(get_db),
+):
+    """TTS 생성 상태 조회"""
+    doc = await db.tts_generations.find_one({"generation_id": generation_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Generation not found")
+
+    return TTSGenerationResponse(
+        generation_id=doc["generation_id"],
+        voice_id=doc["voice_id"],
+        text=doc["text"],
+        status=doc["status"],
+        audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None,
+        duration_seconds=doc.get("duration_seconds"),
+        created_at=doc["created_at"],
+    )
+
+
+@router.get("/generations/{generation_id}/audio")
+async def get_generation_audio(
+    generation_id: str,
+    db: Database = Depends(get_db),
+):
+    """생성된 오디오 다운로드"""
+    doc = await db.tts_generations.find_one({"generation_id": generation_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Generation not found")
+
+    if doc["status"] != "completed":
+        raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}")
+
+    audio_file_id = doc.get("audio_file_id")
+    if not audio_file_id:
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    audio_bytes = await db.get_audio(audio_file_id)
+
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={
+            "Content-Disposition": f'attachment; filename="{generation_id}.wav"',
+        },
+    )
+
+
+@router.get("/health")
+async def tts_health():
+    """TTS 엔진 헬스체크"""
+    try:
+        health = await tts_client.health_check()
+        return {"status": "healthy", "tts_engine": health}
+    except Exception as e:
+        return {"status": "unhealthy", "error": str(e)}
--- a/audio-studio-api/app/routers/voices.py
+++ b/audio-studio-api/app/routers/voices.py
@ -0,0 +1,426 @@
+"""Voice 관리 API 라우터"""
+
+import uuid
+from datetime import datetime
+from typing import Optional, List
+
+from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
+from fastapi.responses import Response
+
+from app.database import Database, get_db
+from app.models.voice import (
+    VoiceType,
+    LanguageCode,
+    VoiceResponse,
+    VoiceListResponse,
+    VoiceCloneRequest,
+    VoiceDesignRequest,
+    VoiceUpdateRequest,
+)
+from app.services.tts_client import tts_client
+
+router = APIRouter(prefix="/api/v1/voices", tags=["voices"])
+
+
+# ========================================
+# 프리셋 보이스 목록 (시스템 기본)
+# ========================================
+
+PRESET_VOICES = [
+    {
+        "voice_id": "preset_chelsie",
+        "name": "Chelsie",
+        "description": "밝고 활기찬 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Chelsie",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["bright", "energetic"],
+    },
+    {
+        "voice_id": "preset_ethan",
+        "name": "Ethan",
+        "description": "차분하고 신뢰감 있는 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Ethan",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["calm", "trustworthy"],
+    },
+    {
+        "voice_id": "preset_vivian",
+        "name": "Vivian",
+        "description": "부드럽고 따뜻한 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Vivian",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["soft", "warm"],
+    },
+    {
+        "voice_id": "preset_benjamin",
+        "name": "Benjamin",
+        "description": "깊고 전문적인 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Benjamin",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["deep", "professional"],
+    },
+    {
+        "voice_id": "preset_aurora",
+        "name": "Aurora",
+        "description": "우아하고 세련된 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Aurora",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["elegant", "refined"],
+    },
+    {
+        "voice_id": "preset_oliver",
+        "name": "Oliver",
+        "description": "친근하고 편안한 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Oliver",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["friendly", "casual"],
+    },
+    {
+        "voice_id": "preset_luna",
+        "name": "Luna",
+        "description": "따뜻하고 감성적인 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Luna",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["warm", "emotional"],
+    },
+    {
+        "voice_id": "preset_jasper",
+        "name": "Jasper",
+        "description": "전문적이고 명확한 남성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Jasper",
+        "language": LanguageCode.EN,
+        "gender": "male",
+        "style_tags": ["professional", "clear"],
+    },
+    {
+        "voice_id": "preset_aria",
+        "name": "Aria",
+        "description": "표현력 풍부한 여성 목소리",
+        "type": VoiceType.PRESET,
+        "preset_voice_id": "Aria",
+        "language": LanguageCode.EN,
+        "gender": "female",
+        "style_tags": ["expressive", "dynamic"],
+    },
+]
+
+
+def _voice_doc_to_response(doc: dict) -> VoiceResponse:
+    """MongoDB 문서를 VoiceResponse로 변환"""
+    return VoiceResponse(
+        voice_id=doc["voice_id"],
+        name=doc["name"],
+        description=doc.get("description"),
+        type=doc["type"],
+        language=doc.get("language", LanguageCode.KO),
+        preset_voice_id=doc.get("preset_voice_id"),
+        design_prompt=doc.get("design_prompt"),
+        reference_transcript=doc.get("reference_transcript"),
+        gender=doc.get("gender"),
+        age_range=doc.get("age_range"),
+        style_tags=doc.get("style_tags", []),
+        owner_id=str(doc.get("owner_id")) if doc.get("owner_id") else None,
+        is_public=doc.get("is_public", True),
+        sample_audio_id=str(doc.get("sample_audio_id")) if doc.get("sample_audio_id") else None,
+        created_at=doc.get("created_at", datetime.utcnow()),
+        updated_at=doc.get("updated_at", datetime.utcnow()),
+    )
+
+
+@router.get("", response_model=VoiceListResponse)
+async def list_voices(
+    type: Optional[VoiceType] = Query(None, description="보이스 타입 필터"),
+    language: Optional[LanguageCode] = Query(None, description="언어 필터"),
+    is_public: bool = Query(True, description="공개 보이스만"),
+    include_presets: bool = Query(True, description="프리셋 포함"),
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    db: Database = Depends(get_db),
+):
+    """보이스 목록 조회"""
+    voices = []
+
+    # 프리셋 보이스 추가
+    if include_presets and (type is None or type == VoiceType.PRESET):
+        for preset in PRESET_VOICES:
+            if language and preset["language"] != language:
+                continue
+            voices.append(VoiceResponse(
+                **preset,
+                is_public=True,
+                created_at=datetime.utcnow(),
+                updated_at=datetime.utcnow(),
+            ))
+
+    # DB에서 사용자 보이스 조회
+    query = {"is_public": True} if is_public else {}
+    if type and type != VoiceType.PRESET:
+        query["type"] = type.value
+    if language:
+        query["language"] = language.value
+
+    cursor = db.voices.find(query).sort("created_at", -1)
+    skip = (page - 1) * page_size
+    cursor = cursor.skip(skip).limit(page_size)
+
+    async for doc in cursor:
+        voices.append(_voice_doc_to_response(doc))
+
+    total = len(PRESET_VOICES) + await db.voices.count_documents(query)
+
+    return VoiceListResponse(
+        voices=voices,
+        total=total,
+        page=page,
+        page_size=page_size,
+    )
+
+
+@router.get("/{voice_id}", response_model=VoiceResponse)
+async def get_voice(
+    voice_id: str,
+    db: Database = Depends(get_db),
+):
+    """보이스 상세 조회"""
+    # 프리셋 체크
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            return VoiceResponse(
+                **preset,
+                is_public=True,
+                created_at=datetime.utcnow(),
+                updated_at=datetime.utcnow(),
+            )
+
+    # DB 조회
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    return _voice_doc_to_response(doc)
+
+
+@router.get("/{voice_id}/sample")
+async def get_voice_sample(
+    voice_id: str,
+    db: Database = Depends(get_db),
+):
+    """보이스 샘플 오디오 스트리밍"""
+    # 프리셋인 경우 TTS로 샘플 생성
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            sample_text = "안녕하세요, 저는 AI 음성입니다."
+            audio_bytes, sr = await tts_client.synthesize(
+                text=sample_text,
+                speaker=preset["preset_voice_id"],
+                language="ko",
+            )
+            return Response(
+                content=audio_bytes,
+                media_type="audio/wav",
+                headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
+            )
+
+    # DB에서 조회
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    if not doc.get("sample_audio_id"):
+        raise HTTPException(status_code=404, detail="No sample audio available")
+
+    audio_bytes = await db.get_audio(doc["sample_audio_id"])
+    return Response(
+        content=audio_bytes,
+        media_type="audio/wav",
+        headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
+    )
+
+
+@router.post("/clone", response_model=VoiceResponse)
+async def create_voice_clone(
+    name: str = Form(...),
+    description: Optional[str] = Form(None),
+    reference_transcript: str = Form(...),
+    language: LanguageCode = Form(LanguageCode.KO),
+    is_public: bool = Form(False),
+    reference_audio: UploadFile = File(...),
+    db: Database = Depends(get_db),
+):
+    """Voice Clone으로 새 보이스 생성
+
+    레퍼런스 오디오를 기반으로 목소리를 복제합니다.
+    3초 이상의 오디오가 권장됩니다.
+    """
+    # 오디오 파일 읽기
+    audio_content = await reference_audio.read()
+
+    # Voice Clone으로 샘플 생성
+    sample_text = "안녕하세요, 저는 복제된 AI 음성입니다."
+    try:
+        sample_audio, sr = await tts_client.voice_clone(
+            text=sample_text,
+            ref_audio=audio_content,
+            ref_text=reference_transcript,
+            language=language.value,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Voice clone failed: {str(e)}")
+
+    # GridFS에 오디오 저장
+    ref_audio_id = await db.save_audio(
+        audio_content,
+        f"ref_{uuid.uuid4()}.wav",
+        metadata={"type": "reference"},
+    )
+    sample_audio_id = await db.save_audio(
+        sample_audio,
+        f"sample_{uuid.uuid4()}.wav",
+        metadata={"type": "sample"},
+    )
+
+    # DB에 보이스 저장
+    voice_id = f"clone_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    doc = {
+        "voice_id": voice_id,
+        "name": name,
+        "description": description,
+        "type": VoiceType.CLONED.value,
+        "language": language.value,
+        "reference_audio_id": ref_audio_id,
+        "reference_transcript": reference_transcript,
+        "sample_audio_id": sample_audio_id,
+        "is_public": is_public,
+        "created_at": now,
+        "updated_at": now,
+    }
+
+    await db.voices.insert_one(doc)
+
+    return _voice_doc_to_response(doc)
+
+
+@router.post("/design", response_model=VoiceResponse)
+async def create_voice_design(
+    request: VoiceDesignRequest,
+    db: Database = Depends(get_db),
+):
+    """Voice Design으로 새 보이스 생성
+
+    텍스트 프롬프트를 기반으로 새로운 음성을 생성합니다.
+    예: "30대 남성, 부드럽고 차분한 목소리"
+    """
+    # Voice Design으로 샘플 생성
+    sample_text = "안녕하세요, 저는 AI로 생성된 음성입니다."
+    try:
+        sample_audio, sr = await tts_client.voice_design(
+            text=sample_text,
+            instruct=request.design_prompt,
+            language=request.language.value,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Voice design failed: {str(e)}")
+
+    # GridFS에 샘플 저장
+    sample_audio_id = await db.save_audio(
+        sample_audio,
+        f"sample_{uuid.uuid4()}.wav",
+        metadata={"type": "sample"},
+    )
+
+    # DB에 보이스 저장
+    voice_id = f"design_{uuid.uuid4().hex[:12]}"
+    now = datetime.utcnow()
+
+    doc = {
+        "voice_id": voice_id,
+        "name": request.name,
+        "description": request.description,
+        "type": VoiceType.DESIGNED.value,
+        "language": request.language.value,
+        "design_prompt": request.design_prompt,
+        "sample_audio_id": sample_audio_id,
+        "is_public": request.is_public,
+        "created_at": now,
+        "updated_at": now,
+    }
+
+    await db.voices.insert_one(doc)
+
+    return _voice_doc_to_response(doc)
+
+
+@router.patch("/{voice_id}", response_model=VoiceResponse)
+async def update_voice(
+    voice_id: str,
+    request: VoiceUpdateRequest,
+    db: Database = Depends(get_db),
+):
+    """보이스 정보 수정"""
+    # 프리셋은 수정 불가
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            raise HTTPException(status_code=400, detail="Cannot modify preset voice")
+
+    # 업데이트할 필드만 추출
+    update_data = {k: v for k, v in request.model_dump().items() if v is not None}
+    if not update_data:
+        raise HTTPException(status_code=400, detail="No fields to update")
+
+    update_data["updated_at"] = datetime.utcnow()
+
+    result = await db.voices.update_one(
+        {"voice_id": voice_id},
+        {"$set": update_data},
+    )
+
+    if result.matched_count == 0:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    return _voice_doc_to_response(doc)
+
+
+@router.delete("/{voice_id}")
+async def delete_voice(
+    voice_id: str,
+    db: Database = Depends(get_db),
+):
+    """보이스 삭제"""
+    # 프리셋은 삭제 불가
+    for preset in PRESET_VOICES:
+        if preset["voice_id"] == voice_id:
+            raise HTTPException(status_code=400, detail="Cannot delete preset voice")
+
+    # 먼저 조회
+    doc = await db.voices.find_one({"voice_id": voice_id})
+    if not doc:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    # 관련 오디오 파일 삭제
+    if doc.get("reference_audio_id"):
+        await db.delete_audio(doc["reference_audio_id"])
+    if doc.get("sample_audio_id"):
+        await db.delete_audio(doc["sample_audio_id"])
+
+    # 보이스 삭제
+    await db.voices.delete_one({"voice_id": voice_id})
+
+    return {"status": "deleted", "voice_id": voice_id}