feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api)
- Next.js 프론트엔드 (audio-studio-ui)
- Qwen3-TTS 엔진 (audio-studio-tts)
- MusicGen 서비스 (audio-studio-musicgen)
- Docker Compose 개발/운영 환경

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions

View File

View File

@ -0,0 +1,193 @@
# 드라마 API 라우터
from fastapi import APIRouter, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from typing import Optional
import os
from app.models.drama import (
DramaCreateRequest, DramaGenerateRequest, DramaResponse,
ParsedScript, Character
)
from app.services.script_parser import script_parser
from app.services.drama_orchestrator import drama_orchestrator
router = APIRouter(prefix="/api/v1/drama", tags=["drama"])
@router.post("/parse", response_model=ParsedScript)
async def parse_script(script: str):
"""
스크립트 파싱 (미리보기)
마크다운 형식의 스크립트를 구조화된 데이터로 변환합니다.
실제 프로젝트 생성 없이 파싱 결과만 확인할 수 있습니다.
"""
is_valid, errors = script_parser.validate_script(script)
if not is_valid:
raise HTTPException(status_code=400, detail={"errors": errors})
return script_parser.parse(script)
@router.post("/projects", response_model=DramaResponse)
async def create_project(request: DramaCreateRequest):
"""
새 드라마 프로젝트 생성
스크립트를 파싱하고 프로젝트를 생성합니다.
voice_mapping으로 캐릭터별 보이스를 지정할 수 있습니다.
"""
# 스크립트 유효성 검사
is_valid, errors = script_parser.validate_script(request.script)
if not is_valid:
raise HTTPException(status_code=400, detail={"errors": errors})
project = await drama_orchestrator.create_project(request)
return DramaResponse(
project_id=project.project_id,
title=project.title,
status=project.status,
characters=project.script_parsed.characters if project.script_parsed else [],
element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None
)
@router.get("/projects", response_model=list[DramaResponse])
async def list_projects(skip: int = 0, limit: int = 20):
"""프로젝트 목록 조회"""
projects = await drama_orchestrator.list_projects(skip=skip, limit=limit)
return [
DramaResponse(
project_id=p.project_id,
title=p.title,
status=p.status,
characters=p.script_parsed.characters if p.script_parsed else [],
element_count=len(p.script_parsed.elements) if p.script_parsed else 0,
output_file_id=p.output_file_id,
error_message=p.error_message
)
for p in projects
]
@router.get("/projects/{project_id}", response_model=DramaResponse)
async def get_project(project_id: str):
"""프로젝트 상세 조회"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
return DramaResponse(
project_id=project.project_id,
title=project.title,
status=project.status,
characters=project.script_parsed.characters if project.script_parsed else [],
element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None,
output_file_id=project.output_file_id,
error_message=project.error_message
)
@router.post("/projects/{project_id}/render")
async def render_project(
project_id: str,
background_tasks: BackgroundTasks,
output_format: str = "wav"
):
"""
드라마 렌더링 시작
백그라운드에서 TTS 생성, 효과음 검색, 믹싱을 수행합니다.
완료되면 status가 'completed'로 변경됩니다.
"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
if project.status == "processing":
raise HTTPException(status_code=400, detail="이미 렌더링 중입니다")
# 백그라운드 렌더링 시작
background_tasks.add_task(
drama_orchestrator.render,
project_id,
output_format
)
return {
"project_id": project_id,
"status": "processing",
"message": "렌더링이 시작되었습니다"
}
@router.get("/projects/{project_id}/download")
async def download_project(project_id: str):
"""렌더링된 드라마 다운로드"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
if project.status != "completed":
raise HTTPException(
status_code=400,
detail=f"렌더링이 완료되지 않았습니다 (현재 상태: {project.status})"
)
if not project.output_file_id or not os.path.exists(project.output_file_id):
raise HTTPException(status_code=404, detail="출력 파일을 찾을 수 없습니다")
return FileResponse(
project.output_file_id,
media_type="audio/wav",
filename=f"{project.title}.wav"
)
@router.put("/projects/{project_id}/voices")
async def update_voice_mapping(
project_id: str,
voice_mapping: dict[str, str]
):
"""캐릭터-보이스 매핑 업데이트"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
from app.database import db
from datetime import datetime
await db.dramas.update_one(
{"project_id": project_id},
{
"$set": {
"voice_mapping": voice_mapping,
"updated_at": datetime.utcnow()
}
}
)
return {"message": "보이스 매핑이 업데이트되었습니다"}
@router.delete("/projects/{project_id}")
async def delete_project(project_id: str):
"""프로젝트 삭제"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
from app.database import db
# 출력 파일 삭제
if project.output_file_id and os.path.exists(project.output_file_id):
os.remove(project.output_file_id)
# DB에서 삭제
await db.dramas.delete_one({"project_id": project_id})
return {"message": "프로젝트가 삭제되었습니다"}

View File

@ -0,0 +1,278 @@
"""배경음악 API 라우터
MusicGen 연동 및 외부 음악 소스
"""
import os
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
from fastapi.responses import Response
from pydantic import BaseModel, Field
import httpx
from app.database import Database, get_db
router = APIRouter(prefix="/api/v1/music", tags=["music"])
MUSICGEN_URL = os.getenv("MUSICGEN_URL", "http://localhost:8002")
# ========================================
# Pydantic 모델
# ========================================
class MusicGenerateRequest(BaseModel):
"""음악 생성 요청"""
prompt: str = Field(..., min_length=5, max_length=500, description="음악 설명")
duration: int = Field(default=30, ge=5, le=30, description="생성 길이 (초)")
save_to_library: bool = Field(default=True, description="라이브러리에 저장")
class MusicTrackResponse(BaseModel):
"""음악 트랙 응답"""
id: str
name: str
description: Optional[str] = None
source: str # musicgen | pixabay | uploaded
generation_prompt: Optional[str] = None
duration_seconds: float
genre: Optional[str] = None
mood: List[str] = []
license: str = ""
created_at: datetime
class MusicListResponse(BaseModel):
"""음악 목록 응답"""
tracks: List[MusicTrackResponse]
total: int
page: int
page_size: int
# ========================================
# API 엔드포인트
# ========================================
@router.post("/generate")
async def generate_music(
request: MusicGenerateRequest,
db: Database = Depends(get_db),
):
"""AI로 배경음악 생성
MusicGen을 사용하여 텍스트 프롬프트 기반 음악 생성
"""
try:
# MusicGen 서비스 호출
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{MUSICGEN_URL}/generate",
json={
"prompt": request.prompt,
"duration": request.duration,
},
)
response.raise_for_status()
audio_bytes = response.content
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Music generation timed out")
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=502, detail=f"MusicGen error: {e.response.text}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Music generation failed: {str(e)}")
# 라이브러리에 저장
if request.save_to_library:
track_id = f"music_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
# GridFS에 오디오 저장
audio_file_id = await db.save_audio(
audio_bytes,
f"{track_id}.wav",
metadata={
"type": "generated_music",
"prompt": request.prompt,
},
)
# DB에 트랙 정보 저장
track_doc = {
"track_id": track_id,
"name": f"Generated: {request.prompt[:30]}...",
"description": request.prompt,
"source": "musicgen",
"generation_prompt": request.prompt,
"audio_file_id": audio_file_id,
"duration_seconds": request.duration,
"format": "wav",
"genre": None,
"mood": [],
"license": "CC-BY-NC", # MusicGen 모델 라이센스
"created_at": now,
}
await db.music_tracks.insert_one(track_doc)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Duration": str(request.duration),
"Content-Disposition": 'attachment; filename="generated_music.wav"',
},
)
@router.get("/library", response_model=MusicListResponse)
async def list_music_library(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
source: Optional[str] = Query(None, description="소스 필터 (musicgen, pixabay, uploaded)"),
genre: Optional[str] = Query(None, description="장르 필터"),
db: Database = Depends(get_db),
):
"""음악 라이브러리 목록 조회"""
query = {}
if source:
query["source"] = source
if genre:
query["genre"] = genre
total = await db.music_tracks.count_documents(query)
skip = (page - 1) * page_size
cursor = db.music_tracks.find(query).sort("created_at", -1).skip(skip).limit(page_size)
tracks = []
async for doc in cursor:
tracks.append(MusicTrackResponse(
id=doc.get("track_id", str(doc["_id"])),
name=doc["name"],
description=doc.get("description"),
source=doc.get("source", "unknown"),
generation_prompt=doc.get("generation_prompt"),
duration_seconds=doc.get("duration_seconds", 0),
genre=doc.get("genre"),
mood=doc.get("mood", []),
license=doc.get("license", ""),
created_at=doc.get("created_at", datetime.utcnow()),
))
return MusicListResponse(
tracks=tracks,
total=total,
page=page,
page_size=page_size,
)
@router.get("/{track_id}")
async def get_music_track(
track_id: str,
db: Database = Depends(get_db),
):
"""음악 트랙 상세 정보"""
doc = await db.music_tracks.find_one({"track_id": track_id})
if not doc:
raise HTTPException(status_code=404, detail="Track not found")
return MusicTrackResponse(
id=doc.get("track_id", str(doc["_id"])),
name=doc["name"],
description=doc.get("description"),
source=doc.get("source", "unknown"),
generation_prompt=doc.get("generation_prompt"),
duration_seconds=doc.get("duration_seconds", 0),
genre=doc.get("genre"),
mood=doc.get("mood", []),
license=doc.get("license", ""),
created_at=doc.get("created_at", datetime.utcnow()),
)
@router.get("/{track_id}/audio")
async def get_music_audio(
track_id: str,
db: Database = Depends(get_db),
):
"""음악 오디오 스트리밍"""
doc = await db.music_tracks.find_one({"track_id": track_id})
if not doc:
raise HTTPException(status_code=404, detail="Track not found")
audio_file_id = doc.get("audio_file_id")
if not audio_file_id:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_bytes = await db.get_audio(audio_file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'inline; filename="{track_id}.wav"'},
)
@router.delete("/{track_id}")
async def delete_music_track(
track_id: str,
db: Database = Depends(get_db),
):
"""음악 트랙 삭제"""
doc = await db.music_tracks.find_one({"track_id": track_id})
if not doc:
raise HTTPException(status_code=404, detail="Track not found")
# 오디오 파일 삭제
if doc.get("audio_file_id"):
await db.delete_audio(doc["audio_file_id"])
# 문서 삭제
await db.music_tracks.delete_one({"track_id": track_id})
return {"status": "deleted", "track_id": track_id}
@router.get("/prompts/examples")
async def get_example_prompts():
"""예시 프롬프트 목록"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(f"{MUSICGEN_URL}/prompts")
response.raise_for_status()
return response.json()
except Exception:
# MusicGen 서비스 연결 실패 시 기본 프롬프트 반환
return {
"examples": [
{
"category": "Ambient",
"prompts": [
"calm piano music, peaceful, ambient",
"lo-fi hip hop beats, relaxing, study music",
"meditation music, calm, zen",
],
},
{
"category": "Electronic",
"prompts": [
"upbeat electronic dance music",
"retro synthwave 80s style",
"chill electronic ambient",
],
},
{
"category": "Cinematic",
"prompts": [
"epic orchestral cinematic music",
"tense suspenseful thriller music",
"cheerful happy video game background",
],
},
]
}

View File

@ -0,0 +1,184 @@
"""녹음 관리 API 라우터"""
import uuid
import io
from typing import List
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
from fastapi.responses import Response
import soundfile as sf
import numpy as np
from app.database import Database, get_db
from app.models.voice import RecordingValidateResponse, RecordingUploadResponse
router = APIRouter(prefix="/api/v1/recordings", tags=["recordings"])
def analyze_audio(audio_bytes: bytes) -> dict:
"""오디오 파일 분석
Returns:
duration, sample_rate, quality_score, issues
"""
try:
# 오디오 로드
audio_data, sample_rate = sf.read(io.BytesIO(audio_bytes))
# 모노로 변환
if len(audio_data.shape) > 1:
audio_data = audio_data.mean(axis=1)
duration = len(audio_data) / sample_rate
# 품질 분석
issues = []
quality_score = 1.0
# 길이 체크
if duration < 1.0:
issues.append("오디오가 너무 짧습니다 (최소 1초 이상)")
quality_score -= 0.3
elif duration < 3.0:
issues.append("Voice Clone에는 3초 이상의 오디오가 권장됩니다")
quality_score -= 0.1
# RMS 레벨 체크 (볼륨)
rms = np.sqrt(np.mean(audio_data ** 2))
if rms < 0.01:
issues.append("볼륨이 너무 낮습니다")
quality_score -= 0.2
elif rms > 0.5:
issues.append("볼륨이 너무 높습니다 (클리핑 가능성)")
quality_score -= 0.1
# 피크 체크
peak = np.max(np.abs(audio_data))
if peak > 0.99:
issues.append("오디오가 클리핑되었습니다")
quality_score -= 0.2
# 노이즈 체크 (간단한 휴리스틱)
# 실제로는 더 정교한 노이즈 감지 필요
silence_threshold = 0.01
silent_samples = np.sum(np.abs(audio_data) < silence_threshold)
silence_ratio = silent_samples / len(audio_data)
if silence_ratio > 0.7:
issues.append("대부분이 무음입니다")
quality_score -= 0.3
elif silence_ratio > 0.5:
issues.append("무음 구간이 많습니다")
quality_score -= 0.1
quality_score = max(0.0, min(1.0, quality_score))
return {
"duration": duration,
"sample_rate": sample_rate,
"quality_score": quality_score,
"issues": issues,
"rms": float(rms),
"peak": float(peak),
}
except Exception as e:
return {
"duration": 0,
"sample_rate": 0,
"quality_score": 0,
"issues": [f"오디오 분석 실패: {str(e)}"],
}
@router.post("/validate", response_model=RecordingValidateResponse)
async def validate_recording(
audio: UploadFile = File(..., description="검증할 오디오 파일"),
):
"""녹음 품질 검증
Voice Clone에 사용할 녹음의 품질을 검증합니다.
"""
audio_bytes = await audio.read()
if len(audio_bytes) < 1000:
raise HTTPException(status_code=400, detail="파일이 너무 작습니다")
analysis = analyze_audio(audio_bytes)
return RecordingValidateResponse(
valid=analysis["quality_score"] > 0.5 and analysis["duration"] > 1.0,
duration=analysis["duration"],
sample_rate=analysis["sample_rate"],
quality_score=analysis["quality_score"],
issues=analysis["issues"],
)
@router.post("/upload", response_model=RecordingUploadResponse)
async def upload_recording(
audio: UploadFile = File(..., description="업로드할 오디오 파일"),
transcript: str = Form(None, description="오디오의 텍스트 내용"),
db: Database = Depends(get_db),
):
"""녹음 파일 업로드
Voice Clone에 사용할 녹음을 업로드합니다.
"""
audio_bytes = await audio.read()
# 품질 분석
analysis = analyze_audio(audio_bytes)
if analysis["duration"] < 0.5:
raise HTTPException(status_code=400, detail="오디오가 너무 짧습니다")
# GridFS에 저장
file_id = await db.save_audio(
audio_bytes,
audio.filename or f"recording_{uuid.uuid4()}.wav",
metadata={
"type": "recording",
"transcript": transcript,
"duration": analysis["duration"],
"sample_rate": analysis["sample_rate"],
"quality_score": analysis["quality_score"],
},
)
return RecordingUploadResponse(
file_id=file_id,
filename=audio.filename or "recording.wav",
duration=analysis["duration"],
sample_rate=analysis["sample_rate"],
)
@router.get("/{file_id}")
async def get_recording(
file_id: str,
db: Database = Depends(get_db),
):
"""녹음 파일 다운로드"""
try:
audio_bytes = await db.get_audio(file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'attachment; filename="{file_id}.wav"'},
)
except Exception as e:
raise HTTPException(status_code=404, detail="Recording not found")
@router.delete("/{file_id}")
async def delete_recording(
file_id: str,
db: Database = Depends(get_db),
):
"""녹음 파일 삭제"""
try:
await db.delete_audio(file_id)
return {"status": "deleted", "file_id": file_id}
except Exception as e:
raise HTTPException(status_code=404, detail="Recording not found")

View File

@ -0,0 +1,340 @@
"""효과음 API 라우터
Freesound API 연동
"""
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query
from fastapi.responses import Response
from pydantic import BaseModel
from app.database import Database, get_db
from app.services.freesound_client import freesound_client
router = APIRouter(prefix="/api/v1/sound-effects", tags=["sound-effects"])
# ========================================
# Pydantic 모델
# ========================================
class SoundEffectResponse(BaseModel):
"""효과음 응답"""
id: str
freesound_id: Optional[int] = None
name: str
description: str
duration: float
tags: List[str] = []
preview_url: Optional[str] = None
license: str = ""
username: Optional[str] = None
source: str = "freesound" # freesound | local
class SoundEffectSearchResponse(BaseModel):
"""효과음 검색 응답"""
count: int
page: int
page_size: int
results: List[SoundEffectResponse]
class SoundEffectImportRequest(BaseModel):
"""효과음 가져오기 요청"""
freesound_id: int
# ========================================
# API 엔드포인트
# ========================================
@router.get("/search", response_model=SoundEffectSearchResponse)
async def search_sound_effects(
query: str = Query(..., min_length=1, description="검색어"),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
min_duration: Optional[float] = Query(None, ge=0, description="최소 길이 (초)"),
max_duration: Optional[float] = Query(None, ge=0, description="최대 길이 (초)"),
sort: str = Query("score", description="정렬 (score, duration_asc, duration_desc)"),
):
"""Freesound에서 효과음 검색"""
try:
result = await freesound_client.search(
query=query,
page=page,
page_size=page_size,
min_duration=min_duration,
max_duration=max_duration,
sort=sort,
)
# 응답 형식 변환
sounds = []
for item in result["results"]:
sounds.append(SoundEffectResponse(
id=f"fs_{item['freesound_id']}",
freesound_id=item["freesound_id"],
name=item["name"],
description=item["description"],
duration=item["duration"],
tags=item["tags"],
preview_url=item["preview_url"],
license=item["license"],
username=item.get("username"),
source="freesound",
))
return SoundEffectSearchResponse(
count=result["count"],
page=page,
page_size=page_size,
results=sounds,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
@router.get("/library", response_model=SoundEffectSearchResponse)
async def list_local_sound_effects(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
category: Optional[str] = Query(None, description="카테고리 필터"),
db: Database = Depends(get_db),
):
"""로컬 효과음 라이브러리 조회"""
query = {}
if category:
query["categories"] = category
total = await db.sound_effects.count_documents(query)
skip = (page - 1) * page_size
cursor = db.sound_effects.find(query).sort("created_at", -1).skip(skip).limit(page_size)
sounds = []
async for doc in cursor:
sounds.append(SoundEffectResponse(
id=str(doc["_id"]),
freesound_id=doc.get("source_id"),
name=doc["name"],
description=doc.get("description", ""),
duration=doc.get("duration_seconds", 0),
tags=doc.get("tags", []),
preview_url=None, # 로컬 파일은 별도 엔드포인트로 제공
license=doc.get("license", ""),
source="local",
))
return SoundEffectSearchResponse(
count=total,
page=page,
page_size=page_size,
results=sounds,
)
@router.post("/import", response_model=SoundEffectResponse)
async def import_sound_effect(
request: SoundEffectImportRequest,
db: Database = Depends(get_db),
):
"""Freesound에서 효과음 가져오기 (로컬 캐시)"""
try:
# Freesound에서 상세 정보 조회
sound_info = await freesound_client.get_sound(request.freesound_id)
# 프리뷰 다운로드
preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
if not preview_url:
raise HTTPException(status_code=400, detail="Preview not available")
audio_bytes = await freesound_client.download_preview(preview_url)
# GridFS에 저장
file_id = await db.save_audio(
audio_bytes,
f"sfx_{request.freesound_id}.mp3",
content_type="audio/mpeg",
metadata={"freesound_id": request.freesound_id},
)
# DB에 메타데이터 저장
now = datetime.utcnow()
doc = {
"name": sound_info.get("name", ""),
"description": sound_info.get("description", ""),
"source": "freesound",
"source_id": request.freesound_id,
"source_url": f"https://freesound.org/s/{request.freesound_id}/",
"audio_file_id": file_id,
"duration_seconds": sound_info.get("duration", 0),
"format": "mp3",
"categories": [],
"tags": sound_info.get("tags", [])[:20], # 최대 20개
"license": sound_info.get("license", ""),
"attribution": sound_info.get("username", ""),
"created_at": now,
"updated_at": now,
}
result = await db.sound_effects.insert_one(doc)
return SoundEffectResponse(
id=str(result.inserted_id),
freesound_id=request.freesound_id,
name=doc["name"],
description=doc["description"],
duration=doc["duration_seconds"],
tags=doc["tags"],
license=doc["license"],
source="local",
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
@router.get("/{sound_id}")
async def get_sound_effect_info(
sound_id: str,
db: Database = Depends(get_db),
):
"""효과음 상세 정보 조회"""
# Freesound ID인 경우
if sound_id.startswith("fs_"):
freesound_id = int(sound_id[3:])
try:
sound_info = await freesound_client.get_sound(freesound_id)
return SoundEffectResponse(
id=sound_id,
freesound_id=freesound_id,
name=sound_info.get("name", ""),
description=sound_info.get("description", ""),
duration=sound_info.get("duration", 0),
tags=sound_info.get("tags", []),
preview_url=sound_info.get("previews", {}).get("preview-hq-mp3", ""),
license=sound_info.get("license", ""),
source="freesound",
)
except Exception as e:
raise HTTPException(status_code=404, detail="Sound not found")
# 로컬 ID인 경우
from bson import ObjectId
try:
doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
except:
raise HTTPException(status_code=400, detail="Invalid sound ID")
if not doc:
raise HTTPException(status_code=404, detail="Sound not found")
return SoundEffectResponse(
id=str(doc["_id"]),
freesound_id=doc.get("source_id"),
name=doc["name"],
description=doc.get("description", ""),
duration=doc.get("duration_seconds", 0),
tags=doc.get("tags", []),
license=doc.get("license", ""),
source="local",
)
@router.get("/{sound_id}/audio")
async def get_sound_effect_audio(
sound_id: str,
db: Database = Depends(get_db),
):
"""효과음 오디오 스트리밍"""
# Freesound ID인 경우 프리뷰 리다이렉트
if sound_id.startswith("fs_"):
freesound_id = int(sound_id[3:])
try:
sound_info = await freesound_client.get_sound(freesound_id)
preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
if preview_url:
audio_bytes = await freesound_client.download_preview(preview_url)
return Response(
content=audio_bytes,
media_type="audio/mpeg",
headers={"Content-Disposition": f'inline; filename="{freesound_id}.mp3"'},
)
except Exception as e:
raise HTTPException(status_code=404, detail="Audio not found")
# 로컬 ID인 경우
from bson import ObjectId
try:
doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
except:
raise HTTPException(status_code=400, detail="Invalid sound ID")
if not doc or not doc.get("audio_file_id"):
raise HTTPException(status_code=404, detail="Audio not found")
audio_bytes = await db.get_audio(doc["audio_file_id"])
content_type = "audio/mpeg" if doc.get("format") == "mp3" else "audio/wav"
return Response(
content=audio_bytes,
media_type=content_type,
headers={"Content-Disposition": f'inline; filename="{sound_id}.{doc.get("format", "wav")}"'},
)
@router.get("/categories")
async def list_categories(
db: Database = Depends(get_db),
):
"""효과음 카테고리 목록"""
# 로컬 라이브러리의 카테고리 집계
pipeline = [
{"$unwind": "$categories"},
{"$group": {"_id": "$categories", "count": {"$sum": 1}}},
{"$sort": {"count": -1}},
]
categories = []
async for doc in db.sound_effects.aggregate(pipeline):
categories.append({
"name": doc["_id"],
"count": doc["count"],
})
return {"categories": categories}
@router.delete("/{sound_id}")
async def delete_sound_effect(
sound_id: str,
db: Database = Depends(get_db),
):
"""로컬 효과음 삭제"""
if sound_id.startswith("fs_"):
raise HTTPException(status_code=400, detail="Cannot delete Freesound reference")
from bson import ObjectId
try:
doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
except:
raise HTTPException(status_code=400, detail="Invalid sound ID")
if not doc:
raise HTTPException(status_code=404, detail="Sound not found")
# 오디오 파일 삭제
if doc.get("audio_file_id"):
await db.delete_audio(doc["audio_file_id"])
# 문서 삭제
await db.sound_effects.delete_one({"_id": ObjectId(sound_id)})
return {"status": "deleted", "sound_id": sound_id}

View File

@ -0,0 +1,227 @@
"""TTS API 라우터"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import Response, StreamingResponse
from app.database import Database, get_db
from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType
from app.services.tts_client import tts_client
from app.routers.voices import PRESET_VOICES
router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
@router.post("/synthesize")
async def synthesize(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""TTS 음성 합성
지정된 보이스로 텍스트를 음성으로 변환합니다.
"""
voice_id = request.voice_id
# 프리셋 보이스 확인
preset_speaker = None
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
preset_speaker = preset["preset_voice_id"]
break
if preset_speaker:
# 프리셋 음성 합성
try:
audio_bytes, sr = await tts_client.synthesize(
text=request.text,
speaker=preset_speaker,
language="ko",
instruct=request.instruct,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
else:
# DB에서 보이스 정보 조회
voice_doc = await db.voices.find_one({"voice_id": voice_id})
if not voice_doc:
raise HTTPException(status_code=404, detail="Voice not found")
voice_type = voice_doc.get("type")
if voice_type == VoiceType.CLONED.value:
# Voice Clone 합성 (레퍼런스 오디오 필요)
ref_audio_id = voice_doc.get("reference_audio_id")
ref_transcript = voice_doc.get("reference_transcript", "")
if not ref_audio_id:
raise HTTPException(status_code=400, detail="Reference audio not found")
ref_audio = await db.get_audio(ref_audio_id)
try:
audio_bytes, sr = await tts_client.voice_clone(
text=request.text,
ref_audio=ref_audio,
ref_text=ref_transcript,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}")
elif voice_type == VoiceType.DESIGNED.value:
# Voice Design 합성
design_prompt = voice_doc.get("design_prompt", "")
try:
audio_bytes, sr = await tts_client.voice_design(
text=request.text,
instruct=design_prompt,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}")
else:
raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}")
# 생성 기록 저장
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
# 오디오 저장
audio_file_id = await db.save_audio(
audio_bytes,
f"{generation_id}.wav",
metadata={"voice_id": voice_id, "text": request.text[:100]},
)
# 생성 기록 저장
gen_doc = {
"generation_id": generation_id,
"voice_id": voice_id,
"text": request.text,
"audio_file_id": audio_file_id,
"status": "completed",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Sample-Rate": str(sr),
"X-Generation-ID": generation_id,
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.post("/synthesize/async", response_model=TTSGenerationResponse)
async def synthesize_async(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""비동기 TTS 음성 합성 (긴 텍스트용)
생성 작업을 큐에 등록하고 generation_id를 반환합니다.
완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다.
"""
# 긴 텍스트 처리를 위한 비동기 방식
# 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동)
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
gen_doc = {
"generation_id": generation_id,
"voice_id": request.voice_id,
"text": request.text,
"status": "pending",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
# 실제로는 백그라운드 워커에서 처리해야 함
# 여기서는 바로 처리
try:
# synthesize 로직과 동일...
# (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용)
pass
except Exception as e:
await db.tts_generations.update_one(
{"generation_id": generation_id},
{"$set": {"status": "failed", "error_message": str(e)}},
)
return TTSGenerationResponse(
generation_id=generation_id,
voice_id=request.voice_id,
text=request.text,
status="pending",
created_at=now,
)
@router.get("/generations/{generation_id}", response_model=TTSGenerationResponse)
async def get_generation(
generation_id: str,
db: Database = Depends(get_db),
):
"""TTS 생성 상태 조회"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
return TTSGenerationResponse(
generation_id=doc["generation_id"],
voice_id=doc["voice_id"],
text=doc["text"],
status=doc["status"],
audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None,
duration_seconds=doc.get("duration_seconds"),
created_at=doc["created_at"],
)
@router.get("/generations/{generation_id}/audio")
async def get_generation_audio(
generation_id: str,
db: Database = Depends(get_db),
):
"""생성된 오디오 다운로드"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
if doc["status"] != "completed":
raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}")
audio_file_id = doc.get("audio_file_id")
if not audio_file_id:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_bytes = await db.get_audio(audio_file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.get("/health")
async def tts_health():
"""TTS 엔진 헬스체크"""
try:
health = await tts_client.health_check()
return {"status": "healthy", "tts_engine": health}
except Exception as e:
return {"status": "unhealthy", "error": str(e)}

View File

@ -0,0 +1,426 @@
"""Voice 관리 API 라우터"""
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
from fastapi.responses import Response
from app.database import Database, get_db
from app.models.voice import (
VoiceType,
LanguageCode,
VoiceResponse,
VoiceListResponse,
VoiceCloneRequest,
VoiceDesignRequest,
VoiceUpdateRequest,
)
from app.services.tts_client import tts_client
router = APIRouter(prefix="/api/v1/voices", tags=["voices"])
# ========================================
# 프리셋 보이스 목록 (시스템 기본)
# ========================================
PRESET_VOICES = [
{
"voice_id": "preset_chelsie",
"name": "Chelsie",
"description": "밝고 활기찬 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Chelsie",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["bright", "energetic"],
},
{
"voice_id": "preset_ethan",
"name": "Ethan",
"description": "차분하고 신뢰감 있는 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Ethan",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["calm", "trustworthy"],
},
{
"voice_id": "preset_vivian",
"name": "Vivian",
"description": "부드럽고 따뜻한 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Vivian",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["soft", "warm"],
},
{
"voice_id": "preset_benjamin",
"name": "Benjamin",
"description": "깊고 전문적인 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Benjamin",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["deep", "professional"],
},
{
"voice_id": "preset_aurora",
"name": "Aurora",
"description": "우아하고 세련된 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Aurora",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["elegant", "refined"],
},
{
"voice_id": "preset_oliver",
"name": "Oliver",
"description": "친근하고 편안한 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Oliver",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["friendly", "casual"],
},
{
"voice_id": "preset_luna",
"name": "Luna",
"description": "따뜻하고 감성적인 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Luna",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["warm", "emotional"],
},
{
"voice_id": "preset_jasper",
"name": "Jasper",
"description": "전문적이고 명확한 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Jasper",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["professional", "clear"],
},
{
"voice_id": "preset_aria",
"name": "Aria",
"description": "표현력 풍부한 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Aria",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["expressive", "dynamic"],
},
]
def _voice_doc_to_response(doc: dict) -> VoiceResponse:
"""MongoDB 문서를 VoiceResponse로 변환"""
return VoiceResponse(
voice_id=doc["voice_id"],
name=doc["name"],
description=doc.get("description"),
type=doc["type"],
language=doc.get("language", LanguageCode.KO),
preset_voice_id=doc.get("preset_voice_id"),
design_prompt=doc.get("design_prompt"),
reference_transcript=doc.get("reference_transcript"),
gender=doc.get("gender"),
age_range=doc.get("age_range"),
style_tags=doc.get("style_tags", []),
owner_id=str(doc.get("owner_id")) if doc.get("owner_id") else None,
is_public=doc.get("is_public", True),
sample_audio_id=str(doc.get("sample_audio_id")) if doc.get("sample_audio_id") else None,
created_at=doc.get("created_at", datetime.utcnow()),
updated_at=doc.get("updated_at", datetime.utcnow()),
)
@router.get("", response_model=VoiceListResponse)
async def list_voices(
type: Optional[VoiceType] = Query(None, description="보이스 타입 필터"),
language: Optional[LanguageCode] = Query(None, description="언어 필터"),
is_public: bool = Query(True, description="공개 보이스만"),
include_presets: bool = Query(True, description="프리셋 포함"),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
db: Database = Depends(get_db),
):
"""보이스 목록 조회"""
voices = []
# 프리셋 보이스 추가
if include_presets and (type is None or type == VoiceType.PRESET):
for preset in PRESET_VOICES:
if language and preset["language"] != language:
continue
voices.append(VoiceResponse(
**preset,
is_public=True,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
))
# DB에서 사용자 보이스 조회
query = {"is_public": True} if is_public else {}
if type and type != VoiceType.PRESET:
query["type"] = type.value
if language:
query["language"] = language.value
cursor = db.voices.find(query).sort("created_at", -1)
skip = (page - 1) * page_size
cursor = cursor.skip(skip).limit(page_size)
async for doc in cursor:
voices.append(_voice_doc_to_response(doc))
total = len(PRESET_VOICES) + await db.voices.count_documents(query)
return VoiceListResponse(
voices=voices,
total=total,
page=page,
page_size=page_size,
)
@router.get("/{voice_id}", response_model=VoiceResponse)
async def get_voice(
voice_id: str,
db: Database = Depends(get_db),
):
"""보이스 상세 조회"""
# 프리셋 체크
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
return VoiceResponse(
**preset,
is_public=True,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
)
# DB 조회
doc = await db.voices.find_one({"voice_id": voice_id})
if not doc:
raise HTTPException(status_code=404, detail="Voice not found")
return _voice_doc_to_response(doc)
@router.get("/{voice_id}/sample")
async def get_voice_sample(
voice_id: str,
db: Database = Depends(get_db),
):
"""보이스 샘플 오디오 스트리밍"""
# 프리셋인 경우 TTS로 샘플 생성
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
sample_text = "안녕하세요, 저는 AI 음성입니다."
audio_bytes, sr = await tts_client.synthesize(
text=sample_text,
speaker=preset["preset_voice_id"],
language="ko",
)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
)
# DB에서 조회
doc = await db.voices.find_one({"voice_id": voice_id})
if not doc:
raise HTTPException(status_code=404, detail="Voice not found")
if not doc.get("sample_audio_id"):
raise HTTPException(status_code=404, detail="No sample audio available")
audio_bytes = await db.get_audio(doc["sample_audio_id"])
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
)
@router.post("/clone", response_model=VoiceResponse)
async def create_voice_clone(
name: str = Form(...),
description: Optional[str] = Form(None),
reference_transcript: str = Form(...),
language: LanguageCode = Form(LanguageCode.KO),
is_public: bool = Form(False),
reference_audio: UploadFile = File(...),
db: Database = Depends(get_db),
):
"""Voice Clone으로 새 보이스 생성
레퍼런스 오디오를 기반으로 목소리를 복제합니다.
3초 이상의 오디오가 권장됩니다.
"""
# 오디오 파일 읽기
audio_content = await reference_audio.read()
# Voice Clone으로 샘플 생성
sample_text = "안녕하세요, 저는 복제된 AI 음성입니다."
try:
sample_audio, sr = await tts_client.voice_clone(
text=sample_text,
ref_audio=audio_content,
ref_text=reference_transcript,
language=language.value,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice clone failed: {str(e)}")
# GridFS에 오디오 저장
ref_audio_id = await db.save_audio(
audio_content,
f"ref_{uuid.uuid4()}.wav",
metadata={"type": "reference"},
)
sample_audio_id = await db.save_audio(
sample_audio,
f"sample_{uuid.uuid4()}.wav",
metadata={"type": "sample"},
)
# DB에 보이스 저장
voice_id = f"clone_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
doc = {
"voice_id": voice_id,
"name": name,
"description": description,
"type": VoiceType.CLONED.value,
"language": language.value,
"reference_audio_id": ref_audio_id,
"reference_transcript": reference_transcript,
"sample_audio_id": sample_audio_id,
"is_public": is_public,
"created_at": now,
"updated_at": now,
}
await db.voices.insert_one(doc)
return _voice_doc_to_response(doc)
@router.post("/design", response_model=VoiceResponse)
async def create_voice_design(
request: VoiceDesignRequest,
db: Database = Depends(get_db),
):
"""Voice Design으로 새 보이스 생성
텍스트 프롬프트를 기반으로 새로운 음성을 생성합니다.
예: "30대 남성, 부드럽고 차분한 목소리"
"""
# Voice Design으로 샘플 생성
sample_text = "안녕하세요, 저는 AI로 생성된 음성입니다."
try:
sample_audio, sr = await tts_client.voice_design(
text=sample_text,
instruct=request.design_prompt,
language=request.language.value,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice design failed: {str(e)}")
# GridFS에 샘플 저장
sample_audio_id = await db.save_audio(
sample_audio,
f"sample_{uuid.uuid4()}.wav",
metadata={"type": "sample"},
)
# DB에 보이스 저장
voice_id = f"design_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
doc = {
"voice_id": voice_id,
"name": request.name,
"description": request.description,
"type": VoiceType.DESIGNED.value,
"language": request.language.value,
"design_prompt": request.design_prompt,
"sample_audio_id": sample_audio_id,
"is_public": request.is_public,
"created_at": now,
"updated_at": now,
}
await db.voices.insert_one(doc)
return _voice_doc_to_response(doc)
@router.patch("/{voice_id}", response_model=VoiceResponse)
async def update_voice(
voice_id: str,
request: VoiceUpdateRequest,
db: Database = Depends(get_db),
):
"""보이스 정보 수정"""
# 프리셋은 수정 불가
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
raise HTTPException(status_code=400, detail="Cannot modify preset voice")
# 업데이트할 필드만 추출
update_data = {k: v for k, v in request.model_dump().items() if v is not None}
if not update_data:
raise HTTPException(status_code=400, detail="No fields to update")
update_data["updated_at"] = datetime.utcnow()
result = await db.voices.update_one(
{"voice_id": voice_id},
{"$set": update_data},
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail="Voice not found")
doc = await db.voices.find_one({"voice_id": voice_id})
return _voice_doc_to_response(doc)
@router.delete("/{voice_id}")
async def delete_voice(
voice_id: str,
db: Database = Depends(get_db),
):
"""보이스 삭제"""
# 프리셋은 삭제 불가
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
raise HTTPException(status_code=400, detail="Cannot delete preset voice")
# 먼저 조회
doc = await db.voices.find_one({"voice_id": voice_id})
if not doc:
raise HTTPException(status_code=404, detail="Voice not found")
# 관련 오디오 파일 삭제
if doc.get("reference_audio_id"):
await db.delete_audio(doc["reference_audio_id"])
if doc.get("sample_audio_id"):
await db.delete_audio(doc["sample_audio_id"])
# 보이스 삭제
await db.voices.delete_one({"voice_id": voice_id})
return {"status": "deleted", "voice_id": voice_id}