feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api)
- Next.js 프론트엔드 (audio-studio-ui)
- Qwen3-TTS 엔진 (audio-studio-tts)
- MusicGen 서비스 (audio-studio-musicgen)
- Docker Compose 개발/운영 환경

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions

View File

View File

@ -0,0 +1,169 @@
"""데이터베이스 연결 설정
MongoDB (motor async) + GridFS (오디오 저장)
"""
import os
import logging
from typing import Optional
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase, AsyncIOMotorGridFSBucket
from redis.asyncio import Redis
logger = logging.getLogger(__name__)
class Database:
"""데이터베이스 연결 관리"""
def __init__(self):
self.client: Optional[AsyncIOMotorClient] = None
self.db: Optional[AsyncIOMotorDatabase] = None
self.gridfs: Optional[AsyncIOMotorGridFSBucket] = None
self.redis: Optional[Redis] = None
async def connect(self):
"""데이터베이스 연결"""
# MongoDB
mongodb_url = os.getenv("MONGODB_URL", "mongodb://localhost:27017/")
db_name = os.getenv("DB_NAME", "audio_studio")
logger.info(f"MongoDB 연결 중: {db_name}")
self.client = AsyncIOMotorClient(mongodb_url)
self.db = self.client[db_name]
# GridFS (오디오 파일 저장용)
self.gridfs = AsyncIOMotorGridFSBucket(self.db, bucket_name="audio_files")
# 연결 테스트
await self.client.admin.command("ping")
logger.info("MongoDB 연결 성공")
# Redis
redis_url = os.getenv("REDIS_URL", "redis://localhost:6379")
logger.info("Redis 연결 중...")
self.redis = Redis.from_url(redis_url, decode_responses=True)
# 연결 테스트
await self.redis.ping()
logger.info("Redis 연결 성공")
# 인덱스 생성
await self._create_indexes()
async def _create_indexes(self):
"""컬렉션 인덱스 생성"""
# voices 컬렉션
await self.db.voices.create_index("voice_id", unique=True)
await self.db.voices.create_index("owner_id")
await self.db.voices.create_index("type")
await self.db.voices.create_index("language")
await self.db.voices.create_index("is_public")
# tts_generations 컬렉션
await self.db.tts_generations.create_index("generation_id", unique=True)
await self.db.tts_generations.create_index("user_id")
await self.db.tts_generations.create_index("voice_id")
await self.db.tts_generations.create_index("created_at")
# sound_effects 컬렉션
await self.db.sound_effects.create_index("source_id")
await self.db.sound_effects.create_index("categories")
await self.db.sound_effects.create_index("tags")
# music_tracks 컬렉션
await self.db.music_tracks.create_index("source")
await self.db.music_tracks.create_index("genre")
await self.db.music_tracks.create_index("mood")
logger.info("인덱스 생성 완료")
async def disconnect(self):
"""데이터베이스 연결 해제"""
if self.client:
self.client.close()
logger.info("MongoDB 연결 해제")
if self.redis:
await self.redis.close()
logger.info("Redis 연결 해제")
# ========================================
# 컬렉션 접근자
# ========================================
@property
def voices(self):
"""voices 컬렉션"""
return self.db.voices
@property
def tts_generations(self):
"""tts_generations 컬렉션"""
return self.db.tts_generations
@property
def sound_effects(self):
"""sound_effects 컬렉션"""
return self.db.sound_effects
@property
def music_tracks(self):
"""music_tracks 컬렉션"""
return self.db.music_tracks
@property
def user_voice_library(self):
"""user_voice_library 컬렉션"""
return self.db.user_voice_library
# ========================================
# GridFS 오디오 저장
# ========================================
async def save_audio(
self,
audio_bytes: bytes,
filename: str,
content_type: str = "audio/wav",
metadata: dict = None,
) -> str:
"""오디오 파일을 GridFS에 저장
Returns:
file_id (str)
"""
file_id = await self.gridfs.upload_from_stream(
filename,
audio_bytes,
metadata={
"content_type": content_type,
**(metadata or {}),
}
)
return str(file_id)
async def get_audio(self, file_id: str) -> bytes:
"""GridFS에서 오디오 파일 읽기"""
from bson import ObjectId
from io import BytesIO
buffer = BytesIO()
await self.gridfs.download_to_stream(ObjectId(file_id), buffer)
buffer.seek(0)
return buffer.read()
async def delete_audio(self, file_id: str):
"""GridFS에서 오디오 파일 삭제"""
from bson import ObjectId
await self.gridfs.delete(ObjectId(file_id))
# 싱글톤 인스턴스
db = Database()
# FastAPI 의존성
async def get_db() -> Database:
"""데이터베이스 인스턴스 반환 (의존성 주입용)"""
return db

View File

@ -0,0 +1,163 @@
"""Drama Studio API Server
AI 라디오 드라마 제작 - TTS, 보이스, 효과음, 배경음악, 드라마 생성 API
"""
import logging
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from app.database import db
from app.routers import voices, tts, recordings, sound_effects, music, drama
# 로깅 설정
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# ========================================
# 앱 생명주기
# ========================================
@asynccontextmanager
async def lifespan(app: FastAPI):
"""앱 시작/종료 시 실행"""
# 시작 시 DB 연결
logger.info("Drama Studio API 서버 시작...")
try:
await db.connect()
logger.info("데이터베이스 연결 완료")
except Exception as e:
logger.error(f"데이터베이스 연결 실패: {e}")
raise
yield
# 종료 시 DB 연결 해제
await db.disconnect()
logger.info("Drama Studio API 서버 종료")
# ========================================
# FastAPI 앱
# ========================================
app = FastAPI(
title="Drama Studio API",
description="""
Drama Studio API - AI 라디오 드라마 제작 플랫폼
## 기능
### Voice (보이스 관리)
- 프리셋 보이스 목록 조회
- Voice Clone (목소리 복제)
- Voice Design (AI 음성 생성)
- 사용자 보이스 라이브러리
### TTS (음성 합성)
- 텍스트를 음성으로 변환
- 다양한 언어 지원 (한국어, 영어, 일본어 등)
### Recording (녹음)
- 녹음 업로드 및 품질 검증
- Voice Clone용 레퍼런스 관리
### Sound Effects (효과음)
- Freesound 검색 및 다운로드
- 로컬 효과음 라이브러리
### Drama (드라마 생성)
- 스크립트 기반 라디오 드라마 생성
- 자동 TTS/BGM/효과음 합성
- 타임라인 기반 오디오 믹싱
""",
version="0.1.0",
lifespan=lifespan,
)
# CORS 설정
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # 개발 환경용, 프로덕션에서는 제한 필요
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ========================================
# 라우터 등록
# ========================================
app.include_router(voices.router)
app.include_router(tts.router)
app.include_router(recordings.router)
app.include_router(sound_effects.router)
app.include_router(music.router)
app.include_router(drama.router)
# ========================================
# 기본 엔드포인트
# ========================================
@app.get("/")
async def root():
"""API 루트"""
return {
"name": "Drama Studio API",
"version": "0.1.0",
"docs": "/docs",
}
@app.get("/health")
async def health_check():
"""헬스체크"""
try:
# MongoDB 연결 확인
await db.client.admin.command("ping")
mongo_status = "healthy"
except Exception as e:
mongo_status = f"unhealthy: {str(e)}"
try:
# Redis 연결 확인
await db.redis.ping()
redis_status = "healthy"
except Exception as e:
redis_status = f"unhealthy: {str(e)}"
status = "healthy" if mongo_status == "healthy" and redis_status == "healthy" else "degraded"
return JSONResponse(
status_code=200 if status == "healthy" else 503,
content={
"status": status,
"services": {
"mongodb": mongo_status,
"redis": redis_status,
},
}
)
# ========================================
# 에러 핸들러
# ========================================
@app.exception_handler(Exception)
async def global_exception_handler(request, exc):
"""전역 예외 핸들러"""
logger.error(f"Unhandled exception: {exc}", exc_info=True)
return JSONResponse(
status_code=500,
content={"detail": "Internal server error"},
)

View File

View File

@ -0,0 +1,193 @@
# 드라마 API 라우터
from fastapi import APIRouter, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from typing import Optional
import os
from app.models.drama import (
DramaCreateRequest, DramaGenerateRequest, DramaResponse,
ParsedScript, Character
)
from app.services.script_parser import script_parser
from app.services.drama_orchestrator import drama_orchestrator
router = APIRouter(prefix="/api/v1/drama", tags=["drama"])
@router.post("/parse", response_model=ParsedScript)
async def parse_script(script: str):
"""
스크립트 파싱 (미리보기)
마크다운 형식의 스크립트를 구조화된 데이터로 변환합니다.
실제 프로젝트 생성 없이 파싱 결과만 확인할 수 있습니다.
"""
is_valid, errors = script_parser.validate_script(script)
if not is_valid:
raise HTTPException(status_code=400, detail={"errors": errors})
return script_parser.parse(script)
@router.post("/projects", response_model=DramaResponse)
async def create_project(request: DramaCreateRequest):
"""
새 드라마 프로젝트 생성
스크립트를 파싱하고 프로젝트를 생성합니다.
voice_mapping으로 캐릭터별 보이스를 지정할 수 있습니다.
"""
# 스크립트 유효성 검사
is_valid, errors = script_parser.validate_script(request.script)
if not is_valid:
raise HTTPException(status_code=400, detail={"errors": errors})
project = await drama_orchestrator.create_project(request)
return DramaResponse(
project_id=project.project_id,
title=project.title,
status=project.status,
characters=project.script_parsed.characters if project.script_parsed else [],
element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None
)
@router.get("/projects", response_model=list[DramaResponse])
async def list_projects(skip: int = 0, limit: int = 20):
"""프로젝트 목록 조회"""
projects = await drama_orchestrator.list_projects(skip=skip, limit=limit)
return [
DramaResponse(
project_id=p.project_id,
title=p.title,
status=p.status,
characters=p.script_parsed.characters if p.script_parsed else [],
element_count=len(p.script_parsed.elements) if p.script_parsed else 0,
output_file_id=p.output_file_id,
error_message=p.error_message
)
for p in projects
]
@router.get("/projects/{project_id}", response_model=DramaResponse)
async def get_project(project_id: str):
"""프로젝트 상세 조회"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
return DramaResponse(
project_id=project.project_id,
title=project.title,
status=project.status,
characters=project.script_parsed.characters if project.script_parsed else [],
element_count=len(project.script_parsed.elements) if project.script_parsed else 0,
estimated_duration=drama_orchestrator.estimate_duration(project.script_parsed) if project.script_parsed else None,
output_file_id=project.output_file_id,
error_message=project.error_message
)
@router.post("/projects/{project_id}/render")
async def render_project(
project_id: str,
background_tasks: BackgroundTasks,
output_format: str = "wav"
):
"""
드라마 렌더링 시작
백그라운드에서 TTS 생성, 효과음 검색, 믹싱을 수행합니다.
완료되면 status가 'completed'로 변경됩니다.
"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
if project.status == "processing":
raise HTTPException(status_code=400, detail="이미 렌더링 중입니다")
# 백그라운드 렌더링 시작
background_tasks.add_task(
drama_orchestrator.render,
project_id,
output_format
)
return {
"project_id": project_id,
"status": "processing",
"message": "렌더링이 시작되었습니다"
}
@router.get("/projects/{project_id}/download")
async def download_project(project_id: str):
"""렌더링된 드라마 다운로드"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
if project.status != "completed":
raise HTTPException(
status_code=400,
detail=f"렌더링이 완료되지 않았습니다 (현재 상태: {project.status})"
)
if not project.output_file_id or not os.path.exists(project.output_file_id):
raise HTTPException(status_code=404, detail="출력 파일을 찾을 수 없습니다")
return FileResponse(
project.output_file_id,
media_type="audio/wav",
filename=f"{project.title}.wav"
)
@router.put("/projects/{project_id}/voices")
async def update_voice_mapping(
project_id: str,
voice_mapping: dict[str, str]
):
"""캐릭터-보이스 매핑 업데이트"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
from app.database import db
from datetime import datetime
await db.dramas.update_one(
{"project_id": project_id},
{
"$set": {
"voice_mapping": voice_mapping,
"updated_at": datetime.utcnow()
}
}
)
return {"message": "보이스 매핑이 업데이트되었습니다"}
@router.delete("/projects/{project_id}")
async def delete_project(project_id: str):
"""프로젝트 삭제"""
project = await drama_orchestrator.get_project(project_id)
if not project:
raise HTTPException(status_code=404, detail="프로젝트를 찾을 수 없습니다")
from app.database import db
# 출력 파일 삭제
if project.output_file_id and os.path.exists(project.output_file_id):
os.remove(project.output_file_id)
# DB에서 삭제
await db.dramas.delete_one({"project_id": project_id})
return {"message": "프로젝트가 삭제되었습니다"}

View File

@ -0,0 +1,278 @@
"""배경음악 API 라우터
MusicGen 연동 및 외부 음악 소스
"""
import os
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
from fastapi.responses import Response
from pydantic import BaseModel, Field
import httpx
from app.database import Database, get_db
router = APIRouter(prefix="/api/v1/music", tags=["music"])
MUSICGEN_URL = os.getenv("MUSICGEN_URL", "http://localhost:8002")
# ========================================
# Pydantic 모델
# ========================================
class MusicGenerateRequest(BaseModel):
"""음악 생성 요청"""
prompt: str = Field(..., min_length=5, max_length=500, description="음악 설명")
duration: int = Field(default=30, ge=5, le=30, description="생성 길이 (초)")
save_to_library: bool = Field(default=True, description="라이브러리에 저장")
class MusicTrackResponse(BaseModel):
"""음악 트랙 응답"""
id: str
name: str
description: Optional[str] = None
source: str # musicgen | pixabay | uploaded
generation_prompt: Optional[str] = None
duration_seconds: float
genre: Optional[str] = None
mood: List[str] = []
license: str = ""
created_at: datetime
class MusicListResponse(BaseModel):
"""음악 목록 응답"""
tracks: List[MusicTrackResponse]
total: int
page: int
page_size: int
# ========================================
# API 엔드포인트
# ========================================
@router.post("/generate")
async def generate_music(
request: MusicGenerateRequest,
db: Database = Depends(get_db),
):
"""AI로 배경음악 생성
MusicGen을 사용하여 텍스트 프롬프트 기반 음악 생성
"""
try:
# MusicGen 서비스 호출
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
f"{MUSICGEN_URL}/generate",
json={
"prompt": request.prompt,
"duration": request.duration,
},
)
response.raise_for_status()
audio_bytes = response.content
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Music generation timed out")
except httpx.HTTPStatusError as e:
raise HTTPException(status_code=502, detail=f"MusicGen error: {e.response.text}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Music generation failed: {str(e)}")
# 라이브러리에 저장
if request.save_to_library:
track_id = f"music_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
# GridFS에 오디오 저장
audio_file_id = await db.save_audio(
audio_bytes,
f"{track_id}.wav",
metadata={
"type": "generated_music",
"prompt": request.prompt,
},
)
# DB에 트랙 정보 저장
track_doc = {
"track_id": track_id,
"name": f"Generated: {request.prompt[:30]}...",
"description": request.prompt,
"source": "musicgen",
"generation_prompt": request.prompt,
"audio_file_id": audio_file_id,
"duration_seconds": request.duration,
"format": "wav",
"genre": None,
"mood": [],
"license": "CC-BY-NC", # MusicGen 모델 라이센스
"created_at": now,
}
await db.music_tracks.insert_one(track_doc)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Duration": str(request.duration),
"Content-Disposition": 'attachment; filename="generated_music.wav"',
},
)
@router.get("/library", response_model=MusicListResponse)
async def list_music_library(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
source: Optional[str] = Query(None, description="소스 필터 (musicgen, pixabay, uploaded)"),
genre: Optional[str] = Query(None, description="장르 필터"),
db: Database = Depends(get_db),
):
"""음악 라이브러리 목록 조회"""
query = {}
if source:
query["source"] = source
if genre:
query["genre"] = genre
total = await db.music_tracks.count_documents(query)
skip = (page - 1) * page_size
cursor = db.music_tracks.find(query).sort("created_at", -1).skip(skip).limit(page_size)
tracks = []
async for doc in cursor:
tracks.append(MusicTrackResponse(
id=doc.get("track_id", str(doc["_id"])),
name=doc["name"],
description=doc.get("description"),
source=doc.get("source", "unknown"),
generation_prompt=doc.get("generation_prompt"),
duration_seconds=doc.get("duration_seconds", 0),
genre=doc.get("genre"),
mood=doc.get("mood", []),
license=doc.get("license", ""),
created_at=doc.get("created_at", datetime.utcnow()),
))
return MusicListResponse(
tracks=tracks,
total=total,
page=page,
page_size=page_size,
)
@router.get("/{track_id}")
async def get_music_track(
track_id: str,
db: Database = Depends(get_db),
):
"""음악 트랙 상세 정보"""
doc = await db.music_tracks.find_one({"track_id": track_id})
if not doc:
raise HTTPException(status_code=404, detail="Track not found")
return MusicTrackResponse(
id=doc.get("track_id", str(doc["_id"])),
name=doc["name"],
description=doc.get("description"),
source=doc.get("source", "unknown"),
generation_prompt=doc.get("generation_prompt"),
duration_seconds=doc.get("duration_seconds", 0),
genre=doc.get("genre"),
mood=doc.get("mood", []),
license=doc.get("license", ""),
created_at=doc.get("created_at", datetime.utcnow()),
)
@router.get("/{track_id}/audio")
async def get_music_audio(
track_id: str,
db: Database = Depends(get_db),
):
"""음악 오디오 스트리밍"""
doc = await db.music_tracks.find_one({"track_id": track_id})
if not doc:
raise HTTPException(status_code=404, detail="Track not found")
audio_file_id = doc.get("audio_file_id")
if not audio_file_id:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_bytes = await db.get_audio(audio_file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'inline; filename="{track_id}.wav"'},
)
@router.delete("/{track_id}")
async def delete_music_track(
track_id: str,
db: Database = Depends(get_db),
):
"""음악 트랙 삭제"""
doc = await db.music_tracks.find_one({"track_id": track_id})
if not doc:
raise HTTPException(status_code=404, detail="Track not found")
# 오디오 파일 삭제
if doc.get("audio_file_id"):
await db.delete_audio(doc["audio_file_id"])
# 문서 삭제
await db.music_tracks.delete_one({"track_id": track_id})
return {"status": "deleted", "track_id": track_id}
@router.get("/prompts/examples")
async def get_example_prompts():
"""예시 프롬프트 목록"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(f"{MUSICGEN_URL}/prompts")
response.raise_for_status()
return response.json()
except Exception:
# MusicGen 서비스 연결 실패 시 기본 프롬프트 반환
return {
"examples": [
{
"category": "Ambient",
"prompts": [
"calm piano music, peaceful, ambient",
"lo-fi hip hop beats, relaxing, study music",
"meditation music, calm, zen",
],
},
{
"category": "Electronic",
"prompts": [
"upbeat electronic dance music",
"retro synthwave 80s style",
"chill electronic ambient",
],
},
{
"category": "Cinematic",
"prompts": [
"epic orchestral cinematic music",
"tense suspenseful thriller music",
"cheerful happy video game background",
],
},
]
}

View File

@ -0,0 +1,184 @@
"""녹음 관리 API 라우터"""
import uuid
import io
from typing import List
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
from fastapi.responses import Response
import soundfile as sf
import numpy as np
from app.database import Database, get_db
from app.models.voice import RecordingValidateResponse, RecordingUploadResponse
router = APIRouter(prefix="/api/v1/recordings", tags=["recordings"])
def analyze_audio(audio_bytes: bytes) -> dict:
"""오디오 파일 분석
Returns:
duration, sample_rate, quality_score, issues
"""
try:
# 오디오 로드
audio_data, sample_rate = sf.read(io.BytesIO(audio_bytes))
# 모노로 변환
if len(audio_data.shape) > 1:
audio_data = audio_data.mean(axis=1)
duration = len(audio_data) / sample_rate
# 품질 분석
issues = []
quality_score = 1.0
# 길이 체크
if duration < 1.0:
issues.append("오디오가 너무 짧습니다 (최소 1초 이상)")
quality_score -= 0.3
elif duration < 3.0:
issues.append("Voice Clone에는 3초 이상의 오디오가 권장됩니다")
quality_score -= 0.1
# RMS 레벨 체크 (볼륨)
rms = np.sqrt(np.mean(audio_data ** 2))
if rms < 0.01:
issues.append("볼륨이 너무 낮습니다")
quality_score -= 0.2
elif rms > 0.5:
issues.append("볼륨이 너무 높습니다 (클리핑 가능성)")
quality_score -= 0.1
# 피크 체크
peak = np.max(np.abs(audio_data))
if peak > 0.99:
issues.append("오디오가 클리핑되었습니다")
quality_score -= 0.2
# 노이즈 체크 (간단한 휴리스틱)
# 실제로는 더 정교한 노이즈 감지 필요
silence_threshold = 0.01
silent_samples = np.sum(np.abs(audio_data) < silence_threshold)
silence_ratio = silent_samples / len(audio_data)
if silence_ratio > 0.7:
issues.append("대부분이 무음입니다")
quality_score -= 0.3
elif silence_ratio > 0.5:
issues.append("무음 구간이 많습니다")
quality_score -= 0.1
quality_score = max(0.0, min(1.0, quality_score))
return {
"duration": duration,
"sample_rate": sample_rate,
"quality_score": quality_score,
"issues": issues,
"rms": float(rms),
"peak": float(peak),
}
except Exception as e:
return {
"duration": 0,
"sample_rate": 0,
"quality_score": 0,
"issues": [f"오디오 분석 실패: {str(e)}"],
}
@router.post("/validate", response_model=RecordingValidateResponse)
async def validate_recording(
audio: UploadFile = File(..., description="검증할 오디오 파일"),
):
"""녹음 품질 검증
Voice Clone에 사용할 녹음의 품질을 검증합니다.
"""
audio_bytes = await audio.read()
if len(audio_bytes) < 1000:
raise HTTPException(status_code=400, detail="파일이 너무 작습니다")
analysis = analyze_audio(audio_bytes)
return RecordingValidateResponse(
valid=analysis["quality_score"] > 0.5 and analysis["duration"] > 1.0,
duration=analysis["duration"],
sample_rate=analysis["sample_rate"],
quality_score=analysis["quality_score"],
issues=analysis["issues"],
)
@router.post("/upload", response_model=RecordingUploadResponse)
async def upload_recording(
audio: UploadFile = File(..., description="업로드할 오디오 파일"),
transcript: str = Form(None, description="오디오의 텍스트 내용"),
db: Database = Depends(get_db),
):
"""녹음 파일 업로드
Voice Clone에 사용할 녹음을 업로드합니다.
"""
audio_bytes = await audio.read()
# 품질 분석
analysis = analyze_audio(audio_bytes)
if analysis["duration"] < 0.5:
raise HTTPException(status_code=400, detail="오디오가 너무 짧습니다")
# GridFS에 저장
file_id = await db.save_audio(
audio_bytes,
audio.filename or f"recording_{uuid.uuid4()}.wav",
metadata={
"type": "recording",
"transcript": transcript,
"duration": analysis["duration"],
"sample_rate": analysis["sample_rate"],
"quality_score": analysis["quality_score"],
},
)
return RecordingUploadResponse(
file_id=file_id,
filename=audio.filename or "recording.wav",
duration=analysis["duration"],
sample_rate=analysis["sample_rate"],
)
@router.get("/{file_id}")
async def get_recording(
file_id: str,
db: Database = Depends(get_db),
):
"""녹음 파일 다운로드"""
try:
audio_bytes = await db.get_audio(file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'attachment; filename="{file_id}.wav"'},
)
except Exception as e:
raise HTTPException(status_code=404, detail="Recording not found")
@router.delete("/{file_id}")
async def delete_recording(
file_id: str,
db: Database = Depends(get_db),
):
"""녹음 파일 삭제"""
try:
await db.delete_audio(file_id)
return {"status": "deleted", "file_id": file_id}
except Exception as e:
raise HTTPException(status_code=404, detail="Recording not found")

View File

@ -0,0 +1,340 @@
"""효과음 API 라우터
Freesound API 연동
"""
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query
from fastapi.responses import Response
from pydantic import BaseModel
from app.database import Database, get_db
from app.services.freesound_client import freesound_client
router = APIRouter(prefix="/api/v1/sound-effects", tags=["sound-effects"])
# ========================================
# Pydantic 모델
# ========================================
class SoundEffectResponse(BaseModel):
"""효과음 응답"""
id: str
freesound_id: Optional[int] = None
name: str
description: str
duration: float
tags: List[str] = []
preview_url: Optional[str] = None
license: str = ""
username: Optional[str] = None
source: str = "freesound" # freesound | local
class SoundEffectSearchResponse(BaseModel):
"""효과음 검색 응답"""
count: int
page: int
page_size: int
results: List[SoundEffectResponse]
class SoundEffectImportRequest(BaseModel):
"""효과음 가져오기 요청"""
freesound_id: int
# ========================================
# API 엔드포인트
# ========================================
@router.get("/search", response_model=SoundEffectSearchResponse)
async def search_sound_effects(
query: str = Query(..., min_length=1, description="검색어"),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
min_duration: Optional[float] = Query(None, ge=0, description="최소 길이 (초)"),
max_duration: Optional[float] = Query(None, ge=0, description="최대 길이 (초)"),
sort: str = Query("score", description="정렬 (score, duration_asc, duration_desc)"),
):
"""Freesound에서 효과음 검색"""
try:
result = await freesound_client.search(
query=query,
page=page,
page_size=page_size,
min_duration=min_duration,
max_duration=max_duration,
sort=sort,
)
# 응답 형식 변환
sounds = []
for item in result["results"]:
sounds.append(SoundEffectResponse(
id=f"fs_{item['freesound_id']}",
freesound_id=item["freesound_id"],
name=item["name"],
description=item["description"],
duration=item["duration"],
tags=item["tags"],
preview_url=item["preview_url"],
license=item["license"],
username=item.get("username"),
source="freesound",
))
return SoundEffectSearchResponse(
count=result["count"],
page=page,
page_size=page_size,
results=sounds,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
@router.get("/library", response_model=SoundEffectSearchResponse)
async def list_local_sound_effects(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
category: Optional[str] = Query(None, description="카테고리 필터"),
db: Database = Depends(get_db),
):
"""로컬 효과음 라이브러리 조회"""
query = {}
if category:
query["categories"] = category
total = await db.sound_effects.count_documents(query)
skip = (page - 1) * page_size
cursor = db.sound_effects.find(query).sort("created_at", -1).skip(skip).limit(page_size)
sounds = []
async for doc in cursor:
sounds.append(SoundEffectResponse(
id=str(doc["_id"]),
freesound_id=doc.get("source_id"),
name=doc["name"],
description=doc.get("description", ""),
duration=doc.get("duration_seconds", 0),
tags=doc.get("tags", []),
preview_url=None, # 로컬 파일은 별도 엔드포인트로 제공
license=doc.get("license", ""),
source="local",
))
return SoundEffectSearchResponse(
count=total,
page=page,
page_size=page_size,
results=sounds,
)
@router.post("/import", response_model=SoundEffectResponse)
async def import_sound_effect(
request: SoundEffectImportRequest,
db: Database = Depends(get_db),
):
"""Freesound에서 효과음 가져오기 (로컬 캐시)"""
try:
# Freesound에서 상세 정보 조회
sound_info = await freesound_client.get_sound(request.freesound_id)
# 프리뷰 다운로드
preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
if not preview_url:
raise HTTPException(status_code=400, detail="Preview not available")
audio_bytes = await freesound_client.download_preview(preview_url)
# GridFS에 저장
file_id = await db.save_audio(
audio_bytes,
f"sfx_{request.freesound_id}.mp3",
content_type="audio/mpeg",
metadata={"freesound_id": request.freesound_id},
)
# DB에 메타데이터 저장
now = datetime.utcnow()
doc = {
"name": sound_info.get("name", ""),
"description": sound_info.get("description", ""),
"source": "freesound",
"source_id": request.freesound_id,
"source_url": f"https://freesound.org/s/{request.freesound_id}/",
"audio_file_id": file_id,
"duration_seconds": sound_info.get("duration", 0),
"format": "mp3",
"categories": [],
"tags": sound_info.get("tags", [])[:20], # 최대 20개
"license": sound_info.get("license", ""),
"attribution": sound_info.get("username", ""),
"created_at": now,
"updated_at": now,
}
result = await db.sound_effects.insert_one(doc)
return SoundEffectResponse(
id=str(result.inserted_id),
freesound_id=request.freesound_id,
name=doc["name"],
description=doc["description"],
duration=doc["duration_seconds"],
tags=doc["tags"],
license=doc["license"],
source="local",
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Import failed: {str(e)}")
@router.get("/{sound_id}")
async def get_sound_effect_info(
sound_id: str,
db: Database = Depends(get_db),
):
"""효과음 상세 정보 조회"""
# Freesound ID인 경우
if sound_id.startswith("fs_"):
freesound_id = int(sound_id[3:])
try:
sound_info = await freesound_client.get_sound(freesound_id)
return SoundEffectResponse(
id=sound_id,
freesound_id=freesound_id,
name=sound_info.get("name", ""),
description=sound_info.get("description", ""),
duration=sound_info.get("duration", 0),
tags=sound_info.get("tags", []),
preview_url=sound_info.get("previews", {}).get("preview-hq-mp3", ""),
license=sound_info.get("license", ""),
source="freesound",
)
except Exception as e:
raise HTTPException(status_code=404, detail="Sound not found")
# 로컬 ID인 경우
from bson import ObjectId
try:
doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
except:
raise HTTPException(status_code=400, detail="Invalid sound ID")
if not doc:
raise HTTPException(status_code=404, detail="Sound not found")
return SoundEffectResponse(
id=str(doc["_id"]),
freesound_id=doc.get("source_id"),
name=doc["name"],
description=doc.get("description", ""),
duration=doc.get("duration_seconds", 0),
tags=doc.get("tags", []),
license=doc.get("license", ""),
source="local",
)
@router.get("/{sound_id}/audio")
async def get_sound_effect_audio(
sound_id: str,
db: Database = Depends(get_db),
):
"""효과음 오디오 스트리밍"""
# Freesound ID인 경우 프리뷰 리다이렉트
if sound_id.startswith("fs_"):
freesound_id = int(sound_id[3:])
try:
sound_info = await freesound_client.get_sound(freesound_id)
preview_url = sound_info.get("previews", {}).get("preview-hq-mp3", "")
if preview_url:
audio_bytes = await freesound_client.download_preview(preview_url)
return Response(
content=audio_bytes,
media_type="audio/mpeg",
headers={"Content-Disposition": f'inline; filename="{freesound_id}.mp3"'},
)
except Exception as e:
raise HTTPException(status_code=404, detail="Audio not found")
# 로컬 ID인 경우
from bson import ObjectId
try:
doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
except:
raise HTTPException(status_code=400, detail="Invalid sound ID")
if not doc or not doc.get("audio_file_id"):
raise HTTPException(status_code=404, detail="Audio not found")
audio_bytes = await db.get_audio(doc["audio_file_id"])
content_type = "audio/mpeg" if doc.get("format") == "mp3" else "audio/wav"
return Response(
content=audio_bytes,
media_type=content_type,
headers={"Content-Disposition": f'inline; filename="{sound_id}.{doc.get("format", "wav")}"'},
)
@router.get("/categories")
async def list_categories(
db: Database = Depends(get_db),
):
"""효과음 카테고리 목록"""
# 로컬 라이브러리의 카테고리 집계
pipeline = [
{"$unwind": "$categories"},
{"$group": {"_id": "$categories", "count": {"$sum": 1}}},
{"$sort": {"count": -1}},
]
categories = []
async for doc in db.sound_effects.aggregate(pipeline):
categories.append({
"name": doc["_id"],
"count": doc["count"],
})
return {"categories": categories}
@router.delete("/{sound_id}")
async def delete_sound_effect(
sound_id: str,
db: Database = Depends(get_db),
):
"""로컬 효과음 삭제"""
if sound_id.startswith("fs_"):
raise HTTPException(status_code=400, detail="Cannot delete Freesound reference")
from bson import ObjectId
try:
doc = await db.sound_effects.find_one({"_id": ObjectId(sound_id)})
except:
raise HTTPException(status_code=400, detail="Invalid sound ID")
if not doc:
raise HTTPException(status_code=404, detail="Sound not found")
# 오디오 파일 삭제
if doc.get("audio_file_id"):
await db.delete_audio(doc["audio_file_id"])
# 문서 삭제
await db.sound_effects.delete_one({"_id": ObjectId(sound_id)})
return {"status": "deleted", "sound_id": sound_id}

View File

@ -0,0 +1,227 @@
"""TTS API 라우터"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import Response, StreamingResponse
from app.database import Database, get_db
from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType
from app.services.tts_client import tts_client
from app.routers.voices import PRESET_VOICES
router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
@router.post("/synthesize")
async def synthesize(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""TTS 음성 합성
지정된 보이스로 텍스트를 음성으로 변환합니다.
"""
voice_id = request.voice_id
# 프리셋 보이스 확인
preset_speaker = None
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
preset_speaker = preset["preset_voice_id"]
break
if preset_speaker:
# 프리셋 음성 합성
try:
audio_bytes, sr = await tts_client.synthesize(
text=request.text,
speaker=preset_speaker,
language="ko",
instruct=request.instruct,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
else:
# DB에서 보이스 정보 조회
voice_doc = await db.voices.find_one({"voice_id": voice_id})
if not voice_doc:
raise HTTPException(status_code=404, detail="Voice not found")
voice_type = voice_doc.get("type")
if voice_type == VoiceType.CLONED.value:
# Voice Clone 합성 (레퍼런스 오디오 필요)
ref_audio_id = voice_doc.get("reference_audio_id")
ref_transcript = voice_doc.get("reference_transcript", "")
if not ref_audio_id:
raise HTTPException(status_code=400, detail="Reference audio not found")
ref_audio = await db.get_audio(ref_audio_id)
try:
audio_bytes, sr = await tts_client.voice_clone(
text=request.text,
ref_audio=ref_audio,
ref_text=ref_transcript,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}")
elif voice_type == VoiceType.DESIGNED.value:
# Voice Design 합성
design_prompt = voice_doc.get("design_prompt", "")
try:
audio_bytes, sr = await tts_client.voice_design(
text=request.text,
instruct=design_prompt,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}")
else:
raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}")
# 생성 기록 저장
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
# 오디오 저장
audio_file_id = await db.save_audio(
audio_bytes,
f"{generation_id}.wav",
metadata={"voice_id": voice_id, "text": request.text[:100]},
)
# 생성 기록 저장
gen_doc = {
"generation_id": generation_id,
"voice_id": voice_id,
"text": request.text,
"audio_file_id": audio_file_id,
"status": "completed",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Sample-Rate": str(sr),
"X-Generation-ID": generation_id,
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.post("/synthesize/async", response_model=TTSGenerationResponse)
async def synthesize_async(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""비동기 TTS 음성 합성 (긴 텍스트용)
생성 작업을 큐에 등록하고 generation_id를 반환합니다.
완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다.
"""
# 긴 텍스트 처리를 위한 비동기 방식
# 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동)
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
gen_doc = {
"generation_id": generation_id,
"voice_id": request.voice_id,
"text": request.text,
"status": "pending",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
# 실제로는 백그라운드 워커에서 처리해야 함
# 여기서는 바로 처리
try:
# synthesize 로직과 동일...
# (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용)
pass
except Exception as e:
await db.tts_generations.update_one(
{"generation_id": generation_id},
{"$set": {"status": "failed", "error_message": str(e)}},
)
return TTSGenerationResponse(
generation_id=generation_id,
voice_id=request.voice_id,
text=request.text,
status="pending",
created_at=now,
)
@router.get("/generations/{generation_id}", response_model=TTSGenerationResponse)
async def get_generation(
generation_id: str,
db: Database = Depends(get_db),
):
"""TTS 생성 상태 조회"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
return TTSGenerationResponse(
generation_id=doc["generation_id"],
voice_id=doc["voice_id"],
text=doc["text"],
status=doc["status"],
audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None,
duration_seconds=doc.get("duration_seconds"),
created_at=doc["created_at"],
)
@router.get("/generations/{generation_id}/audio")
async def get_generation_audio(
generation_id: str,
db: Database = Depends(get_db),
):
"""생성된 오디오 다운로드"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
if doc["status"] != "completed":
raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}")
audio_file_id = doc.get("audio_file_id")
if not audio_file_id:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_bytes = await db.get_audio(audio_file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.get("/health")
async def tts_health():
"""TTS 엔진 헬스체크"""
try:
health = await tts_client.health_check()
return {"status": "healthy", "tts_engine": health}
except Exception as e:
return {"status": "unhealthy", "error": str(e)}

View File

@ -0,0 +1,426 @@
"""Voice 관리 API 라우터"""
import uuid
from datetime import datetime
from typing import Optional, List
from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
from fastapi.responses import Response
from app.database import Database, get_db
from app.models.voice import (
VoiceType,
LanguageCode,
VoiceResponse,
VoiceListResponse,
VoiceCloneRequest,
VoiceDesignRequest,
VoiceUpdateRequest,
)
from app.services.tts_client import tts_client
router = APIRouter(prefix="/api/v1/voices", tags=["voices"])
# ========================================
# 프리셋 보이스 목록 (시스템 기본)
# ========================================
PRESET_VOICES = [
{
"voice_id": "preset_chelsie",
"name": "Chelsie",
"description": "밝고 활기찬 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Chelsie",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["bright", "energetic"],
},
{
"voice_id": "preset_ethan",
"name": "Ethan",
"description": "차분하고 신뢰감 있는 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Ethan",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["calm", "trustworthy"],
},
{
"voice_id": "preset_vivian",
"name": "Vivian",
"description": "부드럽고 따뜻한 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Vivian",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["soft", "warm"],
},
{
"voice_id": "preset_benjamin",
"name": "Benjamin",
"description": "깊고 전문적인 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Benjamin",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["deep", "professional"],
},
{
"voice_id": "preset_aurora",
"name": "Aurora",
"description": "우아하고 세련된 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Aurora",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["elegant", "refined"],
},
{
"voice_id": "preset_oliver",
"name": "Oliver",
"description": "친근하고 편안한 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Oliver",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["friendly", "casual"],
},
{
"voice_id": "preset_luna",
"name": "Luna",
"description": "따뜻하고 감성적인 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Luna",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["warm", "emotional"],
},
{
"voice_id": "preset_jasper",
"name": "Jasper",
"description": "전문적이고 명확한 남성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Jasper",
"language": LanguageCode.EN,
"gender": "male",
"style_tags": ["professional", "clear"],
},
{
"voice_id": "preset_aria",
"name": "Aria",
"description": "표현력 풍부한 여성 목소리",
"type": VoiceType.PRESET,
"preset_voice_id": "Aria",
"language": LanguageCode.EN,
"gender": "female",
"style_tags": ["expressive", "dynamic"],
},
]
def _voice_doc_to_response(doc: dict) -> VoiceResponse:
"""MongoDB 문서를 VoiceResponse로 변환"""
return VoiceResponse(
voice_id=doc["voice_id"],
name=doc["name"],
description=doc.get("description"),
type=doc["type"],
language=doc.get("language", LanguageCode.KO),
preset_voice_id=doc.get("preset_voice_id"),
design_prompt=doc.get("design_prompt"),
reference_transcript=doc.get("reference_transcript"),
gender=doc.get("gender"),
age_range=doc.get("age_range"),
style_tags=doc.get("style_tags", []),
owner_id=str(doc.get("owner_id")) if doc.get("owner_id") else None,
is_public=doc.get("is_public", True),
sample_audio_id=str(doc.get("sample_audio_id")) if doc.get("sample_audio_id") else None,
created_at=doc.get("created_at", datetime.utcnow()),
updated_at=doc.get("updated_at", datetime.utcnow()),
)
@router.get("", response_model=VoiceListResponse)
async def list_voices(
type: Optional[VoiceType] = Query(None, description="보이스 타입 필터"),
language: Optional[LanguageCode] = Query(None, description="언어 필터"),
is_public: bool = Query(True, description="공개 보이스만"),
include_presets: bool = Query(True, description="프리셋 포함"),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
db: Database = Depends(get_db),
):
"""보이스 목록 조회"""
voices = []
# 프리셋 보이스 추가
if include_presets and (type is None or type == VoiceType.PRESET):
for preset in PRESET_VOICES:
if language and preset["language"] != language:
continue
voices.append(VoiceResponse(
**preset,
is_public=True,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
))
# DB에서 사용자 보이스 조회
query = {"is_public": True} if is_public else {}
if type and type != VoiceType.PRESET:
query["type"] = type.value
if language:
query["language"] = language.value
cursor = db.voices.find(query).sort("created_at", -1)
skip = (page - 1) * page_size
cursor = cursor.skip(skip).limit(page_size)
async for doc in cursor:
voices.append(_voice_doc_to_response(doc))
total = len(PRESET_VOICES) + await db.voices.count_documents(query)
return VoiceListResponse(
voices=voices,
total=total,
page=page,
page_size=page_size,
)
@router.get("/{voice_id}", response_model=VoiceResponse)
async def get_voice(
voice_id: str,
db: Database = Depends(get_db),
):
"""보이스 상세 조회"""
# 프리셋 체크
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
return VoiceResponse(
**preset,
is_public=True,
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
)
# DB 조회
doc = await db.voices.find_one({"voice_id": voice_id})
if not doc:
raise HTTPException(status_code=404, detail="Voice not found")
return _voice_doc_to_response(doc)
@router.get("/{voice_id}/sample")
async def get_voice_sample(
voice_id: str,
db: Database = Depends(get_db),
):
"""보이스 샘플 오디오 스트리밍"""
# 프리셋인 경우 TTS로 샘플 생성
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
sample_text = "안녕하세요, 저는 AI 음성입니다."
audio_bytes, sr = await tts_client.synthesize(
text=sample_text,
speaker=preset["preset_voice_id"],
language="ko",
)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
)
# DB에서 조회
doc = await db.voices.find_one({"voice_id": voice_id})
if not doc:
raise HTTPException(status_code=404, detail="Voice not found")
if not doc.get("sample_audio_id"):
raise HTTPException(status_code=404, detail="No sample audio available")
audio_bytes = await db.get_audio(doc["sample_audio_id"])
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
)
@router.post("/clone", response_model=VoiceResponse)
async def create_voice_clone(
name: str = Form(...),
description: Optional[str] = Form(None),
reference_transcript: str = Form(...),
language: LanguageCode = Form(LanguageCode.KO),
is_public: bool = Form(False),
reference_audio: UploadFile = File(...),
db: Database = Depends(get_db),
):
"""Voice Clone으로 새 보이스 생성
레퍼런스 오디오를 기반으로 목소리를 복제합니다.
3초 이상의 오디오가 권장됩니다.
"""
# 오디오 파일 읽기
audio_content = await reference_audio.read()
# Voice Clone으로 샘플 생성
sample_text = "안녕하세요, 저는 복제된 AI 음성입니다."
try:
sample_audio, sr = await tts_client.voice_clone(
text=sample_text,
ref_audio=audio_content,
ref_text=reference_transcript,
language=language.value,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice clone failed: {str(e)}")
# GridFS에 오디오 저장
ref_audio_id = await db.save_audio(
audio_content,
f"ref_{uuid.uuid4()}.wav",
metadata={"type": "reference"},
)
sample_audio_id = await db.save_audio(
sample_audio,
f"sample_{uuid.uuid4()}.wav",
metadata={"type": "sample"},
)
# DB에 보이스 저장
voice_id = f"clone_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
doc = {
"voice_id": voice_id,
"name": name,
"description": description,
"type": VoiceType.CLONED.value,
"language": language.value,
"reference_audio_id": ref_audio_id,
"reference_transcript": reference_transcript,
"sample_audio_id": sample_audio_id,
"is_public": is_public,
"created_at": now,
"updated_at": now,
}
await db.voices.insert_one(doc)
return _voice_doc_to_response(doc)
@router.post("/design", response_model=VoiceResponse)
async def create_voice_design(
request: VoiceDesignRequest,
db: Database = Depends(get_db),
):
"""Voice Design으로 새 보이스 생성
텍스트 프롬프트를 기반으로 새로운 음성을 생성합니다.
예: "30대 남성, 부드럽고 차분한 목소리"
"""
# Voice Design으로 샘플 생성
sample_text = "안녕하세요, 저는 AI로 생성된 음성입니다."
try:
sample_audio, sr = await tts_client.voice_design(
text=sample_text,
instruct=request.design_prompt,
language=request.language.value,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice design failed: {str(e)}")
# GridFS에 샘플 저장
sample_audio_id = await db.save_audio(
sample_audio,
f"sample_{uuid.uuid4()}.wav",
metadata={"type": "sample"},
)
# DB에 보이스 저장
voice_id = f"design_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
doc = {
"voice_id": voice_id,
"name": request.name,
"description": request.description,
"type": VoiceType.DESIGNED.value,
"language": request.language.value,
"design_prompt": request.design_prompt,
"sample_audio_id": sample_audio_id,
"is_public": request.is_public,
"created_at": now,
"updated_at": now,
}
await db.voices.insert_one(doc)
return _voice_doc_to_response(doc)
@router.patch("/{voice_id}", response_model=VoiceResponse)
async def update_voice(
voice_id: str,
request: VoiceUpdateRequest,
db: Database = Depends(get_db),
):
"""보이스 정보 수정"""
# 프리셋은 수정 불가
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
raise HTTPException(status_code=400, detail="Cannot modify preset voice")
# 업데이트할 필드만 추출
update_data = {k: v for k, v in request.model_dump().items() if v is not None}
if not update_data:
raise HTTPException(status_code=400, detail="No fields to update")
update_data["updated_at"] = datetime.utcnow()
result = await db.voices.update_one(
{"voice_id": voice_id},
{"$set": update_data},
)
if result.matched_count == 0:
raise HTTPException(status_code=404, detail="Voice not found")
doc = await db.voices.find_one({"voice_id": voice_id})
return _voice_doc_to_response(doc)
@router.delete("/{voice_id}")
async def delete_voice(
voice_id: str,
db: Database = Depends(get_db),
):
"""보이스 삭제"""
# 프리셋은 삭제 불가
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
raise HTTPException(status_code=400, detail="Cannot delete preset voice")
# 먼저 조회
doc = await db.voices.find_one({"voice_id": voice_id})
if not doc:
raise HTTPException(status_code=404, detail="Voice not found")
# 관련 오디오 파일 삭제
if doc.get("reference_audio_id"):
await db.delete_audio(doc["reference_audio_id"])
if doc.get("sample_audio_id"):
await db.delete_audio(doc["sample_audio_id"])
# 보이스 삭제
await db.voices.delete_one({"voice_id": voice_id})
return {"status": "deleted", "voice_id": voice_id}

View File

@ -0,0 +1,260 @@
# 오디오 믹서 서비스
# pydub를 사용한 오디오 합성/믹싱
import os
import tempfile
from typing import Optional
from pydub import AudioSegment
from pydub.effects import normalize
from app.models.drama import TimelineItem
class AudioMixer:
"""
오디오 믹서
기능:
- 여러 오디오 트랙 합성
- 볼륨 조절
- 페이드 인/아웃
- 타임라인 기반 믹싱
"""
def __init__(self, sample_rate: int = 44100):
self.sample_rate = sample_rate
def load_audio(self, file_path: str) -> AudioSegment:
"""오디오 파일 로드"""
return AudioSegment.from_file(file_path)
def adjust_volume(self, audio: AudioSegment, volume: float) -> AudioSegment:
"""볼륨 조절 (0.0 ~ 2.0, 1.0 = 원본)"""
if volume == 1.0:
return audio
# dB 변환: 0.5 = -6dB, 2.0 = +6dB
db_change = 20 * (volume ** 0.5 - 1) if volume > 0 else -120
return audio + db_change
def apply_fade(
self,
audio: AudioSegment,
fade_in_ms: int = 0,
fade_out_ms: int = 0
) -> AudioSegment:
"""페이드 인/아웃 적용"""
if fade_in_ms > 0:
audio = audio.fade_in(fade_in_ms)
if fade_out_ms > 0:
audio = audio.fade_out(fade_out_ms)
return audio
def concatenate(self, segments: list[AudioSegment]) -> AudioSegment:
"""오디오 세그먼트 연결"""
if not segments:
return AudioSegment.silent(duration=0)
result = segments[0]
for segment in segments[1:]:
result += segment
return result
def overlay(
self,
base: AudioSegment,
overlay_audio: AudioSegment,
position_ms: int = 0
) -> AudioSegment:
"""오디오 오버레이 (배경음악 위에 보이스 등)"""
return base.overlay(overlay_audio, position=position_ms)
def create_silence(self, duration_ms: int) -> AudioSegment:
"""무음 생성"""
return AudioSegment.silent(duration=duration_ms)
def mix_timeline(
self,
timeline: list[TimelineItem],
audio_files: dict[str, str] # audio_path -> 실제 파일 경로
) -> AudioSegment:
"""
타임라인 기반 믹싱
Args:
timeline: 타임라인 아이템 리스트
audio_files: 오디오 경로 매핑
Returns:
믹싱된 오디오
"""
if not timeline:
return AudioSegment.silent(duration=1000)
# 전체 길이 계산
total_duration_ms = max(
int((item.start_time + item.duration) * 1000)
for item in timeline
)
# 트랙별 분리 (voice, music, sfx)
voice_track = AudioSegment.silent(duration=total_duration_ms)
music_track = AudioSegment.silent(duration=total_duration_ms)
sfx_track = AudioSegment.silent(duration=total_duration_ms)
for item in timeline:
if not item.audio_path or item.audio_path not in audio_files:
continue
file_path = audio_files[item.audio_path]
if not os.path.exists(file_path):
continue
# 오디오 로드 및 처리
audio = self.load_audio(file_path)
# 볼륨 조절
audio = self.adjust_volume(audio, item.volume)
# 페이드 적용
fade_in_ms = int(item.fade_in * 1000)
fade_out_ms = int(item.fade_out * 1000)
audio = self.apply_fade(audio, fade_in_ms, fade_out_ms)
# 위치 계산
position_ms = int(item.start_time * 1000)
# 트랙에 오버레이
if item.type == "voice":
voice_track = voice_track.overlay(audio, position=position_ms)
elif item.type == "music":
music_track = music_track.overlay(audio, position=position_ms)
elif item.type == "sfx":
sfx_track = sfx_track.overlay(audio, position=position_ms)
# 트랙 믹싱 (music -> sfx -> voice 순서로 레이어링)
mixed = music_track.overlay(sfx_track).overlay(voice_track)
return mixed
def auto_duck(
self,
music: AudioSegment,
voice: AudioSegment,
duck_amount_db: float = -10,
attack_ms: int = 100,
release_ms: int = 300
) -> AudioSegment:
"""
Auto-ducking: 보이스가 나올 때 음악 볼륨 자동 감소
간단한 구현 - 보이스가 있는 구간에서 음악 볼륨 낮춤
"""
# 보이스 길이에 맞춰 음악 조절
if len(music) < len(voice):
music = music + AudioSegment.silent(duration=len(voice) - len(music))
# 보이스의 무음/유음 구간 감지 (간단한 RMS 기반)
chunk_ms = 50
ducked_music = AudioSegment.silent(duration=0)
for i in range(0, len(voice), chunk_ms):
voice_chunk = voice[i:i + chunk_ms]
music_chunk = music[i:i + chunk_ms]
# 보이스 RMS가 임계값 이상이면 ducking
if voice_chunk.rms > 100: # 임계값 조정 가능
music_chunk = music_chunk + duck_amount_db
ducked_music += music_chunk
return ducked_music
def export(
self,
audio: AudioSegment,
output_path: str,
format: str = "wav",
normalize_audio: bool = True
) -> str:
"""
오디오 내보내기
Args:
audio: 오디오 세그먼트
output_path: 출력 파일 경로
format: 출력 포맷 (wav, mp3)
normalize_audio: 노멀라이즈 여부
Returns:
저장된 파일 경로
"""
if normalize_audio:
audio = normalize(audio)
# 포맷별 설정
export_params = {}
if format == "mp3":
export_params = {"format": "mp3", "bitrate": "192k"}
else:
export_params = {"format": "wav"}
audio.export(output_path, **export_params)
return output_path
def create_with_background(
self,
voice_segments: list[tuple[AudioSegment, float]], # (audio, start_time)
background_music: Optional[AudioSegment] = None,
music_volume: float = 0.3,
gap_between_lines_ms: int = 500
) -> AudioSegment:
"""
보이스 + 배경음악 간단 합성
Args:
voice_segments: (오디오, 시작시간) 튜플 리스트
background_music: 배경음악 (없으면 무음)
music_volume: 배경음악 볼륨
gap_between_lines_ms: 대사 간 간격
Returns:
합성된 오디오
"""
if not voice_segments:
return AudioSegment.silent(duration=1000)
# 전체 보이스 트랙 생성
voice_track = AudioSegment.silent(duration=0)
for audio, start_time in voice_segments:
# 시작 위치까지 무음 추가
current_pos = len(voice_track)
target_pos = int(start_time * 1000)
if target_pos > current_pos:
voice_track += AudioSegment.silent(duration=target_pos - current_pos)
voice_track += audio
voice_track += AudioSegment.silent(duration=gap_between_lines_ms)
total_duration = len(voice_track)
# 배경음악 처리
if background_music:
# 음악 길이 조정
if len(background_music) < total_duration:
# 루프
loops_needed = (total_duration // len(background_music)) + 1
background_music = background_music * loops_needed
background_music = background_music[:total_duration]
# 볼륨 조절
background_music = self.adjust_volume(background_music, music_volume)
# Auto-ducking 적용
background_music = self.auto_duck(background_music, voice_track)
# 믹싱
return background_music.overlay(voice_track)
else:
return voice_track
# 싱글톤 인스턴스
audio_mixer = AudioMixer()

View File

@ -0,0 +1,362 @@
# 드라마 오케스트레이터
# 스크립트 파싱 → 에셋 생성 → 타임라인 구성 → 믹싱 조율
import os
import uuid
import asyncio
import tempfile
from datetime import datetime
from typing import Optional
from pydub import AudioSegment
from app.models.drama import (
ParsedScript, ScriptElement, ElementType, Character,
TimelineItem, DramaProject, DramaCreateRequest
)
from app.services.script_parser import script_parser
from app.services.audio_mixer import audio_mixer
from app.services.tts_client import tts_client
from app.services.freesound_client import freesound_client
from app.database import db
class DramaOrchestrator:
"""
드라마 생성 오케스트레이터
워크플로우:
1. 스크립트 파싱
2. 캐릭터-보이스 매핑
3. 에셋 생성 (TTS, 음악, 효과음)
4. 타임라인 구성
5. 오디오 믹싱
6. 최종 파일 출력
"""
# 기본 대사 간격 (초)
DEFAULT_DIALOGUE_GAP = 0.5
# 효과음 기본 길이 (초)
DEFAULT_SFX_DURATION = 2.0
# 예상 TTS 속도 (글자/초)
TTS_CHARS_PER_SECOND = 5
async def create_project(
self,
request: DramaCreateRequest
) -> DramaProject:
"""새 드라마 프로젝트 생성"""
project_id = str(uuid.uuid4())
# 스크립트 파싱
parsed = script_parser.parse(request.script)
# 보이스 매핑 적용
voice_mapping = request.voice_mapping or {}
for char in parsed.characters:
if char.name in voice_mapping:
char.voice_id = voice_mapping[char.name]
project = DramaProject(
project_id=project_id,
title=request.title or parsed.title or "Untitled Drama",
script_raw=request.script,
script_parsed=parsed,
voice_mapping=voice_mapping,
status="draft"
)
# DB 저장
await db.dramas.insert_one(project.model_dump())
return project
async def get_project(self, project_id: str) -> Optional[DramaProject]:
"""프로젝트 조회"""
doc = await db.dramas.find_one({"project_id": project_id})
if doc:
return DramaProject(**doc)
return None
async def update_project_status(
self,
project_id: str,
status: str,
error_message: Optional[str] = None
):
"""프로젝트 상태 업데이트"""
update = {
"status": status,
"updated_at": datetime.utcnow()
}
if error_message:
update["error_message"] = error_message
await db.dramas.update_one(
{"project_id": project_id},
{"$set": update}
)
def estimate_duration(self, parsed: ParsedScript) -> float:
"""예상 재생 시간 계산 (초)"""
total = 0.0
for element in parsed.elements:
if element.type == ElementType.DIALOGUE:
# 대사 길이 추정
text_len = len(element.text or "")
total += text_len / self.TTS_CHARS_PER_SECOND
total += self.DEFAULT_DIALOGUE_GAP
elif element.type == ElementType.PAUSE:
total += element.duration or 1.0
elif element.type == ElementType.SFX:
total += self.DEFAULT_SFX_DURATION
return total
async def generate_assets(
self,
project: DramaProject,
temp_dir: str
) -> dict[str, str]:
"""
에셋 생성 (TTS, SFX)
Returns:
audio_id -> 파일 경로 매핑
"""
assets: dict[str, str] = {}
parsed = project.script_parsed
if not parsed:
return assets
dialogue_index = 0
for element in parsed.elements:
if element.type == ElementType.DIALOGUE:
# TTS 생성
audio_id = f"dialogue_{dialogue_index}"
# 보이스 ID 결정
voice_id = project.voice_mapping.get(element.character)
if not voice_id:
# 기본 보이스 사용 (첫 번째 프리셋)
voice_id = "default"
try:
# TTS 엔진 호출
audio_data = await tts_client.synthesize(
text=element.text or "",
voice_id=voice_id,
instruct=element.emotion
)
# 파일 저장
file_path = os.path.join(temp_dir, f"{audio_id}.wav")
with open(file_path, "wb") as f:
f.write(audio_data)
assets[audio_id] = file_path
except Exception as e:
print(f"TTS 생성 실패 ({element.character}): {e}")
# 무음으로 대체
silence_duration = len(element.text or "") / self.TTS_CHARS_PER_SECOND
silence = AudioSegment.silent(duration=int(silence_duration * 1000))
file_path = os.path.join(temp_dir, f"{audio_id}.wav")
silence.export(file_path, format="wav")
assets[audio_id] = file_path
dialogue_index += 1
elif element.type == ElementType.SFX:
# Freesound에서 효과음 검색
audio_id = f"sfx_{element.description}"
try:
results = await freesound_client.search(
query=element.description,
page_size=1
)
if results and len(results) > 0:
sound = results[0]
# 프리뷰 다운로드
if sound.get("preview_url"):
audio_data = await freesound_client.download_preview(
sound["preview_url"]
)
file_path = os.path.join(temp_dir, f"sfx_{sound['id']}.mp3")
with open(file_path, "wb") as f:
f.write(audio_data)
assets[audio_id] = file_path
except Exception as e:
print(f"SFX 검색 실패 ({element.description}): {e}")
elif element.type == ElementType.MUSIC:
# MusicGen은 GPU 필요하므로 여기서는 placeholder
# 실제 구현 시 music_client 추가 필요
audio_id = f"music_{element.description}"
# TODO: MusicGen 연동
return assets
def build_timeline(
self,
parsed: ParsedScript,
assets: dict[str, str]
) -> list[TimelineItem]:
"""타임라인 구성"""
timeline: list[TimelineItem] = []
current_time = 0.0
dialogue_index = 0
current_music: Optional[dict] = None
for element in parsed.elements:
if element.type == ElementType.DIALOGUE:
audio_id = f"dialogue_{dialogue_index}"
if audio_id in assets:
# 오디오 길이 확인
try:
audio = AudioSegment.from_file(assets[audio_id])
duration = len(audio) / 1000.0
except:
duration = len(element.text or "") / self.TTS_CHARS_PER_SECOND
timeline.append(TimelineItem(
start_time=current_time,
duration=duration,
type="voice",
audio_path=audio_id,
volume=1.0
))
current_time += duration + self.DEFAULT_DIALOGUE_GAP
dialogue_index += 1
elif element.type == ElementType.PAUSE:
current_time += element.duration or 1.0
elif element.type == ElementType.SFX:
audio_id = f"sfx_{element.description}"
if audio_id in assets:
try:
audio = AudioSegment.from_file(assets[audio_id])
duration = len(audio) / 1000.0
except:
duration = self.DEFAULT_SFX_DURATION
timeline.append(TimelineItem(
start_time=current_time,
duration=duration,
type="sfx",
audio_path=audio_id,
volume=element.volume or 1.0
))
elif element.type == ElementType.MUSIC:
audio_id = f"music_{element.description}"
if element.action == "stop":
current_music = None
elif element.action in ("play", "change", "fade_in"):
if audio_id in assets:
# 음악은 현재 시점부터 끝까지 (나중에 조정)
current_music = {
"audio_id": audio_id,
"start_time": current_time,
"volume": element.volume or 0.3,
"fade_in": element.fade_duration if element.action == "fade_in" else 0
}
# 배경음악 아이템 추가 (전체 길이로)
if current_music:
timeline.append(TimelineItem(
start_time=current_music["start_time"],
duration=current_time - current_music["start_time"],
type="music",
audio_path=current_music["audio_id"],
volume=current_music["volume"],
fade_in=current_music.get("fade_in", 0)
))
return timeline
async def render(
self,
project_id: str,
output_format: str = "wav"
) -> Optional[str]:
"""
드라마 렌더링
Returns:
출력 파일 경로
"""
project = await self.get_project(project_id)
if not project or not project.script_parsed:
return None
await self.update_project_status(project_id, "processing")
try:
with tempfile.TemporaryDirectory() as temp_dir:
# 1. 에셋 생성
assets = await self.generate_assets(project, temp_dir)
# 2. 타임라인 구성
timeline = self.build_timeline(project.script_parsed, assets)
# 3. 믹싱
mixed_audio = audio_mixer.mix_timeline(timeline, assets)
# 4. 출력
output_path = os.path.join(temp_dir, f"drama_{project_id}.{output_format}")
audio_mixer.export(mixed_audio, output_path, format=output_format)
# 5. GridFS에 저장 (TODO: 실제 구현)
# file_id = await save_to_gridfs(output_path)
# 임시: 파일 복사
final_path = f"/tmp/drama_{project_id}.{output_format}"
import shutil
shutil.copy(output_path, final_path)
# 상태 업데이트
await db.dramas.update_one(
{"project_id": project_id},
{
"$set": {
"status": "completed",
"timeline": [t.model_dump() for t in timeline],
"output_file_id": final_path,
"updated_at": datetime.utcnow()
}
}
)
return final_path
except Exception as e:
await self.update_project_status(project_id, "error", str(e))
raise
async def list_projects(
self,
skip: int = 0,
limit: int = 20
) -> list[DramaProject]:
"""프로젝트 목록 조회"""
cursor = db.dramas.find().sort("created_at", -1).skip(skip).limit(limit)
projects = []
async for doc in cursor:
projects.append(DramaProject(**doc))
return projects
# 싱글톤 인스턴스
drama_orchestrator = DramaOrchestrator()

View File

@ -0,0 +1,165 @@
"""Freesound API 클라이언트
효과음 검색 및 다운로드
https://freesound.org/docs/api/
"""
import os
import logging
from typing import Optional, List, Dict
import httpx
logger = logging.getLogger(__name__)
class FreesoundClient:
"""Freesound API 클라이언트"""
BASE_URL = "https://freesound.org/apiv2"
def __init__(self):
self.api_key = os.getenv("FREESOUND_API_KEY", "")
self.timeout = httpx.Timeout(30.0, connect=10.0)
def _get_headers(self) -> dict:
"""인증 헤더 반환"""
return {"Authorization": f"Token {self.api_key}"}
async def search(
self,
query: str,
page: int = 1,
page_size: int = 20,
filter_fields: Optional[str] = None,
sort: str = "score",
min_duration: Optional[float] = None,
max_duration: Optional[float] = None,
) -> Dict:
"""효과음 검색
Args:
query: 검색어
page: 페이지 번호
page_size: 페이지당 결과 수
filter_fields: 필터 (예: "duration:[1 TO 5]")
sort: 정렬 (score, duration_asc, duration_desc, created_desc 등)
min_duration: 최소 길이 (초)
max_duration: 최대 길이 (초)
Returns:
검색 결과 딕셔너리
"""
if not self.api_key:
logger.warning("Freesound API 키가 설정되지 않음")
return {"count": 0, "results": []}
# 필터 구성
filters = []
if min_duration is not None or max_duration is not None:
min_d = min_duration if min_duration is not None else 0
max_d = max_duration if max_duration is not None else "*"
filters.append(f"duration:[{min_d} TO {max_d}]")
if filter_fields:
filters.append(filter_fields)
params = {
"query": query,
"page": page,
"page_size": min(page_size, 150), # Freesound 최대 150
"sort": sort,
"fields": "id,name,description,duration,tags,previews,license,username",
}
if filters:
params["filter"] = " ".join(filters)
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(
f"{self.BASE_URL}/search/text/",
params=params,
headers=self._get_headers(),
)
response.raise_for_status()
data = response.json()
# 결과 정리
results = []
for sound in data.get("results", []):
results.append({
"freesound_id": sound["id"],
"name": sound.get("name", ""),
"description": sound.get("description", ""),
"duration": sound.get("duration", 0),
"tags": sound.get("tags", []),
"preview_url": sound.get("previews", {}).get("preview-hq-mp3", ""),
"license": sound.get("license", ""),
"username": sound.get("username", ""),
})
return {
"count": data.get("count", 0),
"page": page,
"page_size": page_size,
"results": results,
}
async def get_sound(self, sound_id: int) -> Dict:
"""사운드 상세 정보 조회"""
if not self.api_key:
raise ValueError("Freesound API 키 필요")
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(
f"{self.BASE_URL}/sounds/{sound_id}/",
headers=self._get_headers(),
)
response.raise_for_status()
return response.json()
async def download_preview(self, preview_url: str) -> bytes:
"""프리뷰 오디오 다운로드 (인증 불필요)"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(preview_url)
response.raise_for_status()
return response.content
async def get_similar_sounds(
self,
sound_id: int,
page_size: int = 10,
) -> List[Dict]:
"""유사한 사운드 검색"""
if not self.api_key:
return []
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(
f"{self.BASE_URL}/sounds/{sound_id}/similar/",
params={
"page_size": page_size,
"fields": "id,name,description,duration,tags,previews,license",
},
headers=self._get_headers(),
)
response.raise_for_status()
data = response.json()
results = []
for sound in data.get("results", []):
results.append({
"freesound_id": sound["id"],
"name": sound.get("name", ""),
"description": sound.get("description", ""),
"duration": sound.get("duration", 0),
"tags": sound.get("tags", []),
"preview_url": sound.get("previews", {}).get("preview-hq-mp3", ""),
"license": sound.get("license", ""),
})
return results
# 싱글톤 인스턴스
freesound_client = FreesoundClient()

View File

@ -0,0 +1,174 @@
# 드라마 스크립트 파서
# 마크다운 형식의 대본을 구조화된 데이터로 변환
import re
from typing import Optional
from app.models.drama import (
ParsedScript, ScriptElement, Character, ElementType
)
class ScriptParser:
"""
드라마 스크립트 파서
지원 형식:
- # 제목
- [장소: 설명] 또는 [지문]
- [효과음: 설명]
- [음악: 설명] 또는 [음악 시작/중지/변경: 설명]
- [쉼: 2초]
- 캐릭터명(설명, 감정): 대사
- 캐릭터명: 대사
"""
# 정규식 패턴
TITLE_PATTERN = re.compile(r'^#\s+(.+)$')
DIRECTION_PATTERN = re.compile(r'^\[(?:장소|지문|장면):\s*(.+)\]$')
SFX_PATTERN = re.compile(r'^\[효과음:\s*(.+)\]$')
MUSIC_PATTERN = re.compile(r'^\[음악(?:\s+(시작|중지|변경|페이드인|페이드아웃))?:\s*(.+)\]$')
PAUSE_PATTERN = re.compile(r'^\[쉼:\s*(\d+(?:\.\d+)?)\s*초?\]$')
DIALOGUE_PATTERN = re.compile(r'^([^(\[:]+?)(?:\(([^)]*)\))?:\s*(.+)$')
# 음악 액션 매핑
MUSIC_ACTIONS = {
None: "play",
"시작": "play",
"중지": "stop",
"변경": "change",
"페이드인": "fade_in",
"페이드아웃": "fade_out",
}
def parse(self, script: str) -> ParsedScript:
"""스크립트 파싱"""
lines = script.strip().split('\n')
title: Optional[str] = None
characters: dict[str, Character] = {}
elements: list[ScriptElement] = []
for line in lines:
line = line.strip()
if not line:
continue
# 제목
if match := self.TITLE_PATTERN.match(line):
title = match.group(1)
continue
# 지문/장면
if match := self.DIRECTION_PATTERN.match(line):
elements.append(ScriptElement(
type=ElementType.DIRECTION,
text=match.group(1)
))
continue
# 효과음
if match := self.SFX_PATTERN.match(line):
elements.append(ScriptElement(
type=ElementType.SFX,
description=match.group(1),
volume=1.0
))
continue
# 음악
if match := self.MUSIC_PATTERN.match(line):
action_kr = match.group(1)
action = self.MUSIC_ACTIONS.get(action_kr, "play")
elements.append(ScriptElement(
type=ElementType.MUSIC,
description=match.group(2),
action=action,
volume=0.3,
fade_duration=2.0
))
continue
# 쉼
if match := self.PAUSE_PATTERN.match(line):
elements.append(ScriptElement(
type=ElementType.PAUSE,
duration=float(match.group(1))
))
continue
# 대사
if match := self.DIALOGUE_PATTERN.match(line):
char_name = match.group(1).strip()
char_info = match.group(2) # 괄호 안 내용 (설명, 감정)
dialogue_text = match.group(3).strip()
# 캐릭터 정보 파싱
emotion = None
description = None
if char_info:
parts = [p.strip() for p in char_info.split(',')]
if len(parts) >= 2:
description = parts[0]
emotion = parts[1]
else:
# 단일 값은 감정으로 처리
emotion = parts[0]
# 캐릭터 등록
if char_name not in characters:
characters[char_name] = Character(
name=char_name,
description=description
)
elif description and not characters[char_name].description:
characters[char_name].description = description
elements.append(ScriptElement(
type=ElementType.DIALOGUE,
character=char_name,
text=dialogue_text,
emotion=emotion
))
continue
# 매칭 안 되는 줄은 지문으로 처리 (대괄호 없는 일반 텍스트)
if not line.startswith('[') and not line.startswith('#'):
# 콜론이 없으면 지문으로 처리
if ':' not in line:
elements.append(ScriptElement(
type=ElementType.DIRECTION,
text=line
))
return ParsedScript(
title=title,
characters=list(characters.values()),
elements=elements
)
def validate_script(self, script: str) -> tuple[bool, list[str]]:
"""
스크립트 유효성 검사
Returns: (is_valid, error_messages)
"""
errors = []
if not script or not script.strip():
errors.append("스크립트가 비어있습니다")
return False, errors
parsed = self.parse(script)
if not parsed.elements:
errors.append("파싱된 요소가 없습니다")
# 대사가 있는지 확인
dialogue_count = sum(1 for e in parsed.elements if e.type == ElementType.DIALOGUE)
if dialogue_count == 0:
errors.append("대사가 없습니다")
return len(errors) == 0, errors
# 싱글톤 인스턴스
script_parser = ScriptParser()

View File

@ -0,0 +1,135 @@
"""TTS 엔진 클라이언트
audio-studio-tts 서비스와 통신
"""
import os
import logging
from typing import Optional, Tuple, List
import httpx
logger = logging.getLogger(__name__)
class TTSClient:
"""TTS 엔진 HTTP 클라이언트"""
def __init__(self):
self.base_url = os.getenv("TTS_ENGINE_URL", "http://localhost:8001")
self.timeout = httpx.Timeout(120.0, connect=10.0) # TTS는 시간이 걸릴 수 있음
async def health_check(self) -> dict:
"""TTS 엔진 헬스체크"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/health")
response.raise_for_status()
return response.json()
async def get_speakers(self) -> List[str]:
"""프리셋 스피커 목록 조회"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/speakers")
response.raise_for_status()
return response.json()["speakers"]
async def get_languages(self) -> dict:
"""지원 언어 목록 조회"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/languages")
response.raise_for_status()
return response.json()["languages"]
async def synthesize(
self,
text: str,
speaker: str = "Chelsie",
language: str = "ko",
instruct: Optional[str] = None,
) -> Tuple[bytes, int]:
"""프리셋 음성으로 TTS 합성
Returns:
(audio_bytes, sample_rate)
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
payload = {
"text": text,
"speaker": speaker,
"language": language,
}
if instruct:
payload["instruct"] = instruct
response = await client.post(
f"{self.base_url}/synthesize",
json=payload,
)
response.raise_for_status()
# 샘플레이트 추출
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
return response.content, sample_rate
async def voice_clone(
self,
text: str,
ref_audio: bytes,
ref_text: str,
language: str = "ko",
) -> Tuple[bytes, int]:
"""Voice Clone으로 TTS 합성
Returns:
(audio_bytes, sample_rate)
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
# multipart/form-data로 전송
files = {"ref_audio": ("reference.wav", ref_audio, "audio/wav")}
data = {
"text": text,
"ref_text": ref_text,
"language": language,
}
response = await client.post(
f"{self.base_url}/voice-clone",
files=files,
data=data,
)
response.raise_for_status()
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
return response.content, sample_rate
async def voice_design(
self,
text: str,
instruct: str,
language: str = "ko",
) -> Tuple[bytes, int]:
"""Voice Design으로 TTS 합성
Returns:
(audio_bytes, sample_rate)
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
payload = {
"text": text,
"instruct": instruct,
"language": language,
}
response = await client.post(
f"{self.base_url}/voice-design",
json=payload,
)
response.raise_for_status()
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
return response.content, sample_rate
# 싱글톤 인스턴스
tts_client = TTSClient()