- FastAPI 백엔드 (audio-studio-api) - Next.js 프론트엔드 (audio-studio-ui) - Qwen3-TTS 엔진 (audio-studio-tts) - MusicGen 서비스 (audio-studio-musicgen) - Docker Compose 개발/운영 환경 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
427 lines
13 KiB
Python
427 lines
13 KiB
Python
"""Voice 관리 API 라우터"""
|
|
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import Optional, List
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends, Query, UploadFile, File, Form
|
|
from fastapi.responses import Response
|
|
|
|
from app.database import Database, get_db
|
|
from app.models.voice import (
|
|
VoiceType,
|
|
LanguageCode,
|
|
VoiceResponse,
|
|
VoiceListResponse,
|
|
VoiceCloneRequest,
|
|
VoiceDesignRequest,
|
|
VoiceUpdateRequest,
|
|
)
|
|
from app.services.tts_client import tts_client
|
|
|
|
router = APIRouter(prefix="/api/v1/voices", tags=["voices"])
|
|
|
|
|
|
# ========================================
|
|
# 프리셋 보이스 목록 (시스템 기본)
|
|
# ========================================
|
|
|
|
PRESET_VOICES = [
|
|
{
|
|
"voice_id": "preset_chelsie",
|
|
"name": "Chelsie",
|
|
"description": "밝고 활기찬 여성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Chelsie",
|
|
"language": LanguageCode.EN,
|
|
"gender": "female",
|
|
"style_tags": ["bright", "energetic"],
|
|
},
|
|
{
|
|
"voice_id": "preset_ethan",
|
|
"name": "Ethan",
|
|
"description": "차분하고 신뢰감 있는 남성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Ethan",
|
|
"language": LanguageCode.EN,
|
|
"gender": "male",
|
|
"style_tags": ["calm", "trustworthy"],
|
|
},
|
|
{
|
|
"voice_id": "preset_vivian",
|
|
"name": "Vivian",
|
|
"description": "부드럽고 따뜻한 여성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Vivian",
|
|
"language": LanguageCode.EN,
|
|
"gender": "female",
|
|
"style_tags": ["soft", "warm"],
|
|
},
|
|
{
|
|
"voice_id": "preset_benjamin",
|
|
"name": "Benjamin",
|
|
"description": "깊고 전문적인 남성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Benjamin",
|
|
"language": LanguageCode.EN,
|
|
"gender": "male",
|
|
"style_tags": ["deep", "professional"],
|
|
},
|
|
{
|
|
"voice_id": "preset_aurora",
|
|
"name": "Aurora",
|
|
"description": "우아하고 세련된 여성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Aurora",
|
|
"language": LanguageCode.EN,
|
|
"gender": "female",
|
|
"style_tags": ["elegant", "refined"],
|
|
},
|
|
{
|
|
"voice_id": "preset_oliver",
|
|
"name": "Oliver",
|
|
"description": "친근하고 편안한 남성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Oliver",
|
|
"language": LanguageCode.EN,
|
|
"gender": "male",
|
|
"style_tags": ["friendly", "casual"],
|
|
},
|
|
{
|
|
"voice_id": "preset_luna",
|
|
"name": "Luna",
|
|
"description": "따뜻하고 감성적인 여성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Luna",
|
|
"language": LanguageCode.EN,
|
|
"gender": "female",
|
|
"style_tags": ["warm", "emotional"],
|
|
},
|
|
{
|
|
"voice_id": "preset_jasper",
|
|
"name": "Jasper",
|
|
"description": "전문적이고 명확한 남성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Jasper",
|
|
"language": LanguageCode.EN,
|
|
"gender": "male",
|
|
"style_tags": ["professional", "clear"],
|
|
},
|
|
{
|
|
"voice_id": "preset_aria",
|
|
"name": "Aria",
|
|
"description": "표현력 풍부한 여성 목소리",
|
|
"type": VoiceType.PRESET,
|
|
"preset_voice_id": "Aria",
|
|
"language": LanguageCode.EN,
|
|
"gender": "female",
|
|
"style_tags": ["expressive", "dynamic"],
|
|
},
|
|
]
|
|
|
|
|
|
def _voice_doc_to_response(doc: dict) -> VoiceResponse:
|
|
"""MongoDB 문서를 VoiceResponse로 변환"""
|
|
return VoiceResponse(
|
|
voice_id=doc["voice_id"],
|
|
name=doc["name"],
|
|
description=doc.get("description"),
|
|
type=doc["type"],
|
|
language=doc.get("language", LanguageCode.KO),
|
|
preset_voice_id=doc.get("preset_voice_id"),
|
|
design_prompt=doc.get("design_prompt"),
|
|
reference_transcript=doc.get("reference_transcript"),
|
|
gender=doc.get("gender"),
|
|
age_range=doc.get("age_range"),
|
|
style_tags=doc.get("style_tags", []),
|
|
owner_id=str(doc.get("owner_id")) if doc.get("owner_id") else None,
|
|
is_public=doc.get("is_public", True),
|
|
sample_audio_id=str(doc.get("sample_audio_id")) if doc.get("sample_audio_id") else None,
|
|
created_at=doc.get("created_at", datetime.utcnow()),
|
|
updated_at=doc.get("updated_at", datetime.utcnow()),
|
|
)
|
|
|
|
|
|
@router.get("", response_model=VoiceListResponse)
|
|
async def list_voices(
|
|
type: Optional[VoiceType] = Query(None, description="보이스 타입 필터"),
|
|
language: Optional[LanguageCode] = Query(None, description="언어 필터"),
|
|
is_public: bool = Query(True, description="공개 보이스만"),
|
|
include_presets: bool = Query(True, description="프리셋 포함"),
|
|
page: int = Query(1, ge=1),
|
|
page_size: int = Query(20, ge=1, le=100),
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""보이스 목록 조회"""
|
|
voices = []
|
|
|
|
# 프리셋 보이스 추가
|
|
if include_presets and (type is None or type == VoiceType.PRESET):
|
|
for preset in PRESET_VOICES:
|
|
if language and preset["language"] != language:
|
|
continue
|
|
voices.append(VoiceResponse(
|
|
**preset,
|
|
is_public=True,
|
|
created_at=datetime.utcnow(),
|
|
updated_at=datetime.utcnow(),
|
|
))
|
|
|
|
# DB에서 사용자 보이스 조회
|
|
query = {"is_public": True} if is_public else {}
|
|
if type and type != VoiceType.PRESET:
|
|
query["type"] = type.value
|
|
if language:
|
|
query["language"] = language.value
|
|
|
|
cursor = db.voices.find(query).sort("created_at", -1)
|
|
skip = (page - 1) * page_size
|
|
cursor = cursor.skip(skip).limit(page_size)
|
|
|
|
async for doc in cursor:
|
|
voices.append(_voice_doc_to_response(doc))
|
|
|
|
total = len(PRESET_VOICES) + await db.voices.count_documents(query)
|
|
|
|
return VoiceListResponse(
|
|
voices=voices,
|
|
total=total,
|
|
page=page,
|
|
page_size=page_size,
|
|
)
|
|
|
|
|
|
@router.get("/{voice_id}", response_model=VoiceResponse)
|
|
async def get_voice(
|
|
voice_id: str,
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""보이스 상세 조회"""
|
|
# 프리셋 체크
|
|
for preset in PRESET_VOICES:
|
|
if preset["voice_id"] == voice_id:
|
|
return VoiceResponse(
|
|
**preset,
|
|
is_public=True,
|
|
created_at=datetime.utcnow(),
|
|
updated_at=datetime.utcnow(),
|
|
)
|
|
|
|
# DB 조회
|
|
doc = await db.voices.find_one({"voice_id": voice_id})
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Voice not found")
|
|
|
|
return _voice_doc_to_response(doc)
|
|
|
|
|
|
@router.get("/{voice_id}/sample")
|
|
async def get_voice_sample(
|
|
voice_id: str,
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""보이스 샘플 오디오 스트리밍"""
|
|
# 프리셋인 경우 TTS로 샘플 생성
|
|
for preset in PRESET_VOICES:
|
|
if preset["voice_id"] == voice_id:
|
|
sample_text = "안녕하세요, 저는 AI 음성입니다."
|
|
audio_bytes, sr = await tts_client.synthesize(
|
|
text=sample_text,
|
|
speaker=preset["preset_voice_id"],
|
|
language="ko",
|
|
)
|
|
return Response(
|
|
content=audio_bytes,
|
|
media_type="audio/wav",
|
|
headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
|
|
)
|
|
|
|
# DB에서 조회
|
|
doc = await db.voices.find_one({"voice_id": voice_id})
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Voice not found")
|
|
|
|
if not doc.get("sample_audio_id"):
|
|
raise HTTPException(status_code=404, detail="No sample audio available")
|
|
|
|
audio_bytes = await db.get_audio(doc["sample_audio_id"])
|
|
return Response(
|
|
content=audio_bytes,
|
|
media_type="audio/wav",
|
|
headers={"Content-Disposition": f'inline; filename="{voice_id}_sample.wav"'},
|
|
)
|
|
|
|
|
|
@router.post("/clone", response_model=VoiceResponse)
|
|
async def create_voice_clone(
|
|
name: str = Form(...),
|
|
description: Optional[str] = Form(None),
|
|
reference_transcript: str = Form(...),
|
|
language: LanguageCode = Form(LanguageCode.KO),
|
|
is_public: bool = Form(False),
|
|
reference_audio: UploadFile = File(...),
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""Voice Clone으로 새 보이스 생성
|
|
|
|
레퍼런스 오디오를 기반으로 목소리를 복제합니다.
|
|
3초 이상의 오디오가 권장됩니다.
|
|
"""
|
|
# 오디오 파일 읽기
|
|
audio_content = await reference_audio.read()
|
|
|
|
# Voice Clone으로 샘플 생성
|
|
sample_text = "안녕하세요, 저는 복제된 AI 음성입니다."
|
|
try:
|
|
sample_audio, sr = await tts_client.voice_clone(
|
|
text=sample_text,
|
|
ref_audio=audio_content,
|
|
ref_text=reference_transcript,
|
|
language=language.value,
|
|
)
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Voice clone failed: {str(e)}")
|
|
|
|
# GridFS에 오디오 저장
|
|
ref_audio_id = await db.save_audio(
|
|
audio_content,
|
|
f"ref_{uuid.uuid4()}.wav",
|
|
metadata={"type": "reference"},
|
|
)
|
|
sample_audio_id = await db.save_audio(
|
|
sample_audio,
|
|
f"sample_{uuid.uuid4()}.wav",
|
|
metadata={"type": "sample"},
|
|
)
|
|
|
|
# DB에 보이스 저장
|
|
voice_id = f"clone_{uuid.uuid4().hex[:12]}"
|
|
now = datetime.utcnow()
|
|
|
|
doc = {
|
|
"voice_id": voice_id,
|
|
"name": name,
|
|
"description": description,
|
|
"type": VoiceType.CLONED.value,
|
|
"language": language.value,
|
|
"reference_audio_id": ref_audio_id,
|
|
"reference_transcript": reference_transcript,
|
|
"sample_audio_id": sample_audio_id,
|
|
"is_public": is_public,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
}
|
|
|
|
await db.voices.insert_one(doc)
|
|
|
|
return _voice_doc_to_response(doc)
|
|
|
|
|
|
@router.post("/design", response_model=VoiceResponse)
|
|
async def create_voice_design(
|
|
request: VoiceDesignRequest,
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""Voice Design으로 새 보이스 생성
|
|
|
|
텍스트 프롬프트를 기반으로 새로운 음성을 생성합니다.
|
|
예: "30대 남성, 부드럽고 차분한 목소리"
|
|
"""
|
|
# Voice Design으로 샘플 생성
|
|
sample_text = "안녕하세요, 저는 AI로 생성된 음성입니다."
|
|
try:
|
|
sample_audio, sr = await tts_client.voice_design(
|
|
text=sample_text,
|
|
instruct=request.design_prompt,
|
|
language=request.language.value,
|
|
)
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Voice design failed: {str(e)}")
|
|
|
|
# GridFS에 샘플 저장
|
|
sample_audio_id = await db.save_audio(
|
|
sample_audio,
|
|
f"sample_{uuid.uuid4()}.wav",
|
|
metadata={"type": "sample"},
|
|
)
|
|
|
|
# DB에 보이스 저장
|
|
voice_id = f"design_{uuid.uuid4().hex[:12]}"
|
|
now = datetime.utcnow()
|
|
|
|
doc = {
|
|
"voice_id": voice_id,
|
|
"name": request.name,
|
|
"description": request.description,
|
|
"type": VoiceType.DESIGNED.value,
|
|
"language": request.language.value,
|
|
"design_prompt": request.design_prompt,
|
|
"sample_audio_id": sample_audio_id,
|
|
"is_public": request.is_public,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
}
|
|
|
|
await db.voices.insert_one(doc)
|
|
|
|
return _voice_doc_to_response(doc)
|
|
|
|
|
|
@router.patch("/{voice_id}", response_model=VoiceResponse)
|
|
async def update_voice(
|
|
voice_id: str,
|
|
request: VoiceUpdateRequest,
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""보이스 정보 수정"""
|
|
# 프리셋은 수정 불가
|
|
for preset in PRESET_VOICES:
|
|
if preset["voice_id"] == voice_id:
|
|
raise HTTPException(status_code=400, detail="Cannot modify preset voice")
|
|
|
|
# 업데이트할 필드만 추출
|
|
update_data = {k: v for k, v in request.model_dump().items() if v is not None}
|
|
if not update_data:
|
|
raise HTTPException(status_code=400, detail="No fields to update")
|
|
|
|
update_data["updated_at"] = datetime.utcnow()
|
|
|
|
result = await db.voices.update_one(
|
|
{"voice_id": voice_id},
|
|
{"$set": update_data},
|
|
)
|
|
|
|
if result.matched_count == 0:
|
|
raise HTTPException(status_code=404, detail="Voice not found")
|
|
|
|
doc = await db.voices.find_one({"voice_id": voice_id})
|
|
return _voice_doc_to_response(doc)
|
|
|
|
|
|
@router.delete("/{voice_id}")
|
|
async def delete_voice(
|
|
voice_id: str,
|
|
db: Database = Depends(get_db),
|
|
):
|
|
"""보이스 삭제"""
|
|
# 프리셋은 삭제 불가
|
|
for preset in PRESET_VOICES:
|
|
if preset["voice_id"] == voice_id:
|
|
raise HTTPException(status_code=400, detail="Cannot delete preset voice")
|
|
|
|
# 먼저 조회
|
|
doc = await db.voices.find_one({"voice_id": voice_id})
|
|
if not doc:
|
|
raise HTTPException(status_code=404, detail="Voice not found")
|
|
|
|
# 관련 오디오 파일 삭제
|
|
if doc.get("reference_audio_id"):
|
|
await db.delete_audio(doc["reference_audio_id"])
|
|
if doc.get("sample_audio_id"):
|
|
await db.delete_audio(doc["sample_audio_id"])
|
|
|
|
# 보이스 삭제
|
|
await db.voices.delete_one({"voice_id": voice_id})
|
|
|
|
return {"status": "deleted", "voice_id": voice_id}
|