Files
drama-studio/audio-studio-api/app/routers/tts.py
jungwoo choi cc547372c0 feat: Drama Studio 프로젝트 초기 구조 설정
- FastAPI 백엔드 (audio-studio-api)
- Next.js 프론트엔드 (audio-studio-ui)
- Qwen3-TTS 엔진 (audio-studio-tts)
- MusicGen 서비스 (audio-studio-musicgen)
- Docker Compose 개발/운영 환경

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 11:39:38 +09:00

228 lines
7.2 KiB
Python

"""TTS API 라우터"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import Response, StreamingResponse
from app.database import Database, get_db
from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType
from app.services.tts_client import tts_client
from app.routers.voices import PRESET_VOICES
router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
@router.post("/synthesize")
async def synthesize(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""TTS 음성 합성
지정된 보이스로 텍스트를 음성으로 변환합니다.
"""
voice_id = request.voice_id
# 프리셋 보이스 확인
preset_speaker = None
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
preset_speaker = preset["preset_voice_id"]
break
if preset_speaker:
# 프리셋 음성 합성
try:
audio_bytes, sr = await tts_client.synthesize(
text=request.text,
speaker=preset_speaker,
language="ko",
instruct=request.instruct,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
else:
# DB에서 보이스 정보 조회
voice_doc = await db.voices.find_one({"voice_id": voice_id})
if not voice_doc:
raise HTTPException(status_code=404, detail="Voice not found")
voice_type = voice_doc.get("type")
if voice_type == VoiceType.CLONED.value:
# Voice Clone 합성 (레퍼런스 오디오 필요)
ref_audio_id = voice_doc.get("reference_audio_id")
ref_transcript = voice_doc.get("reference_transcript", "")
if not ref_audio_id:
raise HTTPException(status_code=400, detail="Reference audio not found")
ref_audio = await db.get_audio(ref_audio_id)
try:
audio_bytes, sr = await tts_client.voice_clone(
text=request.text,
ref_audio=ref_audio,
ref_text=ref_transcript,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}")
elif voice_type == VoiceType.DESIGNED.value:
# Voice Design 합성
design_prompt = voice_doc.get("design_prompt", "")
try:
audio_bytes, sr = await tts_client.voice_design(
text=request.text,
instruct=design_prompt,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}")
else:
raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}")
# 생성 기록 저장
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
# 오디오 저장
audio_file_id = await db.save_audio(
audio_bytes,
f"{generation_id}.wav",
metadata={"voice_id": voice_id, "text": request.text[:100]},
)
# 생성 기록 저장
gen_doc = {
"generation_id": generation_id,
"voice_id": voice_id,
"text": request.text,
"audio_file_id": audio_file_id,
"status": "completed",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Sample-Rate": str(sr),
"X-Generation-ID": generation_id,
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.post("/synthesize/async", response_model=TTSGenerationResponse)
async def synthesize_async(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""비동기 TTS 음성 합성 (긴 텍스트용)
생성 작업을 큐에 등록하고 generation_id를 반환합니다.
완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다.
"""
# 긴 텍스트 처리를 위한 비동기 방식
# 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동)
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
gen_doc = {
"generation_id": generation_id,
"voice_id": request.voice_id,
"text": request.text,
"status": "pending",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
# 실제로는 백그라운드 워커에서 처리해야 함
# 여기서는 바로 처리
try:
# synthesize 로직과 동일...
# (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용)
pass
except Exception as e:
await db.tts_generations.update_one(
{"generation_id": generation_id},
{"$set": {"status": "failed", "error_message": str(e)}},
)
return TTSGenerationResponse(
generation_id=generation_id,
voice_id=request.voice_id,
text=request.text,
status="pending",
created_at=now,
)
@router.get("/generations/{generation_id}", response_model=TTSGenerationResponse)
async def get_generation(
generation_id: str,
db: Database = Depends(get_db),
):
"""TTS 생성 상태 조회"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
return TTSGenerationResponse(
generation_id=doc["generation_id"],
voice_id=doc["voice_id"],
text=doc["text"],
status=doc["status"],
audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None,
duration_seconds=doc.get("duration_seconds"),
created_at=doc["created_at"],
)
@router.get("/generations/{generation_id}/audio")
async def get_generation_audio(
generation_id: str,
db: Database = Depends(get_db),
):
"""생성된 오디오 다운로드"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
if doc["status"] != "completed":
raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}")
audio_file_id = doc.get("audio_file_id")
if not audio_file_id:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_bytes = await db.get_audio(audio_file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.get("/health")
async def tts_health():
"""TTS 엔진 헬스체크"""
try:
health = await tts_client.health_check()
return {"status": "healthy", "tts_engine": health}
except Exception as e:
return {"status": "unhealthy", "error": str(e)}