feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api)
- Next.js 프론트엔드 (audio-studio-ui)
- Qwen3-TTS 엔진 (audio-studio-tts)
- MusicGen 서비스 (audio-studio-musicgen)
- Docker Compose 개발/운영 환경

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions

View File

@ -0,0 +1,227 @@
"""TTS API 라우터"""
import uuid
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, HTTPException, Depends
from fastapi.responses import Response, StreamingResponse
from app.database import Database, get_db
from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType
from app.services.tts_client import tts_client
from app.routers.voices import PRESET_VOICES
router = APIRouter(prefix="/api/v1/tts", tags=["tts"])
@router.post("/synthesize")
async def synthesize(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""TTS 음성 합성
지정된 보이스로 텍스트를 음성으로 변환합니다.
"""
voice_id = request.voice_id
# 프리셋 보이스 확인
preset_speaker = None
for preset in PRESET_VOICES:
if preset["voice_id"] == voice_id:
preset_speaker = preset["preset_voice_id"]
break
if preset_speaker:
# 프리셋 음성 합성
try:
audio_bytes, sr = await tts_client.synthesize(
text=request.text,
speaker=preset_speaker,
language="ko",
instruct=request.instruct,
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}")
else:
# DB에서 보이스 정보 조회
voice_doc = await db.voices.find_one({"voice_id": voice_id})
if not voice_doc:
raise HTTPException(status_code=404, detail="Voice not found")
voice_type = voice_doc.get("type")
if voice_type == VoiceType.CLONED.value:
# Voice Clone 합성 (레퍼런스 오디오 필요)
ref_audio_id = voice_doc.get("reference_audio_id")
ref_transcript = voice_doc.get("reference_transcript", "")
if not ref_audio_id:
raise HTTPException(status_code=400, detail="Reference audio not found")
ref_audio = await db.get_audio(ref_audio_id)
try:
audio_bytes, sr = await tts_client.voice_clone(
text=request.text,
ref_audio=ref_audio,
ref_text=ref_transcript,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}")
elif voice_type == VoiceType.DESIGNED.value:
# Voice Design 합성
design_prompt = voice_doc.get("design_prompt", "")
try:
audio_bytes, sr = await tts_client.voice_design(
text=request.text,
instruct=design_prompt,
language=voice_doc.get("language", "ko"),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}")
else:
raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}")
# 생성 기록 저장
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
# 오디오 저장
audio_file_id = await db.save_audio(
audio_bytes,
f"{generation_id}.wav",
metadata={"voice_id": voice_id, "text": request.text[:100]},
)
# 생성 기록 저장
gen_doc = {
"generation_id": generation_id,
"voice_id": voice_id,
"text": request.text,
"audio_file_id": audio_file_id,
"status": "completed",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"X-Sample-Rate": str(sr),
"X-Generation-ID": generation_id,
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.post("/synthesize/async", response_model=TTSGenerationResponse)
async def synthesize_async(
request: TTSSynthesizeRequest,
db: Database = Depends(get_db),
):
"""비동기 TTS 음성 합성 (긴 텍스트용)
생성 작업을 큐에 등록하고 generation_id를 반환합니다.
완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다.
"""
# 긴 텍스트 처리를 위한 비동기 방식
# 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동)
generation_id = f"gen_{uuid.uuid4().hex[:12]}"
now = datetime.utcnow()
gen_doc = {
"generation_id": generation_id,
"voice_id": request.voice_id,
"text": request.text,
"status": "pending",
"created_at": now,
}
await db.tts_generations.insert_one(gen_doc)
# 실제로는 백그라운드 워커에서 처리해야 함
# 여기서는 바로 처리
try:
# synthesize 로직과 동일...
# (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용)
pass
except Exception as e:
await db.tts_generations.update_one(
{"generation_id": generation_id},
{"$set": {"status": "failed", "error_message": str(e)}},
)
return TTSGenerationResponse(
generation_id=generation_id,
voice_id=request.voice_id,
text=request.text,
status="pending",
created_at=now,
)
@router.get("/generations/{generation_id}", response_model=TTSGenerationResponse)
async def get_generation(
generation_id: str,
db: Database = Depends(get_db),
):
"""TTS 생성 상태 조회"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
return TTSGenerationResponse(
generation_id=doc["generation_id"],
voice_id=doc["voice_id"],
text=doc["text"],
status=doc["status"],
audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None,
duration_seconds=doc.get("duration_seconds"),
created_at=doc["created_at"],
)
@router.get("/generations/{generation_id}/audio")
async def get_generation_audio(
generation_id: str,
db: Database = Depends(get_db),
):
"""생성된 오디오 다운로드"""
doc = await db.tts_generations.find_one({"generation_id": generation_id})
if not doc:
raise HTTPException(status_code=404, detail="Generation not found")
if doc["status"] != "completed":
raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}")
audio_file_id = doc.get("audio_file_id")
if not audio_file_id:
raise HTTPException(status_code=404, detail="Audio file not found")
audio_bytes = await db.get_audio(audio_file_id)
return Response(
content=audio_bytes,
media_type="audio/wav",
headers={
"Content-Disposition": f'attachment; filename="{generation_id}.wav"',
},
)
@router.get("/health")
async def tts_health():
"""TTS 엔진 헬스체크"""
try:
health = await tts_client.health_check()
return {"status": "healthy", "tts_engine": health}
except Exception as e:
return {"status": "unhealthy", "error": str(e)}