"""TTS API 라우터""" import uuid from datetime import datetime from typing import Optional from fastapi import APIRouter, HTTPException, Depends from fastapi.responses import Response, StreamingResponse from app.database import Database, get_db from app.models.voice import TTSSynthesizeRequest, TTSGenerationResponse, VoiceType from app.services.tts_client import tts_client from app.routers.voices import PRESET_VOICES router = APIRouter(prefix="/api/v1/tts", tags=["tts"]) @router.post("/synthesize") async def synthesize( request: TTSSynthesizeRequest, db: Database = Depends(get_db), ): """TTS 음성 합성 지정된 보이스로 텍스트를 음성으로 변환합니다. """ voice_id = request.voice_id # 프리셋 보이스 확인 preset_speaker = None for preset in PRESET_VOICES: if preset["voice_id"] == voice_id: preset_speaker = preset["preset_voice_id"] break if preset_speaker: # 프리셋 음성 합성 try: audio_bytes, sr = await tts_client.synthesize( text=request.text, speaker=preset_speaker, language="ko", instruct=request.instruct, ) except Exception as e: raise HTTPException(status_code=500, detail=f"TTS synthesis failed: {str(e)}") else: # DB에서 보이스 정보 조회 voice_doc = await db.voices.find_one({"voice_id": voice_id}) if not voice_doc: raise HTTPException(status_code=404, detail="Voice not found") voice_type = voice_doc.get("type") if voice_type == VoiceType.CLONED.value: # Voice Clone 합성 (레퍼런스 오디오 필요) ref_audio_id = voice_doc.get("reference_audio_id") ref_transcript = voice_doc.get("reference_transcript", "") if not ref_audio_id: raise HTTPException(status_code=400, detail="Reference audio not found") ref_audio = await db.get_audio(ref_audio_id) try: audio_bytes, sr = await tts_client.voice_clone( text=request.text, ref_audio=ref_audio, ref_text=ref_transcript, language=voice_doc.get("language", "ko"), ) except Exception as e: raise HTTPException(status_code=500, detail=f"Voice clone synthesis failed: {str(e)}") elif voice_type == VoiceType.DESIGNED.value: # Voice Design 합성 design_prompt = voice_doc.get("design_prompt", "") try: audio_bytes, sr = await tts_client.voice_design( text=request.text, instruct=design_prompt, language=voice_doc.get("language", "ko"), ) except Exception as e: raise HTTPException(status_code=500, detail=f"Voice design synthesis failed: {str(e)}") else: raise HTTPException(status_code=400, detail=f"Unknown voice type: {voice_type}") # 생성 기록 저장 generation_id = f"gen_{uuid.uuid4().hex[:12]}" now = datetime.utcnow() # 오디오 저장 audio_file_id = await db.save_audio( audio_bytes, f"{generation_id}.wav", metadata={"voice_id": voice_id, "text": request.text[:100]}, ) # 생성 기록 저장 gen_doc = { "generation_id": generation_id, "voice_id": voice_id, "text": request.text, "audio_file_id": audio_file_id, "status": "completed", "created_at": now, } await db.tts_generations.insert_one(gen_doc) return Response( content=audio_bytes, media_type="audio/wav", headers={ "X-Sample-Rate": str(sr), "X-Generation-ID": generation_id, "Content-Disposition": f'attachment; filename="{generation_id}.wav"', }, ) @router.post("/synthesize/async", response_model=TTSGenerationResponse) async def synthesize_async( request: TTSSynthesizeRequest, db: Database = Depends(get_db), ): """비동기 TTS 음성 합성 (긴 텍스트용) 생성 작업을 큐에 등록하고 generation_id를 반환합니다. 완료 후 /generations/{generation_id}/audio로 다운로드 가능합니다. """ # 긴 텍스트 처리를 위한 비동기 방식 # 현재는 동기 방식과 동일하게 처리 (추후 Redis 큐 연동) generation_id = f"gen_{uuid.uuid4().hex[:12]}" now = datetime.utcnow() gen_doc = { "generation_id": generation_id, "voice_id": request.voice_id, "text": request.text, "status": "pending", "created_at": now, } await db.tts_generations.insert_one(gen_doc) # 실제로는 백그라운드 워커에서 처리해야 함 # 여기서는 바로 처리 try: # synthesize 로직과 동일... # (간소화를 위해 생략, 실제 구현 시 비동기 워커 사용) pass except Exception as e: await db.tts_generations.update_one( {"generation_id": generation_id}, {"$set": {"status": "failed", "error_message": str(e)}}, ) return TTSGenerationResponse( generation_id=generation_id, voice_id=request.voice_id, text=request.text, status="pending", created_at=now, ) @router.get("/generations/{generation_id}", response_model=TTSGenerationResponse) async def get_generation( generation_id: str, db: Database = Depends(get_db), ): """TTS 생성 상태 조회""" doc = await db.tts_generations.find_one({"generation_id": generation_id}) if not doc: raise HTTPException(status_code=404, detail="Generation not found") return TTSGenerationResponse( generation_id=doc["generation_id"], voice_id=doc["voice_id"], text=doc["text"], status=doc["status"], audio_file_id=str(doc.get("audio_file_id")) if doc.get("audio_file_id") else None, duration_seconds=doc.get("duration_seconds"), created_at=doc["created_at"], ) @router.get("/generations/{generation_id}/audio") async def get_generation_audio( generation_id: str, db: Database = Depends(get_db), ): """생성된 오디오 다운로드""" doc = await db.tts_generations.find_one({"generation_id": generation_id}) if not doc: raise HTTPException(status_code=404, detail="Generation not found") if doc["status"] != "completed": raise HTTPException(status_code=400, detail=f"Generation not completed: {doc['status']}") audio_file_id = doc.get("audio_file_id") if not audio_file_id: raise HTTPException(status_code=404, detail="Audio file not found") audio_bytes = await db.get_audio(audio_file_id) return Response( content=audio_bytes, media_type="audio/wav", headers={ "Content-Disposition": f'attachment; filename="{generation_id}.wav"', }, ) @router.get("/health") async def tts_health(): """TTS 엔진 헬스체크""" try: health = await tts_client.health_check() return {"status": "healthy", "tts_engine": health} except Exception as e: return {"status": "unhealthy", "error": str(e)}