Files
drama-studio/audio-studio-api/app/services/tts_client.py
jungwoo choi cc547372c0 feat: Drama Studio 프로젝트 초기 구조 설정
- FastAPI 백엔드 (audio-studio-api)
- Next.js 프론트엔드 (audio-studio-ui)
- Qwen3-TTS 엔진 (audio-studio-tts)
- MusicGen 서비스 (audio-studio-musicgen)
- Docker Compose 개발/운영 환경

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 11:39:38 +09:00

136 lines
4.0 KiB
Python

"""TTS 엔진 클라이언트
audio-studio-tts 서비스와 통신
"""
import os
import logging
from typing import Optional, Tuple, List
import httpx
logger = logging.getLogger(__name__)
class TTSClient:
"""TTS 엔진 HTTP 클라이언트"""
def __init__(self):
self.base_url = os.getenv("TTS_ENGINE_URL", "http://localhost:8001")
self.timeout = httpx.Timeout(120.0, connect=10.0) # TTS는 시간이 걸릴 수 있음
async def health_check(self) -> dict:
"""TTS 엔진 헬스체크"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/health")
response.raise_for_status()
return response.json()
async def get_speakers(self) -> List[str]:
"""프리셋 스피커 목록 조회"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/speakers")
response.raise_for_status()
return response.json()["speakers"]
async def get_languages(self) -> dict:
"""지원 언어 목록 조회"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(f"{self.base_url}/languages")
response.raise_for_status()
return response.json()["languages"]
async def synthesize(
self,
text: str,
speaker: str = "Chelsie",
language: str = "ko",
instruct: Optional[str] = None,
) -> Tuple[bytes, int]:
"""프리셋 음성으로 TTS 합성
Returns:
(audio_bytes, sample_rate)
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
payload = {
"text": text,
"speaker": speaker,
"language": language,
}
if instruct:
payload["instruct"] = instruct
response = await client.post(
f"{self.base_url}/synthesize",
json=payload,
)
response.raise_for_status()
# 샘플레이트 추출
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
return response.content, sample_rate
async def voice_clone(
self,
text: str,
ref_audio: bytes,
ref_text: str,
language: str = "ko",
) -> Tuple[bytes, int]:
"""Voice Clone으로 TTS 합성
Returns:
(audio_bytes, sample_rate)
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
# multipart/form-data로 전송
files = {"ref_audio": ("reference.wav", ref_audio, "audio/wav")}
data = {
"text": text,
"ref_text": ref_text,
"language": language,
}
response = await client.post(
f"{self.base_url}/voice-clone",
files=files,
data=data,
)
response.raise_for_status()
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
return response.content, sample_rate
async def voice_design(
self,
text: str,
instruct: str,
language: str = "ko",
) -> Tuple[bytes, int]:
"""Voice Design으로 TTS 합성
Returns:
(audio_bytes, sample_rate)
"""
async with httpx.AsyncClient(timeout=self.timeout) as client:
payload = {
"text": text,
"instruct": instruct,
"language": language,
}
response = await client.post(
f"{self.base_url}/voice-design",
json=payload,
)
response.raise_for_status()
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
return response.content, sample_rate
# 싱글톤 인스턴스
tts_client = TTSClient()