- FastAPI 백엔드 (audio-studio-api) - Next.js 프론트엔드 (audio-studio-ui) - Qwen3-TTS 엔진 (audio-studio-tts) - MusicGen 서비스 (audio-studio-musicgen) - Docker Compose 개발/운영 환경 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
136 lines
4.0 KiB
Python
136 lines
4.0 KiB
Python
"""TTS 엔진 클라이언트
|
|
|
|
audio-studio-tts 서비스와 통신
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from typing import Optional, Tuple, List
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TTSClient:
|
|
"""TTS 엔진 HTTP 클라이언트"""
|
|
|
|
def __init__(self):
|
|
self.base_url = os.getenv("TTS_ENGINE_URL", "http://localhost:8001")
|
|
self.timeout = httpx.Timeout(120.0, connect=10.0) # TTS는 시간이 걸릴 수 있음
|
|
|
|
async def health_check(self) -> dict:
|
|
"""TTS 엔진 헬스체크"""
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
response = await client.get(f"{self.base_url}/health")
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
async def get_speakers(self) -> List[str]:
|
|
"""프리셋 스피커 목록 조회"""
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
response = await client.get(f"{self.base_url}/speakers")
|
|
response.raise_for_status()
|
|
return response.json()["speakers"]
|
|
|
|
async def get_languages(self) -> dict:
|
|
"""지원 언어 목록 조회"""
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
response = await client.get(f"{self.base_url}/languages")
|
|
response.raise_for_status()
|
|
return response.json()["languages"]
|
|
|
|
async def synthesize(
|
|
self,
|
|
text: str,
|
|
speaker: str = "Chelsie",
|
|
language: str = "ko",
|
|
instruct: Optional[str] = None,
|
|
) -> Tuple[bytes, int]:
|
|
"""프리셋 음성으로 TTS 합성
|
|
|
|
Returns:
|
|
(audio_bytes, sample_rate)
|
|
"""
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
payload = {
|
|
"text": text,
|
|
"speaker": speaker,
|
|
"language": language,
|
|
}
|
|
if instruct:
|
|
payload["instruct"] = instruct
|
|
|
|
response = await client.post(
|
|
f"{self.base_url}/synthesize",
|
|
json=payload,
|
|
)
|
|
response.raise_for_status()
|
|
|
|
# 샘플레이트 추출
|
|
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
|
|
|
|
return response.content, sample_rate
|
|
|
|
async def voice_clone(
|
|
self,
|
|
text: str,
|
|
ref_audio: bytes,
|
|
ref_text: str,
|
|
language: str = "ko",
|
|
) -> Tuple[bytes, int]:
|
|
"""Voice Clone으로 TTS 합성
|
|
|
|
Returns:
|
|
(audio_bytes, sample_rate)
|
|
"""
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
# multipart/form-data로 전송
|
|
files = {"ref_audio": ("reference.wav", ref_audio, "audio/wav")}
|
|
data = {
|
|
"text": text,
|
|
"ref_text": ref_text,
|
|
"language": language,
|
|
}
|
|
|
|
response = await client.post(
|
|
f"{self.base_url}/voice-clone",
|
|
files=files,
|
|
data=data,
|
|
)
|
|
response.raise_for_status()
|
|
|
|
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
|
|
return response.content, sample_rate
|
|
|
|
async def voice_design(
|
|
self,
|
|
text: str,
|
|
instruct: str,
|
|
language: str = "ko",
|
|
) -> Tuple[bytes, int]:
|
|
"""Voice Design으로 TTS 합성
|
|
|
|
Returns:
|
|
(audio_bytes, sample_rate)
|
|
"""
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
payload = {
|
|
"text": text,
|
|
"instruct": instruct,
|
|
"language": language,
|
|
}
|
|
|
|
response = await client.post(
|
|
f"{self.base_url}/voice-design",
|
|
json=payload,
|
|
)
|
|
response.raise_for_status()
|
|
|
|
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
|
|
return response.content, sample_rate
|
|
|
|
|
|
# 싱글톤 인스턴스
|
|
tts_client = TTSClient()
|