feat: Drama Studio 프로젝트 초기 구조 설정
- FastAPI 백엔드 (audio-studio-api) - Next.js 프론트엔드 (audio-studio-ui) - Qwen3-TTS 엔진 (audio-studio-tts) - MusicGen 서비스 (audio-studio-musicgen) - Docker Compose 개발/운영 환경 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
135
audio-studio-api/app/services/tts_client.py
Normal file
135
audio-studio-api/app/services/tts_client.py
Normal file
@ -0,0 +1,135 @@
|
||||
"""TTS 엔진 클라이언트
|
||||
|
||||
audio-studio-tts 서비스와 통신
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional, Tuple, List
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TTSClient:
|
||||
"""TTS 엔진 HTTP 클라이언트"""
|
||||
|
||||
def __init__(self):
|
||||
self.base_url = os.getenv("TTS_ENGINE_URL", "http://localhost:8001")
|
||||
self.timeout = httpx.Timeout(120.0, connect=10.0) # TTS는 시간이 걸릴 수 있음
|
||||
|
||||
async def health_check(self) -> dict:
|
||||
"""TTS 엔진 헬스체크"""
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(f"{self.base_url}/health")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
async def get_speakers(self) -> List[str]:
|
||||
"""프리셋 스피커 목록 조회"""
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(f"{self.base_url}/speakers")
|
||||
response.raise_for_status()
|
||||
return response.json()["speakers"]
|
||||
|
||||
async def get_languages(self) -> dict:
|
||||
"""지원 언어 목록 조회"""
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(f"{self.base_url}/languages")
|
||||
response.raise_for_status()
|
||||
return response.json()["languages"]
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
text: str,
|
||||
speaker: str = "Chelsie",
|
||||
language: str = "ko",
|
||||
instruct: Optional[str] = None,
|
||||
) -> Tuple[bytes, int]:
|
||||
"""프리셋 음성으로 TTS 합성
|
||||
|
||||
Returns:
|
||||
(audio_bytes, sample_rate)
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
payload = {
|
||||
"text": text,
|
||||
"speaker": speaker,
|
||||
"language": language,
|
||||
}
|
||||
if instruct:
|
||||
payload["instruct"] = instruct
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/synthesize",
|
||||
json=payload,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# 샘플레이트 추출
|
||||
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
|
||||
|
||||
return response.content, sample_rate
|
||||
|
||||
async def voice_clone(
|
||||
self,
|
||||
text: str,
|
||||
ref_audio: bytes,
|
||||
ref_text: str,
|
||||
language: str = "ko",
|
||||
) -> Tuple[bytes, int]:
|
||||
"""Voice Clone으로 TTS 합성
|
||||
|
||||
Returns:
|
||||
(audio_bytes, sample_rate)
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
# multipart/form-data로 전송
|
||||
files = {"ref_audio": ("reference.wav", ref_audio, "audio/wav")}
|
||||
data = {
|
||||
"text": text,
|
||||
"ref_text": ref_text,
|
||||
"language": language,
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/voice-clone",
|
||||
files=files,
|
||||
data=data,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
|
||||
return response.content, sample_rate
|
||||
|
||||
async def voice_design(
|
||||
self,
|
||||
text: str,
|
||||
instruct: str,
|
||||
language: str = "ko",
|
||||
) -> Tuple[bytes, int]:
|
||||
"""Voice Design으로 TTS 합성
|
||||
|
||||
Returns:
|
||||
(audio_bytes, sample_rate)
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
payload = {
|
||||
"text": text,
|
||||
"instruct": instruct,
|
||||
"language": language,
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
f"{self.base_url}/voice-design",
|
||||
json=payload,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
|
||||
return response.content, sample_rate
|
||||
|
||||
|
||||
# 싱글톤 인스턴스
|
||||
tts_client = TTSClient()
|
||||
Reference in New Issue
Block a user