feat: Drama Studio 프로젝트 초기 구조 설정

- FastAPI 백엔드 (audio-studio-api) - Next.js 프론트엔드 (audio-studio-ui) - Qwen3-TTS 엔진 (audio-studio-tts) - MusicGen 서비스 (audio-studio-musicgen) - Docker Compose 개발/운영 환경 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 11:39:38 +09:00
commit cc547372c0
70 changed files with 18399 additions and 0 deletions
--- a/audio-studio-api/app/services/tts_client.py
+++ b/audio-studio-api/app/services/tts_client.py
@ -0,0 +1,135 @@
+"""TTS 엔진 클라이언트
+
+audio-studio-tts 서비스와 통신
+"""
+
+import os
+import logging
+from typing import Optional, Tuple, List
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class TTSClient:
+    """TTS 엔진 HTTP 클라이언트"""
+
+    def __init__(self):
+        self.base_url = os.getenv("TTS_ENGINE_URL", "http://localhost:8001")
+        self.timeout = httpx.Timeout(120.0, connect=10.0)  # TTS는 시간이 걸릴 수 있음
+
+    async def health_check(self) -> dict:
+        """TTS 엔진 헬스체크"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(f"{self.base_url}/health")
+            response.raise_for_status()
+            return response.json()
+
+    async def get_speakers(self) -> List[str]:
+        """프리셋 스피커 목록 조회"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(f"{self.base_url}/speakers")
+            response.raise_for_status()
+            return response.json()["speakers"]
+
+    async def get_languages(self) -> dict:
+        """지원 언어 목록 조회"""
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            response = await client.get(f"{self.base_url}/languages")
+            response.raise_for_status()
+            return response.json()["languages"]
+
+    async def synthesize(
+        self,
+        text: str,
+        speaker: str = "Chelsie",
+        language: str = "ko",
+        instruct: Optional[str] = None,
+    ) -> Tuple[bytes, int]:
+        """프리셋 음성으로 TTS 합성
+
+        Returns:
+            (audio_bytes, sample_rate)
+        """
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            payload = {
+                "text": text,
+                "speaker": speaker,
+                "language": language,
+            }
+            if instruct:
+                payload["instruct"] = instruct
+
+            response = await client.post(
+                f"{self.base_url}/synthesize",
+                json=payload,
+            )
+            response.raise_for_status()
+
+            # 샘플레이트 추출
+            sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
+
+            return response.content, sample_rate
+
+    async def voice_clone(
+        self,
+        text: str,
+        ref_audio: bytes,
+        ref_text: str,
+        language: str = "ko",
+    ) -> Tuple[bytes, int]:
+        """Voice Clone으로 TTS 합성
+
+        Returns:
+            (audio_bytes, sample_rate)
+        """
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            # multipart/form-data로 전송
+            files = {"ref_audio": ("reference.wav", ref_audio, "audio/wav")}
+            data = {
+                "text": text,
+                "ref_text": ref_text,
+                "language": language,
+            }
+
+            response = await client.post(
+                f"{self.base_url}/voice-clone",
+                files=files,
+                data=data,
+            )
+            response.raise_for_status()
+
+            sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
+            return response.content, sample_rate
+
+    async def voice_design(
+        self,
+        text: str,
+        instruct: str,
+        language: str = "ko",
+    ) -> Tuple[bytes, int]:
+        """Voice Design으로 TTS 합성
+
+        Returns:
+            (audio_bytes, sample_rate)
+        """
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            payload = {
+                "text": text,
+                "instruct": instruct,
+                "language": language,
+            }
+
+            response = await client.post(
+                f"{self.base_url}/voice-design",
+                json=payload,
+            )
+            response.raise_for_status()
+
+            sample_rate = int(response.headers.get("X-Sample-Rate", "24000"))
+            return response.content, sample_rate
+
+
+# 싱글톤 인스턴스
+tts_client = TTSClient()