drama-studio/audio-studio-api/app/services/script_parser.py

# 드라마 스크립트 파서
# 마크다운 형식의 대본을 구조화된 데이터로 변환

import re
from typing import Optional
from app.models.drama import (
    ParsedScript, ScriptElement, Character, ElementType
)


class ScriptParser:
    """
    드라마 스크립트 파서

    지원 형식:
    - # 제목
    - [장소: 설명] 또는 [지문]
    - [효과음: 설명]
    - [음악: 설명] 또는 [음악 시작/중지/변경: 설명]
    - [쉼: 2초]
    - 캐릭터명(설명, 감정): 대사
    - 캐릭터명: 대사
    """

    # 정규식 패턴
    TITLE_PATTERN = re.compile(r'^#\s+(.+)$')
    DIRECTION_PATTERN = re.compile(r'^\[(?:장소|지문|장면):\s*(.+)\]$')
    SFX_PATTERN = re.compile(r'^\[효과음:\s*(.+)\]$')
    MUSIC_PATTERN = re.compile(r'^\[음악(?:\s+(시작|중지|변경|페이드인|페이드아웃))?:\s*(.+)\]$')
    PAUSE_PATTERN = re.compile(r'^\[쉼:\s*(\d+(?:\.\d+)?)\s*초?\]$')
    DIALOGUE_PATTERN = re.compile(r'^([^(\[:]+?)(?:\(([^)]*)\))?:\s*(.+)$')

    # 음악 액션 매핑
    MUSIC_ACTIONS = {
        None: "play",
        "시작": "play",
        "중지": "stop",
        "변경": "change",
        "페이드인": "fade_in",
        "페이드아웃": "fade_out",
    }

    def parse(self, script: str) -> ParsedScript:
        """스크립트 파싱"""
        lines = script.strip().split('\n')

        title: Optional[str] = None
        characters: dict[str, Character] = {}
        elements: list[ScriptElement] = []

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # 제목
            if match := self.TITLE_PATTERN.match(line):
                title = match.group(1)
                continue

            # 지문/장면
            if match := self.DIRECTION_PATTERN.match(line):
                elements.append(ScriptElement(
                    type=ElementType.DIRECTION,
                    text=match.group(1)
                ))
                continue

            # 효과음
            if match := self.SFX_PATTERN.match(line):
                elements.append(ScriptElement(
                    type=ElementType.SFX,
                    description=match.group(1),
                    volume=1.0
                ))
                continue

            # 음악
            if match := self.MUSIC_PATTERN.match(line):
                action_kr = match.group(1)
                action = self.MUSIC_ACTIONS.get(action_kr, "play")
                elements.append(ScriptElement(
                    type=ElementType.MUSIC,
                    description=match.group(2),
                    action=action,
                    volume=0.3,
                    fade_duration=2.0
                ))
                continue

            # 쉼
            if match := self.PAUSE_PATTERN.match(line):
                elements.append(ScriptElement(
                    type=ElementType.PAUSE,
                    duration=float(match.group(1))
                ))
                continue

            # 대사
            if match := self.DIALOGUE_PATTERN.match(line):
                char_name = match.group(1).strip()
                char_info = match.group(2)  # 괄호 안 내용 (설명, 감정)
                dialogue_text = match.group(3).strip()

                # 캐릭터 정보 파싱
                emotion = None
                description = None
                if char_info:
                    parts = [p.strip() for p in char_info.split(',')]
                    if len(parts) >= 2:
                        description = parts[0]
                        emotion = parts[1]
                    else:
                        # 단일 값은 감정으로 처리
                        emotion = parts[0]

                # 캐릭터 등록
                if char_name not in characters:
                    characters[char_name] = Character(
                        name=char_name,
                        description=description
                    )
                elif description and not characters[char_name].description:
                    characters[char_name].description = description

                elements.append(ScriptElement(
                    type=ElementType.DIALOGUE,
                    character=char_name,
                    text=dialogue_text,
                    emotion=emotion
                ))
                continue

            # 매칭 안 되는 줄은 지문으로 처리 (대괄호 없는 일반 텍스트)
            if not line.startswith('[') and not line.startswith('#'):
                # 콜론이 없으면 지문으로 처리
                if ':' not in line:
                    elements.append(ScriptElement(
                        type=ElementType.DIRECTION,
                        text=line
                    ))

        return ParsedScript(
            title=title,
            characters=list(characters.values()),
            elements=elements
        )

    def validate_script(self, script: str) -> tuple[bool, list[str]]:
        """
        스크립트 유효성 검사
        Returns: (is_valid, error_messages)
        """
        errors = []

        if not script or not script.strip():
            errors.append("스크립트가 비어있습니다")
            return False, errors

        parsed = self.parse(script)

        if not parsed.elements:
            errors.append("파싱된 요소가 없습니다")

        # 대사가 있는지 확인
        dialogue_count = sum(1 for e in parsed.elements if e.type == ElementType.DIALOGUE)
        if dialogue_count == 0:
            errors.append("대사가 없습니다")

        return len(errors) == 0, errors


# 싱글톤 인스턴스
script_parser = ScriptParser()