Initial commit - cleaned repository

This commit is contained in:
jungwoo choi
2025-09-28 20:41:57 +09:00
commit e3c28f796a
188 changed files with 28102 additions and 0 deletions

View File

@ -0,0 +1,358 @@
"""
고급 이벤트 컨슈머 with DLQ and Retry
"""
import asyncio
import json
import logging
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
from redis import asyncio as aioredis
from aiokafka import AIOKafkaProducer
import sys
sys.path.append('/app')
from shared.kafka import KafkaConsumer, Event, EventType
from event_handlers import EventHandlers
logger = logging.getLogger(__name__)
class RetryPolicy:
"""재시도 정책"""
def __init__(
self,
max_retries: int = 3,
initial_delay: float = 1.0,
max_delay: float = 60.0,
exponential_base: float = 2.0
):
self.max_retries = max_retries
self.initial_delay = initial_delay
self.max_delay = max_delay
self.exponential_base = exponential_base
def get_delay(self, retry_count: int) -> float:
"""재시도 지연 시간 계산 (exponential backoff)"""
delay = self.initial_delay * (self.exponential_base ** retry_count)
return min(delay, self.max_delay)
class AdvancedEventConsumer:
def __init__(
self,
topics: List[str],
group_id: str,
redis_url: str = "redis://redis:6379",
bootstrap_servers: str = "kafka:9092",
enable_dlq: bool = True,
dlq_topic: str = "dead-letter-queue"
):
self.topics = topics
self.group_id = group_id
self.bootstrap_servers = bootstrap_servers
self.enable_dlq = enable_dlq
self.dlq_topic = dlq_topic
# Kafka Consumer
self.consumer = KafkaConsumer(
topics=topics,
group_id=group_id,
bootstrap_servers=bootstrap_servers
)
# DLQ Producer
self.dlq_producer: Optional[AIOKafkaProducer] = None
# Redis for retry tracking
self.redis: Optional[aioredis.Redis] = None
self.redis_url = redis_url
# Event handlers
self.handlers: Optional[EventHandlers] = None
# Retry policies per event type
self.retry_policies = {
EventType.USER_CREATED: RetryPolicy(max_retries=3),
EventType.USER_UPDATED: RetryPolicy(max_retries=2),
EventType.USER_DELETED: RetryPolicy(max_retries=5), # 중요한 이벤트
EventType.OAUTH_APP_CREATED: RetryPolicy(max_retries=3),
EventType.OAUTH_TOKEN_ISSUED: RetryPolicy(max_retries=1),
}
# Processing statistics
self.stats = {
"processed": 0,
"failed": 0,
"retried": 0,
"dlq_sent": 0
}
async def start(self):
"""컨슈머 시작"""
try:
# Redis 연결
self.redis = await aioredis.from_url(
self.redis_url,
encoding="utf-8",
decode_responses=True
)
# Event handlers 초기화
self.handlers = EventHandlers(redis_client=self.redis)
# DLQ Producer 초기화
if self.enable_dlq:
self.dlq_producer = AIOKafkaProducer(
bootstrap_servers=self.bootstrap_servers,
value_serializer=lambda v: json.dumps(v).encode()
)
await self.dlq_producer.start()
logger.info(f"DLQ Producer started for topic: {self.dlq_topic}")
# 이벤트 핸들러 등록
self._register_event_handlers()
# Kafka Consumer 시작
await self.consumer.start()
logger.info(f"Advanced Event Consumer started: {self.topics}")
# 통계 리포팅 태스크 시작
asyncio.create_task(self._report_stats())
except Exception as e:
logger.error(f"Failed to start Advanced Event Consumer: {e}")
raise
async def stop(self):
"""컨슈머 종료"""
await self.consumer.stop()
if self.dlq_producer:
await self.dlq_producer.stop()
if self.redis:
await self.redis.close()
logger.info("Advanced Event Consumer stopped")
def _register_event_handlers(self):
"""이벤트 핸들러 등록"""
# 각 이벤트 타입에 대한 핸들러를 래퍼로 감싸서 등록
self.consumer.register_handler(
EventType.USER_CREATED,
self._create_handler_with_retry(
self.handlers.handle_user_created,
EventType.USER_CREATED
)
)
self.consumer.register_handler(
EventType.USER_UPDATED,
self._create_handler_with_retry(
self.handlers.handle_user_updated,
EventType.USER_UPDATED
)
)
self.consumer.register_handler(
EventType.USER_DELETED,
self._create_handler_with_retry(
self.handlers.handle_user_deleted,
EventType.USER_DELETED
)
)
self.consumer.register_handler(
EventType.OAUTH_APP_CREATED,
self._create_handler_with_retry(
self.handlers.handle_oauth_app_created,
EventType.OAUTH_APP_CREATED
)
)
self.consumer.register_handler(
EventType.OAUTH_TOKEN_ISSUED,
self._create_handler_with_retry(
self.handlers.handle_oauth_token_issued,
EventType.OAUTH_TOKEN_ISSUED
)
)
def _create_handler_with_retry(self, handler_func, event_type: EventType):
"""재시도 로직이 포함된 핸들러 래퍼 생성"""
async def wrapper(event: Event):
event_id = f"{event.event_id}:{event.event_type}"
retry_key = f"retry:{event_id}"
try:
# 재시도 횟수 확인
retry_count = 0
if self.redis:
retry_count_str = await self.redis.get(retry_key)
retry_count = int(retry_count_str) if retry_count_str else 0
# 핸들러 실행
await handler_func(event.dict())
# 성공 시 재시도 카운터 삭제
if self.redis and retry_count > 0:
await self.redis.delete(retry_key)
self.stats["processed"] += 1
except Exception as e:
logger.error(f"Error processing {event_type}: {e}")
self.stats["failed"] += 1
# 재시도 처리
retry_policy = self.retry_policies.get(event_type)
if retry_policy and retry_count < retry_policy.max_retries:
await self._handle_retry(event, retry_count, retry_policy, retry_key)
else:
# 최대 재시도 초과 -> DLQ로 전송
await self._send_to_dlq(event, str(e), retry_count)
return wrapper
async def _handle_retry(
self,
event: Event,
retry_count: int,
retry_policy: RetryPolicy,
retry_key: str
):
"""재시도 처리"""
retry_count += 1
delay = retry_policy.get_delay(retry_count)
logger.warning(
f"Retrying event {event.event_id} "
f"(attempt {retry_count}/{retry_policy.max_retries}) "
f"after {delay}s"
)
# 재시도 카운터 저장
if self.redis:
await self.redis.setex(
retry_key,
timedelta(hours=24), # 24시간 후 자동 삭제
retry_count
)
# 지연 후 재처리를 위해 다시 큐에 추가
# 실제 프로덕션에서는 별도의 재시도 토픽 사용 권장
self.stats["retried"] += 1
# 지연 실행
await asyncio.sleep(delay)
# 이벤트 재발행 (재시도 토픽으로)
if hasattr(self, 'retry_producer'):
await self._republish_for_retry(event, retry_count)
async def _send_to_dlq(self, event: Event, error: str, retry_count: int):
"""Dead Letter Queue로 전송"""
if not self.enable_dlq or not self.dlq_producer:
logger.error(f"Failed to process event {event.event_id} after {retry_count} retries")
return
try:
dlq_message = {
"original_event": event.dict(),
"error": error,
"retry_count": retry_count,
"failed_at": datetime.now().isoformat(),
"consumer_group": self.group_id,
"topic": self.topics[0] if self.topics else None
}
await self.dlq_producer.send(
self.dlq_topic,
value=dlq_message
)
self.stats["dlq_sent"] += 1
logger.error(
f"Event {event.event_id} sent to DLQ after {retry_count} retries. "
f"Error: {error}"
)
# Redis에 DLQ 전송 기록
if self.redis:
dlq_key = f"dlq:{event.event_id}"
await self.redis.setex(
dlq_key,
timedelta(days=7), # 7일 보관
json.dumps({
"error": error,
"retry_count": retry_count,
"sent_at": datetime.now().isoformat()
})
)
except Exception as e:
logger.critical(f"Failed to send event to DLQ: {e}")
async def _republish_for_retry(self, event: Event, retry_count: int):
"""재시도를 위한 이벤트 재발행"""
# 실제 구현에서는 별도의 재시도 토픽 사용
# 여기서는 로깅만 수행
logger.info(f"Would republish event {event.event_id} for retry #{retry_count}")
async def _report_stats(self):
"""통계 리포팅 (1분마다)"""
while True:
await asyncio.sleep(60)
logger.info(
f"Event Consumer Stats - "
f"Processed: {self.stats['processed']}, "
f"Failed: {self.stats['failed']}, "
f"Retried: {self.stats['retried']}, "
f"DLQ: {self.stats['dlq_sent']}"
)
# Redis에 통계 저장
if self.redis:
stats_key = f"consumer:stats:{self.group_id}"
await self.redis.hset(
stats_key,
mapping={
**self.stats,
"updated_at": datetime.now().isoformat()
}
)
async def get_dlq_messages(self, limit: int = 10) -> List[Dict[str, Any]]:
"""DLQ 메시지 조회 (관리 목적)"""
if not self.redis:
return []
dlq_keys = await self.redis.keys("dlq:*")
messages = []
for key in dlq_keys[:limit]:
data = await self.redis.get(key)
if data:
event_id = key.replace("dlq:", "")
message = json.loads(data)
message["event_id"] = event_id
messages.append(message)
return messages
async def retry_dlq_message(self, event_id: str) -> bool:
"""DLQ 메시지 수동 재시도"""
# 실제 구현에서는 DLQ에서 메시지를 읽어 재처리
logger.info(f"Manual retry requested for event: {event_id}")
if self.redis:
# 재시도 카운터 리셋
retry_key = f"retry:{event_id}:*"
keys = await self.redis.keys(retry_key)
if keys:
await self.redis.delete(*keys)
return True
return False