Step 10: 데이터 분석 및 통계 시스템 구현

주요 기능:
- Statistics Service 마이크로서비스 구축
- 실시간 메트릭 수집 시스템 (Kafka 연동)
- 시계열 데이터베이스 인터페이스 구현
- 데이터 집계 및 분석 엔진
- 사용자/시스템/이벤트 분석 API
- WebSocket 기반 실시간 대시보드
- 알림 규칙 및 임계값 설정
- CSV 데이터 내보내기

구현된 컴포넌트:
- MetricsCollector: Kafka 이벤트 메트릭 수집
- DataAggregator: 시간별/일별 데이터 집계
- TimeSeriesDB: 시계열 데이터 저장 인터페이스
- WebSocketManager: 실시간 데이터 스트리밍
- Analytics APIs: 다양한 분석 엔드포인트

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
jungwoo choi
2025-09-11 15:01:15 +09:00
parent 1ca9ca1b5d
commit fad4bffdd9
9 changed files with 1670 additions and 0 deletions

View File

@ -0,0 +1,165 @@
"""
Time Series Database Interface (Simplified for InfluxDB)
"""
import logging
from typing import List, Dict, Any, Optional
from datetime import datetime
from models import Metric, AggregatedMetric
logger = logging.getLogger(__name__)
class TimeSeriesDB:
"""Time series database interface"""
def __init__(self, host: str, port: int, database: str):
self.host = host
self.port = port
self.database = database
self.is_connected = False
# In production, would use actual InfluxDB client
self.data_store = [] # Simplified in-memory storage
async def connect(self):
"""Connect to database"""
# Simplified connection
self.is_connected = True
logger.info(f"Connected to time series database at {self.host}:{self.port}")
async def close(self):
"""Close database connection"""
self.is_connected = False
logger.info("Disconnected from time series database")
async def write_metrics(self, metrics: List[Metric]):
"""Write metrics to database"""
for metric in metrics:
self.data_store.append({
"name": metric.name,
"value": metric.value,
"timestamp": metric.timestamp,
"tags": metric.tags,
"service": metric.service
})
async def query_metrics(
self,
metric_type: str,
start_time: datetime,
end_time: datetime
) -> List[Dict[str, Any]]:
"""Query metrics from database"""
results = []
for data in self.data_store:
if (data["name"].startswith(metric_type) and
start_time <= data["timestamp"] <= end_time):
results.append(data)
return results
async def get_time_series(
self,
metric_name: str,
start_time: datetime,
end_time: datetime,
interval: str
) -> List[Dict[str, Any]]:
"""Get time series data"""
return await self.query_metrics(metric_name, start_time, end_time)
async def store_aggregated_metric(self, metric: AggregatedMetric):
"""Store aggregated metric"""
self.data_store.append({
"name": f"agg.{metric.metric_name}",
"value": metric.value,
"timestamp": metric.end_time,
"tags": {"aggregation": metric.aggregation_type},
"service": "statistics"
})
async def count_metrics(
self,
metric_type: str,
start_time: datetime,
end_time: datetime
) -> int:
"""Count metrics"""
metrics = await self.query_metrics(metric_type, start_time, end_time)
return len(metrics)
async def get_average(
self,
metric_name: str,
start_time: datetime,
end_time: datetime
) -> Optional[float]:
"""Get average value"""
metrics = await self.query_metrics(metric_name, start_time, end_time)
if not metrics:
return None
values = [m["value"] for m in metrics]
return sum(values) / len(values)
async def count_distinct_tags(
self,
metric_type: str,
tag_name: str,
start_time: datetime,
end_time: datetime
) -> int:
"""Count distinct tag values"""
metrics = await self.query_metrics(metric_type, start_time, end_time)
unique_values = set()
for metric in metrics:
if tag_name in metric.get("tags", {}):
unique_values.add(metric["tags"][tag_name])
return len(unique_values)
async def get_top_metrics(
self,
metric_type: str,
group_by: str,
start_time: datetime,
end_time: datetime,
limit: int = 10
) -> List[Dict[str, Any]]:
"""Get top metrics grouped by tag"""
metrics = await self.query_metrics(metric_type, start_time, end_time)
grouped = {}
for metric in metrics:
key = metric.get("tags", {}).get(group_by, "unknown")
grouped[key] = grouped.get(key, 0) + 1
sorted_items = sorted(grouped.items(), key=lambda x: x[1], reverse=True)
return [{"name": k, "count": v} for k, v in sorted_items[:limit]]
async def count_metrics_with_value(
self,
metric_name: str,
value: float,
start_time: datetime,
end_time: datetime
) -> int:
"""Count metrics with specific value"""
metrics = await self.query_metrics(metric_name, start_time, end_time)
return sum(1 for m in metrics if m["value"] == value)
async def get_metric_distribution(
self,
metric_type: str,
tag_name: str,
start_time: datetime,
end_time: datetime
) -> Dict[str, int]:
"""Get metric distribution by tag"""
metrics = await self.query_metrics(metric_type, start_time, end_time)
distribution = {}
for metric in metrics:
key = metric.get("tags", {}).get(tag_name, "unknown")
distribution[key] = distribution.get(key, 0) + 1
return distribution
async def delete_old_data(self, cutoff_date: datetime):
"""Delete old data"""
self.data_store = [
d for d in self.data_store
if d["timestamp"] >= cutoff_date
]