Initial commit - cleaned repository
This commit is contained in:
362
services/search/backend/main.py
Normal file
362
services/search/backend/main.py
Normal file
@ -0,0 +1,362 @@
|
||||
"""
|
||||
Search Service with Apache Solr
|
||||
"""
|
||||
from fastapi import FastAPI, Query, HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
from contextlib import asynccontextmanager
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
from solr_client import SolrClient
|
||||
from indexer import DataIndexer
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global instances
|
||||
solr_client = None
|
||||
data_indexer = None
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Manage application lifecycle"""
|
||||
global solr_client, data_indexer
|
||||
|
||||
# Startup
|
||||
logger.info("Starting Search Service...")
|
||||
|
||||
# Wait for Solr to be ready
|
||||
solr_url = os.getenv("SOLR_URL", "http://solr:8983/solr")
|
||||
max_retries = 30
|
||||
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
solr_client = SolrClient(solr_url=solr_url, core_name="site11")
|
||||
logger.info("Connected to Solr")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Waiting for Solr... ({i+1}/{max_retries})")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if solr_client:
|
||||
# Initialize data indexer
|
||||
mongodb_url = os.getenv("MONGODB_URL", "mongodb://mongodb:27017")
|
||||
kafka_servers = os.getenv("KAFKA_BOOTSTRAP_SERVERS", "kafka:9092")
|
||||
|
||||
data_indexer = DataIndexer(solr_client, mongodb_url, kafka_servers)
|
||||
await data_indexer.start()
|
||||
|
||||
# Initial data sync
|
||||
asyncio.create_task(data_indexer.sync_all_data())
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
if data_indexer:
|
||||
await data_indexer.stop()
|
||||
|
||||
logger.info("Search Service stopped")
|
||||
|
||||
app = FastAPI(
|
||||
title="Search Service",
|
||||
description="Full-text search with Apache Solr",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "search",
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"solr_connected": solr_client is not None
|
||||
}
|
||||
|
||||
@app.get("/api/search")
|
||||
async def search(
|
||||
q: str = Query(..., description="Search query"),
|
||||
doc_type: Optional[str] = Query(None, description="Filter by document type"),
|
||||
start: int = Query(0, ge=0, description="Starting offset"),
|
||||
rows: int = Query(10, ge=1, le=100, description="Number of results"),
|
||||
sort: Optional[str] = Query(None, description="Sort order (e.g., 'created_at desc')"),
|
||||
facet: bool = Query(False, description="Enable faceting"),
|
||||
facet_field: Optional[List[str]] = Query(None, description="Fields to facet on")
|
||||
):
|
||||
"""
|
||||
Search documents across all indexed content
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
# Build filter query
|
||||
fq = []
|
||||
if doc_type:
|
||||
fq.append(f"doc_type:{doc_type}")
|
||||
|
||||
# Prepare search parameters
|
||||
search_params = {
|
||||
'start': start,
|
||||
'rows': rows,
|
||||
'facet': facet
|
||||
}
|
||||
|
||||
if fq:
|
||||
search_params['fq'] = fq
|
||||
|
||||
if sort:
|
||||
search_params['sort'] = sort
|
||||
|
||||
if facet_field:
|
||||
search_params['facet_field'] = facet_field
|
||||
|
||||
# Execute search
|
||||
results = solr_client.search(q, **search_params)
|
||||
|
||||
return {
|
||||
"query": q,
|
||||
"total": results['total'],
|
||||
"start": start,
|
||||
"rows": rows,
|
||||
"documents": results['documents'],
|
||||
"facets": results.get('facets', {}),
|
||||
"highlighting": results.get('highlighting', {})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Search failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/api/search/suggest")
|
||||
async def suggest(
|
||||
q: str = Query(..., min_length=1, description="Query prefix"),
|
||||
field: str = Query("title", description="Field to search in"),
|
||||
limit: int = Query(10, ge=1, le=50, description="Maximum suggestions")
|
||||
):
|
||||
"""
|
||||
Get autocomplete suggestions
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
suggestions = solr_client.suggest(q, field, limit)
|
||||
|
||||
return {
|
||||
"query": q,
|
||||
"suggestions": suggestions,
|
||||
"count": len(suggestions)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Suggest failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/api/search/similar/{doc_id}")
|
||||
async def find_similar(
|
||||
doc_id: str,
|
||||
rows: int = Query(5, ge=1, le=20, description="Number of similar documents")
|
||||
):
|
||||
"""
|
||||
Find documents similar to the given document
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
similar_docs = solr_client.more_like_this(doc_id, rows=rows)
|
||||
|
||||
return {
|
||||
"source_document": doc_id,
|
||||
"similar_documents": similar_docs,
|
||||
"count": len(similar_docs)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Similar search failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/search/index")
|
||||
async def index_document(document: Dict[str, Any]):
|
||||
"""
|
||||
Index a single document
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
doc_type = document.get('doc_type', 'general')
|
||||
success = solr_client.index_document(document, doc_type)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Document indexed",
|
||||
"document_id": document.get('id')
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to index document")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Indexing failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/search/bulk-index")
|
||||
async def bulk_index(documents: List[Dict[str, Any]]):
|
||||
"""
|
||||
Bulk index multiple documents
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
indexed = solr_client.bulk_index(documents)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Indexed {indexed} documents",
|
||||
"count": indexed
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Bulk indexing failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.delete("/api/search/document/{doc_id}")
|
||||
async def delete_document(doc_id: str):
|
||||
"""
|
||||
Delete a document from the index
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
success = solr_client.delete_document(doc_id)
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Document deleted",
|
||||
"document_id": doc_id
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to delete document")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Deletion failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/api/search/stats")
|
||||
async def get_stats():
|
||||
"""
|
||||
Get search index statistics
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
stats = solr_client.get_stats()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"statistics": stats,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/search/reindex/{collection}")
|
||||
async def reindex_collection(
|
||||
collection: str,
|
||||
doc_type: Optional[str] = Query(None, description="Document type for the collection")
|
||||
):
|
||||
"""
|
||||
Reindex a specific collection
|
||||
"""
|
||||
if not data_indexer:
|
||||
raise HTTPException(status_code=503, detail="Indexer service unavailable")
|
||||
|
||||
try:
|
||||
if not doc_type:
|
||||
# Map collection to doc_type
|
||||
doc_type_map = {
|
||||
'users': 'user',
|
||||
'files': 'file',
|
||||
'content': 'content'
|
||||
}
|
||||
doc_type = doc_type_map.get(collection, collection)
|
||||
|
||||
asyncio.create_task(data_indexer.reindex_collection(collection, doc_type))
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Reindexing {collection} started",
|
||||
"collection": collection,
|
||||
"doc_type": doc_type
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Reindex failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/search/optimize")
|
||||
async def optimize_index():
|
||||
"""
|
||||
Optimize the search index
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
success = solr_client.optimize_index()
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Index optimization started"
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to optimize index")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Optimization failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/api/search/clear")
|
||||
async def clear_index():
|
||||
"""
|
||||
Clear all documents from the index (DANGER!)
|
||||
"""
|
||||
if not solr_client:
|
||||
raise HTTPException(status_code=503, detail="Search service unavailable")
|
||||
|
||||
try:
|
||||
success = solr_client.clear_index()
|
||||
|
||||
if success:
|
||||
return {
|
||||
"status": "success",
|
||||
"message": "Index cleared",
|
||||
"warning": "All documents have been deleted!"
|
||||
}
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail="Failed to clear index")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Clear index failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
Reference in New Issue
Block a user