feat: Add Step 13 - Search System with Apache Solr and Data Persistence
- Implemented search service with Apache Solr instead of Elasticsearch - Added full-text search, faceted search, and autocomplete capabilities - Created data indexer for synchronizing data from MongoDB/Kafka to Solr - Configured external volume mounts for all data services: - MongoDB, Redis, Kafka, Zookeeper, MinIO, Solr - All data now persists in ./data/ directory - Added comprehensive search API endpoints - Created documentation for data persistence and backup strategies 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
292
services/search/backend/test_search.py
Normal file
292
services/search/backend/test_search.py
Normal file
@ -0,0 +1,292 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for Search Service with Apache Solr
|
||||
"""
|
||||
import asyncio
|
||||
import httpx
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
BASE_URL = "http://localhost:8015"
|
||||
|
||||
async def test_search_api():
|
||||
"""Test search API endpoints"""
|
||||
async with httpx.AsyncClient() as client:
|
||||
print("\n🔍 Testing Search Service API...")
|
||||
|
||||
# Test health check
|
||||
print("\n1. Testing health check...")
|
||||
response = await client.get(f"{BASE_URL}/health")
|
||||
print(f"Health check: {response.json()}")
|
||||
|
||||
# Test index sample documents
|
||||
print("\n2. Indexing sample documents...")
|
||||
|
||||
# Index user document
|
||||
user_doc = {
|
||||
"id": "user_test_001",
|
||||
"doc_type": "user",
|
||||
"user_id": "test_001",
|
||||
"username": "john_doe",
|
||||
"email": "john@example.com",
|
||||
"name": "John Doe",
|
||||
"bio": "Software developer passionate about Python and microservices",
|
||||
"tags": ["python", "developer", "backend"],
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
response = await client.post(f"{BASE_URL}/api/search/index", json=user_doc)
|
||||
print(f"Indexed user: {response.json()}")
|
||||
|
||||
# Index file documents
|
||||
file_docs = [
|
||||
{
|
||||
"id": "file_test_001",
|
||||
"doc_type": "file",
|
||||
"file_id": "test_file_001",
|
||||
"filename": "architecture_diagram.png",
|
||||
"content_type": "image/png",
|
||||
"size": 1024000,
|
||||
"user_id": "test_001",
|
||||
"tags": ["architecture", "design", "documentation"],
|
||||
"description": "System architecture diagram showing microservices",
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
},
|
||||
{
|
||||
"id": "file_test_002",
|
||||
"doc_type": "file",
|
||||
"file_id": "test_file_002",
|
||||
"filename": "user_manual.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size": 2048000,
|
||||
"user_id": "test_001",
|
||||
"tags": ["documentation", "manual", "guide"],
|
||||
"description": "Complete user manual for the application",
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
]
|
||||
|
||||
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=file_docs)
|
||||
print(f"Bulk indexed files: {response.json()}")
|
||||
|
||||
# Index content documents
|
||||
content_docs = [
|
||||
{
|
||||
"id": "content_test_001",
|
||||
"doc_type": "content",
|
||||
"content_id": "test_content_001",
|
||||
"title": "Getting Started with Microservices",
|
||||
"content": "Microservices architecture is a method of developing software applications as a suite of independently deployable services.",
|
||||
"summary": "Introduction to microservices architecture patterns",
|
||||
"author_id": "test_001",
|
||||
"tags": ["microservices", "architecture", "tutorial"],
|
||||
"category": "technology",
|
||||
"status": "published",
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
},
|
||||
{
|
||||
"id": "content_test_002",
|
||||
"doc_type": "content",
|
||||
"content_id": "test_content_002",
|
||||
"title": "Python Best Practices",
|
||||
"content": "Learn the best practices for writing clean, maintainable Python code including PEP 8 style guide.",
|
||||
"summary": "Essential Python coding standards and practices",
|
||||
"author_id": "test_001",
|
||||
"tags": ["python", "programming", "best-practices"],
|
||||
"category": "programming",
|
||||
"status": "published",
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
}
|
||||
]
|
||||
|
||||
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=content_docs)
|
||||
print(f"Bulk indexed content: {response.json()}")
|
||||
|
||||
# Wait for indexing
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Test basic search
|
||||
print("\n3. Testing basic search...")
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={"q": "microservices"}
|
||||
)
|
||||
results = response.json()
|
||||
print(f"Search for 'microservices': Found {results['total']} results")
|
||||
if results['documents']:
|
||||
print(f"First result: {results['documents'][0].get('title', results['documents'][0].get('filename', 'N/A'))}")
|
||||
|
||||
# Test search with filters
|
||||
print("\n4. Testing filtered search...")
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={
|
||||
"q": "*:*",
|
||||
"doc_type": "file",
|
||||
"rows": 5
|
||||
}
|
||||
)
|
||||
results = response.json()
|
||||
print(f"Files search: Found {results['total']} files")
|
||||
|
||||
# Test faceted search
|
||||
print("\n5. Testing faceted search...")
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={
|
||||
"q": "*:*",
|
||||
"facet": "true",
|
||||
"facet_field": ["doc_type", "tags", "category", "status"]
|
||||
}
|
||||
)
|
||||
results = response.json()
|
||||
print(f"Facets: {json.dumps(results['facets'], indent=2)}")
|
||||
|
||||
# Test autocomplete/suggest
|
||||
print("\n6. Testing autocomplete...")
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search/suggest",
|
||||
params={
|
||||
"q": "micro",
|
||||
"field": "title",
|
||||
"limit": 5
|
||||
}
|
||||
)
|
||||
suggestions = response.json()
|
||||
print(f"Suggestions for 'micro': {suggestions['suggestions']}")
|
||||
|
||||
# Test similar documents
|
||||
print("\n7. Testing similar documents...")
|
||||
response = await client.get(f"{BASE_URL}/api/search/similar/content_test_001")
|
||||
if response.status_code == 200:
|
||||
similar = response.json()
|
||||
print(f"Found {similar['count']} similar documents")
|
||||
else:
|
||||
print(f"Similar search: {response.status_code}")
|
||||
|
||||
# Test search with highlighting
|
||||
print("\n8. Testing search with highlighting...")
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={"q": "Python"}
|
||||
)
|
||||
results = response.json()
|
||||
if results['highlighting']:
|
||||
print(f"Highlighting results: {len(results['highlighting'])} documents highlighted")
|
||||
|
||||
# Test search statistics
|
||||
print("\n9. Testing search statistics...")
|
||||
response = await client.get(f"{BASE_URL}/api/search/stats")
|
||||
if response.status_code == 200:
|
||||
stats = response.json()
|
||||
print(f"Index stats: {stats['statistics']}")
|
||||
|
||||
# Test complex query
|
||||
print("\n10. Testing complex query...")
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={
|
||||
"q": "architecture OR python",
|
||||
"doc_type": "content",
|
||||
"sort": "created_at desc",
|
||||
"rows": 10
|
||||
}
|
||||
)
|
||||
results = response.json()
|
||||
print(f"Complex query: Found {results['total']} results")
|
||||
|
||||
# Test delete document
|
||||
print("\n11. Testing document deletion...")
|
||||
response = await client.delete(f"{BASE_URL}/api/search/document/content_test_002")
|
||||
if response.status_code == 200:
|
||||
print(f"Deleted document: {response.json()}")
|
||||
|
||||
# Verify deletion
|
||||
await asyncio.sleep(1)
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={"q": "id:content_test_002"}
|
||||
)
|
||||
results = response.json()
|
||||
print(f"Verify deletion: Found {results['total']} results (should be 0)")
|
||||
|
||||
async def test_performance():
|
||||
"""Test search performance"""
|
||||
print("\n\n⚡ Testing Search Performance...")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
# Index many documents
|
||||
print("Indexing 100 test documents...")
|
||||
docs = []
|
||||
for i in range(100):
|
||||
docs.append({
|
||||
"id": f"perf_test_{i}",
|
||||
"doc_type": "content",
|
||||
"title": f"Test Document {i}",
|
||||
"content": f"This is test content for document {i} with various keywords like search, Solr, Python, microservices",
|
||||
"tags": [f"tag{i%10}", f"category{i%5}"],
|
||||
"created_at": datetime.utcnow().isoformat()
|
||||
})
|
||||
|
||||
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=docs)
|
||||
print(f"Indexed {response.json().get('count', 0)} documents")
|
||||
|
||||
# Wait for indexing
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Test search speed
|
||||
print("\nTesting search response times...")
|
||||
import time
|
||||
|
||||
queries = ["search", "Python", "document", "test", "microservices"]
|
||||
for query in queries:
|
||||
start = time.time()
|
||||
response = await client.get(
|
||||
f"{BASE_URL}/api/search",
|
||||
params={"q": query, "rows": 20}
|
||||
)
|
||||
elapsed = time.time() - start
|
||||
results = response.json()
|
||||
print(f"Query '{query}': {results['total']} results in {elapsed:.3f}s")
|
||||
|
||||
async def test_reindex():
|
||||
"""Test reindexing from MongoDB"""
|
||||
print("\n\n🔄 Testing Reindex Functionality...")
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Trigger reindex for users collection
|
||||
print("Triggering reindex for users collection...")
|
||||
response = await client.post(
|
||||
f"{BASE_URL}/api/search/reindex/users",
|
||||
params={"doc_type": "user"}
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f"Reindex started: {response.json()}")
|
||||
else:
|
||||
print(f"Reindex failed: {response.status_code}")
|
||||
|
||||
# Test index optimization
|
||||
print("\nTesting index optimization...")
|
||||
response = await client.post(f"{BASE_URL}/api/search/optimize")
|
||||
if response.status_code == 200:
|
||||
print(f"Optimization: {response.json()}")
|
||||
|
||||
async def main():
|
||||
"""Run all tests"""
|
||||
print("=" * 60)
|
||||
print("SEARCH SERVICE TEST SUITE (Apache Solr)")
|
||||
print("=" * 60)
|
||||
print(f"Started at: {datetime.now().isoformat()}")
|
||||
|
||||
# Run tests
|
||||
await test_search_api()
|
||||
await test_performance()
|
||||
await test_reindex()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ All search tests completed!")
|
||||
print(f"Finished at: {datetime.now().isoformat()}")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user