- Implemented search service with Apache Solr instead of Elasticsearch - Added full-text search, faceted search, and autocomplete capabilities - Created data indexer for synchronizing data from MongoDB/Kafka to Solr - Configured external volume mounts for all data services: - MongoDB, Redis, Kafka, Zookeeper, MinIO, Solr - All data now persists in ./data/ directory - Added comprehensive search API endpoints - Created documentation for data persistence and backup strategies 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
292 lines
11 KiB
Python
292 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test script for Search Service with Apache Solr
|
|
"""
|
|
import asyncio
|
|
import httpx
|
|
import json
|
|
from datetime import datetime
|
|
|
|
BASE_URL = "http://localhost:8015"
|
|
|
|
async def test_search_api():
|
|
"""Test search API endpoints"""
|
|
async with httpx.AsyncClient() as client:
|
|
print("\n🔍 Testing Search Service API...")
|
|
|
|
# Test health check
|
|
print("\n1. Testing health check...")
|
|
response = await client.get(f"{BASE_URL}/health")
|
|
print(f"Health check: {response.json()}")
|
|
|
|
# Test index sample documents
|
|
print("\n2. Indexing sample documents...")
|
|
|
|
# Index user document
|
|
user_doc = {
|
|
"id": "user_test_001",
|
|
"doc_type": "user",
|
|
"user_id": "test_001",
|
|
"username": "john_doe",
|
|
"email": "john@example.com",
|
|
"name": "John Doe",
|
|
"bio": "Software developer passionate about Python and microservices",
|
|
"tags": ["python", "developer", "backend"],
|
|
"created_at": datetime.utcnow().isoformat()
|
|
}
|
|
|
|
response = await client.post(f"{BASE_URL}/api/search/index", json=user_doc)
|
|
print(f"Indexed user: {response.json()}")
|
|
|
|
# Index file documents
|
|
file_docs = [
|
|
{
|
|
"id": "file_test_001",
|
|
"doc_type": "file",
|
|
"file_id": "test_file_001",
|
|
"filename": "architecture_diagram.png",
|
|
"content_type": "image/png",
|
|
"size": 1024000,
|
|
"user_id": "test_001",
|
|
"tags": ["architecture", "design", "documentation"],
|
|
"description": "System architecture diagram showing microservices",
|
|
"created_at": datetime.utcnow().isoformat()
|
|
},
|
|
{
|
|
"id": "file_test_002",
|
|
"doc_type": "file",
|
|
"file_id": "test_file_002",
|
|
"filename": "user_manual.pdf",
|
|
"content_type": "application/pdf",
|
|
"size": 2048000,
|
|
"user_id": "test_001",
|
|
"tags": ["documentation", "manual", "guide"],
|
|
"description": "Complete user manual for the application",
|
|
"created_at": datetime.utcnow().isoformat()
|
|
}
|
|
]
|
|
|
|
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=file_docs)
|
|
print(f"Bulk indexed files: {response.json()}")
|
|
|
|
# Index content documents
|
|
content_docs = [
|
|
{
|
|
"id": "content_test_001",
|
|
"doc_type": "content",
|
|
"content_id": "test_content_001",
|
|
"title": "Getting Started with Microservices",
|
|
"content": "Microservices architecture is a method of developing software applications as a suite of independently deployable services.",
|
|
"summary": "Introduction to microservices architecture patterns",
|
|
"author_id": "test_001",
|
|
"tags": ["microservices", "architecture", "tutorial"],
|
|
"category": "technology",
|
|
"status": "published",
|
|
"created_at": datetime.utcnow().isoformat()
|
|
},
|
|
{
|
|
"id": "content_test_002",
|
|
"doc_type": "content",
|
|
"content_id": "test_content_002",
|
|
"title": "Python Best Practices",
|
|
"content": "Learn the best practices for writing clean, maintainable Python code including PEP 8 style guide.",
|
|
"summary": "Essential Python coding standards and practices",
|
|
"author_id": "test_001",
|
|
"tags": ["python", "programming", "best-practices"],
|
|
"category": "programming",
|
|
"status": "published",
|
|
"created_at": datetime.utcnow().isoformat()
|
|
}
|
|
]
|
|
|
|
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=content_docs)
|
|
print(f"Bulk indexed content: {response.json()}")
|
|
|
|
# Wait for indexing
|
|
await asyncio.sleep(2)
|
|
|
|
# Test basic search
|
|
print("\n3. Testing basic search...")
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={"q": "microservices"}
|
|
)
|
|
results = response.json()
|
|
print(f"Search for 'microservices': Found {results['total']} results")
|
|
if results['documents']:
|
|
print(f"First result: {results['documents'][0].get('title', results['documents'][0].get('filename', 'N/A'))}")
|
|
|
|
# Test search with filters
|
|
print("\n4. Testing filtered search...")
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={
|
|
"q": "*:*",
|
|
"doc_type": "file",
|
|
"rows": 5
|
|
}
|
|
)
|
|
results = response.json()
|
|
print(f"Files search: Found {results['total']} files")
|
|
|
|
# Test faceted search
|
|
print("\n5. Testing faceted search...")
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={
|
|
"q": "*:*",
|
|
"facet": "true",
|
|
"facet_field": ["doc_type", "tags", "category", "status"]
|
|
}
|
|
)
|
|
results = response.json()
|
|
print(f"Facets: {json.dumps(results['facets'], indent=2)}")
|
|
|
|
# Test autocomplete/suggest
|
|
print("\n6. Testing autocomplete...")
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search/suggest",
|
|
params={
|
|
"q": "micro",
|
|
"field": "title",
|
|
"limit": 5
|
|
}
|
|
)
|
|
suggestions = response.json()
|
|
print(f"Suggestions for 'micro': {suggestions['suggestions']}")
|
|
|
|
# Test similar documents
|
|
print("\n7. Testing similar documents...")
|
|
response = await client.get(f"{BASE_URL}/api/search/similar/content_test_001")
|
|
if response.status_code == 200:
|
|
similar = response.json()
|
|
print(f"Found {similar['count']} similar documents")
|
|
else:
|
|
print(f"Similar search: {response.status_code}")
|
|
|
|
# Test search with highlighting
|
|
print("\n8. Testing search with highlighting...")
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={"q": "Python"}
|
|
)
|
|
results = response.json()
|
|
if results['highlighting']:
|
|
print(f"Highlighting results: {len(results['highlighting'])} documents highlighted")
|
|
|
|
# Test search statistics
|
|
print("\n9. Testing search statistics...")
|
|
response = await client.get(f"{BASE_URL}/api/search/stats")
|
|
if response.status_code == 200:
|
|
stats = response.json()
|
|
print(f"Index stats: {stats['statistics']}")
|
|
|
|
# Test complex query
|
|
print("\n10. Testing complex query...")
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={
|
|
"q": "architecture OR python",
|
|
"doc_type": "content",
|
|
"sort": "created_at desc",
|
|
"rows": 10
|
|
}
|
|
)
|
|
results = response.json()
|
|
print(f"Complex query: Found {results['total']} results")
|
|
|
|
# Test delete document
|
|
print("\n11. Testing document deletion...")
|
|
response = await client.delete(f"{BASE_URL}/api/search/document/content_test_002")
|
|
if response.status_code == 200:
|
|
print(f"Deleted document: {response.json()}")
|
|
|
|
# Verify deletion
|
|
await asyncio.sleep(1)
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={"q": "id:content_test_002"}
|
|
)
|
|
results = response.json()
|
|
print(f"Verify deletion: Found {results['total']} results (should be 0)")
|
|
|
|
async def test_performance():
|
|
"""Test search performance"""
|
|
print("\n\n⚡ Testing Search Performance...")
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
# Index many documents
|
|
print("Indexing 100 test documents...")
|
|
docs = []
|
|
for i in range(100):
|
|
docs.append({
|
|
"id": f"perf_test_{i}",
|
|
"doc_type": "content",
|
|
"title": f"Test Document {i}",
|
|
"content": f"This is test content for document {i} with various keywords like search, Solr, Python, microservices",
|
|
"tags": [f"tag{i%10}", f"category{i%5}"],
|
|
"created_at": datetime.utcnow().isoformat()
|
|
})
|
|
|
|
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=docs)
|
|
print(f"Indexed {response.json().get('count', 0)} documents")
|
|
|
|
# Wait for indexing
|
|
await asyncio.sleep(2)
|
|
|
|
# Test search speed
|
|
print("\nTesting search response times...")
|
|
import time
|
|
|
|
queries = ["search", "Python", "document", "test", "microservices"]
|
|
for query in queries:
|
|
start = time.time()
|
|
response = await client.get(
|
|
f"{BASE_URL}/api/search",
|
|
params={"q": query, "rows": 20}
|
|
)
|
|
elapsed = time.time() - start
|
|
results = response.json()
|
|
print(f"Query '{query}': {results['total']} results in {elapsed:.3f}s")
|
|
|
|
async def test_reindex():
|
|
"""Test reindexing from MongoDB"""
|
|
print("\n\n🔄 Testing Reindex Functionality...")
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
# Trigger reindex for users collection
|
|
print("Triggering reindex for users collection...")
|
|
response = await client.post(
|
|
f"{BASE_URL}/api/search/reindex/users",
|
|
params={"doc_type": "user"}
|
|
)
|
|
if response.status_code == 200:
|
|
print(f"Reindex started: {response.json()}")
|
|
else:
|
|
print(f"Reindex failed: {response.status_code}")
|
|
|
|
# Test index optimization
|
|
print("\nTesting index optimization...")
|
|
response = await client.post(f"{BASE_URL}/api/search/optimize")
|
|
if response.status_code == 200:
|
|
print(f"Optimization: {response.json()}")
|
|
|
|
async def main():
|
|
"""Run all tests"""
|
|
print("=" * 60)
|
|
print("SEARCH SERVICE TEST SUITE (Apache Solr)")
|
|
print("=" * 60)
|
|
print(f"Started at: {datetime.now().isoformat()}")
|
|
|
|
# Run tests
|
|
await test_search_api()
|
|
await test_performance()
|
|
await test_reindex()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("✅ All search tests completed!")
|
|
print(f"Finished at: {datetime.now().isoformat()}")
|
|
print("=" * 60)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |