Files
site11/services/search/backend/test_search.py
jungwoo choi dd165454f0 feat: Add Step 13 - Search System with Apache Solr and Data Persistence
- Implemented search service with Apache Solr instead of Elasticsearch
- Added full-text search, faceted search, and autocomplete capabilities
- Created data indexer for synchronizing data from MongoDB/Kafka to Solr
- Configured external volume mounts for all data services:
  - MongoDB, Redis, Kafka, Zookeeper, MinIO, Solr
  - All data now persists in ./data/ directory
- Added comprehensive search API endpoints
- Created documentation for data persistence and backup strategies

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-11 20:27:02 +09:00

292 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Test script for Search Service with Apache Solr
"""
import asyncio
import httpx
import json
from datetime import datetime
BASE_URL = "http://localhost:8015"
async def test_search_api():
"""Test search API endpoints"""
async with httpx.AsyncClient() as client:
print("\n🔍 Testing Search Service API...")
# Test health check
print("\n1. Testing health check...")
response = await client.get(f"{BASE_URL}/health")
print(f"Health check: {response.json()}")
# Test index sample documents
print("\n2. Indexing sample documents...")
# Index user document
user_doc = {
"id": "user_test_001",
"doc_type": "user",
"user_id": "test_001",
"username": "john_doe",
"email": "john@example.com",
"name": "John Doe",
"bio": "Software developer passionate about Python and microservices",
"tags": ["python", "developer", "backend"],
"created_at": datetime.utcnow().isoformat()
}
response = await client.post(f"{BASE_URL}/api/search/index", json=user_doc)
print(f"Indexed user: {response.json()}")
# Index file documents
file_docs = [
{
"id": "file_test_001",
"doc_type": "file",
"file_id": "test_file_001",
"filename": "architecture_diagram.png",
"content_type": "image/png",
"size": 1024000,
"user_id": "test_001",
"tags": ["architecture", "design", "documentation"],
"description": "System architecture diagram showing microservices",
"created_at": datetime.utcnow().isoformat()
},
{
"id": "file_test_002",
"doc_type": "file",
"file_id": "test_file_002",
"filename": "user_manual.pdf",
"content_type": "application/pdf",
"size": 2048000,
"user_id": "test_001",
"tags": ["documentation", "manual", "guide"],
"description": "Complete user manual for the application",
"created_at": datetime.utcnow().isoformat()
}
]
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=file_docs)
print(f"Bulk indexed files: {response.json()}")
# Index content documents
content_docs = [
{
"id": "content_test_001",
"doc_type": "content",
"content_id": "test_content_001",
"title": "Getting Started with Microservices",
"content": "Microservices architecture is a method of developing software applications as a suite of independently deployable services.",
"summary": "Introduction to microservices architecture patterns",
"author_id": "test_001",
"tags": ["microservices", "architecture", "tutorial"],
"category": "technology",
"status": "published",
"created_at": datetime.utcnow().isoformat()
},
{
"id": "content_test_002",
"doc_type": "content",
"content_id": "test_content_002",
"title": "Python Best Practices",
"content": "Learn the best practices for writing clean, maintainable Python code including PEP 8 style guide.",
"summary": "Essential Python coding standards and practices",
"author_id": "test_001",
"tags": ["python", "programming", "best-practices"],
"category": "programming",
"status": "published",
"created_at": datetime.utcnow().isoformat()
}
]
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=content_docs)
print(f"Bulk indexed content: {response.json()}")
# Wait for indexing
await asyncio.sleep(2)
# Test basic search
print("\n3. Testing basic search...")
response = await client.get(
f"{BASE_URL}/api/search",
params={"q": "microservices"}
)
results = response.json()
print(f"Search for 'microservices': Found {results['total']} results")
if results['documents']:
print(f"First result: {results['documents'][0].get('title', results['documents'][0].get('filename', 'N/A'))}")
# Test search with filters
print("\n4. Testing filtered search...")
response = await client.get(
f"{BASE_URL}/api/search",
params={
"q": "*:*",
"doc_type": "file",
"rows": 5
}
)
results = response.json()
print(f"Files search: Found {results['total']} files")
# Test faceted search
print("\n5. Testing faceted search...")
response = await client.get(
f"{BASE_URL}/api/search",
params={
"q": "*:*",
"facet": "true",
"facet_field": ["doc_type", "tags", "category", "status"]
}
)
results = response.json()
print(f"Facets: {json.dumps(results['facets'], indent=2)}")
# Test autocomplete/suggest
print("\n6. Testing autocomplete...")
response = await client.get(
f"{BASE_URL}/api/search/suggest",
params={
"q": "micro",
"field": "title",
"limit": 5
}
)
suggestions = response.json()
print(f"Suggestions for 'micro': {suggestions['suggestions']}")
# Test similar documents
print("\n7. Testing similar documents...")
response = await client.get(f"{BASE_URL}/api/search/similar/content_test_001")
if response.status_code == 200:
similar = response.json()
print(f"Found {similar['count']} similar documents")
else:
print(f"Similar search: {response.status_code}")
# Test search with highlighting
print("\n8. Testing search with highlighting...")
response = await client.get(
f"{BASE_URL}/api/search",
params={"q": "Python"}
)
results = response.json()
if results['highlighting']:
print(f"Highlighting results: {len(results['highlighting'])} documents highlighted")
# Test search statistics
print("\n9. Testing search statistics...")
response = await client.get(f"{BASE_URL}/api/search/stats")
if response.status_code == 200:
stats = response.json()
print(f"Index stats: {stats['statistics']}")
# Test complex query
print("\n10. Testing complex query...")
response = await client.get(
f"{BASE_URL}/api/search",
params={
"q": "architecture OR python",
"doc_type": "content",
"sort": "created_at desc",
"rows": 10
}
)
results = response.json()
print(f"Complex query: Found {results['total']} results")
# Test delete document
print("\n11. Testing document deletion...")
response = await client.delete(f"{BASE_URL}/api/search/document/content_test_002")
if response.status_code == 200:
print(f"Deleted document: {response.json()}")
# Verify deletion
await asyncio.sleep(1)
response = await client.get(
f"{BASE_URL}/api/search",
params={"q": "id:content_test_002"}
)
results = response.json()
print(f"Verify deletion: Found {results['total']} results (should be 0)")
async def test_performance():
"""Test search performance"""
print("\n\n⚡ Testing Search Performance...")
async with httpx.AsyncClient(timeout=30.0) as client:
# Index many documents
print("Indexing 100 test documents...")
docs = []
for i in range(100):
docs.append({
"id": f"perf_test_{i}",
"doc_type": "content",
"title": f"Test Document {i}",
"content": f"This is test content for document {i} with various keywords like search, Solr, Python, microservices",
"tags": [f"tag{i%10}", f"category{i%5}"],
"created_at": datetime.utcnow().isoformat()
})
response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=docs)
print(f"Indexed {response.json().get('count', 0)} documents")
# Wait for indexing
await asyncio.sleep(2)
# Test search speed
print("\nTesting search response times...")
import time
queries = ["search", "Python", "document", "test", "microservices"]
for query in queries:
start = time.time()
response = await client.get(
f"{BASE_URL}/api/search",
params={"q": query, "rows": 20}
)
elapsed = time.time() - start
results = response.json()
print(f"Query '{query}': {results['total']} results in {elapsed:.3f}s")
async def test_reindex():
"""Test reindexing from MongoDB"""
print("\n\n🔄 Testing Reindex Functionality...")
async with httpx.AsyncClient() as client:
# Trigger reindex for users collection
print("Triggering reindex for users collection...")
response = await client.post(
f"{BASE_URL}/api/search/reindex/users",
params={"doc_type": "user"}
)
if response.status_code == 200:
print(f"Reindex started: {response.json()}")
else:
print(f"Reindex failed: {response.status_code}")
# Test index optimization
print("\nTesting index optimization...")
response = await client.post(f"{BASE_URL}/api/search/optimize")
if response.status_code == 200:
print(f"Optimization: {response.json()}")
async def main():
"""Run all tests"""
print("=" * 60)
print("SEARCH SERVICE TEST SUITE (Apache Solr)")
print("=" * 60)
print(f"Started at: {datetime.now().isoformat()}")
# Run tests
await test_search_api()
await test_performance()
await test_reindex()
print("\n" + "=" * 60)
print("✅ All search tests completed!")
print(f"Finished at: {datetime.now().isoformat()}")
print("=" * 60)
if __name__ == "__main__":
asyncio.run(main())