#!/usr/bin/env python3 """ Test script for Search Service with Apache Solr """ import asyncio import httpx import json from datetime import datetime BASE_URL = "http://localhost:8015" async def test_search_api(): """Test search API endpoints""" async with httpx.AsyncClient() as client: print("\nšŸ” Testing Search Service API...") # Test health check print("\n1. Testing health check...") response = await client.get(f"{BASE_URL}/health") print(f"Health check: {response.json()}") # Test index sample documents print("\n2. Indexing sample documents...") # Index user document user_doc = { "id": "user_test_001", "doc_type": "user", "user_id": "test_001", "username": "john_doe", "email": "john@example.com", "name": "John Doe", "bio": "Software developer passionate about Python and microservices", "tags": ["python", "developer", "backend"], "created_at": datetime.utcnow().isoformat() } response = await client.post(f"{BASE_URL}/api/search/index", json=user_doc) print(f"Indexed user: {response.json()}") # Index file documents file_docs = [ { "id": "file_test_001", "doc_type": "file", "file_id": "test_file_001", "filename": "architecture_diagram.png", "content_type": "image/png", "size": 1024000, "user_id": "test_001", "tags": ["architecture", "design", "documentation"], "description": "System architecture diagram showing microservices", "created_at": datetime.utcnow().isoformat() }, { "id": "file_test_002", "doc_type": "file", "file_id": "test_file_002", "filename": "user_manual.pdf", "content_type": "application/pdf", "size": 2048000, "user_id": "test_001", "tags": ["documentation", "manual", "guide"], "description": "Complete user manual for the application", "created_at": datetime.utcnow().isoformat() } ] response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=file_docs) print(f"Bulk indexed files: {response.json()}") # Index content documents content_docs = [ { "id": "content_test_001", "doc_type": "content", "content_id": "test_content_001", "title": "Getting Started with Microservices", "content": "Microservices architecture is a method of developing software applications as a suite of independently deployable services.", "summary": "Introduction to microservices architecture patterns", "author_id": "test_001", "tags": ["microservices", "architecture", "tutorial"], "category": "technology", "status": "published", "created_at": datetime.utcnow().isoformat() }, { "id": "content_test_002", "doc_type": "content", "content_id": "test_content_002", "title": "Python Best Practices", "content": "Learn the best practices for writing clean, maintainable Python code including PEP 8 style guide.", "summary": "Essential Python coding standards and practices", "author_id": "test_001", "tags": ["python", "programming", "best-practices"], "category": "programming", "status": "published", "created_at": datetime.utcnow().isoformat() } ] response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=content_docs) print(f"Bulk indexed content: {response.json()}") # Wait for indexing await asyncio.sleep(2) # Test basic search print("\n3. Testing basic search...") response = await client.get( f"{BASE_URL}/api/search", params={"q": "microservices"} ) results = response.json() print(f"Search for 'microservices': Found {results['total']} results") if results['documents']: print(f"First result: {results['documents'][0].get('title', results['documents'][0].get('filename', 'N/A'))}") # Test search with filters print("\n4. Testing filtered search...") response = await client.get( f"{BASE_URL}/api/search", params={ "q": "*:*", "doc_type": "file", "rows": 5 } ) results = response.json() print(f"Files search: Found {results['total']} files") # Test faceted search print("\n5. Testing faceted search...") response = await client.get( f"{BASE_URL}/api/search", params={ "q": "*:*", "facet": "true", "facet_field": ["doc_type", "tags", "category", "status"] } ) results = response.json() print(f"Facets: {json.dumps(results['facets'], indent=2)}") # Test autocomplete/suggest print("\n6. Testing autocomplete...") response = await client.get( f"{BASE_URL}/api/search/suggest", params={ "q": "micro", "field": "title", "limit": 5 } ) suggestions = response.json() print(f"Suggestions for 'micro': {suggestions['suggestions']}") # Test similar documents print("\n7. Testing similar documents...") response = await client.get(f"{BASE_URL}/api/search/similar/content_test_001") if response.status_code == 200: similar = response.json() print(f"Found {similar['count']} similar documents") else: print(f"Similar search: {response.status_code}") # Test search with highlighting print("\n8. Testing search with highlighting...") response = await client.get( f"{BASE_URL}/api/search", params={"q": "Python"} ) results = response.json() if results['highlighting']: print(f"Highlighting results: {len(results['highlighting'])} documents highlighted") # Test search statistics print("\n9. Testing search statistics...") response = await client.get(f"{BASE_URL}/api/search/stats") if response.status_code == 200: stats = response.json() print(f"Index stats: {stats['statistics']}") # Test complex query print("\n10. Testing complex query...") response = await client.get( f"{BASE_URL}/api/search", params={ "q": "architecture OR python", "doc_type": "content", "sort": "created_at desc", "rows": 10 } ) results = response.json() print(f"Complex query: Found {results['total']} results") # Test delete document print("\n11. Testing document deletion...") response = await client.delete(f"{BASE_URL}/api/search/document/content_test_002") if response.status_code == 200: print(f"Deleted document: {response.json()}") # Verify deletion await asyncio.sleep(1) response = await client.get( f"{BASE_URL}/api/search", params={"q": "id:content_test_002"} ) results = response.json() print(f"Verify deletion: Found {results['total']} results (should be 0)") async def test_performance(): """Test search performance""" print("\n\n⚔ Testing Search Performance...") async with httpx.AsyncClient(timeout=30.0) as client: # Index many documents print("Indexing 100 test documents...") docs = [] for i in range(100): docs.append({ "id": f"perf_test_{i}", "doc_type": "content", "title": f"Test Document {i}", "content": f"This is test content for document {i} with various keywords like search, Solr, Python, microservices", "tags": [f"tag{i%10}", f"category{i%5}"], "created_at": datetime.utcnow().isoformat() }) response = await client.post(f"{BASE_URL}/api/search/bulk-index", json=docs) print(f"Indexed {response.json().get('count', 0)} documents") # Wait for indexing await asyncio.sleep(2) # Test search speed print("\nTesting search response times...") import time queries = ["search", "Python", "document", "test", "microservices"] for query in queries: start = time.time() response = await client.get( f"{BASE_URL}/api/search", params={"q": query, "rows": 20} ) elapsed = time.time() - start results = response.json() print(f"Query '{query}': {results['total']} results in {elapsed:.3f}s") async def test_reindex(): """Test reindexing from MongoDB""" print("\n\nšŸ”„ Testing Reindex Functionality...") async with httpx.AsyncClient() as client: # Trigger reindex for users collection print("Triggering reindex for users collection...") response = await client.post( f"{BASE_URL}/api/search/reindex/users", params={"doc_type": "user"} ) if response.status_code == 200: print(f"Reindex started: {response.json()}") else: print(f"Reindex failed: {response.status_code}") # Test index optimization print("\nTesting index optimization...") response = await client.post(f"{BASE_URL}/api/search/optimize") if response.status_code == 200: print(f"Optimization: {response.json()}") async def main(): """Run all tests""" print("=" * 60) print("SEARCH SERVICE TEST SUITE (Apache Solr)") print("=" * 60) print(f"Started at: {datetime.now().isoformat()}") # Run tests await test_search_api() await test_performance() await test_reindex() print("\n" + "=" * 60) print("āœ… All search tests completed!") print(f"Finished at: {datetime.now().isoformat()}") print("=" * 60) if __name__ == "__main__": asyncio.run(main())