Basic MongoDB Vector Search
This example demonstrates how to perform basic vector search operations with MongoDB Atlas Vector Search.
Prerequisites
- MongoDB Atlas cluster with Vector Search enabled
- Python 3.8+
- RecoAgent installed
Basic Setup
from packages.rag.stores import MongoDBAtlasVectorStore, VectorDocument
from packages.rag.mongodb_retrievers import MongoDBVectorRetriever
# Initialize vector store
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index"
)
# Initialize retriever
retriever = MongoDBVectorRetriever(vector_store)
Adding Documents
# Create sample documents
documents = [
VectorDocument(
id="doc1",
content="Machine learning is a subset of artificial intelligence that focuses on algorithms and statistical models.",
embedding=[0.1, 0.2, 0.3, ...], # Your 384-dimensional embedding
metadata={"category": "AI", "year": 2023, "difficulty": "beginner"}
),
VectorDocument(
id="doc2",
content="Neural networks are computing systems inspired by biological neural networks.",
embedding=[0.2, 0.3, 0.4, ...], # Your 384-dimensional embedding
metadata={"category": "AI", "year": 2023, "difficulty": "intermediate"}
),
VectorDocument(
id="doc3",
content="Deep learning uses neural networks with multiple layers to model complex patterns.",
embedding=[0.3, 0.4, 0.5, ...], # Your 384-dimensional embedding
metadata={"category": "AI", "year": 2024, "difficulty": "advanced"}
)
]
# Add documents to MongoDB
success = vector_store.add_documents(documents)
print(f"Documents added successfully: {success}")
Basic Vector Search
# Search for similar documents
query = "artificial intelligence and machine learning"
results = retriever.retrieve(query, k=5)
print(f"Found {len(results)} results:")
for i, result in enumerate(results, 1):
print(f"\n{i}. Score: {result.score:.3f}")
print(f" Content: {result.chunk.content}")
print(f" Metadata: {result.chunk.metadata}")
Search with Metadata Filtering
# Search with metadata filters
filter_metadata = {
"category": "AI",
"year": {"operator": "$gte", "value": 2023}
}
results = retriever.retrieve(
query="machine learning algorithms",
k=5,
filter_metadata=filter_metadata
)
print(f"Found {len(results)} filtered results:")
for result in results:
print(f"Score: {result.score:.3f}")
print(f"Content: {result.chunk.content[:100]}...")
print(f"Year: {result.chunk.metadata.get('year')}")
print()
Async Vector Search
import asyncio
async def async_search_example():
# Async search
results = await retriever.retrieve_async("deep learning", k=5)
print(f"Async search found {len(results)} results:")
for result in results:
print(f"Score: {result.score:.3f}")
print(f"Content: {result.chunk.content[:100]}...")
return results
# Run async example
results = asyncio.run(async_search_example())
Batch Operations
# Batch document addition
def add_documents_in_batches(documents, batch_size=100):
"""Add documents in batches for better performance."""
for i in range(0, len(documents), batch_size):
batch = documents[i:i + batch_size]
success = vector_store.add_documents(batch)
print(f"Batch {i//batch_size + 1}: {len(batch)} documents added - {success}")
# Create more documents for batch example
large_document_set = [
VectorDocument(
id=f"batch_doc_{i}",
content=f"Document {i} about various AI topics including machine learning, neural networks, and deep learning.",
embedding=[0.1 + (i % 100) * 0.01] * 384, # Simple embedding for demo
metadata={"category": "AI", "batch": i // 100, "index": i}
)
for i in range(1000)
]
# Add in batches
add_documents_in_batches(large_document_set, batch_size=100)
Performance Monitoring
import time
def search_with_timing(query, k=5):
"""Search with performance timing."""
start_time = time.time()
results = retriever.retrieve(query, k=k)
end_time = time.time()
search_time = end_time - start_time
print(f"Search completed in {search_time:.3f} seconds")
print(f"Found {len(results)} results")
return results, search_time
# Test search performance
queries = [
"machine learning algorithms",
"neural network architectures",
"deep learning applications",
"artificial intelligence ethics"
]
for query in queries:
print(f"\nQuery: '{query}'")
results, timing = search_with_timing(query, k=3)
Error Handling
def safe_search(query, k=5, max_retries=3):
"""Search with error handling and retries."""
for attempt in range(max_retries):
try:
results = retriever.retrieve(query, k=k)
return results
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(1) # Wait before retry
else:
print("All retry attempts failed")
return []
# Test error handling
results = safe_search("machine learning", k=5)
print(f"Safe search returned {len(results)} results")
Collection Statistics
# Get collection statistics
stats = vector_store.get_stats()
print("Collection Statistics:")
print(f" Total documents: {stats.get('total_documents', 0)}")
print(f" Database: {stats.get('database', 'unknown')}")
print(f" Collection: {stats.get('collection', 'unknown')}")
print(f" Storage size: {stats.get('storage_size', 0)} bytes")
print(f" Index size: {stats.get('index_size', 0)} bytes")
Cleanup
# Delete specific documents
doc_ids_to_delete = ["doc1", "doc2", "doc3"]
success = vector_store.delete_documents(doc_ids_to_delete)
print(f"Documents deleted: {success}")
# Close connections
vector_store.close()
print("Connections closed")
Complete Example
def complete_basic_example():
"""Complete basic vector search example."""
# Initialize
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index"
)
retriever = MongoDBVectorRetriever(vector_store)
try:
# Add documents
documents = [
VectorDocument(
id="example_doc1",
content="Machine learning enables computers to learn without being explicitly programmed.",
embedding=[0.1] * 384, # Simplified for demo
metadata={"topic": "ML", "level": "beginner"}
),
VectorDocument(
id="example_doc2",
content="Neural networks are inspired by the human brain and can recognize patterns.",
embedding=[0.2] * 384, # Simplified for demo
metadata={"topic": "NN", "level": "intermediate"}
)
]
vector_store.add_documents(documents)
print("✅ Documents added")
# Search
results = retriever.retrieve("machine learning", k=2)
print(f"✅ Found {len(results)} results")
for result in results:
print(f" Score: {result.score:.3f}")
print(f" Content: {result.chunk.content}")
return results
except Exception as e:
print(f"❌ Error: {e}")
return []
finally:
# Cleanup
vector_store.delete_documents(["example_doc1", "example_doc2"])
vector_store.close()
# Run complete example
results = complete_basic_example()
This basic example demonstrates the core MongoDB Atlas Vector Search functionality. For more advanced features like hybrid search and faceted search, see the other MongoDB examples.