Skip to main content

Basic MongoDB Vector Search

This example demonstrates how to perform basic vector search operations with MongoDB Atlas Vector Search.

Prerequisites

  • MongoDB Atlas cluster with Vector Search enabled
  • Python 3.8+
  • RecoAgent installed

Basic Setup

from packages.rag.stores import MongoDBAtlasVectorStore, VectorDocument
from packages.rag.mongodb_retrievers import MongoDBVectorRetriever

# Initialize vector store
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index"
)

# Initialize retriever
retriever = MongoDBVectorRetriever(vector_store)

Adding Documents

# Create sample documents
documents = [
VectorDocument(
id="doc1",
content="Machine learning is a subset of artificial intelligence that focuses on algorithms and statistical models.",
embedding=[0.1, 0.2, 0.3, ...], # Your 384-dimensional embedding
metadata={"category": "AI", "year": 2023, "difficulty": "beginner"}
),
VectorDocument(
id="doc2",
content="Neural networks are computing systems inspired by biological neural networks.",
embedding=[0.2, 0.3, 0.4, ...], # Your 384-dimensional embedding
metadata={"category": "AI", "year": 2023, "difficulty": "intermediate"}
),
VectorDocument(
id="doc3",
content="Deep learning uses neural networks with multiple layers to model complex patterns.",
embedding=[0.3, 0.4, 0.5, ...], # Your 384-dimensional embedding
metadata={"category": "AI", "year": 2024, "difficulty": "advanced"}
)
]

# Add documents to MongoDB
success = vector_store.add_documents(documents)
print(f"Documents added successfully: {success}")
# Search for similar documents
query = "artificial intelligence and machine learning"
results = retriever.retrieve(query, k=5)

print(f"Found {len(results)} results:")
for i, result in enumerate(results, 1):
print(f"\n{i}. Score: {result.score:.3f}")
print(f" Content: {result.chunk.content}")
print(f" Metadata: {result.chunk.metadata}")

Search with Metadata Filtering

# Search with metadata filters
filter_metadata = {
"category": "AI",
"year": {"operator": "$gte", "value": 2023}
}

results = retriever.retrieve(
query="machine learning algorithms",
k=5,
filter_metadata=filter_metadata
)

print(f"Found {len(results)} filtered results:")
for result in results:
print(f"Score: {result.score:.3f}")
print(f"Content: {result.chunk.content[:100]}...")
print(f"Year: {result.chunk.metadata.get('year')}")
print()
import asyncio

async def async_search_example():
# Async search
results = await retriever.retrieve_async("deep learning", k=5)

print(f"Async search found {len(results)} results:")
for result in results:
print(f"Score: {result.score:.3f}")
print(f"Content: {result.chunk.content[:100]}...")

return results

# Run async example
results = asyncio.run(async_search_example())

Batch Operations

# Batch document addition
def add_documents_in_batches(documents, batch_size=100):
"""Add documents in batches for better performance."""
for i in range(0, len(documents), batch_size):
batch = documents[i:i + batch_size]
success = vector_store.add_documents(batch)
print(f"Batch {i//batch_size + 1}: {len(batch)} documents added - {success}")

# Create more documents for batch example
large_document_set = [
VectorDocument(
id=f"batch_doc_{i}",
content=f"Document {i} about various AI topics including machine learning, neural networks, and deep learning.",
embedding=[0.1 + (i % 100) * 0.01] * 384, # Simple embedding for demo
metadata={"category": "AI", "batch": i // 100, "index": i}
)
for i in range(1000)
]

# Add in batches
add_documents_in_batches(large_document_set, batch_size=100)

Performance Monitoring

import time

def search_with_timing(query, k=5):
"""Search with performance timing."""
start_time = time.time()
results = retriever.retrieve(query, k=k)
end_time = time.time()

search_time = end_time - start_time
print(f"Search completed in {search_time:.3f} seconds")
print(f"Found {len(results)} results")

return results, search_time

# Test search performance
queries = [
"machine learning algorithms",
"neural network architectures",
"deep learning applications",
"artificial intelligence ethics"
]

for query in queries:
print(f"\nQuery: '{query}'")
results, timing = search_with_timing(query, k=3)

Error Handling

def safe_search(query, k=5, max_retries=3):
"""Search with error handling and retries."""
for attempt in range(max_retries):
try:
results = retriever.retrieve(query, k=k)
return results
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt < max_retries - 1:
time.sleep(1) # Wait before retry
else:
print("All retry attempts failed")
return []

# Test error handling
results = safe_search("machine learning", k=5)
print(f"Safe search returned {len(results)} results")

Collection Statistics

# Get collection statistics
stats = vector_store.get_stats()
print("Collection Statistics:")
print(f" Total documents: {stats.get('total_documents', 0)}")
print(f" Database: {stats.get('database', 'unknown')}")
print(f" Collection: {stats.get('collection', 'unknown')}")
print(f" Storage size: {stats.get('storage_size', 0)} bytes")
print(f" Index size: {stats.get('index_size', 0)} bytes")

Cleanup

# Delete specific documents
doc_ids_to_delete = ["doc1", "doc2", "doc3"]
success = vector_store.delete_documents(doc_ids_to_delete)
print(f"Documents deleted: {success}")

# Close connections
vector_store.close()
print("Connections closed")

Complete Example

def complete_basic_example():
"""Complete basic vector search example."""

# Initialize
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index"
)

retriever = MongoDBVectorRetriever(vector_store)

try:
# Add documents
documents = [
VectorDocument(
id="example_doc1",
content="Machine learning enables computers to learn without being explicitly programmed.",
embedding=[0.1] * 384, # Simplified for demo
metadata={"topic": "ML", "level": "beginner"}
),
VectorDocument(
id="example_doc2",
content="Neural networks are inspired by the human brain and can recognize patterns.",
embedding=[0.2] * 384, # Simplified for demo
metadata={"topic": "NN", "level": "intermediate"}
)
]

vector_store.add_documents(documents)
print("✅ Documents added")

# Search
results = retriever.retrieve("machine learning", k=2)
print(f"✅ Found {len(results)} results")

for result in results:
print(f" Score: {result.score:.3f}")
print(f" Content: {result.chunk.content}")

return results

except Exception as e:
print(f"❌ Error: {e}")
return []

finally:
# Cleanup
vector_store.delete_documents(["example_doc1", "example_doc2"])
vector_store.close()

# Run complete example
results = complete_basic_example()

This basic example demonstrates the core MongoDB Atlas Vector Search functionality. For more advanced features like hybrid search and faceted search, see the other MongoDB examples.

Next Steps