MongoDB Atlas Vector Search Configuration
This guide covers detailed configuration options for MongoDB Atlas Vector Search integration with RecoAgent.
Environment Variables
Required Configuration
# MongoDB Atlas Connection
MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/
MONGODB_DATABASE=recoagent
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index
# Vector Store Type
VECTOR_STORE_TYPE=mongodb_atlas
EMBEDDING_DIMENSION=3072
Connection Pool Configuration
# Connection Pool Settings
MONGODB_MAX_POOL_SIZE=100
MONGODB_MIN_POOL_SIZE=10
MONGODB_MAX_IDLE_TIME_MS=30000
MONGODB_CONNECT_TIMEOUT_MS=10000
MONGODB_SERVER_SELECTION_TIMEOUT_MS=10000
Advanced Configuration
# Performance Tuning
MONGODB_READ_PREFERENCE=secondaryPreferred
MONGODB_READ_CONCERN=available
MONGODB_WRITE_CONCERN=1
# Retry Configuration
MONGODB_RETRY_READS=true
MONGODB_RETRY_WRITES=true
MONGODB_MAX_RETRY_TIME_MS=30000
Configuration in Code
Basic Configuration
from packages.rag.stores import MongoDBAtlasVectorStore
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index",
embedding_dim=3072
)
Advanced Configuration
vector_store = MongoDBAtlasVectorStore(
uri=uri,
database=database,
collection=collection,
vector_search_index=index_name,
embedding_dim=embedding_dim,
# Connection Pool Settings
max_pool_size=100,
min_pool_size=10,
max_idle_time_ms=30000,
connect_timeout_ms=10000,
server_selection_timeout_ms=10000,
# Additional Options
read_preference="secondaryPreferred",
read_concern="available",
write_concern=1,
retry_reads=True,
retry_writes=True,
max_retry_time_ms=30000
)
Using Configuration Settings
from config.settings import get_config
config = get_config()
vector_store = MongoDBAtlasVectorStore(
uri=config.vector_store.mongodb_uri,
database=config.vector_store.mongodb_database,
collection=config.vector_store.mongodb_collection,
vector_search_index=config.vector_store.mongodb_vector_search_index,
embedding_dim=config.llm.embedding_dimension,
max_pool_size=config.vector_store.mongodb_max_pool_size,
min_pool_size=config.vector_store.mongodb_min_pool_size,
max_idle_time_ms=config.vector_store.mongodb_max_idle_time_ms,
connect_timeout_ms=config.vector_store.mongodb_connect_timeout_ms,
server_selection_timeout_ms=config.vector_store.mongodb_server_selection_timeout_ms
)
Vector Search Index Configuration
Index Definition
index_definition = {
"name": "vector_index",
"type": "vectorSearch",
"definition": {
"fields": [
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "cosine"
},
{
"type": "filter",
"path": "metadata"
}
]
}
}
Similarity Metrics
Supported similarity metrics:
- cosine: Cosine similarity (default)
- dotProduct: Dot product similarity
- euclidean: Euclidean distance
# Cosine similarity (default)
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "cosine"
}
# Dot product similarity
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "dotProduct"
}
# Euclidean distance
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "euclidean"
}
Text Index for Hybrid Search
# Create text index
vector_store.create_text_index(['content', 'title', 'description'])
# Custom text index configuration
text_index_definition = {
"content": "text",
"title": "text",
"description": "text",
"metadata.tags": "text"
}
Production Configuration
High-Availability Setup
# Multi-region configuration
uri = "mongodb+srv://username:password@cluster0.abc123.mongodb.net,cluster1.def456.mongodb.net,cluster2.ghi789.mongodb.net/"
vector_store = MongoDBAtlasVectorStore(
uri=uri,
database="recoagent",
collection="documents",
vector_search_index="vector_index",
# High availability settings
max_pool_size=200,
min_pool_size=50,
max_idle_time_ms=60000,
connect_timeout_ms=5000,
server_selection_timeout_ms=5000,
# Read preferences for HA
read_preference="secondaryPreferred",
read_concern="majority",
write_concern=2
)
Performance Optimization
# High-performance configuration
vector_store = MongoDBAtlasVectorStore(
uri=uri,
database="recoagent",
collection="documents",
vector_search_index="vector_index",
# Performance settings
max_pool_size=500,
min_pool_size=100,
max_idle_time_ms=300000, # 5 minutes
connect_timeout_ms=2000,
server_selection_timeout_ms=2000,
# Optimized read/write settings
read_preference="primary",
read_concern="local",
write_concern=1,
# Retry settings
retry_reads=True,
retry_writes=True,
max_retry_time_ms=10000
)
Security Configuration
# Secure configuration
vector_store = MongoDBAtlasVectorStore(
uri=uri,
database="recoagent",
collection="documents",
vector_search_index="vector_index",
# Security settings
tls=True,
tls_allow_invalid_certificates=False,
tls_allow_invalid_hostnames=False,
# Authentication
auth_source="admin",
auth_mechanism="SCRAM-SHA-256",
# Connection security
max_pool_size=50,
min_pool_size=5,
max_idle_time_ms=30000
)
Environment-Specific Configuration
Development Environment
# Development settings
MONGODB_URI=mongodb://localhost:27017
MONGODB_DATABASE=recoagent_dev
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index_dev
MONGODB_MAX_POOL_SIZE=10
MONGODB_MIN_POOL_SIZE=2
Staging Environment
# Staging settings
MONGODB_URI=mongodb+srv://username:password@staging-cluster.mongodb.net/
MONGODB_DATABASE=recoagent_staging
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index_staging
MONGODB_MAX_POOL_SIZE=50
MONGODB_MIN_POOL_SIZE=10
Production Environment
# Production settings
MONGODB_URI=mongodb+srv://username:password@production-cluster.mongodb.net/
MONGODB_DATABASE=recoagent
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index
MONGODB_MAX_POOL_SIZE=200
MONGODB_MIN_POOL_SIZE=50
Configuration Validation
Validate Configuration
from config.settings import validate_config, get_config
# Validate configuration
config = get_config()
issues = validate_config(config)
if issues:
print("Configuration issues found:")
for issue in issues:
print(f" - {issue}")
else:
print("Configuration is valid!")
Test Connection
def test_mongodb_connection():
"""Test MongoDB connection and configuration."""
try:
vector_store = MongoDBAtlasVectorStore(uri=uri, database=database)
# Test connection
stats = vector_store.get_stats()
print(f"✅ Connected to MongoDB: {stats['database']}")
# Test vector search index
vector_store._create_vector_search_index()
print("✅ Vector search index ready")
# Test text index
vector_store.create_text_index(['content'])
print("✅ Text index created")
return True
except Exception as e:
print(f"❌ Connection failed: {e}")
return False
# Run test
if test_mongodb_connection():
print("MongoDB configuration is working correctly!")
else:
print("Please check your MongoDB configuration.")
Troubleshooting Configuration
Common Configuration Issues
-
Invalid URI Format
Error: Invalid connection string
Solution: Ensure URI follows format: mongodb+srv://username:password@cluster.mongodb.net/ -
Authentication Failed
Error: Authentication failed
Solution: Check username, password, and database permissions -
Network Timeout
Error: ServerSelectionTimeoutError
Solution: Check network connectivity and firewall settings -
Index Not Found
Error: Vector search index not found
Solution: Create index manually or ensure auto-creation is enabled
Configuration Debugging
import logging
# Enable debug logging
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('pymongo').setLevel(logging.DEBUG)
logging.getLogger('motor').setLevel(logging.DEBUG)
# Test with debug logging
vector_store = MongoDBAtlasVectorStore(uri=uri, database=database)
This configuration guide provides comprehensive setup options for MongoDB Atlas Vector Search integration with RecoAgent.