Skip to main content

MongoDB Atlas Vector Search Configuration

This guide covers detailed configuration options for MongoDB Atlas Vector Search integration with RecoAgent.

Environment Variables

Required Configuration

# MongoDB Atlas Connection
MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/
MONGODB_DATABASE=recoagent
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index

# Vector Store Type
VECTOR_STORE_TYPE=mongodb_atlas
EMBEDDING_DIMENSION=3072

Connection Pool Configuration

# Connection Pool Settings
MONGODB_MAX_POOL_SIZE=100
MONGODB_MIN_POOL_SIZE=10
MONGODB_MAX_IDLE_TIME_MS=30000
MONGODB_CONNECT_TIMEOUT_MS=10000
MONGODB_SERVER_SELECTION_TIMEOUT_MS=10000

Advanced Configuration

# Performance Tuning
MONGODB_READ_PREFERENCE=secondaryPreferred
MONGODB_READ_CONCERN=available
MONGODB_WRITE_CONCERN=1

# Retry Configuration
MONGODB_RETRY_READS=true
MONGODB_RETRY_WRITES=true
MONGODB_MAX_RETRY_TIME_MS=30000

Configuration in Code

Basic Configuration

from packages.rag.stores import MongoDBAtlasVectorStore

vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index",
embedding_dim=3072
)

Advanced Configuration

vector_store = MongoDBAtlasVectorStore(
uri=uri,
database=database,
collection=collection,
vector_search_index=index_name,
embedding_dim=embedding_dim,

# Connection Pool Settings
max_pool_size=100,
min_pool_size=10,
max_idle_time_ms=30000,
connect_timeout_ms=10000,
server_selection_timeout_ms=10000,

# Additional Options
read_preference="secondaryPreferred",
read_concern="available",
write_concern=1,
retry_reads=True,
retry_writes=True,
max_retry_time_ms=30000
)

Using Configuration Settings

from config.settings import get_config

config = get_config()

vector_store = MongoDBAtlasVectorStore(
uri=config.vector_store.mongodb_uri,
database=config.vector_store.mongodb_database,
collection=config.vector_store.mongodb_collection,
vector_search_index=config.vector_store.mongodb_vector_search_index,
embedding_dim=config.llm.embedding_dimension,
max_pool_size=config.vector_store.mongodb_max_pool_size,
min_pool_size=config.vector_store.mongodb_min_pool_size,
max_idle_time_ms=config.vector_store.mongodb_max_idle_time_ms,
connect_timeout_ms=config.vector_store.mongodb_connect_timeout_ms,
server_selection_timeout_ms=config.vector_store.mongodb_server_selection_timeout_ms
)

Vector Search Index Configuration

Index Definition

index_definition = {
"name": "vector_index",
"type": "vectorSearch",
"definition": {
"fields": [
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "cosine"
},
{
"type": "filter",
"path": "metadata"
}
]
}
}

Similarity Metrics

Supported similarity metrics:

  • cosine: Cosine similarity (default)
  • dotProduct: Dot product similarity
  • euclidean: Euclidean distance
# Cosine similarity (default)
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "cosine"
}

# Dot product similarity
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "dotProduct"
}

# Euclidean distance
{
"type": "vector",
"path": "embedding",
"numDimensions": 3072,
"similarity": "euclidean"
}
# Create text index
vector_store.create_text_index(['content', 'title', 'description'])

# Custom text index configuration
text_index_definition = {
"content": "text",
"title": "text",
"description": "text",
"metadata.tags": "text"
}

Production Configuration

High-Availability Setup

# Multi-region configuration
uri = "mongodb+srv://username:password@cluster0.abc123.mongodb.net,cluster1.def456.mongodb.net,cluster2.ghi789.mongodb.net/"

vector_store = MongoDBAtlasVectorStore(
uri=uri,
database="recoagent",
collection="documents",
vector_search_index="vector_index",

# High availability settings
max_pool_size=200,
min_pool_size=50,
max_idle_time_ms=60000,
connect_timeout_ms=5000,
server_selection_timeout_ms=5000,

# Read preferences for HA
read_preference="secondaryPreferred",
read_concern="majority",
write_concern=2
)

Performance Optimization

# High-performance configuration
vector_store = MongoDBAtlasVectorStore(
uri=uri,
database="recoagent",
collection="documents",
vector_search_index="vector_index",

# Performance settings
max_pool_size=500,
min_pool_size=100,
max_idle_time_ms=300000, # 5 minutes
connect_timeout_ms=2000,
server_selection_timeout_ms=2000,

# Optimized read/write settings
read_preference="primary",
read_concern="local",
write_concern=1,

# Retry settings
retry_reads=True,
retry_writes=True,
max_retry_time_ms=10000
)

Security Configuration

# Secure configuration
vector_store = MongoDBAtlasVectorStore(
uri=uri,
database="recoagent",
collection="documents",
vector_search_index="vector_index",

# Security settings
tls=True,
tls_allow_invalid_certificates=False,
tls_allow_invalid_hostnames=False,

# Authentication
auth_source="admin",
auth_mechanism="SCRAM-SHA-256",

# Connection security
max_pool_size=50,
min_pool_size=5,
max_idle_time_ms=30000
)

Environment-Specific Configuration

Development Environment

# Development settings
MONGODB_URI=mongodb://localhost:27017
MONGODB_DATABASE=recoagent_dev
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index_dev
MONGODB_MAX_POOL_SIZE=10
MONGODB_MIN_POOL_SIZE=2

Staging Environment

# Staging settings
MONGODB_URI=mongodb+srv://username:password@staging-cluster.mongodb.net/
MONGODB_DATABASE=recoagent_staging
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index_staging
MONGODB_MAX_POOL_SIZE=50
MONGODB_MIN_POOL_SIZE=10

Production Environment

# Production settings
MONGODB_URI=mongodb+srv://username:password@production-cluster.mongodb.net/
MONGODB_DATABASE=recoagent
MONGODB_COLLECTION=documents
MONGODB_VECTOR_SEARCH_INDEX=vector_index
MONGODB_MAX_POOL_SIZE=200
MONGODB_MIN_POOL_SIZE=50

Configuration Validation

Validate Configuration

from config.settings import validate_config, get_config

# Validate configuration
config = get_config()
issues = validate_config(config)

if issues:
print("Configuration issues found:")
for issue in issues:
print(f" - {issue}")
else:
print("Configuration is valid!")

Test Connection

def test_mongodb_connection():
"""Test MongoDB connection and configuration."""
try:
vector_store = MongoDBAtlasVectorStore(uri=uri, database=database)

# Test connection
stats = vector_store.get_stats()
print(f"✅ Connected to MongoDB: {stats['database']}")

# Test vector search index
vector_store._create_vector_search_index()
print("✅ Vector search index ready")

# Test text index
vector_store.create_text_index(['content'])
print("✅ Text index created")

return True

except Exception as e:
print(f"❌ Connection failed: {e}")
return False

# Run test
if test_mongodb_connection():
print("MongoDB configuration is working correctly!")
else:
print("Please check your MongoDB configuration.")

Troubleshooting Configuration

Common Configuration Issues

  1. Invalid URI Format

    Error: Invalid connection string
    Solution: Ensure URI follows format: mongodb+srv://username:password@cluster.mongodb.net/
  2. Authentication Failed

    Error: Authentication failed
    Solution: Check username, password, and database permissions
  3. Network Timeout

    Error: ServerSelectionTimeoutError
    Solution: Check network connectivity and firewall settings
  4. Index Not Found

    Error: Vector search index not found
    Solution: Create index manually or ensure auto-creation is enabled

Configuration Debugging

import logging

# Enable debug logging
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('pymongo').setLevel(logging.DEBUG)
logging.getLogger('motor').setLevel(logging.DEBUG)

# Test with debug logging
vector_store = MongoDBAtlasVectorStore(uri=uri, database=database)

This configuration guide provides comprehensive setup options for MongoDB Atlas Vector Search integration with RecoAgent.