MongoDB Faceted Search
This example demonstrates how to perform faceted search operations with MongoDB Atlas Vector Search, enabling users to filter and explore results by metadata facets.
Prerequisites
- MongoDB Atlas cluster with Vector Search enabled
- Documents with rich metadata
- Python 3.8+
- RecoAgent installed
Basic Faceted Search Setup
from packages.rag.stores import MongoDBAtlasVectorStore, VectorDocument
from packages.rag.mongodb_retrievers import MongoDBFacetedRetriever
# Initialize vector store
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index"
)
# Initialize faceted retriever
retriever = MongoDBFacetedRetriever(vector_store)
Prepare Rich Metadata Sample Data
# Create sample documents with diverse metadata for faceting
documents = [
VectorDocument(
id="facet_doc1",
content="Machine learning algorithms can automatically improve their performance through experience and training data.",
embedding=[0.1, 0.2, 0.3, ...], # Your embedding
metadata={
"title": "Introduction to Machine Learning",
"category": "AI",
"subcategory": "Machine Learning",
"difficulty": "beginner",
"year": 2023,
"author": "Dr. Smith",
"language": "English",
"tags": ["algorithms", "training", "performance"],
"rating": 4.5,
"read_time": 15
}
),
VectorDocument(
id="facet_doc2",
content="Neural networks are computing systems inspired by biological neural networks, consisting of interconnected nodes.",
embedding=[0.2, 0.3, 0.4, ...], # Your embedding
metadata={
"title": "Understanding Neural Networks",
"category": "AI",
"subcategory": "Neural Networks",
"difficulty": "intermediate",
"year": 2023,
"author": "Prof. Johnson",
"language": "English",
"tags": ["neural networks", "computing", "biology"],
"rating": 4.8,
"read_time": 25
}
),
VectorDocument(
id="facet_doc3",
content="Deep learning uses artificial neural networks with multiple layers to model complex patterns in data.",
embedding=[0.3, 0.4, 0.5, ...], # Your embedding
metadata={
"title": "Deep Learning Fundamentals",
"category": "AI",
"subcategory": "Deep Learning",
"difficulty": "advanced",
"year": 2024,
"author": "Dr. Williams",
"language": "English",
"tags": ["deep learning", "neural networks", "patterns"],
"rating": 4.9,
"read_time": 35
}
),
VectorDocument(
id="facet_doc4",
content="Natural language processing combines computational linguistics with machine learning for language understanding.",
embedding=[0.4, 0.5, 0.6, ...], # Your embedding
metadata={
"title": "NLP Guide",
"category": "NLP",
"subcategory": "Language Processing",
"difficulty": "intermediate",
"year": 2024,
"author": "Dr. Brown",
"language": "English",
"tags": ["NLP", "linguistics", "language"],
"rating": 4.6,
"read_time": 20
}
),
VectorDocument(
id="facet_doc5",
content="Computer vision enables machines to interpret and understand visual information from the world.",
embedding=[0.5, 0.6, 0.7, ...], # Your embedding
metadata={
"title": "Computer Vision Basics",
"category": "Computer Vision",
"subcategory": "Image Processing",
"difficulty": "beginner",
"year": 2023,
"author": "Dr. Davis",
"language": "English",
"tags": ["computer vision", "images", "visual"],
"rating": 4.3,
"read_time": 18
}
),
VectorDocument(
id="facet_doc6",
content="Reinforcement learning is an area of machine learning concerned with how agents take actions in an environment.",
embedding=[0.6, 0.7, 0.8, ...], # Your embedding
metadata={
"title": "Reinforcement Learning",
"category": "AI",
"subcategory": "Reinforcement Learning",
"difficulty": "advanced",
"year": 2024,
"author": "Prof. Wilson",
"language": "English",
"tags": ["reinforcement", "agents", "environment"],
"rating": 4.7,
"read_time": 30
}
)
]
# Add documents to MongoDB
success = vector_store.add_documents(documents)
print(f"✅ Documents added: {success}")
Basic Faceted Search
# Perform faceted search
query = "artificial intelligence machine learning"
facets = ["category", "difficulty", "year", "author"]
results = retriever.retrieve(
query=query,
k=10,
facets=facets
)
print(f"Faceted search results for: '{query}'")
print(f"Found {len(results)} results")
print()
# Display results
for i, result in enumerate(results, 1):
print(f"{i}. Score: {result.score:.3f}")
print(f" Title: {result.chunk.metadata.get('title', 'N/A')}")
print(f" Category: {result.chunk.metadata.get('category', 'N/A')}")
print(f" Difficulty: {result.chunk.metadata.get('difficulty', 'N/A')}")
print(f" Year: {result.chunk.metadata.get('year', 'N/A')}")
print(f" Content: {result.chunk.content[:100]}...")
print()
# Get facet information
facet_info = retriever.get_facets(query, facets)
print("Facet Information:")
print("=" * 40)
for facet_name, facet_values in facet_info.items():
print(f"\n{facet_name.upper()}:")
for value in facet_values:
print(f" {value['_id']}: {value['count']} documents")
Advanced Faceted Search with Filtering
# Faceted search with metadata filtering
filter_metadata = {
"category": "AI",
"year": {"operator": "$gte", "value": 2023},
"difficulty": {"operator": "$in", "value": ["beginner", "intermediate"]}
}
results = retriever.retrieve(
query="machine learning neural networks",
k=10,
facets=["category", "subcategory", "difficulty", "rating"],
filter_metadata=filter_metadata
)
print(f"Filtered faceted search results:")
print(f"Found {len(results)} results matching filters")
print()
for result in results:
print(f"• {result.chunk.metadata.get('title', 'N/A')}")
print(f" Category: {result.chunk.metadata.get('category', 'N/A')}")
print(f" Subcategory: {result.chunk.metadata.get('subcategory', 'N/A')}")
print(f" Difficulty: {result.chunk.metadata.get('difficulty', 'N/A')}")
print(f" Rating: {result.chunk.metadata.get('rating', 'N/A')}")
print(f" Score: {result.score:.3f}")
print()
# Get filtered facet information
facet_info = retriever.get_facets(
"machine learning neural networks",
["category", "subcategory", "difficulty", "rating"],
filter_metadata
)
print("Filtered Facet Information:")
print("=" * 40)
for facet_name, facet_values in facet_info.items():
print(f"\n{facet_name.upper()}:")
for value in facet_values:
print(f" {value['_id']}: {value['count']} documents")
Interactive Faceted Search
def interactive_faceted_search():
"""Interactive faceted search with user input."""
print("Interactive Faceted Search")
print("=" * 30)
# Get user input
query = input("Enter your search query: ").strip()
if not query:
query = "artificial intelligence"
# Available facets
available_facets = ["category", "subcategory", "difficulty", "year", "author", "language", "rating"]
print(f"\nAvailable facets: {', '.join(available_facets)}")
facet_input = input("Enter facets to include (comma-separated): ").strip()
if facet_input:
facets = [f.strip() for f in facet_input.split(",")]
else:
facets = ["category", "difficulty", "year"]
# Perform search
results = retriever.retrieve(query, k=10, facets=facets)
print(f"\nSearch Results for: '{query}'")
print(f"Found {len(results)} results")
print()
# Display results
for i, result in enumerate(results, 1):
print(f"{i}. {result.chunk.metadata.get('title', 'N/A')}")
print(f" Score: {result.score:.3f}")
for facet in facets:
value = result.chunk.metadata.get(facet, 'N/A')
print(f" {facet.capitalize()}: {value}")
print(f" Content: {result.chunk.content[:80]}...")
print()
# Display facets
facet_info = retriever.get_facets(query, facets)
print("Facet Breakdown:")
print("-" * 20)
for facet_name, facet_values in facet_info.items():
print(f"\n{facet_name.upper()}:")
for value in facet_values:
print(f" {value['_id']}: {value['count']} documents")
return results
# Run interactive search (uncomment to use)
# results = interactive_faceted_search()
Faceted Search with Range Filters
# Faceted search with range filters
range_filters = {
"year": {"operator": "$gte", "value": 2023},
"rating": {"operator": "$gte", "value": 4.5},
"read_time": {"operator": "$lte", "value": 30}
}
results = retriever.retrieve(
query="machine learning artificial intelligence",
k=10,
facets=["category", "difficulty", "year", "rating"],
filter_metadata=range_filters
)
print(f"Range-filtered faceted search results:")
print(f"Found {len(results)} results with filters:")
print(f" Year >= 2023")
print(f" Rating >= 4.5")
print(f" Read time <= 30 minutes")
print()
for result in results:
print(f"• {result.chunk.metadata.get('title', 'N/A')}")
print(f" Year: {result.chunk.metadata.get('year', 'N/A')}")
print(f" Rating: {result.chunk.metadata.get('rating', 'N/A')}")
print(f" Read Time: {result.chunk.metadata.get('read_time', 'N/A')} min")
print(f" Score: {result.score:.3f}")
print()
Multi-Faceted Search with Complex Filters
# Complex multi-faceted search
complex_filters = {
"category": {"operator": "$in", "value": ["AI", "NLP"]},
"difficulty": {"operator": "$in", "value": ["intermediate", "advanced"]},
"year": {"operator": "$gte", "value": 2023},
"rating": {"operator": "$gte", "value": 4.5},
"language": "English"
}
results = retriever.retrieve(
query="machine learning neural networks deep learning",
k=15,
facets=["category", "subcategory", "difficulty", "year", "author", "rating"],
filter_metadata=complex_filters
)
print(f"Complex multi-faceted search results:")
print(f"Found {len(results)} results with complex filters")
print()
# Group results by category
from collections import defaultdict
grouped_results = defaultdict(list)
for result in results:
category = result.chunk.metadata.get('category', 'Unknown')
grouped_results[category].append(result)
for category, category_results in grouped_results.items():
print(f"\n{category.upper()} ({len(category_results)} results):")
for result in category_results:
print(f" • {result.chunk.metadata.get('title', 'N/A')}")
print(f" Difficulty: {result.chunk.metadata.get('difficulty', 'N/A')}")
print(f" Rating: {result.chunk.metadata.get('rating', 'N/A')}")
print(f" Score: {result.score:.3f}")
Faceted Search Analytics
def analyze_faceted_search_performance():
"""Analyze faceted search performance and patterns."""
queries = [
"machine learning",
"neural networks",
"artificial intelligence",
"deep learning",
"natural language processing"
]
facets = ["category", "difficulty", "year", "rating"]
print("Faceted Search Analytics")
print("=" * 30)
all_facet_data = {}
for query in queries:
print(f"\nAnalyzing query: '{query}'")
# Get search results
results = retriever.retrieve(query, k=10, facets=facets)
# Get facet information
facet_info = retriever.get_facets(query, facets)
print(f" Results: {len(results)}")
# Analyze facets
for facet_name, facet_values in facet_info.items():
if facet_name not in all_facet_data:
all_facet_data[facet_name] = {}
for value in facet_values:
facet_key = value['_id']
count = value['count']
if facet_key not in all_facet_data[facet_name]:
all_facet_data[facet_name][facet_key] = 0
all_facet_data[facet_name][facet_key] += count
# Show top facets for this query
for facet_name, facet_values in facet_info.items():
top_facets = sorted(facet_values, key=lambda x: x['count'], reverse=True)[:3]
print(f" Top {facet_name}: {[f['_id'] for f in top_facets]}")
# Overall facet analysis
print(f"\nOverall Facet Analysis:")
print("-" * 25)
for facet_name, facet_data in all_facet_data.items():
sorted_facets = sorted(facet_data.items(), key=lambda x: x[1], reverse=True)
print(f"\n{facet_name.upper()}:")
for facet_value, count in sorted_facets[:5]:
print(f" {facet_value}: {count} total occurrences")
return all_facet_data
# Run analytics
facet_analytics = analyze_faceted_search_performance()
Advanced Faceted Search with MongoDB Advanced Retriever
from packages.rag.mongodb_retrievers import MongoDBAdvancedRetriever
# Use advanced retriever for more control
advanced_retriever = MongoDBAdvancedRetriever(vector_store)
# Compare different search strategies with faceting
query = "machine learning artificial intelligence"
print("Comparing search strategies with faceting:")
print("=" * 50)
# Vector search with faceting
vector_results = advanced_retriever.retrieve(
query, k=5, search_type="vector"
)
print(f"\nVector Search: {len(vector_results)} results")
# Hybrid search with faceting
hybrid_results = advanced_retriever.retrieve(
query, k=5, search_type="hybrid"
)
print(f"Hybrid Search: {len(hybrid_results)} results")
# Faceted search
faceted_results = advanced_retriever.retrieve(
query, k=5, search_type="faceted",
facets=["category", "difficulty", "year"]
)
print(f"Faceted Search: {len(faceted_results)} results")
# Get facets for comparison
facets = advanced_retriever.get_facets(
query, ["category", "difficulty", "year"]
)
print(f"\nFacet Information:")
for facet_name, facet_values in facets.items():
print(f"\n{facet_name.upper()}:")
for value in facet_values:
print(f" {value['_id']}: {value['count']} documents")
Complete Faceted Search Example
def complete_faceted_example():
"""Complete faceted search example with all features."""
# Initialize
vector_store = MongoDBAtlasVectorStore(
uri="mongodb+srv://username:password@cluster.mongodb.net/",
database="recoagent",
collection="documents",
vector_search_index="vector_index"
)
retriever = MongoDBFacetedRetriever(vector_store)
try:
# Add sample documents
sample_docs = [
VectorDocument(
id="complete_facet1",
content="Machine learning enables computers to learn and improve from experience without being explicitly programmed.",
embedding=[0.1] * 384,
metadata={
"title": "ML Basics",
"category": "AI",
"difficulty": "beginner",
"year": 2023,
"rating": 4.5
}
),
VectorDocument(
id="complete_facet2",
content="Neural networks are computing systems inspired by biological neural networks that can learn complex patterns.",
embedding=[0.2] * 384,
metadata={
"title": "Neural Networks",
"category": "AI",
"difficulty": "intermediate",
"year": 2024,
"rating": 4.8
}
)
]
vector_store.add_documents(sample_docs)
print("✅ Sample documents added")
# Perform faceted search
results = retriever.retrieve(
query="machine learning neural networks",
k=5,
facets=["category", "difficulty", "year", "rating"]
)
print(f"✅ Faceted search completed: {len(results)} results")
# Display results
for i, result in enumerate(results, 1):
print(f"{i}. {result.chunk.metadata.get('title', 'N/A')}")
print(f" Score: {result.score:.3f}")
print(f" Category: {result.chunk.metadata.get('category', 'N/A')}")
print(f" Difficulty: {result.chunk.metadata.get('difficulty', 'N/A')}")
# Get facets
facets = retriever.get_facets(
"machine learning neural networks",
["category", "difficulty", "year", "rating"]
)
print(f"\nFacet Information:")
for facet_name, facet_values in facets.items():
print(f"\n{facet_name.upper()}:")
for value in facet_values:
print(f" {value['_id']}: {value['count']} documents")
return results, facets
except Exception as e:
print(f"❌ Error: {e}")
return [], {}
finally:
# Cleanup
vector_store.delete_documents(["complete_facet1", "complete_facet2"])
vector_store.close()
# Run complete example
results, facets = complete_faceted_example()
This faceted search example demonstrates how to leverage MongoDB's aggregation capabilities to provide rich, interactive search experiences with metadata-based filtering and exploration.