Skip to main content

Vector Search

This example shows how to use vector search for semantic similarity, demonstrating the core retrieval mechanism behind RAG systems.

Overview​

This example demonstrates:

  • Creating vector embeddings from text
  • Performing similarity search
  • Comparing different embedding models
  • Visualizing search results and scores

Prerequisites​

  • Python 3.8+
  • RecoAgent installed (pip install recoagent)
  • OpenAI API key
  • Basic understanding of vector embeddings

Code Implementation​

import os
import numpy as np
from typing import List, Dict, Tuple
from dotenv import load_dotenv
from recoagent import RecoAgent
from recoagent.retrievers import VectorRetriever
from recoagent.embeddings import OpenAIEmbeddings

# Load environment variables
load_dotenv()

class VectorSearchDemo:
def __init__(self):
"""Initialize the vector search demonstration."""
self.documents = [
"Machine learning is a subset of artificial intelligence that focuses on algorithms.",
"Deep learning uses neural networks with multiple layers to learn complex patterns.",
"Natural language processing helps computers understand and generate human language.",
"Computer vision enables machines to interpret and analyze visual information.",
"Data science combines statistics, programming, and domain expertise to extract insights.",
"Artificial intelligence aims to create machines that can perform tasks requiring human intelligence.",
"Supervised learning uses labeled data to train models to make predictions.",
"Unsupervised learning finds hidden patterns in data without labeled examples.",
"Reinforcement learning teaches agents to make decisions through trial and error.",
"Feature engineering involves selecting and transforming variables for machine learning models."
]

# Initialize different retrievers for comparison
self.retrievers = {
'openai': VectorRetriever(
embedding_model="text-embedding-ada-002",
similarity_threshold=0.7
),
'openai_large': VectorRetriever(
embedding_model="text-embedding-3-large",
similarity_threshold=0.7
)
}

# Initialize embeddings for direct comparison
self.embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

def setup_retrievers(self):
"""Set up retrievers with sample documents."""
print("šŸ”§ Setting up vector retrievers...")

for name, retriever in self.retrievers.items():
retriever.add_documents(self.documents)
print(f"āœ… {name} retriever ready with {len(self.documents)} documents")

def search_with_retriever(self, query: str, retriever_name: str, top_k: int = 3):
"""Search using a specific retriever."""
retriever = self.retrievers[retriever_name]
results = retriever.search(query, top_k=top_k)

print(f"\nšŸ” Search results with {retriever_name}:")
print(f"Query: '{query}'")
print(f"Found {len(results)} results")

for i, result in enumerate(results, 1):
print(f"\n{i}. Score: {result.score:.3f}")
print(f" Document: {result.document[:80]}...")

return results

def compare_retrievers(self, query: str):
"""Compare results from different retrievers."""
print(f"\nšŸ“Š Comparing retrievers for query: '{query}'")
print("=" * 60)

results_comparison = {}

for retriever_name in self.retrievers.keys():
results = self.search_with_retriever(query, retriever_name, top_k=3)
results_comparison[retriever_name] = results

# Show side-by-side comparison
print(f"\nšŸ“‹ Side-by-side comparison:")
print("-" * 60)

max_results = max(len(results) for results in results_comparison.values())

for i in range(max_results):
print(f"\nResult {i+1}:")
for retriever_name, results in results_comparison.items():
if i < len(results):
result = results[i]
print(f" {retriever_name:12}: {result.score:.3f} - {result.document[:50]}...")
else:
print(f" {retriever_name:12}: No result")

return results_comparison

def analyze_embeddings(self, query: str):
"""Analyze embeddings directly."""
print(f"\n🧮 Embedding Analysis for: '{query}'")
print("=" * 50)

# Get query embedding
query_embedding = self.embeddings.embed_query(query)
print(f"Query embedding shape: {len(query_embedding)}")
print(f"Query embedding (first 5 values): {query_embedding[:5]}")

# Get document embeddings
doc_embeddings = self.embeddings.embed_documents(self.documents)
print(f"Document embeddings shape: {len(doc_embeddings)} x {len(doc_embeddings[0])}")

# Calculate similarities
similarities = []
for i, doc_emb in enumerate(doc_embeddings):
similarity = np.dot(query_embedding, doc_emb) / (
np.linalg.norm(query_embedding) * np.linalg.norm(doc_emb)
)
similarities.append((i, similarity, self.documents[i]))

# Sort by similarity
similarities.sort(key=lambda x: x[1], reverse=True)

print(f"\nšŸ“ˆ Top 5 most similar documents:")
for i, (doc_idx, similarity, document) in enumerate(similarities[:5], 1):
print(f"{i}. Doc {doc_idx}: {similarity:.3f}")
print(f" {document[:70]}...")

return similarities

def semantic_search_demo(self):
"""Demonstrate semantic search capabilities."""
print("\nšŸŽÆ Semantic Search Demonstration")
print("=" * 50)

test_queries = [
"What is deep learning?",
"How do neural networks work?",
"What are the different types of machine learning?",
"How do computers understand language?",
"What is feature engineering in ML?"
]

for query in test_queries:
print(f"\nšŸ” Query: '{query}'")
print("-" * 40)

# Search with main retriever
results = self.search_with_retriever(query, 'openai', top_k=2)

# Show why results were selected
if results:
print(f"\nšŸ’” Why these results were selected:")
for result in results:
# Find which document this is
doc_idx = self.documents.index(result.document)
print(f" Document {doc_idx}: High semantic similarity to query")

def similarity_threshold_demo(self):
"""Demonstrate the effect of similarity thresholds."""
print("\nšŸŽšļø Similarity Threshold Demonstration")
print("=" * 50)

query = "artificial intelligence and machine learning"

# Test different thresholds
thresholds = [0.5, 0.7, 0.8, 0.9]

for threshold in thresholds:
retriever = VectorRetriever(
embedding_model="text-embedding-ada-002",
similarity_threshold=threshold
)
retriever.add_documents(self.documents)

results = retriever.search(query, top_k=5)

print(f"\nThreshold {threshold}: {len(results)} results")
for result in results:
print(f" {result.score:.3f}: {result.document[:50]}...")

def interactive_search(self):
"""Interactive search session."""
print("\nšŸ’¬ Interactive Vector Search")
print("=" * 40)
print("Type 'quit' to exit, 'compare' to compare retrievers")
print("Type 'analyze' to see embedding analysis")

while True:
query = input("\nšŸ” Enter search query: ").strip()

if query.lower() == 'quit':
break
elif query.lower() == 'compare':
self.compare_retrievers(query)
elif query.lower() == 'analyze':
self.analyze_embeddings(query)
elif query:
# Default search
self.search_with_retriever(query, 'openai', top_k=3)

print("\nšŸ‘‹ Thanks for exploring vector search!")

def main():
"""Main function to run the vector search demo."""
print("šŸš€ Vector Search Example")
print("=" * 50)

# Initialize demo
demo = VectorSearchDemo()

# Set up retrievers
demo.setup_retrievers()

# Run demonstrations
print("\n" + "=" * 50)
print("šŸŽ“ Educational Demonstrations")
print("=" * 50)

# Semantic search demo
demo.semantic_search_demo()

# Threshold demo
demo.similarity_threshold_demo()

# Interactive session
demo.interactive_search()

if __name__ == "__main__":
main()

Running the Example​

1. Setup​

# Create project directory
mkdir vector-search-example
cd vector-search-example

# Install dependencies
pip install recoagent python-dotenv numpy

# Create .env file
echo "OPENAI_API_KEY=your_api_key_here" > .env

2. Run the Example​

python vector_search.py

3. Expected Output​

šŸš€ Vector Search Example
==================================================

šŸ”§ Setting up vector retrievers...
āœ… openai retriever ready with 10 documents
āœ… openai_large retriever ready with 10 documents

==================================================
šŸŽ“ Educational Demonstrations
==================================================

šŸŽÆ Semantic Search Demonstration
==================================================

šŸ” Query: 'What is deep learning?'
----------------------------------------

šŸ” Search results with openai:
Query: 'What is deep learning?'
Found 3 results

1. Score: 0.847
Document: Deep learning uses neural networks with multiple layers to learn complex patterns.

2. Score: 0.723
Document: Machine learning is a subset of artificial intelligence that focuses on algorithms.

3. Score: 0.689
Document: Artificial intelligence aims to create machines that can perform tasks requiring human intelligence.

šŸ’” Why these results were selected:
Document 1: High semantic similarity to query
Document 0: High semantic similarity to query

šŸ” Query: 'How do neural networks work?'
----------------------------------------

šŸ” Search results with openai:
Query: 'How do neural networks work?'
Found 3 results

1. Score: 0.856
Document: Deep learning uses neural networks with multiple layers to learn complex patterns.

2. Score: 0.734
Document: Machine learning is a subset of artificial intelligence that focuses on algorithms.

šŸŽšļø Similarity Threshold Demonstration
==================================================

Threshold 0.5: 5 results
0.847: Deep learning uses neural networks with multiple layers to learn complex patterns.
0.723: Machine learning is a subset of artificial intelligence that focuses on algorithms.
0.689: Artificial intelligence aims to create machines that can perform tasks requiring human intelligence.
0.645: Supervised learning uses labeled data to train models to make predictions.
0.598: Unsupervised learning finds hidden patterns in data without labeled examples.

Threshold 0.7: 3 results
0.847: Deep learning uses neural networks with multiple layers to learn complex patterns.
0.723: Machine learning is a subset of artificial intelligence that focuses on algorithms.
0.689: Artificial intelligence aims to create machines that can perform tasks requiring human intelligence.

Threshold 0.8: 1 results
0.847: Deep learning uses neural networks with multiple layers to learn complex patterns.

Threshold 0.9: 0 results

šŸ’¬ Interactive Vector Search
========================================
Type 'quit' to exit, 'compare' to compare retrievers
Type 'analyze' to see embedding analysis

šŸ” Enter search query: What is NLP?

Key Concepts​

  1. Embeddings: High-dimensional vectors representing text meaning
  2. Similarity: Cosine similarity measures how similar two vectors are
  3. Threshold: Minimum similarity score for results to be returned
  4. Semantic Understanding: Vector search finds conceptually related content

Embedding Models​

# Different embedding models have different strengths
embeddings = {
'text-embedding-ada-002': 'Good general purpose, fast',
'text-embedding-3-large': 'Higher quality, slower',
'text-embedding-3-small': 'Fast, smaller dimensions'
}

Similarity Scoring​

# Cosine similarity formula
similarity = dot_product(a, b) / (norm(a) * norm(b))

# Range: -1 to 1, where:
# 1 = identical meaning
# 0 = no relationship
# -1 = opposite meaning

Advanced Features​

Custom Similarity Metrics​

from recoagent.retrievers import VectorRetriever

# Custom similarity function
def custom_similarity(query_emb, doc_emb):
# Your custom similarity calculation
return np.dot(query_emb, doc_emb) / (np.linalg.norm(query_emb) * np.linalg.norm(doc_emb))

retriever = VectorRetriever(
embedding_model="text-embedding-ada-002",
similarity_function=custom_similarity
)

Batch Processing​

# Process multiple queries efficiently
queries = ["What is ML?", "How does AI work?", "What is deep learning?"]
results = retriever.batch_search(queries, top_k=3)

Performance Optimization​

Caching Embeddings​

# Cache embeddings to avoid recomputation
retriever = VectorRetriever(
embedding_model="text-embedding-ada-002",
cache_embeddings=True,
cache_dir="./embeddings_cache"
)

Batch Embedding​

# Process documents in batches for efficiency
embeddings = self.embeddings.embed_documents(
documents,
batch_size=100 # Process 100 documents at a time
)

Troubleshooting​

Common Issues​

Low similarity scores:

  • Check if documents contain relevant information
  • Try different embedding models
  • Adjust similarity threshold

Slow performance:

  • Use smaller embedding models
  • Implement caching
  • Process documents in batches

Inconsistent results:

  • Ensure consistent text preprocessing
  • Use the same embedding model for all operations
  • Check for encoding issues

Next Steps​

This example shows the foundation of vector search. You can extend it by:

  1. Adding more embedding models for comparison
  2. Implementing hybrid search (combining vector + keyword)
  3. Adding reranking to improve results
  4. Creating visualizations of embedding spaces
  5. Building a web interface for interactive search

Ready for more? Check out the Basic Agent Example to see how vector search integrates with agents!