LlamaIndex Vector Stores
Unified interface to 40+ vector databases through LlamaIndex with automatic connection management and query optimization.
Features
- 40+ Vector Databases: Qdrant, Pinecone, Weaviate, Chroma, Milvus, and more
- Unified Interface: Consistent API across all vector stores
- Hybrid Search: Vector + keyword search capabilities
- Automatic Connection Management: Built-in connection pooling
- Query Optimization: Intelligent query routing and caching
Quick Start
from packages.rag import LlamaIndexVectorStoreAdapter, VectorStoreConfig, VectorStoreType
# Configure vector store
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params={"url": "http://localhost:6333"},
index_name="documents"
)
# Create adapter
adapter = LlamaIndexVectorStoreAdapter(config)
# Add documents
documents = [
{"content": "Hello world", "metadata": {"source": "test"}},
{"content": "Vector search", "metadata": {"source": "test"}}
]
await adapter.add_documents(documents)
# Search
results = await adapter.search("hello", k=5)
Supported Vector Stores
Qdrant
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params={
"url": "http://localhost:6333",
"api_key": "your-api-key"
},
index_name="documents"
)
Pinecone
config = VectorStoreConfig(
store_type=VectorStoreType.PINECONE,
connection_params={
"api_key": "your-pinecone-key",
"environment": "your-environment"
},
index_name="documents"
)
Weaviate
config = VectorStoreConfig(
store_type=VectorStoreType.WEAVIATE,
connection_params={
"url": "http://localhost:8080",
"api_key": "your-weaviate-key"
},
index_name="documents"
)
Chroma
config = VectorStoreConfig(
store_type=VectorStoreType.CHROMA,
connection_params={
"persist_directory": "./chroma_db"
},
index_name="documents"
)
Milvus
config = VectorStoreConfig(
store_type=VectorStoreType.MILVUS,
connection_params={
"host": "localhost",
"port": 19530
},
index_name="documents"
)
OpenSearch
config = VectorStoreConfig(
store_type=VectorStoreType.OPENSEARCH,
connection_params={
"endpoint": "https://your-domain.us-east-1.es.amazonaws.com",
"username": "your-username",
"password": "your-password"
},
index_name="documents"
)
Advanced Configuration
Hybrid Search
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params={"url": "http://localhost:6333"},
index_name="documents",
enable_hybrid_search=True,
similarity_threshold=0.7,
max_results=10
)
# Perform hybrid search
results = await adapter.hybrid_search(
query="machine learning",
k=5,
alpha=0.5 # Weight for vector search
)
Metadata Filtering
# Search with metadata filters
results = await adapter.search(
query="machine learning",
k=5,
filters={
"source": "research_papers",
"year": {"$gte": 2020}
}
)
Batch Operations
# Add documents in batches
documents = [{"content": f"Document {i}", "metadata": {"id": i}} for i in range(1000)]
await adapter.add_documents(documents, batch_size=100)
Factory Methods
Quick Setup
from packages.rag import VectorStoreFactory
# Qdrant
qdrant_store = VectorStoreFactory.create_qdrant_store(
url="http://localhost:6333",
collection_name="documents"
)
# Pinecone
pinecone_store = VectorStoreFactory.create_pinecone_store(
api_key="your-api-key",
environment="your-environment",
index_name="documents"
)
# Weaviate
weaviate_store = VectorStoreFactory.create_weaviate_store(
url="http://localhost:8080",
index_name="documents"
)
# Chroma
chroma_store = VectorStoreFactory.create_chroma_store(
persist_directory="./chroma_db",
collection_name="documents"
)
# OpenSearch
opensearch_store = VectorStoreFactory.create_opensearch_store(
endpoint="https://your-domain.us-east-1.es.amazonaws.com",
username="your-username",
password="your-password",
index_name="documents"
)
Document Management
Adding Documents
# Single document
document = {
"content": "This is a test document",
"metadata": {
"source": "test",
"category": "example"
}
}
await adapter.add_documents([document])
# Multiple documents
documents = [
{"content": "Document 1", "metadata": {"id": 1}},
{"content": "Document 2", "metadata": {"id": 2}},
{"content": "Document 3", "metadata": {"id": 3}}
]
await adapter.add_documents(documents)
Updating Documents
# Update document
await adapter.update_document(
document_id="doc_123",
content="Updated content",
metadata={"updated": True}
)
Deleting Documents
# Delete single document
await adapter.delete_documents(["doc_123"])
# Delete multiple documents
await adapter.delete_documents(["doc_123", "doc_456", "doc_789"])
Retrieving Documents
# Get document by ID
document = await adapter.get_document("doc_123")
if document:
print(f"Content: {document['content']}")
print(f"Metadata: {document['metadata']}")
Search Operations
Basic Search
# Simple search
results = await adapter.search("machine learning", k=5)
for result in results:
print(f"Content: {result['content']}")
print(f"Score: {result['score']}")
print(f"Metadata: {result['metadata']}")
Advanced Search
# Search with filters and threshold
results = await adapter.search(
query="artificial intelligence",
k=10,
filters={"category": "research"},
similarity_threshold=0.8
)
Hybrid Search
# Combine vector and keyword search
results = await adapter.hybrid_search(
query="deep learning neural networks",
k=5,
alpha=0.6, # 60% vector, 40% keyword
filters={"year": 2023}
)
Performance Optimization
Connection Pooling
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params={
"url": "http://localhost:6333",
"pool_size": 10, # Connection pool size
"timeout": 30 # Connection timeout
},
index_name="documents"
)
Caching
# Enable caching for frequently accessed documents
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params={"url": "http://localhost:6333"},
index_name="documents",
enable_caching=True,
cache_ttl=3600 # 1 hour cache TTL
)
Batch Processing
# Process large datasets in batches
async def process_large_dataset(documents):
batch_size = 100
for i in range(0, len(documents), batch_size):
batch = documents[i:i + batch_size]
await adapter.add_documents(batch)
print(f"Processed batch {i//batch_size + 1}")
Monitoring and Statistics
Get Statistics
# Get vector store statistics
stats = await adapter.get_stats()
print(f"Store type: {stats['store_type']}")
print(f"Index name: {stats['index_name']}")
print(f"Embedding dimension: {stats['embedding_dimension']}")
print(f"Hybrid search enabled: {stats['hybrid_search_enabled']}")
Performance Metrics
# Monitor performance
import time
start_time = time.time()
results = await adapter.search("test query", k=5)
search_time = time.time() - start_time
print(f"Search completed in {search_time:.2f} seconds")
print(f"Found {len(results)} results")
Error Handling
Connection Errors
try:
adapter = LlamaIndexVectorStoreAdapter(config)
except ConnectionError as e:
print(f"Failed to connect to vector store: {e}")
# Fallback to local storage
config.store_type = VectorStoreType.CHROMA
adapter = LlamaIndexVectorStoreAdapter(config)
Search Errors
try:
results = await adapter.search("query", k=5)
except Exception as e:
print(f"Search failed: {e}")
# Return empty results or fallback
results = []
Batch Operation Errors
# Handle batch operation errors
try:
await adapter.add_documents(documents)
except Exception as e:
print(f"Batch operation failed: {e}")
# Retry with smaller batches
for doc in documents:
try:
await adapter.add_documents([doc])
except Exception as doc_error:
print(f"Failed to add document: {doc_error}")
Best Practices
- Choose Right Store: Select based on your scale and requirements
- Use Hybrid Search: Combine vector and keyword search for better results
- Implement Caching: Cache frequently accessed documents
- Monitor Performance: Track search times and accuracy
- Handle Errors: Implement comprehensive error handling
- Batch Operations: Use batch operations for large datasets
- Connection Management: Configure appropriate connection pools
Migration Guide
From Custom Vector Stores
# Old way
from packages.rag import get_vector_store
vector_store = get_vector_store("qdrant", **params)
# New way
from packages.rag import LlamaIndexVectorStoreAdapter, VectorStoreConfig
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params=params
)
adapter = LlamaIndexVectorStoreAdapter(config)
From Direct LlamaIndex
# Old way
from llama_index.vector_stores import QdrantVectorStore
vector_store = QdrantVectorStore(**params)
# New way
from packages.rag import LlamaIndexVectorStoreAdapter, VectorStoreConfig
config = VectorStoreConfig(
store_type=VectorStoreType.QDRANT,
connection_params=params
)
adapter = LlamaIndexVectorStoreAdapter(config)
API Reference
VectorStoreConfig
| Parameter | Type | Description |
|---|---|---|
store_type | VectorStoreType | Type of vector store |
connection_params | Dict | Connection parameters |
index_name | str | Index/collection name |
embedding_dimension | int | Embedding dimension |
similarity_threshold | float | Similarity threshold |
max_results | int | Maximum results to return |
enable_hybrid_search | bool | Enable hybrid search |
VectorStoreType
| Value | Description |
|---|---|
QDRANT | Qdrant vector database |
PINECONE | Pinecone vector database |
WEAVIATE | Weaviate vector database |
CHROMA | Chroma vector database |
MILVUS | Milvus vector database |
OPENSEARCH | OpenSearch vector database |
ELASTICSEARCH | Elasticsearch vector database |
POSTGRES | PostgreSQL with pgvector |
MONGODB | MongoDB Atlas Vector Search |
REDIS | Redis vector database |
SUPABASE | Supabase vector database |
LlamaIndexVectorStoreAdapter
| Method | Description |
|---|---|
add_documents(documents, batch_size) | Add documents to store |
search(query, k, filters, threshold) | Search documents |
hybrid_search(query, k, alpha, filters) | Hybrid search |
update_document(doc_id, content, metadata) | Update document |
delete_documents(doc_ids) | Delete documents |
get_document(doc_id) | Get document by ID |
get_stats() | Get store statistics |