Skip to main content

Basic Query Expansion

This example demonstrates the fundamental capabilities of the Query Expansion System, including synonym management and basic expansion strategies.

Problem Statement​

Enterprise users search using terminology that may not match how documents are written, causing relevant content to be missed. We need to expand queries to include synonyms and related terms.

Solution Approach​

  1. Set up a synonym database with domain-specific terminology
  2. Create expansion contexts for different user types
  3. Apply multiple expansion strategies
  4. Demonstrate the results

Code Implementation​

import asyncio
from packages.rag.query_expansion import (
QueryExpansionSystem, ExpansionContext, ExpansionType, SynonymSource,
create_query_expansion_system
)

async def basic_query_expansion_example():
"""Demonstrate basic query expansion functionality."""

print("šŸš€ Basic Query Expansion Example")
print("=" * 40)

# 1. Create expansion system
system = create_query_expansion_system("basic_example.db")

# 2. Add some basic synonyms
print("\nšŸ“š Adding synonyms to database...")

# Technical synonyms
technical_synonyms = [
("API", "Application Programming Interface", "technical", 0.95),
("SQL", "Structured Query Language", "technical", 0.95),
("HTTP", "Hypertext Transfer Protocol", "technical", 0.9),
("JSON", "JavaScript Object Notation", "technical", 0.9),
("code", "programming", "technical", 0.8),
("software", "application", "technical", 0.7),
]

for term, synonym, domain, confidence in technical_synonyms:
await system.add_synonym(
term=term,
synonym=synonym,
domain=domain,
source=SynonymSource.MANUAL_CURATION,
confidence=confidence,
context="software development"
)
print(f" Added: {term} -> {synonym}")

# Business synonyms
business_synonyms = [
("KPI", "Key Performance Indicator", "business", 0.95),
("ROI", "Return on Investment", "business", 0.95),
("SLA", "Service Level Agreement", "business", 0.9),
("revenue", "income", "business", 0.8),
("customer", "client", "business", 0.7),
]

for term, synonym, domain, confidence in business_synonyms:
await system.add_synonym(
term=term,
synonym=synonym,
domain=domain,
source=SynonymSource.MANUAL_CURATION,
confidence=confidence,
context="business operations"
)
print(f" Added: {term} -> {synonym}")

# 3. Create expansion contexts
print("\nšŸ‘„ Creating expansion contexts...")

# Developer context
developer_context = ExpansionContext(
user_id="dev_001",
user_role="developer",
department="engineering",
domain="technical",
session_id="session_dev_001",
conversation_history=[],
available_documents=[],
user_preferences={},
expansion_settings={}
)

# Manager context
manager_context = ExpansionContext(
user_id="mgr_001",
user_role="manager",
department="business",
domain="business",
session_id="session_mgr_001",
conversation_history=[],
available_documents=[],
user_preferences={},
expansion_settings={}
)

print(" Created developer context (technical domain)")
print(" Created manager context (business domain)")

# 4. Test queries
test_queries = [
{
"query": "API documentation",
"context": developer_context,
"description": "Technical query with acronym"
},
{
"query": "KPI analysis report",
"context": manager_context,
"description": "Business query with acronym"
},
{
"query": "code review process",
"context": developer_context,
"description": "Technical query with synonym"
},
{
"query": "customer satisfaction metrics",
"context": manager_context,
"description": "Business query with synonym"
}
]

# 5. Demonstrate different expansion strategies
strategies_to_test = [
([ExpansionType.SYNONYM_EXPANSION], "Synonym Expansion Only"),
([ExpansionType.ACRONYM_EXPANSION], "Acronym Expansion Only"),
([ExpansionType.SYNONYM_EXPANSION, ExpansionType.ACRONYM_EXPANSION], "Combined Strategies")
]

for test_case in test_queries:
print(f"\nšŸ“ Testing Query: '{test_case['query']}'")
print(f" Context: {test_case['description']}")

for strategies, strategy_name in strategies_to_test:
print(f"\n šŸ”§ {strategy_name}:")

try:
expansions = await system.expand_query(
query=test_case["query"],
context=test_case["context"],
enabled_strategies=strategies
)

if expansions:
for i, expansion in enumerate(expansions, 1):
print(f" {i}. {expansion.expanded_query}")
print(f" Type: {expansion.expansion_type.value}")
print(f" Confidence: {expansion.confidence_score:.2f}")
print(f" Relevance: {expansion.relevance_score:.2f}")

if expansion.synonyms_used:
synonyms = [f"{s.term}->{s.synonym}" for s in expansion.synonyms_used]
print(f" Synonyms: {', '.join(synonyms)}")
else:
print(f" No expansions found")

except Exception as e:
print(f" Error: {e}")

# 6. Demonstrate user feedback
print(f"\nšŸ’¬ Demonstrating User Feedback...")

# Create a sample expansion result
from packages.rag.query_expansion import ExpansionResult

sample_expansion = ExpansionResult(
original_query="API documentation",
expanded_query="API documentation OR Application Programming Interface documentation",
expansion_type=ExpansionType.SYNONYM_EXPANSION,
synonyms_used=[],
confidence_score=0.9,
relevance_score=0.8,
expansion_metadata={}
)

# Record positive feedback
await system.record_feedback(
expansion_result=sample_expansion,
user_id="dev_001",
was_helpful=True,
rating=5,
comment="Very helpful expansion!"
)

print(" Recorded positive feedback for 'API documentation' expansion")
print(" Rating: 5/5 - Very helpful expansion!")

# Record negative feedback
negative_expansion = ExpansionResult(
original_query="code review",
expanded_query="code review OR programming review",
expansion_type=ExpansionType.SYNONYM_EXPANSION,
synonyms_used=[],
confidence_score=0.6,
relevance_score=0.5,
expansion_metadata={}
)

await system.record_feedback(
expansion_result=negative_expansion,
user_id="dev_001",
was_helpful=False,
rating=2,
comment="Expansion was too generic"
)

print(" Recorded negative feedback for 'code review' expansion")
print(" Rating: 2/5 - Expansion was too generic")

# 7. Get analytics
print(f"\nšŸ“Š Getting Analytics...")

from packages.rag.synonym_analytics import create_synonym_analytics

analytics = create_synonym_analytics("basic_example.db")

# Get expansion metrics
expansion_metrics = await analytics.get_expansion_metrics()
print(f" Total Expansions: {expansion_metrics.total_expansions}")
print(f" Success Rate: {expansion_metrics.success_rate:.2%}")
print(f" Average Confidence: {expansion_metrics.average_confidence:.2f}")

# Get synonym metrics
synonym_metrics = await analytics.get_synonym_metrics()
print(f" Total Synonyms: {synonym_metrics.total_synonyms}")
print(f" Active Synonyms: {synonym_metrics.active_synonyms}")
print(f" Domain Distribution: {synonym_metrics.domain_distribution}")

print(f"\nāœ… Basic Query Expansion Example Complete!")
print(f"\nKey Learnings:")
print(f" • Synonym expansion improves query coverage")
print(f" • Acronym expansion resolves technical abbreviations")
print(f" • Context matters for expansion quality")
print(f" • User feedback helps improve the system")
print(f" • Analytics provide insights into system performance")

# Run the example
if __name__ == "__main__":
asyncio.run(basic_query_expansion_example())

Expected Output​

šŸš€ Basic Query Expansion Example
========================================

šŸ“š Adding synonyms to database...
Added: API -> Application Programming Interface
Added: SQL -> Structured Query Language
Added: HTTP -> Hypertext Transfer Protocol
Added: JSON -> JavaScript Object Notation
Added: code -> programming
Added: software -> application
Added: KPI -> Key Performance Indicator
Added: ROI -> Return on Investment
Added: SLA -> Service Level Agreement
Added: revenue -> income
Added: customer -> client

šŸ‘„ Creating expansion contexts...
Created developer context (technical domain)
Created manager context (business domain)

šŸ“ Testing Query: 'API documentation'
Context: Technical query with acronym

šŸ”§ Synonym Expansion Only:
1. API documentation OR Application Programming Interface documentation
Type: synonym_expansion
Confidence: 0.95
Relevance: 0.80
Synonyms: API->Application Programming Interface

šŸ”§ Acronym Expansion Only:
1. API documentation OR Application Programming Interface documentation
Type: acronym_expansion
Confidence: 0.80
Relevance: 0.90

šŸ”§ Combined Strategies:
1. API documentation OR Application Programming Interface documentation
Type: synonym_expansion
Confidence: 0.95
Relevance: 0.80
Synonyms: API->Application Programming Interface

šŸ“ Testing Query: 'KPI analysis report'
Context: Business query with acronym

šŸ”§ Synonym Expansion Only:
No expansions found

šŸ”§ Acronym Expansion Only:
1. KPI analysis report OR Key Performance Indicator analysis report
Type: acronym_expansion
Confidence: 0.80
Relevance: 0.90

šŸ”§ Combined Strategies:
1. KPI analysis report OR Key Performance Indicator analysis report
Type: acronym_expansion
Confidence: 0.80
Relevance: 0.90

šŸ’¬ Demonstrating User Feedback...
Recorded positive feedback for 'API documentation' expansion
Rating: 5/5 - Very helpful expansion!
Recorded negative feedback for 'code review' expansion
Rating: 2/5 - Expansion was too generic

šŸ“Š Getting Analytics...
Total Expansions: 4
Success Rate: 75.00%
Average Confidence: 0.85
Total Synonyms: 11
Active Synonyms: 11
Domain Distribution: {'technical': 6, 'business': 5}

āœ… Basic Query Expansion Example Complete!

Key Learnings:
• Synonym expansion improves query coverage
• Acronym expansion resolves technical abbreviations
• Context matters for expansion quality
• User feedback helps improve the system
• Analytics provide insights into system performance

Key Learnings​

  1. Synonym Expansion: Replaces terms with domain-specific synonyms to improve recall
  2. Acronym Expansion: Resolves technical abbreviations automatically
  3. Context Awareness: Different user roles and domains get different expansions
  4. User Feedback: System learns from user interactions to improve quality
  5. Analytics: Provides insights into system performance and usage patterns

Next Steps​

  1. Try Different Queries: Test with your own domain-specific terminology
  2. Add More Synonyms: Expand the synonym database with your specific terms
  3. Experiment with Strategies: Try different combinations of expansion strategies
  4. Monitor Performance: Use analytics to track system effectiveness
  5. Collect Feedback: Implement user feedback collection in your application

Extending the Example​

  • Custom Domains: Add synonyms for your specific domain
  • Advanced Strategies: Implement semantic and contextual expansion
  • Bulk Operations: Use management interface for large-scale operations
  • Performance Optimization: Implement caching and optimization strategies