Skip to main content

A/B Testing Guide

The A/B Testing Framework enables you to test different search interface configurations and features to optimize user experience and search effectiveness. This guide covers configuration, usage, and advanced features.

Overview

The A/B Testing Framework offers several testing capabilities:

  • Interface Testing: Test different UI layouts and designs
  • Feature Testing: Test new features and functionality
  • Algorithm Testing: Test different search algorithms and ranking methods
  • Personalization Testing: Test different personalization strategies
  • Performance Testing: Test different performance optimizations
  • Statistical Analysis: Analyze test results with statistical significance

Basic Usage

1. Initialize the Framework

from search_interface.ab_testing import SearchABTestingFramework, ABTestConfig

# Create configuration
config = ABTestConfig(
enable_automatic_tests=True,
default_test_duration_days=7,
minimum_sample_size=1000,
statistical_significance_threshold=0.95
)

# Initialize framework
ab_testing = SearchABTestingFramework(config)

2. Create an A/B Test

# Create a new A/B test
test = await ab_testing.create_test(
test_name="search_interface_improvement",
description="Test new search interface design",
variants={
"control": {
"name": "Current Interface",
"description": "Current search interface",
"config": {"max_results": 10, "enable_highlighting": True}
},
"treatment": {
"name": "New Interface",
"description": "New search interface with improved design",
"config": {"max_results": 15, "enable_highlighting": True, "enable_snippets": True}
}
},
success_metrics=["click_through_rate", "user_satisfaction", "search_success_rate"],
start_date=datetime.utcnow(),
end_date=datetime.utcnow() + timedelta(days=7)
)

print(f"Created A/B test: {test['test_id']}")

3. Assign Users to Variants

# Assign user to test variant
variant = await ab_testing.assign_user_to_variant(
test_id=test['test_id'],
user_id="user123"
)

print(f"User assigned to variant: {variant}")

4. Track Test Events

# Track test events
await ab_testing.track_event(
test_id=test['test_id'],
user_id="user123",
event_type="search_query",
event_data={
"query": "python tutorial",
"variant": variant,
"timestamp": datetime.utcnow()
}
)

await ab_testing.track_event(
test_id=test['test_id'],
user_id="user123",
event_type="result_click",
event_data={
"result_id": "result789",
"position": 1,
"variant": variant,
"timestamp": datetime.utcnow()
}
)

5. Analyze Test Results

# Get test results
results = await ab_testing.get_test_results(test['test_id'])

print("A/B test results:")
print(f"Test status: {results['status']}")
print(f"Total participants: {results['total_participants']}")
print(f"Control performance: {results['control']['performance']:.2f}")
print(f"Treatment performance: {results['treatment']['performance']:.2f}")
print(f"Statistical significance: {results['statistical_significance']:.2f}")
print(f"Winner: {results['winner']}")

Configuration Options

ABTestConfig

config = ABTestConfig(
# Test settings
enable_automatic_tests=True, # Enable automatic test creation
default_test_duration_days=7, # Default test duration
minimum_sample_size=1000, # Minimum sample size for tests
maximum_test_duration_days=30, # Maximum test duration

# Statistical settings
statistical_significance_threshold=0.95, # Statistical significance threshold
confidence_interval=0.95, # Confidence interval
minimum_effect_size=0.05, # Minimum effect size to detect

# User assignment
assignment_strategy="random", # User assignment strategy
enable_user_sticky_assignment=True, # Keep users in same variant
assignment_weights={"control": 0.5, "treatment": 0.5}, # Assignment weights

# Performance settings
cache_enabled=True, # Enable caching
cache_ttl_seconds=300, # Cache time-to-live
batch_processing_enabled=True, # Enable batch processing

# Monitoring
enable_real_time_monitoring=True, # Enable real-time monitoring
monitoring_interval_minutes=5, # Monitoring interval
alert_thresholds={"error_rate": 0.05, "performance_degradation": 0.1}, # Alert thresholds

# Privacy settings
anonymize_user_data=False, # Anonymize user data
data_retention_days=90, # Data retention period
enable_data_export=True, # Enable data export
)

Advanced Features

1. Multi-Variant Testing

# Create multi-variant test
test = await ab_testing.create_test(
test_name="search_algorithm_comparison",
description="Compare different search algorithms",
variants={
"control": {
"name": "Current Algorithm",
"config": {"algorithm": "bm25", "reranking": False}
},
"treatment_a": {
"name": "BM25 + Reranking",
"config": {"algorithm": "bm25", "reranking": True}
},
"treatment_b": {
"name": "Neural Search",
"config": {"algorithm": "neural", "reranking": True}
}
},
success_metrics=["click_through_rate", "search_success_rate", "user_satisfaction"],
assignment_weights={"control": 0.33, "treatment_a": 0.33, "treatment_b": 0.34}
)

2. Sequential Testing

# Create sequential test
test = await ab_testing.create_sequential_test(
test_name="search_interface_rollout",
description="Sequential rollout of new search interface",
variants={
"control": {"name": "Current Interface", "config": {"version": "1.0"}},
"treatment": {"name": "New Interface", "config": {"version": "2.0"}}
},
success_metrics=["click_through_rate", "user_satisfaction"],
stopping_rules={
"max_duration_days": 14,
"min_sample_size": 5000,
"significance_threshold": 0.95
}
)

3. Bayesian Testing

# Create Bayesian test
test = await ab_testing.create_bayesian_test(
test_name="personalization_strategy",
description="Test different personalization strategies",
variants={
"control": {"name": "No Personalization", "config": {"personalization": False}},
"treatment": {"name": "Full Personalization", "config": {"personalization": True}}
},
success_metrics=["click_through_rate", "user_satisfaction"],
prior_beliefs={
"control": {"mean": 0.15, "variance": 0.01},
"treatment": {"mean": 0.18, "variance": 0.01}
}
)

4. Custom Metrics

# Define custom metrics
custom_metrics = {
"search_quality_score": {
"description": "Overall search quality score",
"calculation": "weighted_average(relevance, freshness, completeness)",
"weights": {"relevance": 0.5, "freshness": 0.3, "completeness": 0.2}
},
"user_engagement_score": {
"description": "User engagement with search results",
"calculation": "sum(clicks, time_spent, satisfaction)",
"normalization": "min_max"
}
}

# Create test with custom metrics
test = await ab_testing.create_test(
test_name="search_quality_improvement",
description="Test search quality improvements",
variants={
"control": {"name": "Current Search", "config": {"quality_version": "1.0"}},
"treatment": {"name": "Improved Search", "config": {"quality_version": "2.0"}}
},
success_metrics=["search_quality_score", "user_engagement_score"],
custom_metrics=custom_metrics
)

Integration Examples

1. Search Interface Integration

class ABTestEnabledSearchInterface:
def __init__(self):
self.ab_testing = SearchABTestingFramework(ABTestConfig())
self.search_engine = SearchEngine()
self.active_tests = {}

async def search(self, query: str, user_id: str):
"""Perform search with A/B testing."""
# Get active tests for user
user_tests = await self.ab_testing.get_user_tests(user_id)

# Apply test configurations
search_config = self.get_base_search_config()
for test in user_tests:
variant = await self.ab_testing.get_user_variant(test['test_id'], user_id)
test_config = test['variants'][variant]['config']
search_config.update(test_config)

# Perform search with test configuration
results = await self.search_engine.search(query, config=search_config)

# Track search event
for test in user_tests:
await self.ab_testing.track_event(
test_id=test['test_id'],
user_id=user_id,
event_type="search_query",
event_data={
"query": query,
"variant": await self.ab_testing.get_user_variant(test['test_id'], user_id),
"result_count": len(results)
}
)

return results

def get_base_search_config(self):
"""Get base search configuration."""
return {
"max_results": 10,
"enable_highlighting": True,
"enable_snippets": False
}

2. Real-time Monitoring Integration

class RealTimeABTestMonitoring:
def __init__(self):
self.ab_testing = SearchABTestingFramework(ABTestConfig())
self.websocket_clients = []

async def start_monitoring(self):
"""Start real-time A/B test monitoring."""
while True:
# Get active tests
active_tests = await self.ab_testing.get_active_tests()

# Monitor each test
for test in active_tests:
test_status = await self.ab_testing.get_test_status(test['test_id'])

# Check for alerts
alerts = await self.check_test_alerts(test['test_id'], test_status)
if alerts:
await self.send_alerts(test['test_id'], alerts)

# Send updates to WebSocket clients
await self.send_test_updates(test['test_id'], test_status)

# Wait for next monitoring cycle
await asyncio.sleep(300) # Monitor every 5 minutes

async def check_test_alerts(self, test_id: str, test_status: dict):
"""Check for test alerts."""
alerts = []

# Check error rate
if test_status['error_rate'] > 0.05:
alerts.append({
"type": "error_rate_high",
"message": f"Error rate {test_status['error_rate']:.2%} exceeds threshold"
})

# Check performance degradation
if test_status['performance_degradation'] > 0.1:
alerts.append({
"type": "performance_degradation",
"message": f"Performance degraded by {test_status['performance_degradation']:.2%}"
})

return alerts

3. Mobile App Integration

class MobileABTestInterface:
def __init__(self):
self.ab_testing = SearchABTestingFramework(ABTestConfig())
self.cache = {}

async def get_user_variants(self, user_id: str):
"""Get user variants optimized for mobile."""
# Check cache first
cache_key = f"user_variants:{user_id}"
if cache_key in self.cache:
return self.cache[cache_key]

# Get user variants
variants = await self.ab_testing.get_user_variants(user_id)

# Optimize for mobile
mobile_variants = {
test_id: {
"variant": variant,
"config": self.optimize_config_for_mobile(test['variants'][variant]['config'])
}
for test_id, variant in variants.items()
}

# Cache for performance
self.cache[cache_key] = mobile_variants
return mobile_variants

def optimize_config_for_mobile(self, config: dict):
"""Optimize configuration for mobile."""
mobile_config = config.copy()

# Reduce max results for mobile
if 'max_results' in mobile_config:
mobile_config['max_results'] = min(mobile_config['max_results'], 10)

# Disable heavy features on mobile
mobile_config['enable_real_time'] = False
mobile_config['enable_advanced_analytics'] = False

return mobile_config

Performance Optimization

1. Caching

# Enable caching for better performance
config = ABTestConfig(
cache_enabled=True,
cache_ttl_seconds=300, # 5 minutes
cache_max_size=1000 # Maximum cached entries
)

ab_testing = SearchABTestingFramework(config)

2. Batch Processing

# Enable batch processing for better performance
config = ABTestConfig(
batch_processing_enabled=True,
batch_size=1000,
processing_interval_minutes=5
)

ab_testing = SearchABTestingFramework(config)

3. Async Processing

# Process A/B test data asynchronously
async def process_test_data_async(test_id: str, events: List[dict]):
"""Process A/B test events asynchronously."""
tasks = [
ab_testing.track_event(test_id, event['user_id'], event['event_type'], event['event_data'])
for event in events
]

results = await asyncio.gather(*tasks)
return results

Statistical Analysis

1. Statistical Significance Testing

# Test statistical significance
significance_test = await ab_testing.test_statistical_significance(
test_id=test['test_id'],
metric="click_through_rate",
confidence_level=0.95
)

print("Statistical significance test:")
print(f"P-value: {significance_test['p_value']:.4f}")
print(f"Significant: {significance_test['is_significant']}")
print(f"Effect size: {significance_test['effect_size']:.4f}")

2. Power Analysis

# Perform power analysis
power_analysis = await ab_testing.perform_power_analysis(
test_id=test['test_id'],
metric="click_through_rate",
effect_size=0.05,
alpha=0.05
)

print("Power analysis:")
print(f"Statistical power: {power_analysis['power']:.2f}")
print(f"Required sample size: {power_analysis['required_sample_size']}")
print(f"Current sample size: {power_analysis['current_sample_size']}")

3. Confidence Intervals

# Calculate confidence intervals
confidence_intervals = await ab_testing.calculate_confidence_intervals(
test_id=test['test_id'],
metric="click_through_rate",
confidence_level=0.95
)

print("Confidence intervals:")
print(f"Control: {confidence_intervals['control']}")
print(f"Treatment: {confidence_intervals['treatment']}")
print(f"Difference: {confidence_intervals['difference']}")

Troubleshooting

Common Issues

  1. Test Not Reaching Significance

    • Increase sample size
    • Extend test duration
    • Check for data quality issues
  2. Slow Performance

    • Enable caching
    • Use batch processing
    • Optimize database queries
  3. Memory Usage

    • Reduce cache size
    • Use streaming processing
    • Implement data archiving

Debug Mode

# Enable debug mode for troubleshooting
config = ABTestConfig(debug=True)
ab_testing = SearchABTestingFramework(config)

# Get detailed debug information
test_results = await ab_testing.get_test_results(test_id, debug=True)
print(f"Debug info: {test_results['debug_info']}")

Best Practices

  1. Start Simple: Begin with basic tests and add complexity gradually
  2. Monitor Performance: Track test performance and system impact
  3. Test Regularly: Run tests consistently to gather data
  4. Update Models: Keep statistical models current and relevant
  5. Handle Errors: Implement proper error handling and fallbacks
  6. Cache Strategically: Use caching to improve performance
  7. Respect Privacy: Implement proper data privacy and retention policies

Next Steps