Error Handling System
This example demonstrates the complete error handling system including error classification, user-friendly messaging, recovery suggestions, escalation, analytics, and A/B testing.
Overviewā
The error handling system transforms technical errors into user-friendly experiences with guided resolution paths. This example shows how to:
- Classify technical errors into user-understandable categories
- Generate contextual error messages
- Provide intelligent recovery suggestions
- Handle escalation to human support
- Track analytics and user feedback
- Perform A/B testing on error messages
Prerequisitesā
pip install packages.rag
Basic Error Handlingā
import asyncio
from packages.rag.error_handling import create_error_handler, ErrorContext
async def basic_error_handling():
"""Demonstrate basic error handling."""
# Create error handler
handler = create_error_handler()
# Create error context
context = ErrorContext(
user_id="user123",
session_id="session456",
query="How to configure the API?",
component="api_configuration"
)
# Simulate an error
try:
# This would be your actual operation
raise Exception("Vector store connection failed")
except Exception as e:
# Handle the error
user_error = await handler.handle_error(e, context)
print(f"Error Category: {user_error.category}")
print(f"Severity: {user_error.severity}")
print(f"Title: {user_error.title}")
print(f"Message: {user_error.message}")
print(f"Explanation: {user_error.explanation}")
print(f"Suggested Actions: {[action.value for action in user_error.suggested_actions]}")
print(f"Recovery Suggestions: {user_error.recovery_suggestions}")
# Run the example
asyncio.run(basic_error_handling())
Advanced Error Handling with User Contextā
async def advanced_error_handling():
"""Demonstrate advanced error handling with user context."""
handler = create_error_handler()
# Create error context
context = ErrorContext(
user_id="developer1",
session_id="session789",
query="How to configure complex API settings with multiple endpoints?",
component="vector_store"
)
# Add user context for better recovery suggestions
user_context = {
"user_id": "developer1",
"role": "developer",
"experience_level": "intermediate",
"preferred_language": "en"
}
# Simulate a timeout error
try:
raise Exception("Request timed out after 30 seconds")
except Exception as e:
user_error = await handler.handle_error(e, context, user_context)
print(f"Original Query: {context.query}")
print(f"Error Category: {user_error.category}")
print(f"Recovery Suggestions:")
for i, suggestion in enumerate(user_error.recovery_suggestions, 1):
print(f" {i}. {suggestion}")
asyncio.run(advanced_error_handling())
Error Analytics and Reportingā
from packages.rag.error_analytics_dashboard import create_error_analytics_dashboard
async def error_analytics_example():
"""Demonstrate error analytics and reporting."""
# Create analytics dashboard
dashboard = create_error_analytics_dashboard()
# Simulate some error data
from packages.rag.error_handling import UserFriendlyError, ErrorCategory, ErrorSeverity, ResolutionAction
# Create sample errors
errors = [
UserFriendlyError(
error_id="error_1",
category=ErrorCategory.RETRIEVAL_FAILURE,
severity=ErrorSeverity.MEDIUM,
title="Unable to Find Information",
message="I'm having trouble accessing the knowledge base.",
explanation="The search system is temporarily unavailable.",
suggested_actions=[ResolutionAction.RETRY],
workarounds=["Wait and retry", "Try different keywords"],
escalation_required=False,
recovery_suggestions=["Check your internet connection"],
context=ErrorContext(user_id="user1", component="vector_store")
),
UserFriendlyError(
error_id="error_2",
category=ErrorCategory.GENERATION_TIMEOUT,
severity=ErrorSeverity.MEDIUM,
title="Response Taking Too Long",
message="Your question is taking longer than expected.",
explanation="Complex questions can cause delays.",
suggested_actions=[ResolutionAction.WAIT_AND_RETRY],
workarounds=["Simplify your question", "Wait and retry"],
escalation_required=False,
recovery_suggestions=["Try breaking down your question"],
context=ErrorContext(user_id="user2", component="generation")
)
]
# Add error data to analytics
for error in errors:
await dashboard.add_error_data(error, "resolved")
# Add user feedback
await dashboard.add_user_feedback("error_1", "user1", True, "Very helpful message")
await dashboard.add_user_feedback("error_2", "user2", False, "Could be clearer")
# Add resolution data
await dashboard.add_resolution_data("error_1", "retry_with_different_parameters", 2.5, True)
await dashboard.add_resolution_data("error_2", "simplified_query", 1.0, True)
# Get analytics
print("=== Error Analytics ===")
metrics = await dashboard.get_error_metrics(time_window_hours=24)
print(f"Total Errors: {metrics.total_errors}")
print(f"Errors by Category: {metrics.errors_by_category}")
print(f"User Satisfaction: {metrics.user_satisfaction}")
print(f"Average Resolution Time: {metrics.average_resolution_time:.1f} minutes")
# Generate insights
print("\n=== Error Insights ===")
insights = await dashboard.generate_insights(time_window_hours=24)
for insight in insights:
print(f"⢠{insight.title}")
print(f" Description: {insight.description}")
print(f" Recommended Actions: {insight.recommended_actions}")
print()
# Generate report
print("=== Error Report ===")
report = await dashboard.generate_error_report("daily", 24)
print(f"Report ID: {report.report_id}")
print(f"Summary: {report.summary}")
print(f"Recommendations: {report.recommendations}")
asyncio.run(error_analytics_example())
A/B Testing Error Messagesā
from packages.rag.error_message_testing import (
create_error_message_testing_framework,
TestType,
ErrorMessageVariant
)
async def ab_testing_example():
"""Demonstrate A/B testing of error messages."""
# Create testing framework
testing_framework = create_error_message_testing_framework()
# Create A/B test
test = testing_framework.create_test(
TestType.A_B_TEST,
"RETRIEVAL_FAILURE",
"Test different error messages for retrieval failures"
)
# Create message variants
control_variant = ErrorMessageVariant(
variant_id="control",
title="Unable to Find Information",
message="I'm having trouble accessing the knowledge base to answer your question.",
explanation="This usually happens when the search system is temporarily unavailable.",
suggested_actions=["Try again", "Contact support"],
workarounds=["Wait a moment and retry", "Try different keywords"],
recovery_suggestions=["Check your internet connection", "Try a simpler question"],
metadata={"version": "current"},
created_at=datetime.utcnow(),
is_control=True
)
treatment_variant = ErrorMessageVariant(
variant_id="treatment",
title="Search Temporarily Unavailable",
message="I'm currently unable to search our knowledge base, but I'm working to fix this.",
explanation="Our search system is experiencing issues. This is temporary and should be resolved soon.",
suggested_actions=["Try again in a few minutes", "Contact support if urgent"],
workarounds=["Wait 2-3 minutes before retrying", "Try rephrasing your question"],
recovery_suggestions=["Check system status page", "Try during off-peak hours"],
metadata={"version": "improved"},
created_at=datetime.utcnow(),
is_control=False
)
# Add variants to test
test.add_variant(control_variant)
test.add_variant(treatment_variant)
# Set traffic split (50/50)
test.set_traffic_split({"control": 0.5, "treatment": 0.5})
# Create participants
participants = []
for i in range(10):
participant = testing_framework.add_participant(
user_id=f"test_user_{i}",
user_profile={
"role": "developer",
"experience_level": "intermediate",
"department": "engineering"
}
)
participants.append(participant)
# Assign to test
variant_id = testing_framework.assign_participant_to_test(
participant.participant_id,
test.test_id
)
print(f"Participant {i} assigned to variant: {variant_id}")
# Simulate test results
print("\n=== Simulating Test Results ===")
for i, participant in enumerate(participants):
# Simulate different ratings based on variant
variant = test.get_variant_for_participant(participant.participant_id)
if variant.is_control:
# Control group gets slightly lower ratings
rating = 3 + (i % 2) # 3 or 4
helpful = rating >= 4
else:
# Treatment group gets higher ratings
rating = 4 + (i % 2) # 4 or 5
helpful = rating >= 4
# Submit test result
result = testing_framework.submit_test_result(
test.test_id,
participant.participant_id,
f"error_{i}",
{
"rating": rating,
"helpful": helpful,
"clear": rating >= 4,
"actionable": rating >= 3,
"comments": f"Test comment for variant {variant.variant_id}",
"response_time_seconds": 30 + (i % 20)
}
)
print(f"Participant {i} (variant {variant.variant_id}): rating={rating}, helpful={helpful}")
# Calculate test metrics
print("\n=== Test Results ===")
metrics = testing_framework.calculate_test_metrics(test.test_id)
print(f"Total Participants: {metrics.total_participants}")
print(f"Total Responses: {metrics.total_responses}")
print(f"Completion Rate: {metrics.completion_rate:.1%}")
print(f"Average Rating: {metrics.average_rating:.2f}")
print(f"Helpfulness Rate: {metrics.helpfulness_rate:.1%}")
print("\n=== Variant Performance ===")
for variant_id, performance in metrics.variant_performance.items():
print(f"Variant {variant_id}:")
print(f" Average Rating: {performance['average_rating']:.2f}")
print(f" Helpfulness Rate: {performance['helpfulness_rate']:.1%}")
print(f" Response Count: {performance['response_count']}")
asyncio.run(ab_testing_example())
Complete Integration Exampleā
async def complete_integration_example():
"""Demonstrate complete error handling integration."""
print("š Complete Error Handling System Demo")
print("=" * 50)
# Create error handler
handler = create_error_handler()
# Create analytics dashboard
dashboard = create_error_analytics_dashboard()
# Test various error scenarios
test_scenarios = [
{
"error": Exception("Vector store connection failed"),
"context": ErrorContext(
user_id="user1",
session_id="session1",
query="How to configure the API?",
component="vector_store"
),
"description": "Retrieval failure"
},
{
"error": Exception("Request timed out after 30 seconds"),
"context": ErrorContext(
user_id="user2",
session_id="session2",
query="Complex technical question about database optimization",
component="generation"
),
"description": "Generation timeout"
},
{
"error": Exception("Permission denied: insufficient access rights"),
"context": ErrorContext(
user_id="user3",
session_id="session3",
query="How to access admin settings?",
component="authorization"
),
"description": "Permission denied"
}
]
for i, scenario in enumerate(test_scenarios, 1):
print(f"\n--- Scenario {i}: {scenario['description']} ---")
# Handle the error
user_error = await handler.handle_error(
scenario["error"],
scenario["context"],
user_context={"user_id": scenario["context"].user_id, "role": "developer"}
)
# Add to analytics
await dashboard.add_error_data(user_error, "resolved")
# Display results
print(f"Category: {user_error.category}")
print(f"Severity: {user_error.severity}")
print(f"Title: {user_error.title}")
print(f"Message: {user_error.message}")
print(f"Escalation Required: {user_error.escalation_required}")
if user_error.support_ticket_id:
print(f"Support Ticket: {user_error.support_ticket_id}")
# Get final analytics
print("\n=== Final Analytics ===")
metrics = await dashboard.get_error_metrics(time_window_hours=24)
print(f"Total Errors Processed: {metrics.total_errors}")
print(f"Error Categories: {list(metrics.errors_by_category.keys())}")
print(f"Peak Error Times: {metrics.peak_error_times}")
print("\nā
Complete error handling system demo finished!")
asyncio.run(complete_integration_example())
Running the Examplesā
Save the code above to a file (e.g., error_handling_example.py
) and run:
python error_handling_example.py
Expected Outputā
The examples will demonstrate:
- Error Classification: How technical errors are mapped to user-friendly categories
- User-Friendly Messages: Clear explanations of what went wrong
- Recovery Suggestions: Actionable steps users can take
- Escalation Handling: Automatic routing of critical errors to support
- Analytics: Comprehensive tracking and reporting
- A/B Testing: Testing different error message approaches
Key Benefitsā
- Improved User Experience: Clear, helpful error messages
- Reduced Support Load: Self-service error resolution
- Proactive Monitoring: Early detection of system issues
- Data-Driven Decisions: Analytics guide system improvements
- Continuous Optimization: A/B testing improves error handling
This comprehensive error handling system will significantly enhance your RAG system's user experience during failures while providing valuable insights for continuous improvement.