Error Handling System

This example demonstrates the complete error handling system including error classification, user-friendly messaging, recovery suggestions, escalation, analytics, and A/B testing.

Overview

The error handling system transforms technical errors into user-friendly experiences with guided resolution paths. This example shows how to:

Classify technical errors into user-understandable categories
Generate contextual error messages
Provide intelligent recovery suggestions
Handle escalation to human support
Track analytics and user feedback
Perform A/B testing on error messages

Prerequisites

pip install packages.rag

Basic Error Handling

import asyncio
from packages.rag.error_handling import create_error_handler, ErrorContext

async def basic_error_handling():
    """Demonstrate basic error handling."""
    # Create error handler
    handler = create_error_handler()
    
    # Create error context
    context = ErrorContext(
        user_id="user123",
        session_id="session456",
        query="How to configure the API?",
        component="api_configuration"
    )
    
    # Simulate an error
    try:
        # This would be your actual operation
        raise Exception("Vector store connection failed")
    except Exception as e:
        # Handle the error
        user_error = await handler.handle_error(e, context)
        
        print(f"Error Category: {user_error.category}")
        print(f"Severity: {user_error.severity}")
        print(f"Title: {user_error.title}")
        print(f"Message: {user_error.message}")
        print(f"Explanation: {user_error.explanation}")
        print(f"Suggested Actions: {[action.value for action in user_error.suggested_actions]}")
        print(f"Recovery Suggestions: {user_error.recovery_suggestions}")

# Run the example
asyncio.run(basic_error_handling())

Advanced Error Handling with User Context

async def advanced_error_handling():
    """Demonstrate advanced error handling with user context."""
    handler = create_error_handler()
    
    # Create error context
    context = ErrorContext(
        user_id="developer1",
        session_id="session789",
        query="How to configure complex API settings with multiple endpoints?",
        component="vector_store"
    )
    
    # Add user context for better recovery suggestions
    user_context = {
        "user_id": "developer1",
        "role": "developer",
        "experience_level": "intermediate",
        "preferred_language": "en"
    }
    
    # Simulate a timeout error
    try:
        raise Exception("Request timed out after 30 seconds")
    except Exception as e:
        user_error = await handler.handle_error(e, context, user_context)
        
        print(f"Original Query: {context.query}")
        print(f"Error Category: {user_error.category}")
        print(f"Recovery Suggestions:")
        for i, suggestion in enumerate(user_error.recovery_suggestions, 1):
            print(f"  {i}. {suggestion}")

asyncio.run(advanced_error_handling())

Error Analytics and Reporting

from packages.rag.error_analytics_dashboard import create_error_analytics_dashboard

async def error_analytics_example():
    """Demonstrate error analytics and reporting."""
    # Create analytics dashboard
    dashboard = create_error_analytics_dashboard()
    
    # Simulate some error data
    from packages.rag.error_handling import UserFriendlyError, ErrorCategory, ErrorSeverity, ResolutionAction
    
    # Create sample errors
    errors = [
        UserFriendlyError(
            error_id="error_1",
            category=ErrorCategory.RETRIEVAL_FAILURE,
            severity=ErrorSeverity.MEDIUM,
            title="Unable to Find Information",
            message="I'm having trouble accessing the knowledge base.",
            explanation="The search system is temporarily unavailable.",
            suggested_actions=[ResolutionAction.RETRY],
            workarounds=["Wait and retry", "Try different keywords"],
            escalation_required=False,
            recovery_suggestions=["Check your internet connection"],
            context=ErrorContext(user_id="user1", component="vector_store")
        ),
        UserFriendlyError(
            error_id="error_2",
            category=ErrorCategory.GENERATION_TIMEOUT,
            severity=ErrorSeverity.MEDIUM,
            title="Response Taking Too Long",
            message="Your question is taking longer than expected.",
            explanation="Complex questions can cause delays.",
            suggested_actions=[ResolutionAction.WAIT_AND_RETRY],
            workarounds=["Simplify your question", "Wait and retry"],
            escalation_required=False,
            recovery_suggestions=["Try breaking down your question"],
            context=ErrorContext(user_id="user2", component="generation")
        )
    ]
    
    # Add error data to analytics
    for error in errors:
        await dashboard.add_error_data(error, "resolved")
    
    # Add user feedback
    await dashboard.add_user_feedback("error_1", "user1", True, "Very helpful message")
    await dashboard.add_user_feedback("error_2", "user2", False, "Could be clearer")
    
    # Add resolution data
    await dashboard.add_resolution_data("error_1", "retry_with_different_parameters", 2.5, True)
    await dashboard.add_resolution_data("error_2", "simplified_query", 1.0, True)
    
    # Get analytics
    print("=== Error Analytics ===")
    metrics = await dashboard.get_error_metrics(time_window_hours=24)
    print(f"Total Errors: {metrics.total_errors}")
    print(f"Errors by Category: {metrics.errors_by_category}")
    print(f"User Satisfaction: {metrics.user_satisfaction}")
    print(f"Average Resolution Time: {metrics.average_resolution_time:.1f} minutes")
    
    # Generate insights
    print("\n=== Error Insights ===")
    insights = await dashboard.generate_insights(time_window_hours=24)
    for insight in insights:
        print(f"• {insight.title}")
        print(f"  Description: {insight.description}")
        print(f"  Recommended Actions: {insight.recommended_actions}")
        print()
    
    # Generate report
    print("=== Error Report ===")
    report = await dashboard.generate_error_report("daily", 24)
    print(f"Report ID: {report.report_id}")
    print(f"Summary: {report.summary}")
    print(f"Recommendations: {report.recommendations}")

asyncio.run(error_analytics_example())

A/B Testing Error Messages

from packages.rag.error_message_testing import (
    create_error_message_testing_framework,
    TestType,
    ErrorMessageVariant
)

async def ab_testing_example():
    """Demonstrate A/B testing of error messages."""
    # Create testing framework
    testing_framework = create_error_message_testing_framework()
    
    # Create A/B test
    test = testing_framework.create_test(
        TestType.A_B_TEST,
        "RETRIEVAL_FAILURE",
        "Test different error messages for retrieval failures"
    )
    
    # Create message variants
    control_variant = ErrorMessageVariant(
        variant_id="control",
        title="Unable to Find Information",
        message="I'm having trouble accessing the knowledge base to answer your question.",
        explanation="This usually happens when the search system is temporarily unavailable.",
        suggested_actions=["Try again", "Contact support"],
        workarounds=["Wait a moment and retry", "Try different keywords"],
        recovery_suggestions=["Check your internet connection", "Try a simpler question"],
        metadata={"version": "current"},
        created_at=datetime.utcnow(),
        is_control=True
    )
    
    treatment_variant = ErrorMessageVariant(
        variant_id="treatment",
        title="Search Temporarily Unavailable",
        message="I'm currently unable to search our knowledge base, but I'm working to fix this.",
        explanation="Our search system is experiencing issues. This is temporary and should be resolved soon.",
        suggested_actions=["Try again in a few minutes", "Contact support if urgent"],
        workarounds=["Wait 2-3 minutes before retrying", "Try rephrasing your question"],
        recovery_suggestions=["Check system status page", "Try during off-peak hours"],
        metadata={"version": "improved"},
        created_at=datetime.utcnow(),
        is_control=False
    )
    
    # Add variants to test
    test.add_variant(control_variant)
    test.add_variant(treatment_variant)
    
    # Set traffic split (50/50)
    test.set_traffic_split({"control": 0.5, "treatment": 0.5})
    
    # Create participants
    participants = []
    for i in range(10):
        participant = testing_framework.add_participant(
            user_id=f"test_user_{i}",
            user_profile={
                "role": "developer",
                "experience_level": "intermediate",
                "department": "engineering"
            }
        )
        participants.append(participant)
        
        # Assign to test
        variant_id = testing_framework.assign_participant_to_test(
            participant.participant_id,
            test.test_id
        )
        print(f"Participant {i} assigned to variant: {variant_id}")
    
    # Simulate test results
    print("\n=== Simulating Test Results ===")
    for i, participant in enumerate(participants):
        # Simulate different ratings based on variant
        variant = test.get_variant_for_participant(participant.participant_id)
        if variant.is_control:
            # Control group gets slightly lower ratings
            rating = 3 + (i % 2)  # 3 or 4
            helpful = rating >= 4
        else:
            # Treatment group gets higher ratings
            rating = 4 + (i % 2)  # 4 or 5
            helpful = rating >= 4
        
        # Submit test result
        result = testing_framework.submit_test_result(
            test.test_id,
            participant.participant_id,
            f"error_{i}",
            {
                "rating": rating,
                "helpful": helpful,
                "clear": rating >= 4,
                "actionable": rating >= 3,
                "comments": f"Test comment for variant {variant.variant_id}",
                "response_time_seconds": 30 + (i % 20)
            }
        )
        
        print(f"Participant {i} (variant {variant.variant_id}): rating={rating}, helpful={helpful}")
    
    # Calculate test metrics
    print("\n=== Test Results ===")
    metrics = testing_framework.calculate_test_metrics(test.test_id)
    print(f"Total Participants: {metrics.total_participants}")
    print(f"Total Responses: {metrics.total_responses}")
    print(f"Completion Rate: {metrics.completion_rate:.1%}")
    print(f"Average Rating: {metrics.average_rating:.2f}")
    print(f"Helpfulness Rate: {metrics.helpfulness_rate:.1%}")
    
    print("\n=== Variant Performance ===")
    for variant_id, performance in metrics.variant_performance.items():
        print(f"Variant {variant_id}:")
        print(f"  Average Rating: {performance['average_rating']:.2f}")
        print(f"  Helpfulness Rate: {performance['helpfulness_rate']:.1%}")
        print(f"  Response Count: {performance['response_count']}")

asyncio.run(ab_testing_example())

Complete Integration Example

async def complete_integration_example():
    """Demonstrate complete error handling integration."""
    print("🚀 Complete Error Handling System Demo")
    print("=" * 50)
    
    # Create error handler
    handler = create_error_handler()
    
    # Create analytics dashboard
    dashboard = create_error_analytics_dashboard()
    
    # Test various error scenarios
    test_scenarios = [
        {
            "error": Exception("Vector store connection failed"),
            "context": ErrorContext(
                user_id="user1",
                session_id="session1",
                query="How to configure the API?",
                component="vector_store"
            ),
            "description": "Retrieval failure"
        },
        {
            "error": Exception("Request timed out after 30 seconds"),
            "context": ErrorContext(
                user_id="user2",
                session_id="session2",
                query="Complex technical question about database optimization",
                component="generation"
            ),
            "description": "Generation timeout"
        },
        {
            "error": Exception("Permission denied: insufficient access rights"),
            "context": ErrorContext(
                user_id="user3",
                session_id="session3",
                query="How to access admin settings?",
                component="authorization"
            ),
            "description": "Permission denied"
        }
    ]
    
    for i, scenario in enumerate(test_scenarios, 1):
        print(f"\n--- Scenario {i}: {scenario['description']} ---")
        
        # Handle the error
        user_error = await handler.handle_error(
            scenario["error"],
            scenario["context"],
            user_context={"user_id": scenario["context"].user_id, "role": "developer"}
        )
        
        # Add to analytics
        await dashboard.add_error_data(user_error, "resolved")
        
        # Display results
        print(f"Category: {user_error.category}")
        print(f"Severity: {user_error.severity}")
        print(f"Title: {user_error.title}")
        print(f"Message: {user_error.message}")
        print(f"Escalation Required: {user_error.escalation_required}")
        if user_error.support_ticket_id:
            print(f"Support Ticket: {user_error.support_ticket_id}")
    
    # Get final analytics
    print("\n=== Final Analytics ===")
    metrics = await dashboard.get_error_metrics(time_window_hours=24)
    print(f"Total Errors Processed: {metrics.total_errors}")
    print(f"Error Categories: {list(metrics.errors_by_category.keys())}")
    print(f"Peak Error Times: {metrics.peak_error_times}")
    
    print("\n✅ Complete error handling system demo finished!")

asyncio.run(complete_integration_example())

Running the Examples

Save the code above to a file (e.g., error_handling_example.py) and run:

python error_handling_example.py

Expected Output

The examples will demonstrate:

Error Classification: How technical errors are mapped to user-friendly categories
User-Friendly Messages: Clear explanations of what went wrong
Recovery Suggestions: Actionable steps users can take
Escalation Handling: Automatic routing of critical errors to support
Analytics: Comprehensive tracking and reporting
A/B Testing: Testing different error message approaches

Key Benefits

Improved User Experience: Clear, helpful error messages
Reduced Support Load: Self-service error resolution
Proactive Monitoring: Early detection of system issues
Data-Driven Decisions: Analytics guide system improvements
Continuous Optimization: A/B testing improves error handling

This comprehensive error handling system will significantly enhance your RAG system's user experience during failures while providing valuable insights for continuous improvement.

Overview​

Prerequisites​

Basic Error Handling​

Advanced Error Handling with User Context​

Error Analytics and Reporting​

A/B Testing Error Messages​

Complete Integration Example​

Running the Examples​

Expected Output​

Key Benefits​