#!/usr/bin/env python3
"""
Deferral Text Parser

Parses customer deferral phrases into structured follow-up dates.

Returns:
    {
        "due_at": "2025-12-25T09:00:00+00:00",  # ISO 8601 UTC at 09:00 UK
        "reason": "matched phrase",
        "confidence": 0.8,  # 1.0=anchor, 0.8=numeric, 0.6=vague
        "source": "parser"
    }
"""
import os
import re
from datetime import datetime, time
from zoneinfo import ZoneInfo
from dateutil.relativedelta import relativedelta
from supabase import create_client


def _uk_now():
    """Get current time in Europe/London timezone."""
    return datetime.now(ZoneInfo("Europe/London"))


def _get_seasonal_anchors():
    """Fetch seasonal anchor dates from Supabase."""
    url = os.getenv('SUPABASE_URL')
    key = os.getenv('SUPABASE_SERVICE_KEY')
    
    if not url or not key:
        return {}
    
    try:
        client = create_client(url, key)
        result = client.table('seasonal_anchor').select('keyword,anchor_date,description').execute()
        
        # Build lookup dict: {keyword: anchor_date}
        anchors = {}
        for row in result.data:
            keyword = row.get('keyword', '').lower()
            anchor_date = row.get('anchor_date')
            if keyword and anchor_date:
                # Store earliest if multiple
                if keyword not in anchors or anchor_date < anchors[keyword]:
                    anchors[keyword] = anchor_date
        
        return anchors
    except Exception:
        return {}


def parse_deferral_text(text: str, now_uk=None):
    """
    Parse deferral text into structured follow-up date.
    
    Args:
        text: Customer deferral phrase (e.g., "in 3 months", "after christmas")
        now_uk: Optional datetime in Europe/London (defaults to now)
    
    Returns:
        dict: {
            "due_at": ISO 8601 UTC datetime at 09:00,
            "reason": matched phrase,
            "confidence": float (1.0=anchor, 0.8=numeric, 0.6=vague),
            "source": "parser"
        }
    """
    if now_uk is None:
        now_uk = _uk_now()
    
    # Ensure now_uk is timezone-aware
    if now_uk.tzinfo is None:
        now_uk = now_uk.replace(tzinfo=ZoneInfo("Europe/London"))
    
    # Normalize text
    text_lower = text.lower().strip()
    
    due_date = None
    reason = text_lower
    confidence = 0.6  # Default for vague
    
    # Pattern 1: "in X day(s)|week(s)|month(s)"
    pattern_numeric = r'in\s+(\d+)\s+(day|days|week|weeks|month|months)'
    match = re.search(pattern_numeric, text_lower)
    
    if match:
        quantity = int(match.group(1))
        unit = match.group(2)
        
        if 'day' in unit:
            due_date = now_uk + relativedelta(days=quantity)
        elif 'week' in unit:
            due_date = now_uk + relativedelta(weeks=quantity)
        elif 'month' in unit:
            due_date = now_uk + relativedelta(months=quantity)
        
        confidence = 0.8
        reason = f"in {quantity} {unit}"
    
    # Pattern 2: "couple of months" → +60 days
    elif 'couple of month' in text_lower or 'couple months' in text_lower:
        due_date = now_uk + relativedelta(days=60)
        confidence = 0.6
        reason = "couple of months"
    
    # Pattern 3: "few months" → +90 days
    elif 'few month' in text_lower:
        due_date = now_uk + relativedelta(days=90)
        confidence = 0.6
        reason = "few months"
    
    # Pattern 4: Seasonal anchors
    else:
        anchors = _get_seasonal_anchors()
        
        # Check for keyword matches
        seasonal_keywords = [
            'after christmas', 'christmas',
            'after easter', 'easter',
            'summer', 'after summer',
            'next quarter', 'quarter',
            'new tax year', 'tax year', 'after tax year'
        ]
        
        for keyword in seasonal_keywords:
            if keyword in text_lower:
                # Look up anchor date
                anchor_date_str = anchors.get(keyword)
                if anchor_date_str:
                    # Parse anchor date (YYYY-MM-DD)
                    anchor_date = datetime.fromisoformat(anchor_date_str)
                    # Make it timezone-aware at 09:00 UK
                    due_date = anchor_date.replace(tzinfo=ZoneInfo("Europe/London"))
                    confidence = 1.0
                    reason = keyword
                    break
    
    # If no match, default to +30 days
    if due_date is None:
        due_date = now_uk + relativedelta(days=30)
        confidence = 0.5
        reason = "default: +30 days"
    
    # Snap to 09:00 UK time
    due_date = due_date.replace(hour=9, minute=0, second=0, microsecond=0)
    
    # Convert to UTC
    due_date_utc = due_date.astimezone(ZoneInfo("UTC"))
    
    return {
        "due_at": due_date_utc.isoformat(),
        "reason": reason,
        "confidence": confidence,
        "source": "parser"
    }


def parse_appointment_when(text: str, now_uk=None):
    """
    Parse appointment date/time from UK phrasing into structured DATE and TIME.
    
    Args:
        text: Customer phrase (e.g., "Friday 10:30", "tomorrow morning", "next week")
        now_uk: Optional datetime in Europe/London (defaults to now)
    
    Returns:
        dict: {
            "date": "YYYY-MM-DD" or None,
            "time": "HH:MM:SS" or None,
            "parsed": bool (whether parsing succeeded)
        }
    """
    if now_uk is None:
        now_uk = _uk_now()
    
    if now_uk.tzinfo is None:
        now_uk = now_uk.replace(tzinfo=ZoneInfo("Europe/London"))
    
    text_lower = text.lower().strip()
    result = {'date': None, 'time': None, 'parsed': False}
    
    parsed_date = None
    parsed_time = None
    
    # Parse DATE component
    if 'tomorrow' in text_lower:
        parsed_date = (now_uk + relativedelta(days=1)).date()
    elif 'today' in text_lower:
        parsed_date = now_uk.date()
    elif 'monday' in text_lower:
        days_ahead = (0 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    elif 'tuesday' in text_lower:
        days_ahead = (1 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    elif 'wednesday' in text_lower:
        days_ahead = (2 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    elif 'thursday' in text_lower:
        days_ahead = (3 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    elif 'friday' in text_lower:
        days_ahead = (4 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    elif 'saturday' in text_lower:
        days_ahead = (5 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    elif 'sunday' in text_lower:
        days_ahead = (6 - now_uk.weekday()) % 7
        if days_ahead == 0:
            days_ahead = 7
        parsed_date = (now_uk + relativedelta(days=days_ahead)).date()
    
    # Parse TIME component
    # First try HH:MM format (takes priority)
    time_match = re.search(r'\b(\d{1,2}):(\d{2})', text_lower)
    if time_match:
        hour = int(time_match.group(1))
        minute = int(time_match.group(2))
        if 0 <= hour <= 23 and 0 <= minute <= 59:
            parsed_time = time(hour, minute, 0)
    
    # Then try "2pm" or "2 pm" format (but not if already parsed)
    if not parsed_time:
        pm_match = re.search(r'(?<![:\d])(\d{1,2})\s*pm\b', text_lower)
        am_match = re.search(r'(?<![:\d])(\d{1,2})\s*am\b', text_lower)
        
        if pm_match:
            hour = int(pm_match.group(1))
            if hour >= 1 and hour <= 11:
                hour += 12
            elif hour == 12:
                hour = 12
            if 0 <= hour <= 23:
                parsed_time = time(hour, 0, 0)
        elif am_match:
            hour = int(am_match.group(1))
            if hour == 12:
                hour = 0
            if 0 <= hour <= 23:
                parsed_time = time(hour, 0, 0)
        elif 'half ten' in text_lower:
            parsed_time = time(10, 30, 0)
        elif 'half eleven' in text_lower:
            parsed_time = time(11, 30, 0)
        elif 'afternoon' in text_lower:
            parsed_time = time(14, 0, 0)
        elif 'midday' in text_lower or 'noon' in text_lower:
            parsed_time = time(12, 0, 0)
        elif 'morning' in text_lower:
            parsed_time = time(9, 0, 0)
        elif 'evening' in text_lower or 'after work' in text_lower:
            parsed_time = time(17, 0, 0)
    
    # Format results
    if parsed_date:
        result['date'] = parsed_date.isoformat()
        result['parsed'] = True
    
    if parsed_time:
        result['time'] = parsed_time.isoformat()
        result['parsed'] = True
    
    return result


if __name__ == '__main__':
    print("=" * 80)
    print("DEFERRAL PARSER TEST HARNESS")
    print("=" * 80)
    
    test_strings = [
        "in 6 weeks",
        "after christmas",
        "couple of months",
        "after summer",
        "next quarter",
        "few months",
        "in 3 months"
    ]
    
    for test_str in test_strings:
        result = parse_deferral_text(test_str)
        print(f"\nInput: '{test_str}'")
        print(f"  → due_at:     {result['due_at']}")
        print(f"  → reason:     {result['reason']}")
        print(f"  → confidence: {result['confidence']}")
        print(f"  → source:     {result['source']}")
    
    print("\n" + "=" * 80)
    print("APPOINTMENT PARSER TEST HARNESS")
    print("=" * 80)
    
    appt_tests = [
        "Friday 10:30",
        "tomorrow morning",
        "next week",
        "Monday afternoon",
        "Tuesday at half ten"
    ]
    
    for test_str in appt_tests:
        result = parse_appointment_when(test_str)
        print(f"\nInput: '{test_str}'")
        print(f"  → date:   {result['date']}")
        print(f"  → time:   {result['time']}")
        print(f"  → parsed: {result['parsed']}")
    
    print("\n" + "=" * 80)
    print("TEST COMPLETE")
    print("=" * 80)
