#!/usr/bin/env python3
"""
Holiday Park Research Script
Searches Eurocamp, Siblu, Al Fresco for family holiday parks with:
- Mobile homes (not camping)
- Pool/water play areas
- Kids activity programmes
- Evening entertainment
"""

import json
import time
from datetime import datetime, timedelta
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout

# Configuration
CHECKIN = "2026-07-18"
CHECKOUT = "2026-08-02"
DURATION = 15  # nights
PARTY = {"adults": 2, "children": 1, "infants": 0}
BUDGET = 2000  # £ total

RESULTS = []

def save_results(data, filename):
    """Save results to JSON file"""
    with open(f"holiday-planning/{filename}", "w") as f:
        json.dump(data, f, indent=2)
    print(f"✅ Saved to holiday-planning/{filename}")

def take_screenshot(page, name):
    """Take a screenshot for debugging"""
    try:
        page.screenshot(path=f"holiday-planning/screenshots/{name}.png")
    except:
        pass

def search_eurocamp(playwright):
    """Search Eurocamp for suitable parks"""
    print("\n🏨 Searching Eurocamp...")
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
    )
    page = context.new_page()
    
    parks = []
    
    try:
        # Go to Eurocamp
        print("  Loading Eurocamp homepage...")
        page.goto("https://www.eurocamp.co.uk", timeout=60000)
        page.wait_for_load_state("networkidle", timeout=30000)
        time.sleep(2)
        
        # Take screenshot
        take_screenshot(page, "eurocamp_home")
        
        # Look for search functionality
        # Try to find and fill search form
        
        # Check for cookie consent
        try:
            accept_btn = page.query_selector("button:has-text('Accept'), button:has-text('accept'), #onetrust-accept-btn-handler")
            if accept_btn:
                accept_btn.click()
                time.sleep(1)
        except:
            pass
        
        # Navigate to destinations to find parks with pools
        print("  Navigating to France destinations...")
        page.goto("https://www.eurocamp.co.uk/destinations/france", timeout=60000)
        page.wait_for_load_state("networkidle", timeout=30000)
        take_screenshot(page, "eurocamp_france")
        
        # Get page content for analysis
        content = page.content()
        
        # Look for parks with pools, kids clubs
        parks_info = {
            "source": "Eurocamp",
            "url": "https://www.eurocamp.co.uk",
            "scraped_at": datetime.now().isoformat(),
            "parks": []
        }
        
        # Try to extract park information from the page
        park_cards = page.query_selector_all("[class*='park'], [class*='card'], [class*='listing']")
        print(f"  Found {len(park_cards)} potential park elements")
        
        # Also try to navigate to parc search with filters
        # Go to search page
        print("  Trying search with dates...")
        
        # Navigate to a search results page
        search_url = f"https://www.eurocamp.co.uk/search?checkin={CHECKIN}&checkout={CHECKOUT}&adults={PARTY['adults']}&children={PARTY['children']}"
        page.goto(search_url, timeout=60000)
        page.wait_for_load_state("networkidle", timeout=30000)
        take_screenshot(page, "eurocamp_search")
        
        # Get all text for analysis
        page_text = page.inner_text("body")
        
        parks_info["page_text_sample"] = page_text[:2000]
        parks_info["search_url"] = search_url
        
        save_results(parks_info, "eurocamp_raw.json")
        
    except Exception as e:
        print(f"  ❌ Error: {e}")
        take_screenshot(page, "eurocamp_error")
        
    finally:
        browser.close()
    
    return parks

def search_siblu(playwright):
    """Search Siblu for holiday villages"""
    print("\n🏨 Searching Siblu...")
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
    )
    page = context.new_page()
    
    parks = []
    
    try:
        print("  Loading Siblu homepage...")
        page.goto("https://www.siblu.co.uk", timeout=60000)
        page.wait_for_load_state("networkidle", timeout=30000)
        time.sleep(2)
        
        take_screenshot(page, "siblu_home")
        
        # Handle cookies
        try:
            accept_btn = page.query_selector("button:has-text('Accept'), #onetrust-accept-btn-handler")
            if accept_btn:
                accept_btn.click()
                time.sleep(1)
        except:
            pass
        
        # Siblu has specific villages - list them
        villages_info = {
            "source": "Siblu",
            "url": "https://www.siblu.co.uk",
            "scraped_at": datetime.now().isoformat(),
            "villages": []
        }
        
        # Get page content
        page_text = page.inner_text("body")
        villages_info["page_text_sample"] = page_text[:2000]
        
        # Try to find villages/destinations
        try:
            print("  Looking for villages list...")
            page.goto("https://www.siblu.co.uk/our-villages", timeout=60000)
            page.wait_for_load_state("networkidle", timeout=30000)
            take_screenshot(page, "siblu_villages")
            
            page_text = page.inner_text("body")
            villages_info["villages_page_text"] = page_text[:3000]
        except:
            pass
        
        save_results(villages_info, "siblu_raw.json")
        
    except Exception as e:
        print(f"  ❌ Error: {e}")
        take_screenshot(page, "siblu_error")
        
    finally:
        browser.close()
    
    return parks

def search_alfresco(playwright):
    """Search Al Fresco Holidays"""
    print("\n🏨 Searching Al Fresco Holidays...")
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
    )
    page = context.new_page()
    
    try:
        print("  Loading Al Fresco homepage...")
        page.goto("https://www.alfresco-holidays.com", timeout=60000)
        page.wait_for_load_state("networkidle", timeout=30000)
        time.sleep(2)
        
        take_screenshot(page, "alfresco_home")
        
        info = {
            "source": "Al Fresco Holidays",
            "url": "https://www.alfresco-holidays.com",
            "scraped_at": datetime.now().isoformat(),
        }
        
        page_text = page.inner_text("body")
        info["page_text_sample"] = page_text[:2000]
        
        save_results(info, "alfresco_raw.json")
        
    except Exception as e:
        print(f"  ❌ Error: {e}")
        take_screenshot(page, "alfresco_error")
        
    finally:
        browser.close()

def search_yelloh(playwright):
    """Search Yelloh! Village"""
    print("\n🏨 Searching Yelloh! Village...")
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
    )
    page = context.new_page()
    
    try:
        print("  Loading Yelloh! Village homepage...")
        page.goto("https://www.yellohvillage.co.uk", timeout=60000)
        page.wait_for_load_state("networkidle", timeout=30000)
        time.sleep(2)
        
        take_screenshot(page, "yelloh_home")
        
        info = {
            "source": "Yelloh! Village",
            "url": "https://www.yellohvillage.co.uk",
            "scraped_at": datetime.now().isoformat(),
        }
        
        page_text = page.inner_text("body")
        info["page_text_sample"] = page_text[:2000]
        
        save_results(info, "yelloh_raw.json")
        
    except Exception as e:
        print(f"  ❌ Error: {e}")
        take_screenshot(page, "yelloh_error")
        
    finally:
        browser.close()

def main():
    """Main scraping function"""
    print("=" * 60)
    print("HOLIDAY PARK RESEARCH")
    print(f"Dates: {CHECKIN} to {CHECKOUT} ({DURATION} nights)")
    print(f"Party: {PARTY['adults']} adults, {PARTY['children']} child(ren)")
    print(f"Budget: £{BUDGET}")
    print("=" * 60)
    
    # Create screenshots directory
    import os
    os.makedirs("holiday-planning/screenshots", exist_ok=True)
    
    with sync_playwright() as playwright:
        # Search each provider
        search_eurocamp(playwright)
        search_siblu(playwright)
        search_alfresco(playwright)
        search_yelloh(playwright)
    
    print("\n" + "=" * 60)
    print("SCRAPING COMPLETE")
    print("=" * 60)

if __name__ == "__main__":
    main()