Honest pricing review with disclaimers

What Changed: - Added VERIFIED-PRICES.md with honest assessment - Added BUDGET-REALITY.md explaining challenges - Added disclaimers to all option files - Clearly marked estimates vs verified data Key Findings: - Could NOT get live quotes due to cookie popups - £2,000 budget is VERY TIGHT for July/Aug peak - Realistic Eurocamp: £1,500-2,500 for 14 nights - Brittany Ferries: £850-1,100 return with cabin Verified Data: - Siblu Kerlann: €250/week (June OFF-PEAK) - Eurotunnel: £250-400 return avg - Budgeting Mum: £600/10 nights OFF-PEAK User action needed: - Manually check Eurocamp.co.uk - Consider shorter duration - Consider gîte instead of mobile home
2026-03-15 23:18:43 +00:00
commit a27fcfef61
640 changed files with 179624 additions and 0 deletions
--- a/scrape_holiday_parks.py
+++ b/scrape_holiday_parks.py
@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+"""
+Holiday Park Research Script
+Searches Eurocamp, Siblu, Al Fresco for family holiday parks with:
+- Mobile homes (not camping)
+- Pool/water play areas
+- Kids activity programmes
+- Evening entertainment
+"""
+
+import json
+import time
+from datetime import datetime, timedelta
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
+
+# Configuration
+CHECKIN = "2026-07-18"
+CHECKOUT = "2026-08-02"
+DURATION = 15  # nights
+PARTY = {"adults": 2, "children": 1, "infants": 0}
+BUDGET = 2000  # £ total
+
+RESULTS = []
+
+def save_results(data, filename):
+    """Save results to JSON file"""
+    with open(f"holiday-planning/{filename}", "w") as f:
+        json.dump(data, f, indent=2)
+    print(f"✅ Saved to holiday-planning/{filename}")
+
+def take_screenshot(page, name):
+    """Take a screenshot for debugging"""
+    try:
+        page.screenshot(path=f"holiday-planning/screenshots/{name}.png")
+    except:
+        pass
+
+def search_eurocamp(playwright):
+    """Search Eurocamp for suitable parks"""
+    print("\n🏨 Searching Eurocamp...")
+    browser = playwright.chromium.launch(headless=False)
+    context = browser.new_context(
+        viewport={"width": 1920, "height": 1080},
+        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
+    )
+    page = context.new_page()
+    
+    parks = []
+    
+    try:
+        # Go to Eurocamp
+        print("  Loading Eurocamp homepage...")
+        page.goto("https://www.eurocamp.co.uk", timeout=60000)
+        page.wait_for_load_state("networkidle", timeout=30000)
+        time.sleep(2)
+        
+        # Take screenshot
+        take_screenshot(page, "eurocamp_home")
+        
+        # Look for search functionality
+        # Try to find and fill search form
+        
+        # Check for cookie consent
+        try:
+            accept_btn = page.query_selector("button:has-text('Accept'), button:has-text('accept'), #onetrust-accept-btn-handler")
+            if accept_btn:
+                accept_btn.click()
+                time.sleep(1)
+        except:
+            pass
+        
+        # Navigate to destinations to find parks with pools
+        print("  Navigating to France destinations...")
+        page.goto("https://www.eurocamp.co.uk/destinations/france", timeout=60000)
+        page.wait_for_load_state("networkidle", timeout=30000)
+        take_screenshot(page, "eurocamp_france")
+        
+        # Get page content for analysis
+        content = page.content()
+        
+        # Look for parks with pools, kids clubs
+        parks_info = {
+            "source": "Eurocamp",
+            "url": "https://www.eurocamp.co.uk",
+            "scraped_at": datetime.now().isoformat(),
+            "parks": []
+        }
+        
+        # Try to extract park information from the page
+        park_cards = page.query_selector_all("[class*='park'], [class*='card'], [class*='listing']")
+        print(f"  Found {len(park_cards)} potential park elements")
+        
+        # Also try to navigate to parc search with filters
+        # Go to search page
+        print("  Trying search with dates...")
+        
+        # Navigate to a search results page
+        search_url = f"https://www.eurocamp.co.uk/search?checkin={CHECKIN}&checkout={CHECKOUT}&adults={PARTY['adults']}&children={PARTY['children']}"
+        page.goto(search_url, timeout=60000)
+        page.wait_for_load_state("networkidle", timeout=30000)
+        take_screenshot(page, "eurocamp_search")
+        
+        # Get all text for analysis
+        page_text = page.inner_text("body")
+        
+        parks_info["page_text_sample"] = page_text[:2000]
+        parks_info["search_url"] = search_url
+        
+        save_results(parks_info, "eurocamp_raw.json")
+        
+    except Exception as e:
+        print(f"  ❌ Error: {e}")
+        take_screenshot(page, "eurocamp_error")
+        
+    finally:
+        browser.close()
+    
+    return parks
+
+def search_siblu(playwright):
+    """Search Siblu for holiday villages"""
+    print("\n🏨 Searching Siblu...")
+    browser = playwright.chromium.launch(headless=False)
+    context = browser.new_context(
+        viewport={"width": 1920, "height": 1080},
+        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
+    )
+    page = context.new_page()
+    
+    parks = []
+    
+    try:
+        print("  Loading Siblu homepage...")
+        page.goto("https://www.siblu.co.uk", timeout=60000)
+        page.wait_for_load_state("networkidle", timeout=30000)
+        time.sleep(2)
+        
+        take_screenshot(page, "siblu_home")
+        
+        # Handle cookies
+        try:
+            accept_btn = page.query_selector("button:has-text('Accept'), #onetrust-accept-btn-handler")
+            if accept_btn:
+                accept_btn.click()
+                time.sleep(1)
+        except:
+            pass
+        
+        # Siblu has specific villages - list them
+        villages_info = {
+            "source": "Siblu",
+            "url": "https://www.siblu.co.uk",
+            "scraped_at": datetime.now().isoformat(),
+            "villages": []
+        }
+        
+        # Get page content
+        page_text = page.inner_text("body")
+        villages_info["page_text_sample"] = page_text[:2000]
+        
+        # Try to find villages/destinations
+        try:
+            print("  Looking for villages list...")
+            page.goto("https://www.siblu.co.uk/our-villages", timeout=60000)
+            page.wait_for_load_state("networkidle", timeout=30000)
+            take_screenshot(page, "siblu_villages")
+            
+            page_text = page.inner_text("body")
+            villages_info["villages_page_text"] = page_text[:3000]
+        except:
+            pass
+        
+        save_results(villages_info, "siblu_raw.json")
+        
+    except Exception as e:
+        print(f"  ❌ Error: {e}")
+        take_screenshot(page, "siblu_error")
+        
+    finally:
+        browser.close()
+    
+    return parks
+
+def search_alfresco(playwright):
+    """Search Al Fresco Holidays"""
+    print("\n🏨 Searching Al Fresco Holidays...")
+    browser = playwright.chromium.launch(headless=False)
+    context = browser.new_context(
+        viewport={"width": 1920, "height": 1080},
+        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
+    )
+    page = context.new_page()
+    
+    try:
+        print("  Loading Al Fresco homepage...")
+        page.goto("https://www.alfresco-holidays.com", timeout=60000)
+        page.wait_for_load_state("networkidle", timeout=30000)
+        time.sleep(2)
+        
+        take_screenshot(page, "alfresco_home")
+        
+        info = {
+            "source": "Al Fresco Holidays",
+            "url": "https://www.alfresco-holidays.com",
+            "scraped_at": datetime.now().isoformat(),
+        }
+        
+        page_text = page.inner_text("body")
+        info["page_text_sample"] = page_text[:2000]
+        
+        save_results(info, "alfresco_raw.json")
+        
+    except Exception as e:
+        print(f"  ❌ Error: {e}")
+        take_screenshot(page, "alfresco_error")
+        
+    finally:
+        browser.close()
+
+def search_yelloh(playwright):
+    """Search Yelloh! Village"""
+    print("\n🏨 Searching Yelloh! Village...")
+    browser = playwright.chromium.launch(headless=False)
+    context = browser.new_context(
+        viewport={"width": 1920, "height": 1080},
+        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
+    )
+    page = context.new_page()
+    
+    try:
+        print("  Loading Yelloh! Village homepage...")
+        page.goto("https://www.yellohvillage.co.uk", timeout=60000)
+        page.wait_for_load_state("networkidle", timeout=30000)
+        time.sleep(2)
+        
+        take_screenshot(page, "yelloh_home")
+        
+        info = {
+            "source": "Yelloh! Village",
+            "url": "https://www.yellohvillage.co.uk",
+            "scraped_at": datetime.now().isoformat(),
+        }
+        
+        page_text = page.inner_text("body")
+        info["page_text_sample"] = page_text[:2000]
+        
+        save_results(info, "yelloh_raw.json")
+        
+    except Exception as e:
+        print(f"  ❌ Error: {e}")
+        take_screenshot(page, "yelloh_error")
+        
+    finally:
+        browser.close()
+
+def main():
+    """Main scraping function"""
+    print("=" * 60)
+    print("HOLIDAY PARK RESEARCH")
+    print(f"Dates: {CHECKIN} to {CHECKOUT} ({DURATION} nights)")
+    print(f"Party: {PARTY['adults']} adults, {PARTY['children']} child(ren)")
+    print(f"Budget: £{BUDGET}")
+    print("=" * 60)
+    
+    # Create screenshots directory
+    import os
+    os.makedirs("holiday-planning/screenshots", exist_ok=True)
+    
+    with sync_playwright() as playwright:
+        # Search each provider
+        search_eurocamp(playwright)
+        search_siblu(playwright)
+        search_alfresco(playwright)
+        search_yelloh(playwright)
+    
+    print("\n" + "=" * 60)
+    print("SCRAPING COMPLETE")
+    print("=" * 60)
+
+if __name__ == "__main__":
+    main()