#!/usr/bin/env python3 """ Holiday Park Research Script Searches Eurocamp, Siblu, Al Fresco for family holiday parks with: - Mobile homes (not camping) - Pool/water play areas - Kids activity programmes - Evening entertainment """ import json import time from datetime import datetime, timedelta from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout # Configuration CHECKIN = "2026-07-18" CHECKOUT = "2026-08-02" DURATION = 15 # nights PARTY = {"adults": 2, "children": 1, "infants": 0} BUDGET = 2000 # £ total RESULTS = [] def save_results(data, filename): """Save results to JSON file""" with open(f"holiday-planning/{filename}", "w") as f: json.dump(data, f, indent=2) print(f"✅ Saved to holiday-planning/{filename}") def take_screenshot(page, name): """Take a screenshot for debugging""" try: page.screenshot(path=f"holiday-planning/screenshots/{name}.png") except: pass def search_eurocamp(playwright): """Search Eurocamp for suitable parks""" print("\n🏨 Searching Eurocamp...") browser = playwright.chromium.launch(headless=False) context = browser.new_context( viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" ) page = context.new_page() parks = [] try: # Go to Eurocamp print(" Loading Eurocamp homepage...") page.goto("https://www.eurocamp.co.uk", timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) time.sleep(2) # Take screenshot take_screenshot(page, "eurocamp_home") # Look for search functionality # Try to find and fill search form # Check for cookie consent try: accept_btn = page.query_selector("button:has-text('Accept'), button:has-text('accept'), #onetrust-accept-btn-handler") if accept_btn: accept_btn.click() time.sleep(1) except: pass # Navigate to destinations to find parks with pools print(" Navigating to France destinations...") page.goto("https://www.eurocamp.co.uk/destinations/france", timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) take_screenshot(page, "eurocamp_france") # Get page content for analysis content = page.content() # Look for parks with pools, kids clubs parks_info = { "source": "Eurocamp", "url": "https://www.eurocamp.co.uk", "scraped_at": datetime.now().isoformat(), "parks": [] } # Try to extract park information from the page park_cards = page.query_selector_all("[class*='park'], [class*='card'], [class*='listing']") print(f" Found {len(park_cards)} potential park elements") # Also try to navigate to parc search with filters # Go to search page print(" Trying search with dates...") # Navigate to a search results page search_url = f"https://www.eurocamp.co.uk/search?checkin={CHECKIN}&checkout={CHECKOUT}&adults={PARTY['adults']}&children={PARTY['children']}" page.goto(search_url, timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) take_screenshot(page, "eurocamp_search") # Get all text for analysis page_text = page.inner_text("body") parks_info["page_text_sample"] = page_text[:2000] parks_info["search_url"] = search_url save_results(parks_info, "eurocamp_raw.json") except Exception as e: print(f" ❌ Error: {e}") take_screenshot(page, "eurocamp_error") finally: browser.close() return parks def search_siblu(playwright): """Search Siblu for holiday villages""" print("\n🏨 Searching Siblu...") browser = playwright.chromium.launch(headless=False) context = browser.new_context( viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" ) page = context.new_page() parks = [] try: print(" Loading Siblu homepage...") page.goto("https://www.siblu.co.uk", timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) time.sleep(2) take_screenshot(page, "siblu_home") # Handle cookies try: accept_btn = page.query_selector("button:has-text('Accept'), #onetrust-accept-btn-handler") if accept_btn: accept_btn.click() time.sleep(1) except: pass # Siblu has specific villages - list them villages_info = { "source": "Siblu", "url": "https://www.siblu.co.uk", "scraped_at": datetime.now().isoformat(), "villages": [] } # Get page content page_text = page.inner_text("body") villages_info["page_text_sample"] = page_text[:2000] # Try to find villages/destinations try: print(" Looking for villages list...") page.goto("https://www.siblu.co.uk/our-villages", timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) take_screenshot(page, "siblu_villages") page_text = page.inner_text("body") villages_info["villages_page_text"] = page_text[:3000] except: pass save_results(villages_info, "siblu_raw.json") except Exception as e: print(f" ❌ Error: {e}") take_screenshot(page, "siblu_error") finally: browser.close() return parks def search_alfresco(playwright): """Search Al Fresco Holidays""" print("\n🏨 Searching Al Fresco Holidays...") browser = playwright.chromium.launch(headless=False) context = browser.new_context( viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" ) page = context.new_page() try: print(" Loading Al Fresco homepage...") page.goto("https://www.alfresco-holidays.com", timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) time.sleep(2) take_screenshot(page, "alfresco_home") info = { "source": "Al Fresco Holidays", "url": "https://www.alfresco-holidays.com", "scraped_at": datetime.now().isoformat(), } page_text = page.inner_text("body") info["page_text_sample"] = page_text[:2000] save_results(info, "alfresco_raw.json") except Exception as e: print(f" ❌ Error: {e}") take_screenshot(page, "alfresco_error") finally: browser.close() def search_yelloh(playwright): """Search Yelloh! Village""" print("\n🏨 Searching Yelloh! Village...") browser = playwright.chromium.launch(headless=False) context = browser.new_context( viewport={"width": 1920, "height": 1080}, user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" ) page = context.new_page() try: print(" Loading Yelloh! Village homepage...") page.goto("https://www.yellohvillage.co.uk", timeout=60000) page.wait_for_load_state("networkidle", timeout=30000) time.sleep(2) take_screenshot(page, "yelloh_home") info = { "source": "Yelloh! Village", "url": "https://www.yellohvillage.co.uk", "scraped_at": datetime.now().isoformat(), } page_text = page.inner_text("body") info["page_text_sample"] = page_text[:2000] save_results(info, "yelloh_raw.json") except Exception as e: print(f" ❌ Error: {e}") take_screenshot(page, "yelloh_error") finally: browser.close() def main(): """Main scraping function""" print("=" * 60) print("HOLIDAY PARK RESEARCH") print(f"Dates: {CHECKIN} to {CHECKOUT} ({DURATION} nights)") print(f"Party: {PARTY['adults']} adults, {PARTY['children']} child(ren)") print(f"Budget: £{BUDGET}") print("=" * 60) # Create screenshots directory import os os.makedirs("holiday-planning/screenshots", exist_ok=True) with sync_playwright() as playwright: # Search each provider search_eurocamp(playwright) search_siblu(playwright) search_alfresco(playwright) search_yelloh(playwright) print("\n" + "=" * 60) print("SCRAPING COMPLETE") print("=" * 60) if __name__ == "__main__": main()