Options researched: - Northern Spain (Cantabria/Asturias) - France Dordogne - France Brittany - Eurocamp Domaine des Ormes - Eurocamp La Grande Métairie - Siblu Domaine de Kerlann Includes detailed itineraries and costings for family of 3. Dates: 18 July - 2 August 2026 Budget: £2,000
282 lines
8.8 KiB
Python
282 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Holiday Park Research Script
|
|
Searches Eurocamp, Siblu, Al Fresco for family holiday parks with:
|
|
- Mobile homes (not camping)
|
|
- Pool/water play areas
|
|
- Kids activity programmes
|
|
- Evening entertainment
|
|
"""
|
|
|
|
import json
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
|
|
|
|
# Configuration
|
|
CHECKIN = "2026-07-18"
|
|
CHECKOUT = "2026-08-02"
|
|
DURATION = 15 # nights
|
|
PARTY = {"adults": 2, "children": 1, "infants": 0}
|
|
BUDGET = 2000 # £ total
|
|
|
|
RESULTS = []
|
|
|
|
def save_results(data, filename):
|
|
"""Save results to JSON file"""
|
|
with open(f"holiday-planning/{filename}", "w") as f:
|
|
json.dump(data, f, indent=2)
|
|
print(f"✅ Saved to holiday-planning/{filename}")
|
|
|
|
def take_screenshot(page, name):
|
|
"""Take a screenshot for debugging"""
|
|
try:
|
|
page.screenshot(path=f"holiday-planning/screenshots/{name}.png")
|
|
except:
|
|
pass
|
|
|
|
def search_eurocamp(playwright):
|
|
"""Search Eurocamp for suitable parks"""
|
|
print("\n🏨 Searching Eurocamp...")
|
|
browser = playwright.chromium.launch(headless=False)
|
|
context = browser.new_context(
|
|
viewport={"width": 1920, "height": 1080},
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
|
)
|
|
page = context.new_page()
|
|
|
|
parks = []
|
|
|
|
try:
|
|
# Go to Eurocamp
|
|
print(" Loading Eurocamp homepage...")
|
|
page.goto("https://www.eurocamp.co.uk", timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
time.sleep(2)
|
|
|
|
# Take screenshot
|
|
take_screenshot(page, "eurocamp_home")
|
|
|
|
# Look for search functionality
|
|
# Try to find and fill search form
|
|
|
|
# Check for cookie consent
|
|
try:
|
|
accept_btn = page.query_selector("button:has-text('Accept'), button:has-text('accept'), #onetrust-accept-btn-handler")
|
|
if accept_btn:
|
|
accept_btn.click()
|
|
time.sleep(1)
|
|
except:
|
|
pass
|
|
|
|
# Navigate to destinations to find parks with pools
|
|
print(" Navigating to France destinations...")
|
|
page.goto("https://www.eurocamp.co.uk/destinations/france", timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
take_screenshot(page, "eurocamp_france")
|
|
|
|
# Get page content for analysis
|
|
content = page.content()
|
|
|
|
# Look for parks with pools, kids clubs
|
|
parks_info = {
|
|
"source": "Eurocamp",
|
|
"url": "https://www.eurocamp.co.uk",
|
|
"scraped_at": datetime.now().isoformat(),
|
|
"parks": []
|
|
}
|
|
|
|
# Try to extract park information from the page
|
|
park_cards = page.query_selector_all("[class*='park'], [class*='card'], [class*='listing']")
|
|
print(f" Found {len(park_cards)} potential park elements")
|
|
|
|
# Also try to navigate to parc search with filters
|
|
# Go to search page
|
|
print(" Trying search with dates...")
|
|
|
|
# Navigate to a search results page
|
|
search_url = f"https://www.eurocamp.co.uk/search?checkin={CHECKIN}&checkout={CHECKOUT}&adults={PARTY['adults']}&children={PARTY['children']}"
|
|
page.goto(search_url, timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
take_screenshot(page, "eurocamp_search")
|
|
|
|
# Get all text for analysis
|
|
page_text = page.inner_text("body")
|
|
|
|
parks_info["page_text_sample"] = page_text[:2000]
|
|
parks_info["search_url"] = search_url
|
|
|
|
save_results(parks_info, "eurocamp_raw.json")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
take_screenshot(page, "eurocamp_error")
|
|
|
|
finally:
|
|
browser.close()
|
|
|
|
return parks
|
|
|
|
def search_siblu(playwright):
|
|
"""Search Siblu for holiday villages"""
|
|
print("\n🏨 Searching Siblu...")
|
|
browser = playwright.chromium.launch(headless=False)
|
|
context = browser.new_context(
|
|
viewport={"width": 1920, "height": 1080},
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
|
)
|
|
page = context.new_page()
|
|
|
|
parks = []
|
|
|
|
try:
|
|
print(" Loading Siblu homepage...")
|
|
page.goto("https://www.siblu.co.uk", timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
time.sleep(2)
|
|
|
|
take_screenshot(page, "siblu_home")
|
|
|
|
# Handle cookies
|
|
try:
|
|
accept_btn = page.query_selector("button:has-text('Accept'), #onetrust-accept-btn-handler")
|
|
if accept_btn:
|
|
accept_btn.click()
|
|
time.sleep(1)
|
|
except:
|
|
pass
|
|
|
|
# Siblu has specific villages - list them
|
|
villages_info = {
|
|
"source": "Siblu",
|
|
"url": "https://www.siblu.co.uk",
|
|
"scraped_at": datetime.now().isoformat(),
|
|
"villages": []
|
|
}
|
|
|
|
# Get page content
|
|
page_text = page.inner_text("body")
|
|
villages_info["page_text_sample"] = page_text[:2000]
|
|
|
|
# Try to find villages/destinations
|
|
try:
|
|
print(" Looking for villages list...")
|
|
page.goto("https://www.siblu.co.uk/our-villages", timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
take_screenshot(page, "siblu_villages")
|
|
|
|
page_text = page.inner_text("body")
|
|
villages_info["villages_page_text"] = page_text[:3000]
|
|
except:
|
|
pass
|
|
|
|
save_results(villages_info, "siblu_raw.json")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
take_screenshot(page, "siblu_error")
|
|
|
|
finally:
|
|
browser.close()
|
|
|
|
return parks
|
|
|
|
def search_alfresco(playwright):
|
|
"""Search Al Fresco Holidays"""
|
|
print("\n🏨 Searching Al Fresco Holidays...")
|
|
browser = playwright.chromium.launch(headless=False)
|
|
context = browser.new_context(
|
|
viewport={"width": 1920, "height": 1080},
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
|
)
|
|
page = context.new_page()
|
|
|
|
try:
|
|
print(" Loading Al Fresco homepage...")
|
|
page.goto("https://www.alfresco-holidays.com", timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
time.sleep(2)
|
|
|
|
take_screenshot(page, "alfresco_home")
|
|
|
|
info = {
|
|
"source": "Al Fresco Holidays",
|
|
"url": "https://www.alfresco-holidays.com",
|
|
"scraped_at": datetime.now().isoformat(),
|
|
}
|
|
|
|
page_text = page.inner_text("body")
|
|
info["page_text_sample"] = page_text[:2000]
|
|
|
|
save_results(info, "alfresco_raw.json")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
take_screenshot(page, "alfresco_error")
|
|
|
|
finally:
|
|
browser.close()
|
|
|
|
def search_yelloh(playwright):
|
|
"""Search Yelloh! Village"""
|
|
print("\n🏨 Searching Yelloh! Village...")
|
|
browser = playwright.chromium.launch(headless=False)
|
|
context = browser.new_context(
|
|
viewport={"width": 1920, "height": 1080},
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
|
)
|
|
page = context.new_page()
|
|
|
|
try:
|
|
print(" Loading Yelloh! Village homepage...")
|
|
page.goto("https://www.yellohvillage.co.uk", timeout=60000)
|
|
page.wait_for_load_state("networkidle", timeout=30000)
|
|
time.sleep(2)
|
|
|
|
take_screenshot(page, "yelloh_home")
|
|
|
|
info = {
|
|
"source": "Yelloh! Village",
|
|
"url": "https://www.yellohvillage.co.uk",
|
|
"scraped_at": datetime.now().isoformat(),
|
|
}
|
|
|
|
page_text = page.inner_text("body")
|
|
info["page_text_sample"] = page_text[:2000]
|
|
|
|
save_results(info, "yelloh_raw.json")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Error: {e}")
|
|
take_screenshot(page, "yelloh_error")
|
|
|
|
finally:
|
|
browser.close()
|
|
|
|
def main():
|
|
"""Main scraping function"""
|
|
print("=" * 60)
|
|
print("HOLIDAY PARK RESEARCH")
|
|
print(f"Dates: {CHECKIN} to {CHECKOUT} ({DURATION} nights)")
|
|
print(f"Party: {PARTY['adults']} adults, {PARTY['children']} child(ren)")
|
|
print(f"Budget: £{BUDGET}")
|
|
print("=" * 60)
|
|
|
|
# Create screenshots directory
|
|
import os
|
|
os.makedirs("holiday-planning/screenshots", exist_ok=True)
|
|
|
|
with sync_playwright() as playwright:
|
|
# Search each provider
|
|
search_eurocamp(playwright)
|
|
search_siblu(playwright)
|
|
search_alfresco(playwright)
|
|
search_yelloh(playwright)
|
|
|
|
print("\n" + "=" * 60)
|
|
print("SCRAPING COMPLETE")
|
|
print("=" * 60)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|