Honest pricing review with disclaimers
What Changed: - Added VERIFIED-PRICES.md with honest assessment - Added BUDGET-REALITY.md explaining challenges - Added disclaimers to all option files - Clearly marked estimates vs verified data Key Findings: - Could NOT get live quotes due to cookie popups - £2,000 budget is VERY TIGHT for July/Aug peak - Realistic Eurocamp: £1,500-2,500 for 14 nights - Brittany Ferries: £850-1,100 return with cabin Verified Data: - Siblu Kerlann: €250/week (June OFF-PEAK) - Eurotunnel: £250-400 return avg - Budgeting Mum: £600/10 nights OFF-PEAK User action needed: - Manually check Eurocamp.co.uk - Consider shorter duration - Consider gîte instead of mobile home
This commit is contained in:
281
scrape_holiday_parks.py
Normal file
281
scrape_holiday_parks.py
Normal file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Holiday Park Research Script
|
||||
Searches Eurocamp, Siblu, Al Fresco for family holiday parks with:
|
||||
- Mobile homes (not camping)
|
||||
- Pool/water play areas
|
||||
- Kids activity programmes
|
||||
- Evening entertainment
|
||||
"""
|
||||
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
|
||||
|
||||
# Configuration
|
||||
CHECKIN = "2026-07-18"
|
||||
CHECKOUT = "2026-08-02"
|
||||
DURATION = 15 # nights
|
||||
PARTY = {"adults": 2, "children": 1, "infants": 0}
|
||||
BUDGET = 2000 # £ total
|
||||
|
||||
RESULTS = []
|
||||
|
||||
def save_results(data, filename):
|
||||
"""Save results to JSON file"""
|
||||
with open(f"holiday-planning/{filename}", "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
print(f"✅ Saved to holiday-planning/{filename}")
|
||||
|
||||
def take_screenshot(page, name):
|
||||
"""Take a screenshot for debugging"""
|
||||
try:
|
||||
page.screenshot(path=f"holiday-planning/screenshots/{name}.png")
|
||||
except:
|
||||
pass
|
||||
|
||||
def search_eurocamp(playwright):
|
||||
"""Search Eurocamp for suitable parks"""
|
||||
print("\n🏨 Searching Eurocamp...")
|
||||
browser = playwright.chromium.launch(headless=False)
|
||||
context = browser.new_context(
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
||||
)
|
||||
page = context.new_page()
|
||||
|
||||
parks = []
|
||||
|
||||
try:
|
||||
# Go to Eurocamp
|
||||
print(" Loading Eurocamp homepage...")
|
||||
page.goto("https://www.eurocamp.co.uk", timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
|
||||
# Take screenshot
|
||||
take_screenshot(page, "eurocamp_home")
|
||||
|
||||
# Look for search functionality
|
||||
# Try to find and fill search form
|
||||
|
||||
# Check for cookie consent
|
||||
try:
|
||||
accept_btn = page.query_selector("button:has-text('Accept'), button:has-text('accept'), #onetrust-accept-btn-handler")
|
||||
if accept_btn:
|
||||
accept_btn.click()
|
||||
time.sleep(1)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Navigate to destinations to find parks with pools
|
||||
print(" Navigating to France destinations...")
|
||||
page.goto("https://www.eurocamp.co.uk/destinations/france", timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
take_screenshot(page, "eurocamp_france")
|
||||
|
||||
# Get page content for analysis
|
||||
content = page.content()
|
||||
|
||||
# Look for parks with pools, kids clubs
|
||||
parks_info = {
|
||||
"source": "Eurocamp",
|
||||
"url": "https://www.eurocamp.co.uk",
|
||||
"scraped_at": datetime.now().isoformat(),
|
||||
"parks": []
|
||||
}
|
||||
|
||||
# Try to extract park information from the page
|
||||
park_cards = page.query_selector_all("[class*='park'], [class*='card'], [class*='listing']")
|
||||
print(f" Found {len(park_cards)} potential park elements")
|
||||
|
||||
# Also try to navigate to parc search with filters
|
||||
# Go to search page
|
||||
print(" Trying search with dates...")
|
||||
|
||||
# Navigate to a search results page
|
||||
search_url = f"https://www.eurocamp.co.uk/search?checkin={CHECKIN}&checkout={CHECKOUT}&adults={PARTY['adults']}&children={PARTY['children']}"
|
||||
page.goto(search_url, timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
take_screenshot(page, "eurocamp_search")
|
||||
|
||||
# Get all text for analysis
|
||||
page_text = page.inner_text("body")
|
||||
|
||||
parks_info["page_text_sample"] = page_text[:2000]
|
||||
parks_info["search_url"] = search_url
|
||||
|
||||
save_results(parks_info, "eurocamp_raw.json")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
take_screenshot(page, "eurocamp_error")
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
return parks
|
||||
|
||||
def search_siblu(playwright):
|
||||
"""Search Siblu for holiday villages"""
|
||||
print("\n🏨 Searching Siblu...")
|
||||
browser = playwright.chromium.launch(headless=False)
|
||||
context = browser.new_context(
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
||||
)
|
||||
page = context.new_page()
|
||||
|
||||
parks = []
|
||||
|
||||
try:
|
||||
print(" Loading Siblu homepage...")
|
||||
page.goto("https://www.siblu.co.uk", timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
|
||||
take_screenshot(page, "siblu_home")
|
||||
|
||||
# Handle cookies
|
||||
try:
|
||||
accept_btn = page.query_selector("button:has-text('Accept'), #onetrust-accept-btn-handler")
|
||||
if accept_btn:
|
||||
accept_btn.click()
|
||||
time.sleep(1)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Siblu has specific villages - list them
|
||||
villages_info = {
|
||||
"source": "Siblu",
|
||||
"url": "https://www.siblu.co.uk",
|
||||
"scraped_at": datetime.now().isoformat(),
|
||||
"villages": []
|
||||
}
|
||||
|
||||
# Get page content
|
||||
page_text = page.inner_text("body")
|
||||
villages_info["page_text_sample"] = page_text[:2000]
|
||||
|
||||
# Try to find villages/destinations
|
||||
try:
|
||||
print(" Looking for villages list...")
|
||||
page.goto("https://www.siblu.co.uk/our-villages", timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
take_screenshot(page, "siblu_villages")
|
||||
|
||||
page_text = page.inner_text("body")
|
||||
villages_info["villages_page_text"] = page_text[:3000]
|
||||
except:
|
||||
pass
|
||||
|
||||
save_results(villages_info, "siblu_raw.json")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
take_screenshot(page, "siblu_error")
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
return parks
|
||||
|
||||
def search_alfresco(playwright):
|
||||
"""Search Al Fresco Holidays"""
|
||||
print("\n🏨 Searching Al Fresco Holidays...")
|
||||
browser = playwright.chromium.launch(headless=False)
|
||||
context = browser.new_context(
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
||||
)
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
print(" Loading Al Fresco homepage...")
|
||||
page.goto("https://www.alfresco-holidays.com", timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
|
||||
take_screenshot(page, "alfresco_home")
|
||||
|
||||
info = {
|
||||
"source": "Al Fresco Holidays",
|
||||
"url": "https://www.alfresco-holidays.com",
|
||||
"scraped_at": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
page_text = page.inner_text("body")
|
||||
info["page_text_sample"] = page_text[:2000]
|
||||
|
||||
save_results(info, "alfresco_raw.json")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
take_screenshot(page, "alfresco_error")
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
def search_yelloh(playwright):
|
||||
"""Search Yelloh! Village"""
|
||||
print("\n🏨 Searching Yelloh! Village...")
|
||||
browser = playwright.chromium.launch(headless=False)
|
||||
context = browser.new_context(
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"
|
||||
)
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
print(" Loading Yelloh! Village homepage...")
|
||||
page.goto("https://www.yellohvillage.co.uk", timeout=60000)
|
||||
page.wait_for_load_state("networkidle", timeout=30000)
|
||||
time.sleep(2)
|
||||
|
||||
take_screenshot(page, "yelloh_home")
|
||||
|
||||
info = {
|
||||
"source": "Yelloh! Village",
|
||||
"url": "https://www.yellohvillage.co.uk",
|
||||
"scraped_at": datetime.now().isoformat(),
|
||||
}
|
||||
|
||||
page_text = page.inner_text("body")
|
||||
info["page_text_sample"] = page_text[:2000]
|
||||
|
||||
save_results(info, "yelloh_raw.json")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
take_screenshot(page, "yelloh_error")
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
def main():
|
||||
"""Main scraping function"""
|
||||
print("=" * 60)
|
||||
print("HOLIDAY PARK RESEARCH")
|
||||
print(f"Dates: {CHECKIN} to {CHECKOUT} ({DURATION} nights)")
|
||||
print(f"Party: {PARTY['adults']} adults, {PARTY['children']} child(ren)")
|
||||
print(f"Budget: £{BUDGET}")
|
||||
print("=" * 60)
|
||||
|
||||
# Create screenshots directory
|
||||
import os
|
||||
os.makedirs("holiday-planning/screenshots", exist_ok=True)
|
||||
|
||||
with sync_playwright() as playwright:
|
||||
# Search each provider
|
||||
search_eurocamp(playwright)
|
||||
search_siblu(playwright)
|
||||
search_alfresco(playwright)
|
||||
search_yelloh(playwright)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("SCRAPING COMPLETE")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user