From 4691a6eb52e9d04e04d157d5186dc86797eefd54 Mon Sep 17 00:00:00 2001 From: Mateusz Krainski Date: Wed, 27 Nov 2024 21:11:23 -0500 Subject: [PATCH] Fix change of terminology for hiding separate tickets. Add option to log screenshots and launch the app in non-headless mode. --- yafs/.gitignore | 2 ++ yafs/yafs/flights.py | 73 +++++++++++++++++++++++++++++++++++++++++--- yafs/yafs/main.py | 11 +++++-- 3 files changed, 80 insertions(+), 6 deletions(-) diff --git a/yafs/.gitignore b/yafs/.gitignore index 0845884..3761c8b 100644 --- a/yafs/.gitignore +++ b/yafs/.gitignore @@ -162,3 +162,5 @@ cython_debug/ #.idea/ test-page.png + +.screenshots/ diff --git a/yafs/yafs/flights.py b/yafs/yafs/flights.py index 3e3da1f..c8b9125 100644 --- a/yafs/yafs/flights.py +++ b/yafs/yafs/flights.py @@ -1,4 +1,6 @@ -from playwright.async_api import async_playwright +from functools import wraps + +from playwright.async_api import TimeoutError, async_playwright from yafs.utils import extract_numeric_and_non_numeric, insert_space_before_capital @@ -11,7 +13,10 @@ async def _get_flights( destination_airport_code, departure_date, return_date, + verbose=False, ): + _configure_verbose(verbose) + page = await browser.new_page() await _go_to_google_flights(page) @@ -41,10 +46,39 @@ async def _get_flights( return data +verbose = False + + +def _configure_verbose(verbose_setting): + global verbose + verbose = verbose_setting + + +def capture_result_screenshot(f): + global verbose + + @wraps(f) + async def wrapper(page, *args, **kwargs): + await _screenshot(page, f"before_{f.__name__}", verbose) + result = await f(page, *args, **kwargs) + await _screenshot(page, f"after_{f.__name__}", verbose) + return result + + return wrapper + + +async def _screenshot(page, screenshot_name, verbose=False): + if not verbose: + return + await page.screenshot(path=f".screenshots/{screenshot_name}.png") + + +@capture_result_screenshot async def _go_to_google_flights(page): await page.goto("https://www.google.com/travel/flights/search") # noqa: SC200 +@capture_result_screenshot async def _input_location(page, label, location_name, location_code): location = page.get_by_label(label, exact=True) await location.fill(location_name) @@ -53,14 +87,17 @@ async def _input_location(page, label, location_name, location_code): await airport.click() +@capture_result_screenshot async def _input_where_to(page, destination_name, destination_code): await _input_location(page, "Where to?", destination_name, destination_code) +@capture_result_screenshot async def _input_where_from(page, origin_name, origin_code): await _input_location(page, "Where from?", origin_name, origin_code) +@capture_result_screenshot async def _input_date(page, placeholder, date): date_input = page.get_by_placeholder(placeholder).nth(0) await page.wait_for_timeout(500) @@ -70,40 +107,65 @@ async def _input_date(page, placeholder, date): await page.wait_for_timeout(100) +@capture_result_screenshot async def _input_departure_date(page, date): await _input_date(page, "Departure", date) +@capture_result_screenshot async def _input_return_date(page, date): await _input_date(page, "Return", date) +@capture_result_screenshot async def _wait(page, timeout=1000): await page.wait_for_timeout(timeout) +@capture_result_screenshot async def _press_escape(page): await page.keyboard.press("Escape") +@capture_result_screenshot async def _click_search(page): search = page.get_by_label("Search", exact=True) await page.wait_for_timeout(100) await search.click() +@capture_result_screenshot async def _hide_separate_tickets_filter(page): filters = page.get_by_label("All filters") await filters.click() await page.wait_for_timeout(1000) - hide_separate = page.get_by_text("Hide separate tickets") - await hide_separate.scroll_into_view_if_needed() + # Apparently, the exact phrasing can differ based on the browser or some other + # factor. I saw the first string in Chromium and the second one in Chrome. + # This is using Chromium to run the scraper, but I am leaving the other one in case + # they decide to align the terminology. + values_to_try = [ + "Hide separate & self-transfer tickets", + "Hide separate and self-transfer tickets", + ] + for description in values_to_try: + try: + hide_separate = page.get_by_text(description) + await hide_separate.scroll_into_view_if_needed() + break + except TimeoutError: + continue + else: + raise Exception( + "When trying to disable separate tickets, none of the values to try " + "have found a match." + ) await hide_separate.click() await page.wait_for_timeout(1000) await page.keyboard.press("Escape") +@capture_result_screenshot async def _parse_results(page, **kwargs): data = [] for row in await page.get_by_role("listitem").all(): @@ -139,6 +201,7 @@ async def get_flights( destination_airport_code: str, departure_date: str, return_date: str, + verbose: bool = False, ): """Get flights for the selected connection. @@ -149,12 +212,13 @@ async def get_flights( destination_airport_code (str): code of the destination airport departure_date (str): date of departure, e.g. "21 Jul" or "21 July" return_date (str): return date, e.g. "28 Jul" or "28 July" + verbose (bool): turn on verbose logging Returns: list: List of dicts containing metadata of discovered connections """ async with async_playwright() as p: - browser = await p.chromium.launch() + browser = await p.chromium.launch(headless=(not verbose)) data = await _get_flights( browser, origin_airport_name, @@ -163,6 +227,7 @@ async def get_flights( destination_airport_code, departure_date, return_date, + verbose, ) await browser.close() return data diff --git a/yafs/yafs/main.py b/yafs/yafs/main.py index 1f1a840..9971a2a 100644 --- a/yafs/yafs/main.py +++ b/yafs/yafs/main.py @@ -49,6 +49,11 @@ def wrapper(*args, **kwargs): "parameter names will be used" ), ) +@click.option( + "--verbose/--no-verbose", + default=False, + help="Turns on verbose logging. Warning, this will also save screenshots", +) @as_coroutine async def yafs( origin_airport, @@ -56,6 +61,7 @@ async def yafs( departure_date, return_date, result_filename, + verbose, ): for i in range(MAX_RETRIES): try: @@ -66,11 +72,12 @@ async def yafs( destination_airport[1], departure_date, return_date, + verbose, ) except Exception as e: print( - f"Run failed due to {e}. This was attempt {i + 1}/{MAX_RETRIES}. " - "Restarting..." + f"Run failed due to ({type(e)}) {e}. This was attempt " + f"{i + 1}/{MAX_RETRIES}. Restarting..." ) else: break