From b577a05102b93bd28cbdcb082f477b158dc6291d Mon Sep 17 00:00:00 2001 From: Hiroshi Nishio Date: Tue, 7 Jan 2025 15:05:09 +0900 Subject: [PATCH 1/2] Improve a google rate limit error handling: google_search encountered an KeyError: 'x-ratelimit-limit' in handle_exceptions() --- utils/handle_exceptions.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/utils/handle_exceptions.py b/utils/handle_exceptions.py index b600ca25..5ac47fb5 100644 --- a/utils/handle_exceptions.py +++ b/utils/handle_exceptions.py @@ -13,21 +13,28 @@ def handle_exceptions( - default_return_value: Any = None, raise_on_error: bool = False + default_return_value: Any = None, + raise_on_error: bool = False, + api_type: str = "github", # "github" or "google" ) -> Callable[[F], F]: """https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit""" def decorator(func: F) -> F: @wraps(wrapped=func) def wrapper(*args: Tuple[Any, ...], **kwargs: Any): - truncated_kwargs = str({k: str(v)[:50] + '...' if len(str(v)) > 50 else v for k, v in kwargs.items()}) + truncated_kwargs = str( + { + k: str(v)[:50] + "..." if len(str(v)) > 50 else v + for k, v in kwargs.items() + } + ) try: return func(*args, **kwargs) except requests.exceptions.HTTPError as err: reason: str | Any = err.response.reason text: str | Any = err.response.text - if err.response.status_code in {403, 429}: + if api_type == "github" and err.response.status_code in {403, 429}: limit = int(err.response.headers["X-RateLimit-Limit"]) remaining = int(err.response.headers["X-RateLimit-Remaining"]) used = int(err.response.headers["X-RateLimit-Used"]) @@ -56,6 +63,13 @@ def wrapper(*args: Tuple[Any, ...], **kwargs: Any): if raise_on_error: raise + elif api_type == "google" and err.response.status_code == 429: + retry_after = int(err.response.headers.get("Retry-After", 60)) + err_msg = f"Google Search Rate Limit: {func.__name__} will retry after {retry_after} seconds" + logging.warning(msg=err_msg) + time.sleep(retry_after) + return wrapper(*args, **kwargs) + # Ex) 409: Conflict, 422: Unprocessable Entity (No changes made), and etc. else: err_msg = f"{func.__name__} encountered an HTTPError: {err}\nArgs: {args}\nKwargs: {truncated_kwargs}. Reason: {reason}. Text: {text}\n" From 5caa8fba3d5ac4030feda9f11246474abc4d9fbb Mon Sep 17 00:00:00 2001 From: Hiroshi Nishio Date: Tue, 7 Jan 2025 15:06:36 +0900 Subject: [PATCH 2/2] . --- services/google/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/google/search.py b/services/google/search.py index 861b4790..1fc0cd6b 100644 --- a/services/google/search.py +++ b/services/google/search.py @@ -76,8 +76,8 @@ def google_search( num_results: int = NUM_RESULTS_DEFAULT, lang: str = "en", ): - urls = search_urls(query=query, num_results=num_results, lang=lang) - contents = [] + urls: list[dict[str, str]] = search_urls(query=query, num_results=num_results, lang=lang) + contents: list[str] = [] for url in urls: contents.append(scrape_content_from_url(url["url"])) return contents