From 448261f0489e74471cc6dbb05b727c2daad03799 Mon Sep 17 00:00:00 2001 From: Hiroshi Nishio <4620828+hiroshinishio@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:18:44 +0900 Subject: [PATCH] Improve a rate limit exception in handle_exceptions() 2 --- cloudformation.yml | 2 +- scheduler.py | 9 +++++-- services/github/github_manager.py | 4 ++-- utils/handle_exceptions.py | 39 +++++++++++++++++++++++-------- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/cloudformation.yml b/cloudformation.yml index 5e6ca370..7f212b8e 100644 --- a/cloudformation.yml +++ b/cloudformation.yml @@ -20,7 +20,7 @@ Resources: Properties: Name: SchedulerEventRule Description: "Schedule Lambda function to run every weekday at 0 AM UTC" - ScheduleExpression: cron(36 5 ? * MON-FRI *) # min hour day month day-of-week year + ScheduleExpression: cron(28 6 ? * MON-FRI *) # min hour day month day-of-week year State: ENABLED Targets: - Arn: !Ref LambdaFunctionArn diff --git a/scheduler.py b/scheduler.py index c7b13cb4..4b6ec9b9 100644 --- a/scheduler.py +++ b/scheduler.py @@ -26,8 +26,13 @@ def schedule_handler(_event, _context) -> dict[str, int]: # Pause for 1+ second to avoid secondary rate limits. https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#pause-between-mutative-requests time.sleep(1) - # Start processing this loop. - token: str = get_installation_access_token(installation_id=installation_id) + # Get the installation access token for each installation ID. + token = get_installation_access_token(installation_id=installation_id) + if token is None: + logging.info("Token is None for installation_id: %s, so skipping", installation_id) + continue + + # Get all owners and repositories for each installation ID. owners_repos: list[dict[str, str]] = get_installed_owners_and_repos( installation_id=installation_id, token=token ) diff --git a/services/github/github_manager.py b/services/github/github_manager.py index c918652f..f49e6543 100644 --- a/services/github/github_manager.py +++ b/services/github/github_manager.py @@ -328,8 +328,8 @@ def initialize_repo(repo_path: str, remote_url: str) -> None: run_command(command="git push -u origin main", cwd=repo_path) -@handle_exceptions(raise_on_error=True) -def get_installation_access_token(installation_id: int) -> str: +@handle_exceptions(default_return_value=None, raise_on_error=False) +def get_installation_access_token(installation_id: int) -> str | None: """https://docs.github.com/en/rest/apps/apps?apiVersion=2022-11-28#create-an-installation-access-token-for-an-app""" jwt_token: str = create_jwt() response: requests.Response = requests.post( diff --git a/utils/handle_exceptions.py b/utils/handle_exceptions.py index 85d784ac..90aef59e 100644 --- a/utils/handle_exceptions.py +++ b/utils/handle_exceptions.py @@ -23,20 +23,39 @@ def wrapper(*args: Tuple[Any, ...], **kwargs: Any): try: return func(*args, **kwargs) except requests.exceptions.HTTPError as err: - if ( - err.response.status_code == 403 - and "X-RateLimit-Reset" in err.response.headers - ): + if err.response.status_code in {403, 429}: limit = int(err.response.headers["X-RateLimit-Limit"]) remaining = int(err.response.headers["X-RateLimit-Remaining"]) used = int(err.response.headers["X-RateLimit-Used"]) - reset_timestamp = int(err.response.headers["X-RateLimit-Reset"]) - current_timestamp = int(time.time()) - wait_time = reset_timestamp - current_timestamp - err_msg = f"{func.__name__} encountered a GitHubRateLimitError: {err}. Retrying after {wait_time} seconds. Limit: {limit}, Remaining: {remaining}, Used: {used}" + + # Check if the primary rate limit has been exceeded + if remaining == 0: + reset_ts = int(err.response.headers.get("X-RateLimit-Reset", 0)) + current_ts = int(time.time()) + wait_time = reset_ts - current_ts + err_msg = f"{func.__name__} encountered a GitHubPrimaryRateLimitError: {err}. Retrying after {wait_time} seconds. Limit: {limit}, Remaining: {remaining}, Used: {used}" + logging.error(msg=err_msg) + time.sleep(wait_time + 5) # 5 seconds is a buffer + return wrapper(*args, **kwargs) + + # Check if the secondary rate limit has been exceeded + if "exceeded a secondary rate limit" in err.response.text.lower(): + retry_after = int(err.response.headers.get("Retry-After", 60)) + err_msg = f"{func.__name__} encountered a GitHubSecondaryRateLimitError: {err}. Retrying after {retry_after} seconds. Limit: {limit}, Remaining: {remaining}, Used: {used}" + logging.error(msg=err_msg) + time.sleep(retry_after) + return wrapper(*args, **kwargs) + + # Otherwise, log the error and return the default return value + err_msg = f"{func.__name__} encountered an HTTPError: {err}. Limit: {limit}, Remaining: {remaining}, Used: {used}" logging.error(msg=err_msg) - time.sleep(wait_time + 5) # 5 seconds is a buffer - return wrapper(*args, **kwargs) + if raise_on_error: + raise + else: + err_msg = f"{func.__name__} encountered an HTTPError: {err}" + logging.error(msg=err_msg) + if raise_on_error: + raise except (AttributeError, KeyError, TypeError, Exception) as err: error_msg = f"{func.__name__} encountered an {type(err).__name__}: {err}\nArgs: {args}\nKwargs: {kwargs}" logging.error(msg=error_msg)