From 52d1c415c448d5ee105499c9eb5f9f3dcedf1e9d Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Thu, 28 Nov 2024 08:50:33 +0100 Subject: [PATCH] tools: Ignore gnome.org in urls-check https://gnome.org keeps failing with "403 Forbidden" when running in a GitHub action, presumably due to some IP range (or User-Agent) ban? It works fine locally. We know that https://www.gnome.org is fine, so generalize our existing ignore mechanism for example.com and add it to that list. Fixes #21340 --- tools/urls-check | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/urls-check b/tools/urls-check index effd7f4189a7..f17591b03491 100755 --- a/tools/urls-check +++ b/tools/urls-check @@ -36,6 +36,11 @@ TASK_NAME = "Validate all URLs" USER_AGENT = "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0" +IGNORE = [ + "https://www.gnome.org", # keeps causing 403 from GitHub; DNS or user-agent ban? + "*example.com*", # some tests use demo urls +] + KNOWN_REDIRECTS = [ # fnmatch-like "https://access.redhat.com/security/updates/classification/#", @@ -113,7 +118,9 @@ def check_urls(verbose): if not url: continue - if urllib.parse.urlparse(url).hostname.endswith(".example.com"): # Some tests use demo urls + if any(fnmatch.fnmatch(url, pattern) for pattern in IGNORE): + if verbose: + print(f"Ignoring: {url}") continue if verbose: