diff --git a/utils/extract_urls.py b/utils/extract_urls.py
index 8584e05f..9cc010be 100644
--- a/utils/extract_urls.py
+++ b/utils/extract_urls.py
@@ -4,13 +4,17 @@
@handle_exceptions(default_return_value=[], raise_on_error=False)
def extract_image_urls(text: str) -> list[dict[str, str]]:
- """Extract alt text and URLs from img tags in the given text.
+ """Extract alt text and URLs from img tags in the given text. Excludes SVG images.
Example:
"""
pattern = r']*alt="([^"]*)"[^>]*src="([^"]*)"[^>]*>'
matches = findall(pattern, text)
- return [{"alt": alt, "url": url} for alt, url in matches]
+ return [
+ {"alt": alt, "url": url}
+ for alt, url in matches
+ if not url.lower().endswith(".svg")
+ ]
def extract_urls(text: str) -> tuple[list[str], list[str]]: