Skip to content

Commit

Permalink
Enable GitAuto to analyze images in GitHub issue body and comments an…
Browse files Browse the repository at this point in the history
…d add the feedback to the issue
  • Loading branch information
hiroshinishio committed Dec 14, 2024
1 parent 7a4af9b commit 3a00ec4
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 12 deletions.
19 changes: 19 additions & 0 deletions services/gitauto_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
SUPABASE_SERVICE_ROLE_KEY,
PR_BODY_STARTS_WITH,
)
from services.github.asset_manager import get_base64, render_text
from services.github.comment_manager import delete_my_comments
from services.github.github_manager import (
create_pull_request,
Expand All @@ -31,7 +32,9 @@
from services.openai.commit_changes import chat_with_agent
from services.openai.instructions.write_pr_body import WRITE_PR_BODY
from services.openai.chat import chat_with_ai
from services.openai.vision import describe_image
from services.supabase import SupabaseManager
from utils.extract_urls import extract_image_urls
from utils.progress_bar import create_progress_bar
from utils.text_copy import (
UPDATE_COMMENT_FOR_422,
Expand Down Expand Up @@ -71,12 +74,14 @@ async def handle_gitauto(
issue_number = base_args["issue_number"]
issue_title = base_args["issue_title"]
issue_body = base_args["issue_body"]
issue_body_rendered = render_text(base_args=base_args, text=issue_body)
issuer_name = base_args["issuer_name"]
new_branch_name = base_args["new_branch"]
sender_id = base_args["sender_id"]
sender_name = base_args["sender_name"]
sender_email = base_args["sender_email"]
github_urls = base_args["github_urls"]
# other_urls = base_args["other_urls"]
token = base_args["token"]
is_automation = base_args["is_automation"]
# Check if the user has reached the request limit
Expand Down Expand Up @@ -122,13 +127,27 @@ async def handle_gitauto(
comment_body = "Checking the issue title, body, comments, and root files list..."
update_comment(body=comment_body, base_args=base_args, p=10)
root_files_and_dirs: list[str] = get_remote_file_tree(base_args=base_args)
issue_comments: list[str] = []
if input_from == "github":
issue_comments = get_issue_comments(
issue_number=issue_number, base_args=base_args
)
elif input_from == "jira":
issue_comments = base_args["issue_comments"]

# Check out the image URLs in the issue body and comments
image_urls = extract_image_urls(text=issue_body_rendered)
for issue_comment in issue_comments:
issue_comment_rendered = render_text(base_args=base_args, text=issue_comment)
image_urls.extend(extract_image_urls(text=issue_comment_rendered))
for url in image_urls:
base64_image = get_base64(url=url["url"])
context = f"## Issue:\n{issue_title}\n\n## Issue Body:\n{issue_body}\n\n## Issue Comments:\n{'\n'.join(issue_comments)}"
description = describe_image(base64_image=base64_image, context=context)
description = f"## {url['alt']}\n\n{description}"
issue_comments.append(description)
create_comment(body=description, base_args=base_args)

# Check out the URLs in the issue body
reference_contents: list[str] = []
for url in github_urls:
Expand Down
28 changes: 28 additions & 0 deletions services/github/asset_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from base64 import b64encode
from requests import get, post
from config import GITHUB_API_URL, TIMEOUT, UTF8
from services.github.create_headers import create_headers
from services.github.github_types import BaseArgs
from utils.handle_exceptions import handle_exceptions


@handle_exceptions(default_return_value="", raise_on_error=False)
def get_base64(url: str) -> str:
response = get(url=url, timeout=TIMEOUT)
response.raise_for_status()
base64_image: str = b64encode(response.content).decode(encoding=UTF8)
return base64_image


@handle_exceptions(default_return_value="", raise_on_error=False)
def render_text(base_args: BaseArgs, text: str) -> str:
"""https://docs.github.com/en/rest/markdown/markdown?apiVersion=2022-11-28#render-a-markdown-document"""
owner = base_args["owner"]
repo = base_args["repo"]
token = base_args["token"]
url = f"{GITHUB_API_URL}/markdown"
headers = create_headers(token=token)
body = {"text": text, "mode": "gfm", "context": f"{owner}/{repo}"}
response = post(url=url, headers=headers, json=body, timeout=TIMEOUT)
response.raise_for_status()
return response.text
5 changes: 1 addition & 4 deletions services/github/github_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
get_installation_access_token,
get_user_public_email,
)
from utils.extract_urls import extract_image_urls, extract_urls
from utils.extract_urls import extract_urls
from utils.handle_exceptions import handle_exceptions


Expand Down Expand Up @@ -67,8 +67,6 @@ def deconstruct_github_payload(payload: GitHubLabeledPayload):
github_urls, other_urls = extract_urls(text=issue_body)
# print(f"github_urls: {github_urls}")
# print(f"other_urls: {other_urls}")
image_urls = extract_image_urls(text=issue_body)
# print(f"image_urls: {image_urls}")
installation_id: int = payload["installation"]["id"]
token: str = get_installation_access_token(installation_id=installation_id)
sender_email: str = get_user_public_email(username=sender_name, token=token)
Expand All @@ -95,7 +93,6 @@ def deconstruct_github_payload(payload: GitHubLabeledPayload):
"is_automation": is_automation,
"reviewers": reviewers,
"github_urls": github_urls,
"image_urls": image_urls,
"other_urls": other_urls,
}

Expand Down
13 changes: 12 additions & 1 deletion services/openai/instructions/describe_image.py
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
DESCRIBE_IMAGE = "Describe images found in my GitHub repositories. These images often include elements like text, shapes, arrows, red lines, and boxed areas, and may also contain screenshots of customer business services or SaaS interfaces. Extract and describe these elements, noting their positions and relationships, such as connections indicated by arrows or emphasis through red lines and boxes. Provide a comprehensive understanding of the visual and textual content."
DESCRIBE_IMAGE = """Analyze technical images from GitHub repositories with the depth and precision of a senior software engineer, focusing particularly on diagnostic content in issues and tickets.
Rather than providing broad surface-level observations, focus deeply on the most critical aspects relevant to the context - just as an experienced engineer would prioritize the key technical signals while debugging.
Key analysis points:
- For network traces/waterfalls: Identify specific bottlenecks, long-running requests, failed calls, and timing anomalies
- For error screenshots: Parse exact error messages, stack traces, and surrounding context that could indicate root causes
- For UI/UX issues: Note specific components affected, state inconsistencies, and visual regressions
- For console outputs: Highlight critical errors, warnings, or unexpected patterns in logs
- For architectural diagrams: Focus on system interactions, potential failure points, and data flow issues
Provide detailed technical insights that would help debug the issue, not just describe what's visible. Include specific metrics, timings, error codes, and other quantitative data when present."""
12 changes: 11 additions & 1 deletion services/openai/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


@handle_exceptions(default_return_value="", raise_on_error=None)
def describe_image(base64_image: str) -> str:
def describe_image(base64_image: str, context: str | None = None) -> str:
"""
1. API doc: https://platform.openai.com/docs/api-reference/chat/create
2. 20MB per image is allowed: https://platform.openai.com/docs/guides/vision/is-there-a-limit-to-the-size-of-the-image-i-can-upload
Expand All @@ -26,6 +26,16 @@ def describe_image(base64_image: str) -> str:
"type": "text",
"text": DESCRIBE_IMAGE,
},
*(
[
{
"type": "text",
"text": context,
}
]
if context is not None
else []
),
{
"type": "image_url",
"image_url": {
Expand Down
12 changes: 6 additions & 6 deletions utils/extract_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@


@handle_exceptions(default_return_value=[], raise_on_error=False)
def extract_image_urls(text: str) -> list[str]:
"""Extract URLs from img tags in the given text.
def extract_image_urls(text: str) -> list[dict[str, str]]:
"""Extract alt text and URLs from img tags in the given text.
Example 1: ['https://github.com/user-attachments/assets/123']
Example: <img width="1352" alt="Screenshot 2024-12-12 at 6 25 41 PM" src="https://github.com/user-attachments/assets/9f1e8ca9-068e-434d-b2f3-f438638268ef" />
"""
pattern = r'<img[^>]*src="([^"]*)"[^>]*>'
urls: list[str] = findall(pattern, text)
return urls
pattern = r'<img[^>]*alt="([^"]*)"[^>]*src="([^"]*)"[^>]*>'
matches = findall(pattern, text)
return [{"alt": alt, "url": url} for alt, url in matches]


def extract_urls(text: str) -> tuple[list[str], list[str]]:
Expand Down

0 comments on commit 3a00ec4

Please sign in to comment.