Fix get_remote_file_content encountered an UnicodeDecodeError: 'utf-8…

…' codec can't decode byte 0x89 in position 0: invalid start byte
gitautoai · Jul 5, 2024 · fb21910 · fb21910
1 parent 9a35b71
commit fb21910
Show file tree

Hide file tree

Showing 6 changed files with 86 additions and 34 deletions.
diff --git a/cloudformation.yml b/cloudformation.yml
@@ -20,7 +20,7 @@ Resources:
     Properties:
       Name: SchedulerEventRule
       Description: "Schedule Lambda function to run every weekday at 0 AM UTC"
-      ScheduleExpression: cron(14 6 ? * MON-FRI *)  # min hour day month day-of-week year
+      ScheduleExpression: cron(0 0 ? * MON-FRI *)  # min hour day month day-of-week year
       State: ENABLED
       Targets:
         - Arn: !Ref LambdaFunctionArn

diff --git a/services/github/github_manager.py b/services/github/github_manager.py
@@ -34,6 +34,7 @@
     GitHubLabeledPayload,
     IssueInfo,
 )
+from services.openai.vision import describe_image
 from services.supabase import SupabaseManager
 from utils.file_manager import apply_patch, extract_file_name, run_command
 from utils.handle_exceptions import handle_exceptions
@@ -389,7 +390,6 @@ def get_issue_comments(
     )
     response.raise_for_status()
     comments: list[Any] = response.json()
-    print(f"GITHUB_APP_IDS: {GITHUB_APP_IDS}")
     filtered_comments: list[Any] = [
         comment
         for comment in comments
@@ -515,7 +515,13 @@ def get_remote_file_content(
         url=url, headers=headers, timeout=TIMEOUT_IN_SECONDS
     )
     response.raise_for_status()
-    encoded_content: str = response.json()["content"]
+    encoded_content: str = response.json()["content"]  # Base64 encoded content
+
+    # If encoded_content is image, describe the image content in text by vision API
+    if file_path.endswith((".png", ".jpeg", ".jpg", ".webp", ".gif")):
+        return describe_image(base64_image=encoded_content)
+
+    # Otherwise, decode the content
     decoded_content: str = base64.b64decode(s=encoded_content).decode(encoding=UTF8)
     return decoded_content
 

diff --git a/services/openai/agent.py b/services/openai/agent.py
@@ -274,6 +274,7 @@ def wait_on_run(run: Run, thread: Thread, token: str, run_name: str) -> tuple[Ru
                     run=run, funcs=functions, token=token
                 )
 
+                # The combined tool outputs must be less than 512kb.
                 tool_outputs_json: list[ToolOutput] = [
                     {"tool_call_id": tool_call.id, "output": json.dumps(obj=result)}
                     for tool_call, result in tool_outputs

diff --git a/services/openai/chat.py b/services/openai/chat.py
@@ -6,41 +6,39 @@
 from config import OPENAI_MODEL_ID, OPENAI_TEMPERATURE
 from services.openai.init import create_openai_client
 from services.openai.instructions import SYSTEM_INSTRUCTION_FOR_WRITING_PR
+from utils.handle_exceptions import handle_exceptions
 
 
+@handle_exceptions(raise_on_error=True)
 def write_pr_body(input_message: str) -> str:
     """https://platform.openai.com/docs/api-reference/chat/create"""
-    try:
-        client: OpenAI = create_openai_client()
-        completion: ChatCompletion = client.chat.completions.create(
-            messages=[
-                {"role": "system", "content": SYSTEM_INSTRUCTION_FOR_WRITING_PR},
-                {"role": "user", "content": input_message},
-            ],
-            model=OPENAI_MODEL_ID,
-            n=1,
-            temperature=OPENAI_TEMPERATURE,
-        )
-        content: str | None = completion.choices[0].message.content
-        response: str = content if content else "No response from OpenAI"
+    client: OpenAI = create_openai_client()
+    completion: ChatCompletion = client.chat.completions.create(
+        messages=[
+            {"role": "system", "content": SYSTEM_INSTRUCTION_FOR_WRITING_PR},
+            {"role": "user", "content": input_message},
+        ],
+        model=OPENAI_MODEL_ID,
+        n=1,
+        temperature=OPENAI_TEMPERATURE,
+    )
+    content: str | None = completion.choices[0].message.content
+    response: str = content if content else "No response from OpenAI"
 
-        # Check for backticks
-        if response[:4] == "```\n":
-            response = response[4:]
-        if response[:3] == "```":
-            response = response[3:]
-        if response.endswith("```"):
-            response = response[:-3]
+    # Check for backticks
+    if response[:4] == "```\n":
+        response = response[4:]
+    if response[:3] == "```":
+        response = response[3:]
+    if response.endswith("```"):
+        response = response[:-3]
 
-        # Check for triple quotes
-        if response[:4] == '"""\n':
-            response = response[4:]
-        if response[:3] == '"""\n':
-            response = response[3:]
-        if response.endswith('"""'):
-            response = response[:-3]
+    # Check for triple quotes
+    if response[:4] == '"""\n':
+        response = response[4:]
+    if response[:3] == '"""\n':
+        response = response[3:]
+    if response.endswith('"""'):
+        response = response[:-3]
 
-        print(f"OpenAI response: {response}")
-        return response
-    except Exception as e:
-        raise ValueError(f"Error: {e}") from e
+    return response
diff --git a/services/openai/instructions.py b/services/openai/instructions.py
@@ -107,3 +107,5 @@
 Think step by step.
 """
 '''
+
+USER_INSTRUCTION = "Describe images found in my GitHub repositories. These images often include elements like text, shapes, arrows, red lines, and boxed areas, and may also contain screenshots of customer business services or SaaS interfaces. Extract and describe these elements, noting their positions and relationships, such as connections indicated by arrows or emphasis through red lines and boxes. Provide a comprehensive understanding of the visual and textual content."
diff --git a/services/openai/vision.py b/services/openai/vision.py
@@ -0,0 +1,45 @@
+# Third-party imports
+from openai import OpenAI
+from openai.types.chat import ChatCompletion
+
+# Local imports
+from config import OPENAI_MODEL_ID, OPENAI_TEMPERATURE
+from services.openai.init import create_openai_client
+from services.openai.instructions import USER_INSTRUCTION
+from utils.handle_exceptions import handle_exceptions
+
+
+@handle_exceptions(default_return_value="", raise_on_error=None)
+def describe_image(base64_image: str) -> str:
+    """
+    1. API doc: https://platform.openai.com/docs/api-reference/chat/create
+    2. 20MB per image is allowed: https://platform.openai.com/docs/guides/vision/is-there-a-limit-to-the-size-of-the-image-i-can-upload
+    3. PNG (.png), JPEG (.jpeg and .jpg), WEBP (.webp), and non-animated GIF (.gif) are only supported: https://platform.openai.com/docs/guides/vision/what-type-of-files-can-i-upload
+    """
+    client: OpenAI = create_openai_client()
+    completion: ChatCompletion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": USER_INSTRUCTION,
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                            "detail": "auto",
+                        },
+                    },
+                ],
+            },
+        ],
+        model=OPENAI_MODEL_ID,
+        n=1,
+        temperature=OPENAI_TEMPERATURE,
+    )
+    content: str | None = completion.choices[0].message.content.strip()
+    description: str = content if content else "No response from OpenAI"
+    return description
-Original file line number
+Diff line change
@@ Expand Up / @@ -107,3 +107,5 @@ @@
     Think step by step.
     """
     '''
+    USER_INSTRUCTION = "Describe images found in my GitHub repositories. These images often include elements like text, shapes, arrows, red lines, and boxed areas, and may also contain screenshots of customer business services or SaaS interfaces. Extract and describe these elements, noting their positions and relationships, such as connections indicated by arrows or emphasis through red lines and boxes. Provide a comprehensive understanding of the visual and textual content."