Skip to content

Commit

Permalink
Enable GitAuto to conduct Google Search to make sure its suggestion i…
Browse files Browse the repository at this point in the history
…s not deprecated and follows up-to-date practice
  • Loading branch information
hiroshinishio committed Dec 22, 2024
1 parent 88e6cb9 commit b9ec8e0
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 3 deletions.
1 change: 1 addition & 0 deletions constants/requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
annotated-types==0.7.0
anyio==4.4.0
astroid==3.2.4
beautifulsoup4==4.12.3
black==24.8.0
certifi==2024.7.4
cffi==1.17.0
Expand All @@ -21,6 +22,7 @@ exceptiongroup==1.2.2
fastapi==0.112.0
fastapi-cli==0.0.5
filelock==3.15.4
googlesearch-python==1.2.5
gotrue==2.6.1
h11==0.14.0
h2==4.1.0
Expand Down Expand Up @@ -71,6 +73,7 @@ sentry-sdk==2.12.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
soupsieve==2.6
starlette==0.37.2
storage3==0.7.7
StrEnum==0.4.15
Expand Down
20 changes: 20 additions & 0 deletions services/gitauto_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,26 @@ async def handle_gitauto(
update_comment(body=comment_body, base_args=base_args, p=p)
p = min(p + 5, 85)

# Search Google
(
messages,
previous_calls,
tool_name,
tool_args,
token_input,
token_output,
_is_searched,
) = chat_with_agent(
messages=messages,
base_args=base_args,
mode="search",
previous_calls=previous_calls,
)
if tool_name is not None and tool_args is not None:
comment_body = f"Calling `{tool_name}()` with `{tool_args}`..."
update_comment(body=comment_body, base_args=base_args, p=p)
p = min(p + 5, 85)

# Commit changes based on the exploration information
(
messages,
Expand Down
80 changes: 80 additions & 0 deletions services/google/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from bs4 import BeautifulSoup
from googlesearch import search
from requests import get
from config import TIMEOUT
from constants.requests import USER_AGENT
from services.github.github_types import BaseArgs
from utils.handle_exceptions import handle_exceptions

NUM_RESULTS_DEFAULT = 1
UNNECESSARY_TAGS = [
"ads",
"advertisement",
"aside",
"footer",
"head",
"header",
"iframe",
"link",
"meta",
"nav",
"noscript",
"path",
"script",
"style",
"svg",
]


@handle_exceptions(default_return_value=[], raise_on_error=False)
def search_urls(query: str, num_results: int = NUM_RESULTS_DEFAULT, lang: str = "en"):
"""https://pypi.org/project/googlesearch-python/"""
search_results: list[dict[str, str]] = []
results = search(
term=query, num_results=num_results, lang=lang, safe=None, advanced=True
)
for result in results:
title = result.title
description = result.description
url = result.url
search_results.append({"title": title, "description": description, "url": url})

return search_results


@handle_exceptions(default_return_value=None, raise_on_error=False)
def scrape_content_from_url(url: str):
headers = {"User-Agent": USER_AGENT}
response = get(url, headers=headers, timeout=TIMEOUT)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")

# Remove unnecessary elements
for element in soup(UNNECESSARY_TAGS):
element.decompose()

# Get title and content
title = soup.title.string if soup.title else ""
print(f"Googled url: {url}\nTitle: {title}")
print(f"Soup: {soup.prettify()}")

# Find main content area if possible
main_content = soup.find(["main", "article", 'div[role="main"]']) or soup
content = "\n".join(main_content.stripped_strings).strip()
print(f"Content: {content}")
return {"title": title.strip(), "content": content, "url": url}


@handle_exceptions(default_return_value=[], raise_on_error=False)
def google_search(
base_args: BaseArgs,
query: str,
num_results: int = NUM_RESULTS_DEFAULT,
lang: str = "en",
):
urls = search_urls(query=query, num_results=num_results, lang=lang)
contents = []
for url in urls:
contents.append(scrape_content_from_url(url["url"]))
return contents
11 changes: 8 additions & 3 deletions services/openai/commit_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
TOOLS_TO_COMMIT_CHANGES,
TOOLS_TO_EXPLORE_REPO,
TOOLS_TO_GET_FILE,
TOOLS_TO_SEARCH_GOOGLE,
TOOLS_TO_UPDATE_COMMENT,
tools_to_call,
)
Expand All @@ -27,6 +28,7 @@
SYSTEM_INSTRUCTION_TO_COMMIT_CHANGES,
)
from services.openai.instructions.explore_repo import SYSTEM_INSTRUCTION_TO_EXPLORE_REPO
from services.openai.instructions.search_google import SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE
from services.openai.instructions.update_comment import (
SYSTEM_INSTRUCTION_TO_UPDATE_COMMENT,
)
Expand All @@ -38,7 +40,7 @@
def chat_with_agent(
messages: Iterable[ChatCompletionMessageParam],
base_args: BaseArgs,
mode: Literal["comment", "commit", "explore", "get"],
mode: Literal["comment", "commit", "explore", "get", "search"],
previous_calls: List[dict] | None = None,
):
"""https://platform.openai.com/docs/api-reference/chat/create"""
Expand All @@ -58,6 +60,9 @@ def chat_with_agent(
elif mode == "get":
content = SYSTEM_INSTRUCTION_TO_EXPLORE_REPO
tools = TOOLS_TO_GET_FILE
elif mode == "search":
content = SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE
tools = TOOLS_TO_SEARCH_GOOGLE
system_message: ChatCompletionMessageParam = {"role": "system", "content": content}
all_messages = [system_message] + list(messages)

Expand Down Expand Up @@ -90,8 +95,8 @@ def chat_with_agent(
tool_call_id: str = tool_calls[0].id
tool_name: str = tool_calls[0].function.name
tool_args: dict = json.loads(tool_calls[0].function.arguments)
print(colorize(f"tool_name: {tool_name}", "green"))
print(colorize(f"tool_args: {tool_args}\n", "green"))
# print(colorize(f"tool_name: {tool_name}", "green"))
# print(colorize(f"tool_args: {tool_args}\n", "green"))

# Check if the same function with the same args has been called before
current_call = {"function": tool_name, "args": tool_args}
Expand Down
8 changes: 8 additions & 0 deletions services/openai/functions/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
search_remote_file_contents,
update_comment,
)
from services.google.search import google_search
from services.openai.functions.search_google import SEARCH_GOOGLE
from services.openai.functions.update_comment import UPDATE_GITHUB_COMMENT
from services.openai.instructions.diff import DIFF_DESCRIPTION

Expand Down Expand Up @@ -121,14 +123,20 @@
{"type": "function", "function": GET_REMOTE_FILE_CONTENT},
{"type": "function", "function": SEARCH_REMOTE_FILE_CONTENT},
]
TOOLS_TO_SEARCH_GOOGLE: Iterable[ChatCompletionToolParam] = [
{"type": "function", "function": SEARCH_GOOGLE},
]
TOOLS_TO_COMMIT_CHANGES: Iterable[ChatCompletionToolParam] = [
{"type": "function", "function": COMMIT_CHANGES_TO_REMOTE_BRANCH},
]

# Define tools to call
tools_to_call: dict[str, Any] = {
# GitHub
"commit_changes_to_remote_branch": commit_changes_to_remote_branch,
"get_remote_file_content": get_remote_file_content,
"search_remote_file_contents": search_remote_file_contents,
"update_github_comment": update_comment,
# Google
"search_google": google_search,
}
21 changes: 21 additions & 0 deletions services/openai/functions/search_google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Third-party imports
from openai.types import shared_params

# OpenAI: We recommend including instructions regarding when to call a function in the system prompt, while using the function definition to provide instructions on how to call the function and how to generate the parameters.
# https://platform.openai.com/docs/guides/function-calling/should-i-include-function-call-instructions-in-the-tool-specification-or-in-the-system-prompt

QUERY: dict[str, str] = {
"type": "string",
"description": "The query to search for.",
}

SEARCH_GOOGLE: shared_params.FunctionDefinition = {
"name": "search_google",
"description": "Search Google for a query.",
"parameters": {
"type": "object",
"properties": {"query": QUERY},
"required": ["query"],
"additionalProperties": False,
},
}
15 changes: 15 additions & 0 deletions services/openai/instructions/search_google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE = """
When suggesting libraries, GitHub Actions, or any external tools/services, search Google to verify just in case:
1. The latest available versions
- Real example: While your knowledge shows codecov/codecov-action@v3, Google search reveals codecov/codecov-action@v5 is the latest version
- Your knowledge cutoff date means you might have outdated version information
2. Current status of the tool
- Check if it's still actively maintained
- Verify it hasn't been deprecated or replaced
3. Best practices and alternatives
- Search for current recommended approaches
- Look for any newer alternatives that might be more suitable
"""

0 comments on commit b9ec8e0

Please sign in to comment.