From b9ec8e0623b8e54f5ca9520f7828386ac907c3c7 Mon Sep 17 00:00:00 2001 From: Hiroshi Nishio Date: Sun, 22 Dec 2024 19:46:00 +0900 Subject: [PATCH] Enable GitAuto to conduct Google Search to make sure its suggestion is not deprecated and follows up-to-date practice --- constants/requests.py | 1 + requirements.txt | 3 + services/gitauto_handler.py | 20 +++++ services/google/search.py | 80 +++++++++++++++++++ services/openai/commit_changes.py | 11 ++- services/openai/functions/functions.py | 8 ++ services/openai/functions/search_google.py | 21 +++++ services/openai/instructions/search_google.py | 15 ++++ 8 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 constants/requests.py create mode 100644 services/google/search.py create mode 100644 services/openai/functions/search_google.py create mode 100644 services/openai/instructions/search_google.py diff --git a/constants/requests.py b/constants/requests.py new file mode 100644 index 00000000..b3e67f08 --- /dev/null +++ b/constants/requests.py @@ -0,0 +1 @@ +USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" diff --git a/requirements.txt b/requirements.txt index 8c6a8bd2..93b93687 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ annotated-types==0.7.0 anyio==4.4.0 astroid==3.2.4 +beautifulsoup4==4.12.3 black==24.8.0 certifi==2024.7.4 cffi==1.17.0 @@ -21,6 +22,7 @@ exceptiongroup==1.2.2 fastapi==0.112.0 fastapi-cli==0.0.5 filelock==3.15.4 +googlesearch-python==1.2.5 gotrue==2.6.1 h11==0.14.0 h2==4.1.0 @@ -71,6 +73,7 @@ sentry-sdk==2.12.0 shellingham==1.5.4 six==1.16.0 sniffio==1.3.1 +soupsieve==2.6 starlette==0.37.2 storage3==0.7.7 StrEnum==0.4.15 diff --git a/services/gitauto_handler.py b/services/gitauto_handler.py index 7e3f24d3..c89ca851 100644 --- a/services/gitauto_handler.py +++ b/services/gitauto_handler.py @@ -245,6 +245,26 @@ async def handle_gitauto( update_comment(body=comment_body, base_args=base_args, p=p) p = min(p + 5, 85) + # Search Google + ( + messages, + previous_calls, + tool_name, + tool_args, + token_input, + token_output, + _is_searched, + ) = chat_with_agent( + messages=messages, + base_args=base_args, + mode="search", + previous_calls=previous_calls, + ) + if tool_name is not None and tool_args is not None: + comment_body = f"Calling `{tool_name}()` with `{tool_args}`..." + update_comment(body=comment_body, base_args=base_args, p=p) + p = min(p + 5, 85) + # Commit changes based on the exploration information ( messages, diff --git a/services/google/search.py b/services/google/search.py new file mode 100644 index 00000000..c3ed2738 --- /dev/null +++ b/services/google/search.py @@ -0,0 +1,80 @@ +from bs4 import BeautifulSoup +from googlesearch import search +from requests import get +from config import TIMEOUT +from constants.requests import USER_AGENT +from services.github.github_types import BaseArgs +from utils.handle_exceptions import handle_exceptions + +NUM_RESULTS_DEFAULT = 1 +UNNECESSARY_TAGS = [ + "ads", + "advertisement", + "aside", + "footer", + "head", + "header", + "iframe", + "link", + "meta", + "nav", + "noscript", + "path", + "script", + "style", + "svg", +] + + +@handle_exceptions(default_return_value=[], raise_on_error=False) +def search_urls(query: str, num_results: int = NUM_RESULTS_DEFAULT, lang: str = "en"): + """https://pypi.org/project/googlesearch-python/""" + search_results: list[dict[str, str]] = [] + results = search( + term=query, num_results=num_results, lang=lang, safe=None, advanced=True + ) + for result in results: + title = result.title + description = result.description + url = result.url + search_results.append({"title": title, "description": description, "url": url}) + + return search_results + + +@handle_exceptions(default_return_value=None, raise_on_error=False) +def scrape_content_from_url(url: str): + headers = {"User-Agent": USER_AGENT} + response = get(url, headers=headers, timeout=TIMEOUT) + response.raise_for_status() + + soup = BeautifulSoup(response.text, "html.parser") + + # Remove unnecessary elements + for element in soup(UNNECESSARY_TAGS): + element.decompose() + + # Get title and content + title = soup.title.string if soup.title else "" + print(f"Googled url: {url}\nTitle: {title}") + print(f"Soup: {soup.prettify()}") + + # Find main content area if possible + main_content = soup.find(["main", "article", 'div[role="main"]']) or soup + content = "\n".join(main_content.stripped_strings).strip() + print(f"Content: {content}") + return {"title": title.strip(), "content": content, "url": url} + + +@handle_exceptions(default_return_value=[], raise_on_error=False) +def google_search( + base_args: BaseArgs, + query: str, + num_results: int = NUM_RESULTS_DEFAULT, + lang: str = "en", +): + urls = search_urls(query=query, num_results=num_results, lang=lang) + contents = [] + for url in urls: + contents.append(scrape_content_from_url(url["url"])) + return contents diff --git a/services/openai/commit_changes.py b/services/openai/commit_changes.py index 38d39010..13ae5434 100644 --- a/services/openai/commit_changes.py +++ b/services/openai/commit_changes.py @@ -19,6 +19,7 @@ TOOLS_TO_COMMIT_CHANGES, TOOLS_TO_EXPLORE_REPO, TOOLS_TO_GET_FILE, + TOOLS_TO_SEARCH_GOOGLE, TOOLS_TO_UPDATE_COMMENT, tools_to_call, ) @@ -27,6 +28,7 @@ SYSTEM_INSTRUCTION_TO_COMMIT_CHANGES, ) from services.openai.instructions.explore_repo import SYSTEM_INSTRUCTION_TO_EXPLORE_REPO +from services.openai.instructions.search_google import SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE from services.openai.instructions.update_comment import ( SYSTEM_INSTRUCTION_TO_UPDATE_COMMENT, ) @@ -38,7 +40,7 @@ def chat_with_agent( messages: Iterable[ChatCompletionMessageParam], base_args: BaseArgs, - mode: Literal["comment", "commit", "explore", "get"], + mode: Literal["comment", "commit", "explore", "get", "search"], previous_calls: List[dict] | None = None, ): """https://platform.openai.com/docs/api-reference/chat/create""" @@ -58,6 +60,9 @@ def chat_with_agent( elif mode == "get": content = SYSTEM_INSTRUCTION_TO_EXPLORE_REPO tools = TOOLS_TO_GET_FILE + elif mode == "search": + content = SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE + tools = TOOLS_TO_SEARCH_GOOGLE system_message: ChatCompletionMessageParam = {"role": "system", "content": content} all_messages = [system_message] + list(messages) @@ -90,8 +95,8 @@ def chat_with_agent( tool_call_id: str = tool_calls[0].id tool_name: str = tool_calls[0].function.name tool_args: dict = json.loads(tool_calls[0].function.arguments) - print(colorize(f"tool_name: {tool_name}", "green")) - print(colorize(f"tool_args: {tool_args}\n", "green")) + # print(colorize(f"tool_name: {tool_name}", "green")) + # print(colorize(f"tool_args: {tool_args}\n", "green")) # Check if the same function with the same args has been called before current_call = {"function": tool_name, "args": tool_args} diff --git a/services/openai/functions/functions.py b/services/openai/functions/functions.py index 7c427678..4a9c327d 100644 --- a/services/openai/functions/functions.py +++ b/services/openai/functions/functions.py @@ -12,6 +12,8 @@ search_remote_file_contents, update_comment, ) +from services.google.search import google_search +from services.openai.functions.search_google import SEARCH_GOOGLE from services.openai.functions.update_comment import UPDATE_GITHUB_COMMENT from services.openai.instructions.diff import DIFF_DESCRIPTION @@ -121,14 +123,20 @@ {"type": "function", "function": GET_REMOTE_FILE_CONTENT}, {"type": "function", "function": SEARCH_REMOTE_FILE_CONTENT}, ] +TOOLS_TO_SEARCH_GOOGLE: Iterable[ChatCompletionToolParam] = [ + {"type": "function", "function": SEARCH_GOOGLE}, +] TOOLS_TO_COMMIT_CHANGES: Iterable[ChatCompletionToolParam] = [ {"type": "function", "function": COMMIT_CHANGES_TO_REMOTE_BRANCH}, ] # Define tools to call tools_to_call: dict[str, Any] = { + # GitHub "commit_changes_to_remote_branch": commit_changes_to_remote_branch, "get_remote_file_content": get_remote_file_content, "search_remote_file_contents": search_remote_file_contents, "update_github_comment": update_comment, + # Google + "search_google": google_search, } diff --git a/services/openai/functions/search_google.py b/services/openai/functions/search_google.py new file mode 100644 index 00000000..e712bdf8 --- /dev/null +++ b/services/openai/functions/search_google.py @@ -0,0 +1,21 @@ +# Third-party imports +from openai.types import shared_params + +# OpenAI: We recommend including instructions regarding when to call a function in the system prompt, while using the function definition to provide instructions on how to call the function and how to generate the parameters. +# https://platform.openai.com/docs/guides/function-calling/should-i-include-function-call-instructions-in-the-tool-specification-or-in-the-system-prompt + +QUERY: dict[str, str] = { + "type": "string", + "description": "The query to search for.", +} + +SEARCH_GOOGLE: shared_params.FunctionDefinition = { + "name": "search_google", + "description": "Search Google for a query.", + "parameters": { + "type": "object", + "properties": {"query": QUERY}, + "required": ["query"], + "additionalProperties": False, + }, +} diff --git a/services/openai/instructions/search_google.py b/services/openai/instructions/search_google.py new file mode 100644 index 00000000..46598a88 --- /dev/null +++ b/services/openai/instructions/search_google.py @@ -0,0 +1,15 @@ +SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE = """ +When suggesting libraries, GitHub Actions, or any external tools/services, search Google to verify just in case: + +1. The latest available versions + - Real example: While your knowledge shows codecov/codecov-action@v3, Google search reveals codecov/codecov-action@v5 is the latest version + - Your knowledge cutoff date means you might have outdated version information + +2. Current status of the tool + - Check if it's still actively maintained + - Verify it hasn't been deprecated or replaced + +3. Best practices and alternatives + - Search for current recommended approaches + - Look for any newer alternatives that might be more suitable +"""