-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable GitAuto to conduct Google Search to make sure its suggestion i…
…s not deprecated and follows up-to-date practice
- Loading branch information
1 parent
88e6cb9
commit b9ec8e0
Showing
8 changed files
with
156 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
from bs4 import BeautifulSoup | ||
from googlesearch import search | ||
from requests import get | ||
from config import TIMEOUT | ||
from constants.requests import USER_AGENT | ||
from services.github.github_types import BaseArgs | ||
from utils.handle_exceptions import handle_exceptions | ||
|
||
NUM_RESULTS_DEFAULT = 1 | ||
UNNECESSARY_TAGS = [ | ||
"ads", | ||
"advertisement", | ||
"aside", | ||
"footer", | ||
"head", | ||
"header", | ||
"iframe", | ||
"link", | ||
"meta", | ||
"nav", | ||
"noscript", | ||
"path", | ||
"script", | ||
"style", | ||
"svg", | ||
] | ||
|
||
|
||
@handle_exceptions(default_return_value=[], raise_on_error=False) | ||
def search_urls(query: str, num_results: int = NUM_RESULTS_DEFAULT, lang: str = "en"): | ||
"""https://pypi.org/project/googlesearch-python/""" | ||
search_results: list[dict[str, str]] = [] | ||
results = search( | ||
term=query, num_results=num_results, lang=lang, safe=None, advanced=True | ||
) | ||
for result in results: | ||
title = result.title | ||
description = result.description | ||
url = result.url | ||
search_results.append({"title": title, "description": description, "url": url}) | ||
|
||
return search_results | ||
|
||
|
||
@handle_exceptions(default_return_value=None, raise_on_error=False) | ||
def scrape_content_from_url(url: str): | ||
headers = {"User-Agent": USER_AGENT} | ||
response = get(url, headers=headers, timeout=TIMEOUT) | ||
response.raise_for_status() | ||
|
||
soup = BeautifulSoup(response.text, "html.parser") | ||
|
||
# Remove unnecessary elements | ||
for element in soup(UNNECESSARY_TAGS): | ||
element.decompose() | ||
|
||
# Get title and content | ||
title = soup.title.string if soup.title else "" | ||
print(f"Googled url: {url}\nTitle: {title}") | ||
print(f"Soup: {soup.prettify()}") | ||
|
||
# Find main content area if possible | ||
main_content = soup.find(["main", "article", 'div[role="main"]']) or soup | ||
content = "\n".join(main_content.stripped_strings).strip() | ||
print(f"Content: {content}") | ||
return {"title": title.strip(), "content": content, "url": url} | ||
|
||
|
||
@handle_exceptions(default_return_value=[], raise_on_error=False) | ||
def google_search( | ||
base_args: BaseArgs, | ||
query: str, | ||
num_results: int = NUM_RESULTS_DEFAULT, | ||
lang: str = "en", | ||
): | ||
urls = search_urls(query=query, num_results=num_results, lang=lang) | ||
contents = [] | ||
for url in urls: | ||
contents.append(scrape_content_from_url(url["url"])) | ||
return contents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Third-party imports | ||
from openai.types import shared_params | ||
|
||
# OpenAI: We recommend including instructions regarding when to call a function in the system prompt, while using the function definition to provide instructions on how to call the function and how to generate the parameters. | ||
# https://platform.openai.com/docs/guides/function-calling/should-i-include-function-call-instructions-in-the-tool-specification-or-in-the-system-prompt | ||
|
||
QUERY: dict[str, str] = { | ||
"type": "string", | ||
"description": "The query to search for.", | ||
} | ||
|
||
SEARCH_GOOGLE: shared_params.FunctionDefinition = { | ||
"name": "search_google", | ||
"description": "Search Google for a query.", | ||
"parameters": { | ||
"type": "object", | ||
"properties": {"query": QUERY}, | ||
"required": ["query"], | ||
"additionalProperties": False, | ||
}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
SYSTEM_INSTRUCTION_TO_SEARCH_GOOGLE = """ | ||
When suggesting libraries, GitHub Actions, or any external tools/services, search Google to verify just in case: | ||
1. The latest available versions | ||
- Real example: While your knowledge shows codecov/codecov-action@v3, Google search reveals codecov/codecov-action@v5 is the latest version | ||
- Your knowledge cutoff date means you might have outdated version information | ||
2. Current status of the tool | ||
- Check if it's still actively maintained | ||
- Verify it hasn't been deprecated or replaced | ||
3. Best practices and alternatives | ||
- Search for current recommended approaches | ||
- Look for any newer alternatives that might be more suitable | ||
""" |