From e199052994733ba68f1056649399095714822a00 Mon Sep 17 00:00:00 2001 From: Agastya Kumar Yadav Date: Fri, 17 May 2024 10:13:13 +0000 Subject: [PATCH 1/4] scraped atcoder --- src/scrape_up/atcoder/__init__.py | 3 +++ src/scrape_up/atcoder/atcoder.py | 40 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 src/scrape_up/atcoder/__init__.py create mode 100644 src/scrape_up/atcoder/atcoder.py diff --git a/src/scrape_up/atcoder/__init__.py b/src/scrape_up/atcoder/__init__.py new file mode 100644 index 00000000..961b109b --- /dev/null +++ b/src/scrape_up/atcoder/__init__.py @@ -0,0 +1,3 @@ +from .atcoder import Atcoder + +__all__ = ["Atcoder"] diff --git a/src/scrape_up/atcoder/atcoder.py b/src/scrape_up/atcoder/atcoder.py new file mode 100644 index 00000000..3d6e229d --- /dev/null +++ b/src/scrape_up/atcoder/atcoder.py @@ -0,0 +1,40 @@ +import json +import requests +from bs4 import BeautifulSoup + +class Atcoder: + ''' + ''' + def __init__(self,user): + self.user=user + + def get_profile(self): + try: + url = "https://atcoder.jp/users/chokudai" + headers = {"User-Agent": "scrapeup"} + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.text, "html.parser") + table=soup.find_all('table',class_="dl-table") + user_details={} + + row=table[0].find_all('tr') + for r in row : + # print(r) + user_details[r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() + + row=table[1].find_all('tr') + for r in row : + # print(r) + user_details["Algorithm_"+r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() + + url="https://atcoder.jp/users/chokudai?contestType=heuristic" + response = requests.get(url, headers=headers) + soup = BeautifulSoup(response.text, "html.parser") + table=soup.find_all('table',class_="dl-table"); + row=table[1].find_all('tr') + for r in row : + # print(r) + user_details["Heuristic_"+r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() + return json.dumps(user_data) + except: + return None \ No newline at end of file From 94b84c9ef12473c99f97b973a73684ec23a6183d Mon Sep 17 00:00:00 2001 From: Agastya Kumar Yadav Date: Fri, 17 May 2024 10:31:26 +0000 Subject: [PATCH 2/4] updated atcoder.py --- src/scrape_up/atcoder/atcoder.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/scrape_up/atcoder/atcoder.py b/src/scrape_up/atcoder/atcoder.py index 3d6e229d..0247db2d 100644 --- a/src/scrape_up/atcoder/atcoder.py +++ b/src/scrape_up/atcoder/atcoder.py @@ -4,13 +4,20 @@ class Atcoder: ''' + atc=Atcoder(user="chokudai") + stc.get_profile() + + | Methods | Details | + | ----------------- | ---------------------------------------------------------------------------------- | + | `.get_profile()` | Returns the user data in json format. | + ''' def __init__(self,user): self.user=user def get_profile(self): try: - url = "https://atcoder.jp/users/chokudai" + url = f"https://atcoder.jp/users/{self.user}" headers = {"User-Agent": "scrapeup"} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") @@ -19,22 +26,22 @@ def get_profile(self): row=table[0].find_all('tr') for r in row : - # print(r) + user_details[r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() row=table[1].find_all('tr') for r in row : - # print(r) + user_details["Algorithm_"+r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() - url="https://atcoder.jp/users/chokudai?contestType=heuristic" + url=f"https://atcoder.jp/users/{self.user}?contestType=heuristic" response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") - table=soup.find_all('table',class_="dl-table"); + table=soup.find_all('table',class_="dl-table") row=table[1].find_all('tr') for r in row : - # print(r) + user_details["Heuristic_"+r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() - return json.dumps(user_data) + return json.dumps(user_details) except: return None \ No newline at end of file From e69bc448ff5f2eae5009d92f6f24a41d4116631f Mon Sep 17 00:00:00 2001 From: Agastya Kumar Yadav Date: Fri, 17 May 2024 10:33:37 +0000 Subject: [PATCH 3/4] added atcoder in dev-doumentation --- dev-documentation.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dev-documentation.md b/dev-documentation.md index b353171c..08dd5aee 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1708,3 +1708,16 @@ The `Jobs` class provides methods for configuring scraping parameters and fetchi | --------------- | ---------------------------------------------------------------------------------------- | | `.filter_job()` | Apply filters such as job title, country, city, and range of years of experience. | | `.fetch_jobs()` | Fetch job listings from the website based on the applied filters, across multiple pages. | + +## Atcode + +First create an object of class `Geeksforgeeks`. +```python +atcoder = Atcoder(user="username") +``` + +| Methods | Details | +| ------------------------------- | ---------------------------------------------- | +| `.get_profile()` | Returns the user data in json format. | + +--- From daa07a512bed1b95272c8d36d7b81b21098ec224 Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Fri, 17 May 2024 20:25:06 +0530 Subject: [PATCH 4/4] Check --- dev-documentation.md | 51 ++++++------- src/scrape_up/atcoder/atcoder.py | 84 +++++++++++++++------- src/scrape_up/espncricinfo/espncricinfo.py | 2 +- src/scrape_up/pinterest/pinterest.py | 1 - src/test/espncricinfo_test.py | 1 - src/test/pinterest_test.py | 4 +- 6 files changed, 88 insertions(+), 55 deletions(-) diff --git a/dev-documentation.md b/dev-documentation.md index 08dd5aee..42f1c38d 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -46,8 +46,8 @@ user = github.Users(username="nikhil25803") | `.star_count()` | Returns the number of stars of a user. | | `.get_yearly_contributions()` | Returns the number of contributions made in 365 days frame. | | `.get_repositories()` | Returns the list of repositories of a user. | -| `.get_starred_repos()` | Returns the list of starred repositories of a user. | -| `.pul_requests()` | Returns the number of pull requests opened in a repository. | +| `.get_starred_repos()` | Returns the list of starred repositories of a user. | +| `.pul_requests()` | Returns the number of pull requests opened in a repository. | | `.get_followers()` | Returns the list of followers of a user. | | `.get_following_users()` | Returns the list of users followed by a user. | | `.get_achievements()` | Returns the list of achievements of a user. | @@ -712,8 +712,8 @@ Create an instance of `Video` class. video = Video(video_url="video_url") ``` -| Methods | Details | -| --------------- | ------------------------ | +| Methods | Details | +| --------------- | ------------------------- | | `.getDetails()` | Returns the video details | ## Scrape Channel Details @@ -1173,10 +1173,10 @@ user = Codechef(id="username") ``` -| Methods | Details | -| --------------- | ------------------------------------------------------------------------- | -| `get_profile()` | Returns name, username, profile_image_link, rating, details etc. | -| `get_contests()`| Returns future_contests , past_contests , skill_tests etc in json format. | +| Methods | Details | +| ---------------- | ------------------------------------------------------------------------- | +| `get_profile()` | Returns name, username, profile_image_link, rating, details etc. | +| `get_contests()` | Returns future_contests , past_contests , skill_tests etc in json format. | --- @@ -1656,19 +1656,18 @@ First create an object of class `Dictionary`. | `.get_word_of_the_day()` | Returns the word of the day. | | `.word_of_the_day_definition()` | Returns the definition of the word of the day. | --------- - +--- -#### AmbitionBx +#### AmbitionBx Create an directory with name ambitonbox -created a python which consist the code for scarping the website +created a python which consist the code for scarping the website ```python # Example usage from scrape_up import ambitionBox -num_pages_to_scrape = 2 +num_pages_to_scrape = 2 scraper = ambitionBox.Comapiens(num_pages_to_scrape) @@ -1676,8 +1675,8 @@ scraper.scrape_companies() ``` -| Methods | Details | -| --------------- | ----------------------------------------------------------------------------- | +| Methods | Details | +| --------------------- | ----------------------------------------- | | `.scrape_companies()` | Returns the company name with the rating. | --- @@ -1685,20 +1684,21 @@ scraper.scrape_companies() ## Geeksforgeeks First create an object of class `Geeksforgeeks`. + ```python geeksforgeeks = Geeksforgeeks(user="username") ``` -| Methods | Details | -| ------------------------------- | ---------------------------------------------- | -| `.get_profile()` | Returns the user data in json format. | +| Methods | Details | +| ---------------- | ------------------------------------- | +| `.get_profile()` | Returns the user data in json format. | --- ## Wuzzuf ```python -from scrap-up import wuzzuf +from scrap_up import wuzzuf jobs = wuzzuf.Jobs() ``` @@ -1709,15 +1709,18 @@ The `Jobs` class provides methods for configuring scraping parameters and fetchi | `.filter_job()` | Apply filters such as job title, country, city, and range of years of experience. | | `.fetch_jobs()` | Fetch job listings from the website based on the applied filters, across multiple pages. | -## Atcode +## Atcoder + +First create an object of class `Atcoder`. -First create an object of class `Geeksforgeeks`. ```python +from scrap_up import Atcoder atcoder = Atcoder(user="username") +atcode.get_profile() ``` -| Methods | Details | -| ------------------------------- | ---------------------------------------------- | -| `.get_profile()` | Returns the user data in json format. | +| Methods | Details | +| ---------------- | ------------------------------------- | +| `.get_profile()` | Returns the user data in json format. | --- diff --git a/src/scrape_up/atcoder/atcoder.py b/src/scrape_up/atcoder/atcoder.py index 0247db2d..cec07e25 100644 --- a/src/scrape_up/atcoder/atcoder.py +++ b/src/scrape_up/atcoder/atcoder.py @@ -1,47 +1,79 @@ import json import requests -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup -class Atcoder: - ''' - atc=Atcoder(user="chokudai") - stc.get_profile() +class Atcoder: + """ + ``` + atc = Atcoder(user="chokudai") + atc.get_profile() + ``` | Methods | Details | | ----------------- | ---------------------------------------------------------------------------------- | | `.get_profile()` | Returns the user data in json format. | - - ''' - def __init__(self,user): - self.user=user + Response + ```json + { + "Country/Region": "Japan", + "Birth_Year": "1988", + "Twitter_ID": "@chokudai", + "TopCoder_ID": "chokudai", + "Codeforces_ID": "chokudai", + "Affiliation": "AtCoder Inc. CEO", + "Algorithm_Rank": "44th", + "Algorithm_Rating": "3028", + "Algorithm_Highest_Rating": "3092 ― 6 Dan (+108 to promote)", + "Algorithm_Rated_Matches_": "35", + "Algorithm_Last_Competed": "2023/12/17", + "Heuristic_Rank": "62nd", + "Heuristic_Rating": "2525 (Provisional)", + "Heuristic_Highest_Rating": "2525", + "Heuristic_Rated_Matches_": "8", + "Heuristic_Last_Competed": "2024/04/07" + } + ``` + """ + + + def __init__(self, user): + self.user = user + def get_profile(self): try: url = f"https://atcoder.jp/users/{self.user}" headers = {"User-Agent": "scrapeup"} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") - table=soup.find_all('table',class_="dl-table") - user_details={} + table = soup.find_all("table", class_="dl-table") + user_details = {} - row=table[0].find_all('tr') - for r in row : - - user_details[r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() + row = table[0].find_all("tr") + for r in row: + user_details[r.find("th").text.replace(" ", "_")] = ( + r.find("td").text.replace("\n", " ").strip() + ) - row=table[1].find_all('tr') - for r in row : - - user_details["Algorithm_"+r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() + row = table[1].find_all("tr") + for r in row: + user_details["Algorithm_" + r.find("th").text.replace(" ", "_")] = ( + r.find("td").text.replace("\n", " ").strip() + ) - url=f"https://atcoder.jp/users/{self.user}?contestType=heuristic" + url = f"https://atcoder.jp/users/{self.user}?contestType=heuristic" response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, "html.parser") - table=soup.find_all('table',class_="dl-table") - row=table[1].find_all('tr') - for r in row : - - user_details["Heuristic_"+r.find('th').text.replace(' ','_')]=r.find('td').text.replace('\n',' ').strip() + table = soup.find_all("table", class_="dl-table") + row = table[1].find_all("tr") + for r in row: + user_details["Heuristic_" + r.find("th").text.replace(" ", "_")] = ( + r.find("td").text.replace("\n", " ").strip() + ) return json.dumps(user_details) except: - return None \ No newline at end of file + return None + + +atc = Atcoder(user="chokudai") +print(atc.get_profile()) diff --git a/src/scrape_up/espncricinfo/espncricinfo.py b/src/scrape_up/espncricinfo/espncricinfo.py index 982883a7..688575dc 100644 --- a/src/scrape_up/espncricinfo/espncricinfo.py +++ b/src/scrape_up/espncricinfo/espncricinfo.py @@ -100,4 +100,4 @@ def get_livescores(self): live_scores.append(match_details) return live_scores except: - return live_scores \ No newline at end of file + return live_scores diff --git a/src/scrape_up/pinterest/pinterest.py b/src/scrape_up/pinterest/pinterest.py index b54a309e..14ae7c64 100644 --- a/src/scrape_up/pinterest/pinterest.py +++ b/src/scrape_up/pinterest/pinterest.py @@ -135,4 +135,3 @@ def get_pin_details(self, pin_url): } except Exception as e: return None - diff --git a/src/test/espncricinfo_test.py b/src/test/espncricinfo_test.py index 54ada955..701778dd 100644 --- a/src/test/espncricinfo_test.py +++ b/src/test/espncricinfo_test.py @@ -3,7 +3,6 @@ class ESPNTest(unittest.TestCase): - def test_connection(self): instance = Espncricinfo() self.assertTrue( diff --git a/src/test/pinterest_test.py b/src/test/pinterest_test.py index 476a3df9..68778858 100644 --- a/src/test/pinterest_test.py +++ b/src/test/pinterest_test.py @@ -17,7 +17,7 @@ def test_get_today(self): self.assertIn("image", topic) def test_get_photo(self): - url = "https://pin.it/1ZhgQA5AG" + url = "https://pin.it/1ZhgQA5AG" photo = self.pinterest.get_photo(url) if photo: self.assertIn("alt", photo) @@ -33,7 +33,7 @@ def test_search_pins(self): self.assertIn("image", pin) def test_get_pin_details(self): - pin_url = "https://pin.it/1ZhgQA5AG" + pin_url = "https://pin.it/1ZhgQA5AG" details = self.pinterest.get_pin_details(pin_url) if details: self.assertIn("title", details)