From 61baf7ccb639fc1049967391c4c04c60b1a2bf48 Mon Sep 17 00:00:00 2001 From: Agastya Kumar Yadav Date: Sun, 19 May 2024 14:56:52 +0000 Subject: [PATCH 1/4] Added ended contests from contest page of codeforces --- src/scrape_up/codeforces/contests.py | 29 +++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py index 8dd7c041..38e774c1 100644 --- a/src/scrape_up/codeforces/contests.py +++ b/src/scrape_up/codeforces/contests.py @@ -57,7 +57,8 @@ def get_contests(self): contest_list = [] try: - upcoming_contests = soup.find("div", {"class": "datatable"}).find_all("tr") + upcoming_list=[] + upcoming_contests = soup.find_all("div", {"class": "datatable"})[0].find_all("tr") for contest in upcoming_contests: columns = contest.find_all("td") if len(columns) == 6: @@ -70,14 +71,36 @@ def get_contests(self): ) name = name.replace("Enter »", "").strip() - contest_list.append( + upcoming_list.append( { "name": name, "start": start_time_str, "length": duration_str, } ) + ended_list=[] + ended_contests = soup.find_all("div", {"class": "datatable"})[1].find_all("tr") + for contest in ended_contests: + columns = contest.find_all("td") + if len(columns) == 6: + name = columns[0].text.strip() + start_time_str = columns[2].text.strip() + duration_str = columns[3].text.strip() + + name = " ".join( + line.strip() for line in name.splitlines() if line.strip() + ) + name = name.replace("Enter »", "").strip() + + ended_list.append( + { + "name": name, + "start": start_time_str, + "length": duration_str, + } + ) + contest_list={"upcoming_contest":upcoming_list,"ended_contest":ended_list} - return contest_list + return json.dumps(contest_list) except Exception: return None From 29a26f19b27e9726a5a30e719c99e247de403a4b Mon Sep 17 00:00:00 2001 From: Agastya Kumar Yadav Date: Sun, 19 May 2024 15:16:37 +0000 Subject: [PATCH 2/4] cleaned extracted data --- src/scrape_up/codeforces/contests.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py index 38e774c1..053a3eed 100644 --- a/src/scrape_up/codeforces/contests.py +++ b/src/scrape_up/codeforces/contests.py @@ -62,15 +62,12 @@ def get_contests(self): for contest in upcoming_contests: columns = contest.find_all("td") if len(columns) == 6: - name = columns[0].text.strip() + name = columns[0].text.strip().replace("Enter"," ").replace("Virtual participation"," ").replace("\u00bb"," ") start_time_str = columns[2].text.strip() duration_str = columns[3].text.strip() - name = " ".join( line.strip() for line in name.splitlines() if line.strip() ) - name = name.replace("Enter »", "").strip() - upcoming_list.append( { "name": name, @@ -83,15 +80,12 @@ def get_contests(self): for contest in ended_contests: columns = contest.find_all("td") if len(columns) == 6: - name = columns[0].text.strip() - start_time_str = columns[2].text.strip() + name = columns[0].text.strip().replace("Enter"," ").replace("Virtual participation"," ").replace("\u00bb"," ") + start_time_str = columns[2].find('span',class_="format-date").text.strip() duration_str = columns[3].text.strip() - name = " ".join( line.strip() for line in name.splitlines() if line.strip() ) - name = name.replace("Enter »", "").strip() - ended_list.append( { "name": name, From dff2cf195ba7f47605eb1a2059289036a8a99054 Mon Sep 17 00:00:00 2001 From: Agastya Kumar Yadav Date: Sun, 19 May 2024 15:20:20 +0000 Subject: [PATCH 3/4] updated documentation --- src/scrape_up/codeforces/contests.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py index 053a3eed..c80d71d9 100644 --- a/src/scrape_up/codeforces/contests.py +++ b/src/scrape_up/codeforces/contests.py @@ -13,7 +13,7 @@ class Contest: | Methods | Details | | ---------------------------- | ----------------------------------------------------------------------------------------- | - | `get_contests()` | Returns information on active contests like title, start, and duration | + | `get_contests()` | Returns information on active and past contests like title, start, and duration | | """ def __init__(self, *, config: RequestConfig = RequestConfig()): @@ -24,7 +24,7 @@ def __init__(self, *, config: RequestConfig = RequestConfig()): def get_contests(self): """ - Method to fetch the list of active contests on Codeforces using web scraping. + Method to fetch the list of active and past contests on Codeforces using web scraping. Example ------- @@ -35,8 +35,8 @@ def get_contests(self): Returns ------- - { - "data": [ + { + "upcoming_contest": [ { "name": "Codeforces Round #731 (Div. 3)", "start": "Aug/08/2021 17:35", @@ -44,7 +44,14 @@ def get_contests(self): }, ... ], - "message": "Found contest list" + "ended_contest": [ + { + "name": "Codeforces Round #730 (Div. 2)", + "start": "Aug/01/2021 17:35", + "length": "2 hrs" + }, + ... + ] } """ codeforces_url = "https://codeforces.com/contests" From 056b308e44f87f29f63d04f3dcf39d4c65f6b430 Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Tue, 21 May 2024 17:28:54 +0530 Subject: [PATCH 4/4] Checks --- src/scrape_up/codeforces/contests.py | 39 +++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py index c80d71d9..214baeaa 100644 --- a/src/scrape_up/codeforces/contests.py +++ b/src/scrape_up/codeforces/contests.py @@ -64,12 +64,20 @@ def get_contests(self): contest_list = [] try: - upcoming_list=[] - upcoming_contests = soup.find_all("div", {"class": "datatable"})[0].find_all("tr") + upcoming_list = [] + upcoming_contests = soup.find_all("div", {"class": "datatable"})[ + 0 + ].find_all("tr") for contest in upcoming_contests: columns = contest.find_all("td") if len(columns) == 6: - name = columns[0].text.strip().replace("Enter"," ").replace("Virtual participation"," ").replace("\u00bb"," ") + name = ( + columns[0] + .text.strip() + .replace("Enter", " ") + .replace("Virtual participation", " ") + .replace("\u00bb", " ") + ) start_time_str = columns[2].text.strip() duration_str = columns[3].text.strip() name = " ".join( @@ -82,13 +90,23 @@ def get_contests(self): "length": duration_str, } ) - ended_list=[] - ended_contests = soup.find_all("div", {"class": "datatable"})[1].find_all("tr") + ended_list = [] + ended_contests = soup.find_all("div", {"class": "datatable"})[1].find_all( + "tr" + ) for contest in ended_contests: columns = contest.find_all("td") if len(columns) == 6: - name = columns[0].text.strip().replace("Enter"," ").replace("Virtual participation"," ").replace("\u00bb"," ") - start_time_str = columns[2].find('span',class_="format-date").text.strip() + name = ( + columns[0] + .text.strip() + .replace("Enter", " ") + .replace("Virtual participation", " ") + .replace("\u00bb", " ") + ) + start_time_str = ( + columns[2].find("span", class_="format-date").text.strip() + ) duration_str = columns[3].text.strip() name = " ".join( line.strip() for line in name.splitlines() if line.strip() @@ -100,8 +118,11 @@ def get_contests(self): "length": duration_str, } ) - contest_list={"upcoming_contest":upcoming_list,"ended_contest":ended_list} + contest_list = { + "upcoming_contest": upcoming_list, + "ended_contest": ended_list, + } return json.dumps(contest_list) except Exception: - return None + return None \ No newline at end of file