From 92ba0714fedd9a6ce7c376d13c1ed53cf3ce74f5 Mon Sep 17 00:00:00 2001 From: "filip.hein1@gmail.com" Date: Wed, 26 Feb 2020 14:24:46 +0100 Subject: [PATCH] Added Green Towers Bistro menu scraper, updated GreenTowers class to contain new scraper, created IMenu - class acting as every scrapers interface to maintain code readability and force common namespace. --- logic/sites.py | 4 +-- scrapers/AstraMenu.py | 5 +++- scrapers/CockpeatMenu.py | 3 ++- scrapers/FacebookPage.py | 4 --- scrapers/GreenTowersBistroMenu.py | 44 +++++++++++++++++++++++++++++++ scrapers/IMenu.py | 7 +++++ scrapers/KameMenu.py | 3 ++- scrapers/ObiadeoMenu.py | 3 ++- scrapers/__init__.py | 1 + 9 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 scrapers/GreenTowersBistroMenu.py create mode 100644 scrapers/IMenu.py diff --git a/logic/sites.py b/logic/sites.py index 5d6ba60..562d2ac 100644 --- a/logic/sites.py +++ b/logic/sites.py @@ -3,7 +3,7 @@ from emails import SubscriptionChecker, MailSender, MailingList, MailCreator from emails import State -from scrapers import AstraMenu, CockpeatMenu, ObiadeoMenu, KameMenu +from scrapers import AstraMenu, CockpeatMenu, ObiadeoMenu, KameMenu, GreenTowersBistroMenu from bot_credentials import WL_USERNAME, WL_PASSWORD, GT_USERNAME, GT_PASSWORD @@ -51,4 +51,4 @@ def __init__(self, email=WL_USERNAME, password=WL_PASSWORD, send_confirmation_ma class GreenTowers(Site): def __init__(self, email=GT_USERNAME, password=GT_PASSWORD, send_confirmation_mails=True): super().__init__('GT', email, password, send_confirmation_mails) - self.scrapers = [KameMenu] + self.scrapers = [GreenTowersBistroMenu, KameMenu] diff --git a/scrapers/AstraMenu.py b/scrapers/AstraMenu.py index 1d51100..101c6f5 100644 --- a/scrapers/AstraMenu.py +++ b/scrapers/AstraMenu.py @@ -5,8 +5,10 @@ import requests from bs4 import BeautifulSoup +from scrapers.IMenu import IMenu -class AstraMenu: + +class AstraMenu(IMenu): def __init__(self): self.menu_url = 'http://www.astra-catering.pl/zestaw-dnia.html' self.day_of_the_week = datetime.today().weekday() @@ -54,4 +56,5 @@ def _correct_text(text): text = re.sub(r'-\n', '\n', text) text = re.sub(r'[ ]{2,}', ' ', text) text = re.sub(r'\n{2,}', '\n', text) + text = re.sub(r': \n', ': ', text) return text diff --git a/scrapers/CockpeatMenu.py b/scrapers/CockpeatMenu.py index 9fefdfc..dcdcc03 100644 --- a/scrapers/CockpeatMenu.py +++ b/scrapers/CockpeatMenu.py @@ -2,10 +2,11 @@ import re from scrapers.FacebookPage import FacebookPage +from scrapers.IMenu import IMenu from tools.levenshtein_distance import calc_levenshtein -class CockpeatMenu(FacebookPage): +class CockpeatMenu(IMenu, FacebookPage): def __init__(self): super().__init__(fanpage_url='https://www.facebook.com/pg/COCKPEAT/posts') diff --git a/scrapers/FacebookPage.py b/scrapers/FacebookPage.py index b8faa15..ee5e56e 100644 --- a/scrapers/FacebookPage.py +++ b/scrapers/FacebookPage.py @@ -17,10 +17,6 @@ def __init__(self, fanpage_url): self._create_soup_object() self.posts = self._get_posts_from_content() - @abstractmethod - def get_todays_menu(self) -> dict: - raise NotImplementedError - def _get_page(self): self.page = requests.get(self.fanpage_url) diff --git a/scrapers/GreenTowersBistroMenu.py b/scrapers/GreenTowersBistroMenu.py new file mode 100644 index 0000000..aa4e469 --- /dev/null +++ b/scrapers/GreenTowersBistroMenu.py @@ -0,0 +1,44 @@ +from datetime import datetime + +import requests +from bs4 import BeautifulSoup + +from scrapers.IMenu import IMenu + + +class GreenTowersBistroMenu(IMenu): + def __init__(self): + self.menu_url_template = 'http://www.bistrogreentowers.pl/menu-tygodniowe#c{}' + self.day_of_the_week = datetime.today().weekday() + self.weekly_menu = list() + self._get_page(self.day_of_the_week+1) + self._decode_content() + self._create_soup_object() + self._get_table_from_content() + + def get_todays_menu(self) -> dict: + if self.day_of_the_week < 5: + return {'green_towers_bistro_menu': self._get_featured_dishes()} + else: + return {'green_towers_bistro_menu': ''} + + def _get_page(self, day_of_the_week): + self.page = requests.get(self.menu_url_template.format(day_of_the_week)) + + def _decode_content(self): + self.content = self.page.content.decode('utf-8', errors='ignore').replace('ó', 'รณ').replace(' ', '') + + def _create_soup_object(self): + self.soup = BeautifulSoup(self.content, 'html.parser') + + def _get_table_from_content(self): + self.table = self.soup.find('table') + + def _get_featured_dishes(self): + tds = self.table.find_all("td") + soup = "Zupa: {}".format(tds[1].text) + featured_dishes = ["Danie dnia nr {}: {}".format(ind+1, y) for ind, y in + enumerate([x.text.replace("*", "") for x in tds if '*' in x.text])] + featured_dishes = [soup] + featured_dishes + featured_dishes = "\n".join(featured_dishes) + return featured_dishes diff --git a/scrapers/IMenu.py b/scrapers/IMenu.py new file mode 100644 index 0000000..5b2bb1b --- /dev/null +++ b/scrapers/IMenu.py @@ -0,0 +1,7 @@ +from abc import ABC, abstractmethod + + +class IMenu(ABC): + @abstractmethod + def get_todays_menu(self) -> dict: + raise NotImplementedError diff --git a/scrapers/KameMenu.py b/scrapers/KameMenu.py index fc0b742..aa47d54 100644 --- a/scrapers/KameMenu.py +++ b/scrapers/KameMenu.py @@ -2,10 +2,11 @@ import re import datetime from scrapers.FacebookPage import FacebookPage +from scrapers.IMenu import IMenu from tools.levenshtein_distance import calc_levenshtein -class KameMenu(FacebookPage): +class KameMenu(IMenu, FacebookPage): def __init__(self): super().__init__(fanpage_url='https://www.facebook.com/pg/kame.wro/posts') diff --git a/scrapers/ObiadeoMenu.py b/scrapers/ObiadeoMenu.py index 1355e60..5b23c7f 100644 --- a/scrapers/ObiadeoMenu.py +++ b/scrapers/ObiadeoMenu.py @@ -5,10 +5,11 @@ from bs4 import NavigableString from scrapers.FacebookPage import FacebookPage +from scrapers.IMenu import IMenu from tools.levenshtein_distance import calc_levenshtein -class ObiadeoMenu(FacebookPage): +class ObiadeoMenu(IMenu, FacebookPage): def __init__(self): super().__init__(fanpage_url='https://www.facebook.com/pg/obiadeo/posts') diff --git a/scrapers/__init__.py b/scrapers/__init__.py index f6c16b0..80d676b 100644 --- a/scrapers/__init__.py +++ b/scrapers/__init__.py @@ -2,3 +2,4 @@ from .CockpeatMenu import CockpeatMenu from .ObiadeoMenu import ObiadeoMenu from .KameMenu import KameMenu +from .GreenTowersBistroMenu import GreenTowersBistroMenu \ No newline at end of file