Skip to content

Commit

Permalink
Added Green Towers Bistro menu scraper, updated GreenTowers class to …
Browse files Browse the repository at this point in the history
…contain new scraper, created IMenu - class acting as every scrapers interface to maintain code readability and force common namespace.
  • Loading branch information
f-hein committed Feb 26, 2020
1 parent 60cd8a6 commit 92ba071
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 10 deletions.
4 changes: 2 additions & 2 deletions logic/sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from emails import SubscriptionChecker, MailSender, MailingList, MailCreator
from emails import State
from scrapers import AstraMenu, CockpeatMenu, ObiadeoMenu, KameMenu
from scrapers import AstraMenu, CockpeatMenu, ObiadeoMenu, KameMenu, GreenTowersBistroMenu
from bot_credentials import WL_USERNAME, WL_PASSWORD, GT_USERNAME, GT_PASSWORD


Expand Down Expand Up @@ -51,4 +51,4 @@ def __init__(self, email=WL_USERNAME, password=WL_PASSWORD, send_confirmation_ma
class GreenTowers(Site):
def __init__(self, email=GT_USERNAME, password=GT_PASSWORD, send_confirmation_mails=True):
super().__init__('GT', email, password, send_confirmation_mails)
self.scrapers = [KameMenu]
self.scrapers = [GreenTowersBistroMenu, KameMenu]
5 changes: 4 additions & 1 deletion scrapers/AstraMenu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import requests
from bs4 import BeautifulSoup

from scrapers.IMenu import IMenu

class AstraMenu:

class AstraMenu(IMenu):
def __init__(self):
self.menu_url = 'http://www.astra-catering.pl/zestaw-dnia.html'
self.day_of_the_week = datetime.today().weekday()
Expand Down Expand Up @@ -54,4 +56,5 @@ def _correct_text(text):
text = re.sub(r'-\n', '\n', text)
text = re.sub(r'[ ]{2,}', ' ', text)
text = re.sub(r'\n{2,}', '\n', text)
text = re.sub(r': \n', ': ', text)
return text
3 changes: 2 additions & 1 deletion scrapers/CockpeatMenu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import re

from scrapers.FacebookPage import FacebookPage
from scrapers.IMenu import IMenu
from tools.levenshtein_distance import calc_levenshtein


class CockpeatMenu(FacebookPage):
class CockpeatMenu(IMenu, FacebookPage):
def __init__(self):
super().__init__(fanpage_url='https://www.facebook.com/pg/COCKPEAT/posts')

Expand Down
4 changes: 0 additions & 4 deletions scrapers/FacebookPage.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ def __init__(self, fanpage_url):
self._create_soup_object()
self.posts = self._get_posts_from_content()

@abstractmethod
def get_todays_menu(self) -> dict:
raise NotImplementedError

def _get_page(self):
self.page = requests.get(self.fanpage_url)

Expand Down
44 changes: 44 additions & 0 deletions scrapers/GreenTowersBistroMenu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from datetime import datetime

import requests
from bs4 import BeautifulSoup

from scrapers.IMenu import IMenu


class GreenTowersBistroMenu(IMenu):
def __init__(self):
self.menu_url_template = 'http://www.bistrogreentowers.pl/menu-tygodniowe#c{}'
self.day_of_the_week = datetime.today().weekday()
self.weekly_menu = list()
self._get_page(self.day_of_the_week+1)
self._decode_content()
self._create_soup_object()
self._get_table_from_content()

def get_todays_menu(self) -> dict:
if self.day_of_the_week < 5:
return {'green_towers_bistro_menu': self._get_featured_dishes()}
else:
return {'green_towers_bistro_menu': ''}

def _get_page(self, day_of_the_week):
self.page = requests.get(self.menu_url_template.format(day_of_the_week))

def _decode_content(self):
self.content = self.page.content.decode('utf-8', errors='ignore').replace('&oacute;', 'ó').replace('&nbsp;', '')

def _create_soup_object(self):
self.soup = BeautifulSoup(self.content, 'html.parser')

def _get_table_from_content(self):
self.table = self.soup.find('table')

def _get_featured_dishes(self):
tds = self.table.find_all("td")
soup = "Zupa: {}".format(tds[1].text)
featured_dishes = ["Danie dnia nr {}: {}".format(ind+1, y) for ind, y in
enumerate([x.text.replace("*", "") for x in tds if '*' in x.text])]
featured_dishes = [soup] + featured_dishes
featured_dishes = "\n".join(featured_dishes)
return featured_dishes
7 changes: 7 additions & 0 deletions scrapers/IMenu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from abc import ABC, abstractmethod


class IMenu(ABC):
@abstractmethod
def get_todays_menu(self) -> dict:
raise NotImplementedError
3 changes: 2 additions & 1 deletion scrapers/KameMenu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import re
import datetime
from scrapers.FacebookPage import FacebookPage
from scrapers.IMenu import IMenu
from tools.levenshtein_distance import calc_levenshtein


class KameMenu(FacebookPage):
class KameMenu(IMenu, FacebookPage):
def __init__(self):
super().__init__(fanpage_url='https://www.facebook.com/pg/kame.wro/posts')

Expand Down
3 changes: 2 additions & 1 deletion scrapers/ObiadeoMenu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from bs4 import NavigableString

from scrapers.FacebookPage import FacebookPage
from scrapers.IMenu import IMenu
from tools.levenshtein_distance import calc_levenshtein


class ObiadeoMenu(FacebookPage):
class ObiadeoMenu(IMenu, FacebookPage):
def __init__(self):
super().__init__(fanpage_url='https://www.facebook.com/pg/obiadeo/posts')

Expand Down
1 change: 1 addition & 0 deletions scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .CockpeatMenu import CockpeatMenu
from .ObiadeoMenu import ObiadeoMenu
from .KameMenu import KameMenu
from .GreenTowersBistroMenu import GreenTowersBistroMenu

0 comments on commit 92ba071

Please sign in to comment.