fix id & alpha setup

alvarobartt · Feb 3, 2019 · 88f53e3 · 88f53e3
1 parent 5942cd4
commit 88f53e3
Show file tree

Hide file tree

Showing 9 changed files with 594 additions and 499 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/investing_scrapper/Data.py b/investing_scrapper/Data.py
@@ -10,7 +10,7 @@ def __init__(self, date_, close_, open_, max_, min_, volume_):
         self.min = min_
         self.volume = volume_
 
-    def to_dict(self):
+    def equity_to_dict(self):
         return {
             'Date': self.date,
             'Close': self.close,
@@ -19,3 +19,12 @@ def to_dict(self):
             'Min': self.min,
             'Volume': self.volume,
         }
+
+    def fund_to_dict(self):
+        return {
+            'Date': self.date,
+            'Close': self.close,
+            'Open': self.open,
+            'Max': self.max,
+            'Min': self.min,
+        }
diff --git a/investing_scrapper/__init__.py b/investing_scrapper/__init__.py
@@ -57,7 +57,7 @@ def get_recent_data(equity):
 
             result = result[::-1]
 
-            df = pd.DataFrame.from_records([value.to_dict() for value in result])
+            df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
             df.set_index('Date', inplace=True)
 
             return df
@@ -84,12 +84,12 @@ def get_historical_data(equity, start, end):
             status = req.status_code
             html = BeautifulSoup(req.text, 'html.parser')
 
-            selection = html.select('div.instrumentHeader > h2.float_lang_base_1')
+            selection = html.select('div.instrumentHeader > h2')
             for element in selection:
                 header = element.text
 
             params = {
-                "curr_id": "558",
+                "curr_id": row.id,
                 "smlID": "1159685",
                 "header": header,
                 "st_date": start,
@@ -117,7 +117,6 @@ def get_historical_data(equity, start, end):
 
             for element in selection:
                 info = element.getText().strip().split()
-                print(info)
 
                 stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
                 stock_close = float(info[1].replace(',', '.'))
@@ -137,13 +136,13 @@ def get_historical_data(equity, start, end):
 
             result = result[::-1]
 
-            df = pd.DataFrame.from_records([value.to_dict() for value in result])
+            df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
             df.set_index('Date', inplace=True)
 
             return df
 
 
-"""def get_fund_recent_data(fund):
+def get_fund_recent_data(fund):
     resource_package = __name__
     resource_path = '/'.join(('resources', 'funds.csv'))
     if pkg_resources.resource_exists(resource_package, resource_path):
@@ -176,18 +175,71 @@ def get_historical_data(equity, start, end):
                 stock_open = float(info[2].replace(',', '.'))
                 stock_max = float(info[3].replace(',', '.'))
                 stock_min = float(info[4].replace(',', '.'))
-                stock_volume = 0
 
-                if info[5].__contains__('M'):
-                    stock_volume = int(float(info[5].replace('M', '').replace(',', '.')) * 1000000)
-                elif info[5].__contains__('B'):
-                    stock_volume = int(float(info[5].replace('B', '').replace(',', '.')) * 1000000000)
+                result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, None,))
 
-                result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, stock_volume,))
+            result = result[::-1]
+
+            df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
+            df.set_index('Date', inplace=True)
+
+            return df
+
+
+def get_fund_historical_data(fund, start, end):
+    resource_package = __name__
+    resource_path = '/'.join(('resources', 'funds.csv'))
+    if pkg_resources.resource_exists(resource_package, resource_path):
+        funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
+    else:
+        names = fs.get_fund_names()
+        funds = pd.DataFrame(names)
+
+    for row in funds.itertuples():
+        if row.name.lower() == fund.lower():
+            header = "Datos históricos " + row.symbol
+
+            params = {
+                "curr_id": row.id,
+                "smlID": "15361696",
+                "header": header,
+                "st_date": start,
+                "end_date": end,
+                "interval_sec": "Daily",
+                "sort_col": "date",
+                "sort_ord": "DESC",
+                "action": "historical_data"
+            }
+
+            head = {
+                "User-Agent": ua.get_random(),
+                "X-Requested-With": "XMLHttpRequest"
+            }
+
+            url = "https://es.investing.com/instruments/HistoricalDataAjax"
+
+            req = requests.post(url, data=params, headers=head)
+
+            html = BeautifulSoup(req.content, 'html.parser')
+
+            selection = html.select('div#results_box > table#curr_table > tbody > tr')
+
+            result = list()
+
+            for element in selection:
+                info = element.getText().strip().split()
+
+                stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
+                stock_close = float(info[1].replace(',', '.'))
+                stock_open = float(info[2].replace(',', '.'))
+                stock_max = float(info[3].replace(',', '.'))
+                stock_min = float(info[4].replace(',', '.'))
+
+                result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, None,))
 
             result = result[::-1]
 
-            df = pd.DataFrame.from_records([value.to_dict() for value in result])
+            df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
             df.set_index('Date', inplace=True)
 
-            return df"""
+            return df
diff --git a/investing_scrapper/equities.py b/investing_scrapper/equities.py
@@ -33,13 +33,16 @@ def get_equity_names():
     results = list()
 
     for element in selection:
+        id_ = element.get("id")
+        id_ = id_.replace('pair_', '')
         for nested in element.select("a"):
             info = nested.get("href")
             info = info.replace("/equities/", "")
 
             data = {
                 "name": nested.text,
-                "tag": info
+                "tag": info,
+                "id": id_
             }
 
             results.append(data)

diff --git a/investing_scrapper/funds.py b/investing_scrapper/funds.py
@@ -25,6 +25,9 @@ def get_fund_names():
     results = list()
 
     for element in selection:
+        id_ = element.get('id')
+        id_ = id_.replace('pair_', '')
+
         symbol = None
         for symbol in element.select("td.symbol"):
             symbol = symbol.get("title")
@@ -37,13 +40,15 @@ def get_fund_names():
                 data = {
                     "name": nested.text,
                     "symbol": symbol,
-                    "tag": info
+                    "tag": info,
+                    "id": id_
                 }
             else:
                 data = {
                     "name": nested.text,
                     "symbol": "undefined",
-                    "tag": info
+                    "tag": info,
+                    "id": id_
                 }
 
             results.append(data)
@@ -55,4 +60,23 @@ def get_fund_names():
     df = pd.DataFrame(results)
     df.to_csv(file, index=False)
 
-    return results
+    return results
+
+
+# def get_id_value(fund):
+#     url = "https://es.investing.com/funds/" + fund + "-historical-data"
+#     headers = {
+#         'User-Agent': ua.get_random()
+#     }
+#
+#     req = requests.get(url, headers=headers)
+#
+#     html = BeautifulSoup(req.text, 'html.parser')
+#
+#     selection = html.select('div.js-inject-add-alert-widget > div')
+#
+#     for element in selection:
+#         id_ = element['data-pair-id']
+#         return id_
+#
+#     return 0