Skip to content

Commit

Permalink
fix id & alpha setup
Browse files Browse the repository at this point in the history
  • Loading branch information
Álvaro Bartolomé committed Feb 3, 2019
1 parent 5942cd4 commit 88f53e3
Show file tree
Hide file tree
Showing 9 changed files with 594 additions and 499 deletions.
284 changes: 144 additions & 140 deletions .idea/workspace.xml

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion investing_scrapper/Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, date_, close_, open_, max_, min_, volume_):
self.min = min_
self.volume = volume_

def to_dict(self):
def equity_to_dict(self):
return {
'Date': self.date,
'Close': self.close,
Expand All @@ -19,3 +19,12 @@ def to_dict(self):
'Min': self.min,
'Volume': self.volume,
}

def fund_to_dict(self):
return {
'Date': self.date,
'Close': self.close,
'Open': self.open,
'Max': self.max,
'Min': self.min,
}
80 changes: 66 additions & 14 deletions investing_scrapper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_recent_data(equity):

result = result[::-1]

df = pd.DataFrame.from_records([value.to_dict() for value in result])
df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
df.set_index('Date', inplace=True)

return df
Expand All @@ -84,12 +84,12 @@ def get_historical_data(equity, start, end):
status = req.status_code
html = BeautifulSoup(req.text, 'html.parser')

selection = html.select('div.instrumentHeader > h2.float_lang_base_1')
selection = html.select('div.instrumentHeader > h2')
for element in selection:
header = element.text

params = {
"curr_id": "558",
"curr_id": row.id,
"smlID": "1159685",
"header": header,
"st_date": start,
Expand Down Expand Up @@ -117,7 +117,6 @@ def get_historical_data(equity, start, end):

for element in selection:
info = element.getText().strip().split()
print(info)

stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
stock_close = float(info[1].replace(',', '.'))
Expand All @@ -137,13 +136,13 @@ def get_historical_data(equity, start, end):

result = result[::-1]

df = pd.DataFrame.from_records([value.to_dict() for value in result])
df = pd.DataFrame.from_records([value.equity_to_dict() for value in result])
df.set_index('Date', inplace=True)

return df


"""def get_fund_recent_data(fund):
def get_fund_recent_data(fund):
resource_package = __name__
resource_path = '/'.join(('resources', 'funds.csv'))
if pkg_resources.resource_exists(resource_package, resource_path):
Expand Down Expand Up @@ -176,18 +175,71 @@ def get_historical_data(equity, start, end):
stock_open = float(info[2].replace(',', '.'))
stock_max = float(info[3].replace(',', '.'))
stock_min = float(info[4].replace(',', '.'))
stock_volume = 0

if info[5].__contains__('M'):
stock_volume = int(float(info[5].replace('M', '').replace(',', '.')) * 1000000)
elif info[5].__contains__('B'):
stock_volume = int(float(info[5].replace('B', '').replace(',', '.')) * 1000000000)
result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, None,))

result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, stock_volume,))
result = result[::-1]

df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
df.set_index('Date', inplace=True)

return df


def get_fund_historical_data(fund, start, end):
resource_package = __name__
resource_path = '/'.join(('resources', 'funds.csv'))
if pkg_resources.resource_exists(resource_package, resource_path):
funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
else:
names = fs.get_fund_names()
funds = pd.DataFrame(names)

for row in funds.itertuples():
if row.name.lower() == fund.lower():
header = "Datos históricos " + row.symbol

params = {
"curr_id": row.id,
"smlID": "15361696",
"header": header,
"st_date": start,
"end_date": end,
"interval_sec": "Daily",
"sort_col": "date",
"sort_ord": "DESC",
"action": "historical_data"
}

head = {
"User-Agent": ua.get_random(),
"X-Requested-With": "XMLHttpRequest"
}

url = "https://es.investing.com/instruments/HistoricalDataAjax"

req = requests.post(url, data=params, headers=head)

html = BeautifulSoup(req.content, 'html.parser')

selection = html.select('div#results_box > table#curr_table > tbody > tr')

result = list()

for element in selection:
info = element.getText().strip().split()

stock_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
stock_close = float(info[1].replace(',', '.'))
stock_open = float(info[2].replace(',', '.'))
stock_max = float(info[3].replace(',', '.'))
stock_min = float(info[4].replace(',', '.'))

result.insert(len(result), Data(stock_date, stock_close, stock_open, stock_max, stock_min, None,))

result = result[::-1]

df = pd.DataFrame.from_records([value.to_dict() for value in result])
df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
df.set_index('Date', inplace=True)

return df"""
return df
5 changes: 4 additions & 1 deletion investing_scrapper/equities.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,16 @@ def get_equity_names():
results = list()

for element in selection:
id_ = element.get("id")
id_ = id_.replace('pair_', '')
for nested in element.select("a"):
info = nested.get("href")
info = info.replace("/equities/", "")

data = {
"name": nested.text,
"tag": info
"tag": info,
"id": id_
}

results.append(data)
Expand Down
30 changes: 27 additions & 3 deletions investing_scrapper/funds.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def get_fund_names():
results = list()

for element in selection:
id_ = element.get('id')
id_ = id_.replace('pair_', '')

symbol = None
for symbol in element.select("td.symbol"):
symbol = symbol.get("title")
Expand All @@ -37,13 +40,15 @@ def get_fund_names():
data = {
"name": nested.text,
"symbol": symbol,
"tag": info
"tag": info,
"id": id_
}
else:
data = {
"name": nested.text,
"symbol": "undefined",
"tag": info
"tag": info,
"id": id_
}

results.append(data)
Expand All @@ -55,4 +60,23 @@ def get_fund_names():
df = pd.DataFrame(results)
df.to_csv(file, index=False)

return results
return results


# def get_id_value(fund):
# url = "https://es.investing.com/funds/" + fund + "-historical-data"
# headers = {
# 'User-Agent': ua.get_random()
# }
#
# req = requests.get(url, headers=headers)
#
# html = BeautifulSoup(req.text, 'html.parser')
#
# selection = html.select('div.js-inject-add-alert-widget > div')
#
# for element in selection:
# id_ = element['data-pair-id']
# return id_
#
# return 0
Loading

0 comments on commit 88f53e3

Please sign in to comment.