forked from wandgibaut/lets_code_g6
-
Notifications
You must be signed in to change notification settings - Fork 0
/
census_withHigh.py
32 lines (23 loc) · 1.09 KB
/
census_withHigh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# High_income class - projeto pandas - Santander Coders 2021 - G6 (by Samya)
# Subset of Database object based on above 90th INCOME
import os
import pandas as pd
import census_database as census
class High_income(census.Database):
def __init__(self):
census.Database.__init__(self)
self.data_highincome = self._subset_highincome()
self._save2csv()
print('High Income Data READY!')
def _check_highincome(self): #create mask for High Income (10% top values)
income_thresh = self.data['Total Household Income'].quantile(0.9)
is_highincome = self.data['Total Household Income'] >= income_thresh
return is_highincome
def _subset_highincome(self):
is_highincome = self._check_highincome()
newdata = self.data[is_highincome].copy()
data_highincome = newdata.sort_values(by='Total Household Income', ascending=False)
return data_highincome
def _save2csv(self):
fileout = os.path.join('output_files','High_Income_CensusData.csv')
self.data_highincome.to_csv(fileout)