-
Notifications
You must be signed in to change notification settings - Fork 6
/
income.py
63 lines (46 loc) · 1.96 KB
/
income.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import re
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.tools as tls
class SalaryEstimates:
def __init__(self):
pass
def salary_parser(self, soup):
'''
:param soup: beautiful soup object defined at the main module.
:return: returns a pandas DataFrame
'''
rx = re.compile('([+(),])')
for post in soup.find_all("ul", {"class":"rbList"}):
figures = post.get_text(' ', strip=True)
figures = list(rx.sub(r'', figures).replace(' ', ', ').split())
quantity = [elem.replace(',','') for elem in figures if '$' not in elem]
salary = [elem.replace('$', '').replace(',','') for elem in figures if '$' in elem]
d = {'Salary from jobs': pd.Series(salary, index=['a', 'b', 'c','d','e']),
'Quantity': pd.Series(quantity, index=['a', 'b', 'c', 'd', 'e'])}
self.df5 = pd.DataFrame(d)
self.df5 = self.df5.apply(pd.to_numeric, errors='coerce')
print self.df5
df5_median =self.df5['Salary from jobs'].median()
df5_mean = self.df5['Salary from jobs'].mean()
print "The median for this job is:", df5_median
print "The mean for this job is:", df5_mean
return self.df5
def graphing_salary(self, username, api_key):
'''
:param username: str. This is the Plotly api username that you gave beforehand
:param api_key: str. Plotly api_key
:return: graphical output of the job selected.
'''
# authorizing the user Plotly credentials
tls.set_credentials_file(username=username, api_key=api_key)
# creating a Plotly scatter object
data = [
go.Scatter(
x=self.df5['Quantity'],
y=self.df5['Salary from jobs']
)
]
final_graph = py.plot(data, filename='pandas/basic-bar')
return final_graph