-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_functions.py
56 lines (45 loc) · 1.83 KB
/
data_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def load_data(path):
'''
(str) -> (pandas.DataFrame)
Loads the database and cleans the whitespace in STATIONS_ID.
IMPORTANT: This function assumes you have the database stored in a text file in the directory.
'''
data = pd.read_csv(path, index_col = 2)
date_form = data.index.values.astype(str)
for i in range(0, len(date_form)):
date_form[i] = date_form[i][:-2]
date_form[i] = pd.to_datetime(date_form[i])
data.index = date_form
#data = data.astype(str)
#pd.to_datetime(df.day + df.month + df.year, format="%d%m%Y")
data["STATIONS_ID"] = data["STATIONS_ID"].str.replace(' ', '')
data["STATIONS_ID"] = data["STATIONS_ID"].convert_objects(convert_numeric=True)
return data
def get_data(data, station_id, category = 3):
"""
(pandas.Dataframe, int, list) -> (pandas.DataFrame)
Returns desired information from the database about requested city and categories.
station_id: The code for the requested city/station
category: Can be an int or a list of desired variable(s). By default gets the air temperature.
The codes for variables:
0: Numerical Index
1: STATIONS_ID
2: QUALITAETS_NIVEAU
3: Air Temperature / LUFTTEMPERATUR
4: DAMPFDRUCK
5: BEDECKUNGSGRAD
6: LUFTDRUCK_STATIONSHOEHE
7: REL_FEUCHTE
8: WINDGESCHWINDIGKEIT
9: Max Air Temperature
10: Min Air Temperature
11: LUFTTEMP_AM_ERDB_MINIMUM (?)
12: Max Wind Speed / WINDSPITZE_MAXIMUM
13: Precipitation Height / NIEDERSCHLAGSHOEHE (?)
14: NIEDERSCHLAGSHOEHE_IND (?)
15: Sunshine Duration
16: Snow Height
"""
rlv_station = data[data.iloc[:, 1] == station_id]
selected = rlv_station.iloc[:, category]
return selected