-
Notifications
You must be signed in to change notification settings - Fork 1
/
22140_read-in.py
123 lines (97 loc) · 4.47 KB
/
22140_read-in.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# %%
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn import linear_model
from dfmgmt import *
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
import os
# %%
# Read the files into a df and clean the data
# Create list of filenames
key_list_us = [22139, 22140]
filename_list_us = []
for k in range(len(key_list_us)):
filename = str(key_list_us[k]) + '_daily-record.txt'
filename_list_us.append(filename)
# Create a list of dfs
df_list_us = [pd.read_fwf(filename, skiprows=2,
names=['station',
'date',
'precip',
'tmax',
'tmin'])
for filename in filename_list_us]
# Add climate station key for each df
for k in range(len(key_list_us)):
df_list_us[k]['key'] = key_list_us[k]
# Concatenate dfs
data_us = pd.concat(df_list_us)
data_us = data_us[['date','precip','tmax','tmin','key']]
# Swap non-records to None type
data_us = data_us.replace({-999.0: None, -9999.0: None}, regex=True)
# Set dates to correct format
data_us['date'] = pd.to_datetime(data_us['date'], format='%Y%m%d').dt.strftime("%Y-%m-%d")
data_us['date'] = pd.to_datetime(data_us['date'], format='%Y-%m-%d')
# Set df index to the datetime
data_us = data_us.set_index('date')
# Add month and year columns to df
data_us['year'] = data_us.index.year
data_us['month'] = data_us. index.month
# %%
# Create a dictionary of dfs utilizing a for loop and the split_key_df function
dfs_us = {key: split_key_df(data_us, key) for key in key_list_us}
# Create a function to generate separate dfs based on freq using a provided list of column names
cols_us = ['precip', 'tmax', 'tmin']
# Create a dictionary of dfs utilizing a for loop and the resample_mean
dfs_mm_us = {key: resample_mean(dfs_us[key], cols_us, 'M') for key in key_list_us} # mm = monthly mean
# Add month and year to each df in dict
for key in key_list_us:
dfs_mm_us[key]['month'] = dfs_mm_us[key].index.month
dfs_mm_us[key]['year'] = dfs_mm_us[key].index.year
# %%
# Automate 12-plot monthly mean plot for variables
# Set up data & variables
start, end = 1976, 2016 # set time frame to last forty years
month_str = ['Jan', 'Feb', 'Mar', 'Apr', 'May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df_mm_us_last40 = dfs_mm_us[22140].loc[str(start):str(end)]
degree_sign = u'\N{DEGREE SIGN}'
for col in cols_us:
fig = plt.figure(figsize=(24,16))
fig.subplots_adjust(hspace=0.2, wspace=0.2)
fig.suptitle("Monthly Mean for "+col+"\nClimate Station 22140", fontsize=30)
for i in range(1,13):
ax = fig.add_subplot(3,4,i)
x = df_mm_us_last40[df_mm_us_last40.index.month == i].index.year
y = df_mm_us_last40[df_mm_us_last40.index.month == i][col]
ax.plot(x,y) # this plots the col values
# Col-alike subplot formatting
ax.set_title(month_str[i-1], fontsize=20, fontweight='bold')
# Make the linear regression
database= df_mm_us_last40.loc[df_mm_us_last40['month']==i][[col,'year']]
database=database.dropna()
x_data = database['year'].values.reshape(database.shape[0],1)
y_data = database[col].values.reshape(database.shape[0],1)
reg = linear_model.LinearRegression().fit(x_data, y_data)
coef = reg.coef_
inter= reg.intercept_
y_estimate = coef*x_data+inter # y=mx+b, possible option to upgrade
ax.plot(x_data,y_estimate) # this plots the linear regression
# Col-dependent subplot formatting
if col == cols_us[0]:
ax.set_ylabel('mm')
ax.text(.1, .8,
str(round((end-start)*coef[0,0],2))+'mm/40yr',
transform=ax.transAxes,
fontsize=24,
color='red')
else:
ax.set_ylabel(degree_sign+'C')
ax.text(.1, .8,
str(round((end-start)*coef[0,0],2))+degree_sign+'C/40yr',
transform=ax.transAxes,
fontsize=24,
color='red')
plt.savefig('22140_'+col+'-mm')
# %%