forked from david-edu-morales/TAAP_py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
taap_plot_seasonalSum.py
179 lines (139 loc) · 8.75 KB
/
taap_plot_seasonalSum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# %%
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import datetime as dt
from sklearn import linear_model
import csv
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
# %%
# Set up variables
keylist_mx = [26013, 26057, 26164] # create list of climate station keys
varsSum_mx = ['precip'] # specify variables to be resampled
seasons = ['summer', 'winter'] # specify seasons to be graphed
csvFile = 'data/historicalTrends_precipSum.csv' # csv filename to collect linRegCoefs
headerList = ['key', 'season', 'coef'] # header names for csv of linRegCoefs
# %%
# *** MEXICAN CLIMATE STATIONS ***
# Read the files into a df
# Create a dictionary of keys and filenames to call dataframes into another dictionary
filenameDict = {keylist_mx[key]: 'data/'+str(keylist_mx[key])+'_clean-data.csv' for key in range(len(keylist_mx))}
# Create a dictionary of keys and corresponding dataframes
dictCleanData = {key: pd.read_csv(filename,
index_col = 'date',
parse_dates=True)
for (key, filename) in filenameDict.items()}
# %%
# Calculate rainfall for seasonal storm seasons for all stations
# Select precip data from QC'd database
dictPrecip = {key: dictCleanData[key][dictCleanData[key].variable == 'precip'] for key in keylist_mx}
# WINTER SEASON // WINTER SEASON // WINTER SEASON // WINTER SEASON // WINTER SEASON // WINTER SEASON
# Set up master dictionary to receive year & precipSum entries for all stations
dictWinPrecip = {}
for key in keylist_mx:
df = dictPrecip[key] # simplify dataframe for reference
stationData = {} # dict to hold precipSum/yearList for single station
yearList = df.index.year.unique().tolist() # list to hold all available years
precipSums = [] # list to hold precipSums for each year
for year in yearList:
# Create mask of desired dates for Winter rain-season
mask = (df.index >= dt.datetime(year,11,1)) & (df.index <= dt.datetime(year+1,3,31))
rainfall = df.loc[mask,['measurement']].sum() # sum precip measurements from each day
precipSums.append(rainfall[0]) # add precipSum to list
yearEntry = {'year' : yearList} # create entry for list of years
rainEntry = {'precipSum' : precipSums} # create entry for list of precipSums
stationData.update(yearEntry) # add year entry to stationData dict
stationData.update(rainEntry) # add sums entry to stationData dict
dictStation = {key:stationData} # create entry of station key/data
dictWinPrecip.update(dictStation) # add stationData entry to winPrecip dict
dictWinPrecip = {key: pd.DataFrame.from_dict(dictWinPrecip[key]) for key in keylist_mx}
for key in keylist_mx:
dictWinPrecip[key]['season'] = 'winter'
dictWinPrecip[key]['year'] = pd.to_datetime(dictWinPrecip[key]['year'], format='%Y')
dictWinPrecip[key] = dictWinPrecip[key].set_index('year')
dictWinPrecip[key].drop(dictWinPrecip[key].tail(1).index,inplace=True)
# SUMMER SEASON // SUMMER SEASON // SUMMER SEASON // SUMMER SEASON // SUMMER SEASON // SUMMER SEASON
# Set up master dictionary to receive year & precipSum entries for all stations
dictSummPrecip = {}
for key in keylist_mx:
df = dictPrecip[key] # simplify dataframe for reference
stationData = {} # dict to hold precipSum/yearList for single station
yearList = df.index.year.unique().tolist() # list to hold all available years
precipSums = [] # list to hold precipSums for each year
for year in yearList:
# Create mask of desired dates for Summer rain-season
mask = (df.index >= dt.datetime(year,6,1)) & (df.index <= dt.datetime(year,9,30))
rainfall = df.loc[mask,['measurement']].sum() # sum precip measurements from each day
precipSums.append(rainfall[0]) # add precipSum to list
yearEntry = {'year' : yearList} # create entry for list of years
rainEntry = {'precipSum' : precipSums} # create entry for list of precipSums
stationData.update(yearEntry) # add year entry to stationData dict
stationData.update(rainEntry) # add sums entry to stationData dict
dictStation = {key:stationData} # create entry of station key/data
dictSummPrecip.update(dictStation) # add stationData entry to summPrecip dict
dictSummPrecip = {key: pd.DataFrame.from_dict(dictSummPrecip[key]) for key in keylist_mx}
for key in keylist_mx:
dictSummPrecip[key]['season'] = 'summer'
dictSummPrecip[key]['year'] = pd.to_datetime(dictSummPrecip[key]['year'], format='%Y')
dictSummPrecip[key] = dictSummPrecip[key].set_index('year')
dictSummPrecip[key].drop(dictSummPrecip[key].tail(1).index,inplace=True)
# Combine seasonal dataframes
dictSeasonPrecip = {key: pd.concat([dictWinPrecip[key],dictSummPrecip[key]], sort=True) for key in keylist_mx}
# %%
# Create precipSum csv files for MCA
for key in keylist_mx:
dictSeasonPrecip[key].to_csv('data/'+str(key)+'_seasonalSum.csv')
# %%
# Re-create the 12-month plots for each station/variable using the quality-controlled data
with open(csvFile, 'w') as file: # set mode to write w/ truncation
dw = csv.DictWriter(file, delimiter=',',
fieldnames=headerList)
dw.writeheader()
# Set up data & variables
start, end = 1976, 2016 # set time frame to last forty years
for key in keylist_mx:
dfKey = dictSeasonPrecip[key] # rename working database for ease of reading
for var in varsSum_mx:
fig = plt.figure(figsize=(21,7))
fig.subplots_adjust(hspace=0.2, wspace=0.2)
# Var-dependent figure title
fig.suptitle("Total Rainfall by Season"+"\nClimate Station "+str(key), fontsize=22)
for i in range(len(seasons)):
ax = fig.add_subplot(1,2, i+1) # creates a 12-plot fig (3r x 4c)
# select data to plot
df = dfKey[dfKey.season == seasons[i]]
end = df.index.year[-1]
start = end - 39
x = df.loc[str(start):str(end)].index.year
y = df.loc[str(start):str(end)].precipSum.tail(40)
ax.plot(x,y) # this plots the col values
# Var-alike subplot formatting
ax.set_title(seasons[i], fontsize=24, fontweight='bold')
# Make the linear regression
database = df.loc[str(start):str(end)][[]].tail(40)
database = database.dropna()
# Reshape data for use in LinReg builder
x_data = x.values.reshape(x.shape[0],1)
y_data = y.values.reshape(y.shape[0],1)
timespan = x_data[-1,0] - x_data[0,0] + 1
reg = linear_model.LinearRegression().fit(x_data, y_data)
coef = reg.coef_
inter= reg.intercept_
y_estimate = coef*x_data+inter # y=mx+b, possible option to upgrade
ax.plot(x_data,y_estimate) # this plots the linear regression
# # Save the observed trends to a csv to be plotted on monte carlo distribution
saveLine = '\n'+str(key)+','+seasons[i]+','+str(timespan*coef[0,0])
saveFile = open(csvFile, 'a') # reopen csv file
saveFile.write(saveLine) # append the saved row
saveFile.close()
# Var-dependent subplot formatting
ax.set_ylabel('mm', fontsize=18)
ax.tick_params(axis='both', which='major', labelsize=18)
ax.text(.1, .8,
str(round(timespan*coef[0,0],2))+'mm/'+str(timespan)+'yr',
transform=ax.transAxes,
fontsize=24,
color='red')
plt.savefig('graphs/sumPlots/'+str(key)+'_precip-seasonal_sum')
# %%