Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
"""Configuration file for wind resource analysis.

Attributes:
start_year (int): Process the wind data starting from this year - in four-digit format.
final_year (int): Process the wind data up to this year - in four-digit format.
era5_data_dir (str): Directory path for reading era5 data files.
model_level_file_name_format (str): Target name of the wind data files. Python's format() is used to fill in year
and month at placeholders.
surface_file_name_format (str): Target name of geopotential and surface pressure data files. Python's format() is
used to fill in year and month at placeholders.

.FILL
.
.


.. _L137 model level definitions:
https://www.ecmwf.int/en/forecasts/documentation-and-support/137-model-levels

"""

# --------------------------- GENERAL
use_data_opts = ['DOWA', 'LIDAR', 'ERA5']
use_data = use_data_opts[0]


# See plots interactively - don't save plots directly as pdf to result_dir
plots_interactive = False
result_dir = "../clustering_results/" + use_data + "/"


start_year = 2010
final_year = 2017

# Single location processing
latitude = 0
longitude = 0
# TODO: get dowa indices by lat/lon - for now: DOWA loc indices used for both

# --------------------------- DOWA
# data contains years 2008 to 2017
DOWA_data_dir = "/gpfs/share/home/s6lathim/AWE/DOWA/"
# "/home/mark/WindData/DOWA/" # '/media/mark/LaCie/DOWA/'

location = {'i_lat': 110, 'i_lon': 55}


# --------------------------- ERA5
# General settings.
era5_data_dir = '/cephfs/user/s6lathim/ERA5Data-redownload/'
model_level_file_name_format = "{:d}_europe_{:d}_130_131_132_133_135.nc" # 'ml_{:d}_{:02d}.netcdf'
surface_file_name_format = "{:d}_europe_{:d}_152.nc" # 'sfc_{:d}_{:02d}.netcdf'
era5_grid_size = 1. # 0.25
# Processing settings
read_model_level_up_to = 112
height_range = [10., 20., 40., 60., 80., 100., 120., 140., 150., 160., 180.,
200., 220., 250., 300., 500., 600.]
133 changes: 133 additions & 0 deletions era5_ml_height_calc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# -*- coding: utf-8 -*-
"""ERA5 model level height calculation."""
import numpy as np

r_d = 287.06 # Gas constant for dry air [J/K/kg]
g = 9.80665 # Gravitational acceleration [m/s^2]


def get_ph_levs(level, sp):
"""Get the half-level pressures for the requested ERA5 model level and the one after that. The a and b coefficients
define the model levels and are provided in `L137 model level definitions`_.

Args:
level (int): Model level identifier.
sp (float): Surface pressure [Pa].

Returns:
tuple of float: Half-level pressures for the requested ERA5 model level and the one after that [Pa].

.. _L137 model level definitions:
https://www.ecmwf.int/en/forecasts/documentation-and-support/137-model-levels

"""

a_coef = [0, 2.000365, 3.102241, 4.666084, 6.827977, 9.746966, 13.605424, 18.608931, 24.985718, 32.98571,
42.879242, 54.955463, 69.520576, 86.895882, 107.415741, 131.425507, 159.279404, 191.338562, 227.968948,
269.539581, 316.420746, 368.982361, 427.592499, 492.616028, 564.413452, 643.339905, 729.744141, 823.967834,
926.34491, 1037.201172, 1156.853638, 1285.610352, 1423.770142, 1571.622925, 1729.448975, 1897.519287,
2076.095947, 2265.431641, 2465.770508, 2677.348145, 2900.391357, 3135.119385, 3381.743652, 3640.468262,
3911.490479, 4194.930664, 4490.817383, 4799.149414, 5119.89502, 5452.990723, 5798.344727, 6156.074219,
6526.946777, 6911.870605, 7311.869141, 7727.412109, 8159.354004, 8608.525391, 9076.400391, 9562.682617,
10065.978516, 10584.631836, 11116.662109, 11660.067383, 12211.547852, 12766.873047, 13324.668945, 13881.331055,
14432.139648, 14975.615234, 15508.256836, 16026.115234, 16527.322266, 17008.789063, 17467.613281, 17901.621094,
18308.433594, 18685.71875, 19031.289063, 19343.511719, 19620.042969, 19859.390625, 20059.931641, 20219.664063,
20337.863281, 20412.308594, 20442.078125, 20425.71875, 20361.816406, 20249.511719, 20087.085938, 19874.025391,
19608.572266, 19290.226563, 18917.460938, 18489.707031, 18006.925781, 17471.839844, 16888.6875, 16262.046875,
15596.695313, 14898.453125, 14173.324219, 13427.769531, 12668.257813, 11901.339844, 11133.304688, 10370.175781,
9617.515625, 8880.453125, 8163.375, 7470.34375, 6804.421875, 6168.53125, 5564.382813, 4993.796875, 4457.375,
3955.960938, 3489.234375, 3057.265625, 2659.140625, 2294.242188, 1961.5, 1659.476563, 1387.546875, 1143.25,
926.507813, 734.992188, 568.0625, 424.414063, 302.476563, 202.484375, 122.101563, 62.78125, 22.835938, 3.757813,
0, 0]

b_coef = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7e-06, 2.4e-05, 5.9e-05, 0.000112, 0.000199, 0.00034, 0.000562, 0.00089, 0.001353, 0.001992,
0.002857, 0.003971, 0.005378, 0.007133, 0.009261, 0.011806, 0.014816, 0.018318, 0.022355, 0.026964,
0.032176, 0.038026, 0.044548, 0.051773, 0.059728, 0.068448, 0.077958, 0.088286, 0.099462, 0.111505,
0.124448, 0.138313, 0.153125, 0.16891, 0.185689, 0.203491, 0.222333, 0.242244, 0.263242, 0.285354,
0.308598, 0.332939, 0.358254, 0.384363, 0.411125, 0.438391, 0.466003, 0.4938, 0.521619, 0.549301,
0.576692, 0.603648, 0.630036, 0.655736, 0.680643, 0.704669, 0.727739, 0.749797, 0.770798, 0.790717,
0.809536, 0.827256, 0.843881, 0.859432, 0.873929, 0.887408, 0.8999, 0.911448, 0.922096, 0.931881,
0.94086, 0.949064, 0.95655, 0.963352, 0.969513, 0.975078, 0.980072, 0.984542, 0.9885, 0.991984,
0.995003, 0.99763, 1]

ph_lev = a_coef[level - 1] + (b_coef[level - 1] * sp)
ph_levplusone = a_coef[level] + (b_coef[level] * sp)
return ph_lev, ph_levplusone


def compute_level_height(t, q, level, ph_lev, ph_levplusone, h_h):
"""Compute height at half- & full-level for the requested ERA5 model level, based on temperature, humidity, and
half-level pressures.

Args:
t (float): Temperature at the requested model level [K].
q (float): Humidity at the requested model level [kg/kg].
level (int): Model level identifier.
ph_lev (float): Half-level pressure for the requested model level [Pa].
ph_levplusone (float): Half-level pressures for the subsequent model level [Pa].
h_h (float): Half-level height of previous model level [m].

Returns:
tuple of float: Half- and full-level heights of requested model level.

"""
# Compute the moist temperature.
t = t * (1. + 0.609133 * q)

if level == 1:
dlog_p = np.log(ph_levplusone / 0.1)
alpha = np.log(2)
else:
dlog_p = np.log(ph_levplusone / ph_lev)
alpha = 1. - ((ph_lev / (ph_levplusone - ph_lev)) * dlog_p)

# Integrate from previous (lower) half-level h_h to the full-level.
h_f = h_h + (t * r_d * alpha)/g

# Integrate from previous (lower) half-level h_h to the current half-level.
h_h = h_h + (t * r_d * dlog_p)/g

return h_h, h_f


def compute_level_heights(levels, surface_pressure, levels_temperature, levels_humidity):
"""Compute the full-level heights and air densities for the given model levels.

Args:
levels (list): Identifiers of model levels to evaluate - should be consecutive and include the lower level.
surface_pressure (ndarray): Time trace of surface pressure [Pa].
levels_temperature (ndarray): Time traces of temperature at model levels [K].
levels_humidity (ndarray): Time traces of humidity at model levels [kg/kg].

Returns:
tuple of ndarray: Time traces of Full-level heights [m] and air densities [kg/m^3] at requested model levels.

Raises:
AssertError: If requested model levels are not consecutive or don't include the lower level.

"""
assert np.all(np.diff(levels) == 1) and levels[-1] == 137, "Provided levels should be consecutive."
n_levels = len(levels)
n_hours = levels_temperature.shape[0]
densities = np.zeros((n_hours, n_levels))
heights = np.zeros((n_hours, n_levels))
for i_hr in range(n_hours):
h_h = 0 # Half-level height at lower model level.
for i, level in enumerate(reversed(levels)): # Start from lower model level.
i_level = n_levels - 1 - i # Identifier of model level.

# Get the half-level pressures.
ph_lev, ph_levplusone = get_ph_levs(level, surface_pressure[i_hr])

# Determine half- & full-level height using previous half-level height.
h_h, h_f = compute_level_height(levels_temperature[i_hr, i_level], levels_humidity[i_hr, i_level],
level, ph_lev, ph_levplusone, h_h)
heights[i_hr, i_level] = h_f

# Determine full-level air density.
pf = (ph_lev+ph_levplusone)/2 # Full-level pressure.
densities[i_hr, i_level] = pf/(r_d*levels_temperature[i_hr, i_level])

return heights, densities
17 changes: 10 additions & 7 deletions preprocess_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
from copy import copy

from read_requested_data import get_wind_data

ref_vector_height = 100.


Expand Down Expand Up @@ -28,16 +30,16 @@ def express_profiles_wrt_ref_vector(data):
data['wind_direction'])

data['wind_speed_parallel'] = data['wind_speed_east']*np.cos(ref_dir).reshape((-1, 1)) + \
data['wind_speed_north']*np.sin(ref_dir).reshape((-1, 1))
data['wind_speed_north']*np.sin(ref_dir).reshape((-1, 1))
data['wind_speed_perpendicular'] = -data['wind_speed_east']*np.sin(ref_dir).reshape((-1, 1)) + \
data['wind_speed_north']*np.cos(ref_dir).reshape((-1, 1))
data['wind_speed_north']*np.cos(ref_dir).reshape((-1, 1))
return data


def reduce_wind_data(data, mask_keep):
n_samples_after_filter = np.sum(mask_keep)
print("{:.1f}% of data/{} samples remain after filtering.".format(n_samples_after_filter/data['n_samples'] * 100.,
n_samples_after_filter))
n_samples_after_filter))
for k, val in data.items():
if k in ['altitude', 'n_samples', 'n_locs', 'years']:
continue
Expand All @@ -57,13 +59,14 @@ def remove_lt_mean_wind_speed_value(data, min_mean_wind_speed):

def normalize_data(data):
norm_ref = np.percentile(data['wind_speed'], 90., axis=1).reshape((-1, 1))

print('shape_single', data['wind_speed_parallel'].shape)
print('shape_norm', norm_ref.shape)
training_data_prl = data['wind_speed_parallel']/norm_ref
training_data_prp = data['wind_speed_perpendicular']/norm_ref

data['training_data'] = np.concatenate((training_data_prl, training_data_prp), 1)
data['normalisation_value'] = norm_ref.reshape(-1)

print('shape_single', data['training_data'].shape)
return data


Expand All @@ -80,6 +83,6 @@ def preprocess_data(data, remove_low_wind_samples=True, return_copy=True):


if __name__ == '__main__':
from read_data.dowa import read_data
wind_data = read_data({'i_lat': 110, 'i_lon': 55})
# Read data
wind_data, loc_info = get_wind_data()
preprocess_data(wind_data)
42 changes: 28 additions & 14 deletions principal_component_analysis.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from sklearn.decomposition import PCA
from itertools import accumulate
import numpy as np

import matplotlib as mpl

from config import plots_interactive, result_dir
from read_requested_data import get_wind_data

if not plots_interactive:
mpl.use('Pdf')
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LogNorm

Expand All @@ -9,7 +17,7 @@
x_lim_profiles = [-0.8, 1.25]


def plot_mean_and_pc_profiles(altitudes, var, get_profile):
def plot_mean_and_pc_profiles(altitudes, var, get_profile, plot_info=""):
plot_n_pcs = 2
n_cols = 3
n_rows = plot_n_pcs
Expand Down Expand Up @@ -37,7 +45,7 @@ def plot_mean_and_pc_profiles(altitudes, var, get_profile):
ax_mean.set_xlim(x_lim_profiles)
ax_mean.set_xlabel(x_label)
ax_mean.legend(bbox_to_anchor=(1., 1.16, 3, 0.2), loc="lower left", mode="expand",
borderaxespad=0, ncol=4)
borderaxespad=0, ncol=4)

# Add plot window for hodograph to existing plot windows and plot it.
y0 = ax[1, 0]._position.y0 + .05
Expand Down Expand Up @@ -80,7 +88,8 @@ def plot_mean_and_pc_profiles(altitudes, var, get_profile):

marker_counter += 1
ax[i_pc, i_col].plot(0.1, 0.1, 's', mfc="white", alpha=1, ms=12, mec='k', transform=ax[i_pc, i_col].transAxes)
ax[i_pc, i_col].plot(0.1, 0.1, marker='${}$'.format(marker_counter), alpha=1, ms=7, mec='k', transform=ax[i_pc, i_col].transAxes)
ax[i_pc, i_col].plot(0.1, 0.1, marker='${}$'.format(marker_counter), alpha=1, ms=7,
mec='k', transform=ax[i_pc, i_col].transAxes)
ax[i_pc, i_col].grid(True)

# Add labels on x-axes.
Expand All @@ -89,9 +98,10 @@ def plot_mean_and_pc_profiles(altitudes, var, get_profile):
ax[-1, i_col].set_xlabel("Coefficient of PC [-]")
else:
ax[-1, i_col].set_xlabel(x_label)
if not plots_interactive: plt.savefig(result_dir + 'pc_mean_and_pc_profiles' + plot_info + '.pdf')


def plot_frequency_projection(data_pc):
def plot_frequency_projection(data_pc, plot_info=""):
plt.figure(figsize=(5, 2.5))
plt.subplots_adjust(top=0.975, bottom=0.178, left=0.15, right=0.94)

Expand All @@ -102,7 +112,6 @@ def plot_frequency_projection(data_pc):
clrs_low_values[0, :] = 0.
clrs_high_values = cmap_baseline(np.linspace(.5, 1., int(256*(1-frac))))
cmap = ListedColormap(np.vstack((clrs_low_values, clrs_high_values)))

n_bins = 120
vmax = 1200
h, _, _, im = plt.hist2d(data_pc[:, 0], data_pc[:, 1], bins=n_bins, cmap=cmap, norm=LogNorm(vmin=1, vmax=vmax))
Expand All @@ -119,30 +128,34 @@ def plot_frequency_projection(data_pc):
plt.grid()
plt.xlabel('PC1')
plt.ylabel('PC2')
if not plots_interactive:
plt.savefig(result_dir + 'pc_frequency_projection' + plot_info + '.pdf')


def analyse_pc(wind_data):

def analyse_pc(wind_data, loc_info=""):
altitudes = wind_data['altitude']
normalized_data = wind_data['training_data']

# Perform principal component analyis.
n_features = normalized_data.shape[1]
pca = PCA(n_components=n_features)
pipeline = pca

# print("{} features reduced to {} components.".format(n_features, n_components))
data_pc = pipeline.fit_transform(normalized_data)
print("{:.1f}% of variance retained using first two principal components.".format(np.sum(pca.explained_variance_ratio_[:2])*100))
print("{:.1f}% of variance retained using first two principal components.".format(
np.sum(pca.explained_variance_ratio_[:2])*100))
cum_var_exp = list(accumulate(pca.explained_variance_ratio_*100))
print("Cumulative variance retained: " + ", ".join(["{:.2f}".format(var) for var in cum_var_exp]))
var = pca.explained_variance_

# Plot results.
plot_frequency_projection(data_pc)
plot_frequency_projection(data_pc, plot_info=loc_info)
markers_pc1, markers_pc2 = [-var[0]**.5, var[0]**.5, 0, 0], [0, 0, -var[1]**.5, var[1]**.5]
plt.plot(markers_pc1, markers_pc2, 's', mfc="white", alpha=1, ms=12, mec='k')
for i, (pc1, pc2) in enumerate(zip(markers_pc1, markers_pc2)):
plt.plot(pc1, pc2, marker='${}$'.format(i+1), alpha=1, ms=7, mec='k')
if not plots_interactive: plt.savefig(result_dir + 'pc_frequency_projection' + loc_info + '_markers.pdf')

def get_pc_profile(i_pc=-1, multiplier=1., plot_pc=False):
# Determine profile data by transforming data in PC to original coordinate system.
Expand All @@ -160,13 +173,14 @@ def get_pc_profile(i_pc=-1, multiplier=1., plot_pc=False):
prp = profile[len(altitudes):]
return prl, prp

plot_mean_and_pc_profiles(altitudes, var, get_pc_profile)
plot_mean_and_pc_profiles(altitudes, var, get_pc_profile, plot_info=loc_info)


if __name__ == '__main__':
from read_data.dowa import read_data
wind_data = read_data({'name': 'mmij'})
wind_data, data_info = get_wind_data()
from preprocess_data import preprocess_data
wind_data = preprocess_data(wind_data)
analyse_pc(wind_data)
plt.show()
# Run principal component analysis
analyse_pc(wind_data, loc_info=data_info)

if plots_interactive: plt.show()
Loading