markschelbergen · lthUniBonn · Feb 24, 2021 · Feb 24, 2021 · Feb 26, 2021 · Feb 26, 2021
diff --git a/config.py b/config.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+"""Configuration file for wind resource analysis.
+
+Attributes:
+    start_year (int): Process the wind data starting from this year - in four-digit format.
+    final_year (int): Process the wind data up to this year - in four-digit format.
+    era5_data_dir (str): Directory path for reading era5 data files.
+    model_level_file_name_format (str): Target name of the wind data files. Python's format() is used to fill in year
+        and month at placeholders.
+    surface_file_name_format (str): Target name of geopotential and surface pressure data files. Python's format() is
+        used to fill in year and month at placeholders.
+
+.FILL
+.
+.
+
+
+.. _L137 model level definitions:
+    https://www.ecmwf.int/en/forecasts/documentation-and-support/137-model-levels
+
+"""
+
+# --------------------------- GENERAL
+use_data_opts = ['DOWA', 'LIDAR', 'ERA5']
+use_data = use_data_opts[0]
+
+
+# See plots interactively - don't save plots directly as pdf to result_dir
+plots_interactive = False
+result_dir = "../clustering_results/" + use_data + "/"
+
+
+start_year = 2010
+final_year = 2017
+
+# Single location processing
+latitude = 0
+longitude = 0
+# TODO: get dowa indices by lat/lon - for now: DOWA loc indices used for both
+
+# --------------------------- DOWA
+# data contains years 2008 to 2017
+DOWA_data_dir = "/gpfs/share/home/s6lathim/AWE/DOWA/"
+# "/home/mark/WindData/DOWA/"  # '/media/mark/LaCie/DOWA/'
+
+location = {'i_lat': 110, 'i_lon': 55}
+
+
+# --------------------------- ERA5
+# General settings.
+era5_data_dir = '/cephfs/user/s6lathim/ERA5Data-redownload/'
+model_level_file_name_format = "{:d}_europe_{:d}_130_131_132_133_135.nc"  # 'ml_{:d}_{:02d}.netcdf'
+surface_file_name_format = "{:d}_europe_{:d}_152.nc"  # 'sfc_{:d}_{:02d}.netcdf'
+era5_grid_size = 1.  # 0.25
+# Processing settings
+read_model_level_up_to = 112
+height_range = [10.,  20.,  40.,  60.,  80., 100., 120., 140., 150., 160., 180.,
+                200., 220., 250., 300., 500., 600.]
diff --git a/era5_ml_height_calc.py b/era5_ml_height_calc.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+"""ERA5 model level height calculation."""
+import numpy as np
+
+r_d = 287.06  # Gas constant for dry air [J/K/kg]
+g = 9.80665  # Gravitational acceleration [m/s^2]
+
+
+def get_ph_levs(level, sp):
+    """Get the half-level pressures for the requested ERA5 model level and the one after that. The a and b coefficients
+    define the model levels and are provided in `L137 model level definitions`_.
+
+    Args:
+        level (int): Model level identifier.
+        sp (float): Surface pressure [Pa].
+
+    Returns:
+        tuple of float: Half-level pressures for the requested ERA5 model level and the one after that [Pa].
+
+    .. _L137 model level definitions:
+        https://www.ecmwf.int/en/forecasts/documentation-and-support/137-model-levels
+
+    """
+
+    a_coef = [0, 2.000365, 3.102241, 4.666084, 6.827977,	9.746966, 13.605424, 18.608931, 24.985718, 32.98571,
+        42.879242, 54.955463, 69.520576, 86.895882, 107.415741, 131.425507, 159.279404, 191.338562, 227.968948,
+        269.539581, 316.420746, 368.982361, 427.592499, 492.616028, 564.413452, 643.339905, 729.744141, 823.967834,
+        926.34491, 1037.201172, 1156.853638, 1285.610352, 1423.770142, 1571.622925, 1729.448975, 1897.519287,
+        2076.095947, 2265.431641, 2465.770508, 2677.348145, 2900.391357, 3135.119385, 3381.743652, 3640.468262,
+        3911.490479, 4194.930664, 4490.817383, 4799.149414, 5119.89502, 5452.990723, 5798.344727, 6156.074219,
+        6526.946777, 6911.870605, 7311.869141, 7727.412109, 8159.354004, 8608.525391, 9076.400391, 9562.682617,
+        10065.978516, 10584.631836, 11116.662109, 11660.067383, 12211.547852, 12766.873047, 13324.668945, 13881.331055,
+        14432.139648, 14975.615234, 15508.256836, 16026.115234, 16527.322266, 17008.789063, 17467.613281, 17901.621094,
+        18308.433594, 18685.71875, 19031.289063, 19343.511719, 19620.042969, 19859.390625, 20059.931641, 20219.664063,
+        20337.863281, 20412.308594, 20442.078125, 20425.71875, 20361.816406, 20249.511719, 20087.085938, 19874.025391,
+        19608.572266, 19290.226563, 18917.460938, 18489.707031, 18006.925781, 17471.839844, 16888.6875, 16262.046875,
+        15596.695313, 14898.453125, 14173.324219, 13427.769531, 12668.257813, 11901.339844, 11133.304688, 10370.175781,
+        9617.515625, 8880.453125, 8163.375, 7470.34375, 6804.421875, 6168.53125, 5564.382813, 4993.796875, 4457.375,
+        3955.960938, 3489.234375, 3057.265625, 2659.140625, 2294.242188, 1961.5, 1659.476563, 1387.546875, 1143.25,
+        926.507813, 734.992188, 568.0625, 424.414063, 302.476563, 202.484375, 122.101563, 62.78125, 22.835938, 3.757813,
+        0, 0]
+
+    b_coef = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        7e-06, 2.4e-05, 5.9e-05, 0.000112, 0.000199, 0.00034, 0.000562, 0.00089, 0.001353, 0.001992,
+        0.002857, 0.003971, 0.005378, 0.007133, 0.009261, 0.011806, 0.014816, 0.018318, 0.022355, 0.026964,
+        0.032176, 0.038026, 0.044548, 0.051773, 0.059728, 0.068448, 0.077958, 0.088286, 0.099462, 0.111505,
+        0.124448, 0.138313, 0.153125, 0.16891, 0.185689, 0.203491, 0.222333, 0.242244, 0.263242, 0.285354,
+        0.308598, 0.332939, 0.358254, 0.384363, 0.411125, 0.438391, 0.466003, 0.4938, 0.521619, 0.549301,
+        0.576692, 0.603648, 0.630036, 0.655736, 0.680643, 0.704669, 0.727739, 0.749797, 0.770798, 0.790717,
+        0.809536, 0.827256, 0.843881, 0.859432, 0.873929, 0.887408, 0.8999, 0.911448, 0.922096, 0.931881,
+        0.94086, 0.949064, 0.95655, 0.963352, 0.969513, 0.975078, 0.980072, 0.984542, 0.9885, 0.991984,
+        0.995003, 0.99763, 1]
+
+    ph_lev = a_coef[level - 1] + (b_coef[level - 1] * sp)
+    ph_levplusone = a_coef[level] + (b_coef[level] * sp)
+    return ph_lev, ph_levplusone
+
+
+def compute_level_height(t, q, level, ph_lev, ph_levplusone, h_h):
+    """Compute height at half- & full-level for the requested ERA5 model level, based on temperature, humidity, and
+    half-level pressures.
+
+    Args:
+        t (float): Temperature at the requested model level [K].
+        q (float): Humidity at the requested model level [kg/kg].
+        level (int): Model level identifier.
+        ph_lev (float): Half-level pressure for the requested model level [Pa].
+        ph_levplusone (float): Half-level pressures for the subsequent model level [Pa].
+        h_h (float): Half-level height of previous model level [m].
+
+    Returns:
+        tuple of float: Half- and full-level heights of requested model level.
+
+    """
+    # Compute the moist temperature.
+    t = t * (1. + 0.609133 * q)
+
+    if level == 1:
+        dlog_p = np.log(ph_levplusone / 0.1)
+        alpha = np.log(2)
+    else:
+        dlog_p = np.log(ph_levplusone / ph_lev)
+        alpha = 1. - ((ph_lev / (ph_levplusone - ph_lev)) * dlog_p)
+
+    # Integrate from previous (lower) half-level h_h to the full-level.
+    h_f = h_h + (t * r_d * alpha)/g
+
+    # Integrate from previous (lower) half-level h_h to the current half-level.
+    h_h = h_h + (t * r_d * dlog_p)/g
+
+    return h_h, h_f
+
+
+def compute_level_heights(levels, surface_pressure, levels_temperature, levels_humidity):
+    """Compute the full-level heights and air densities for the given model levels.
+
+    Args:
+        levels (list): Identifiers of model levels to evaluate - should be consecutive and include the lower level.
+        surface_pressure (ndarray): Time trace of surface pressure [Pa].
+        levels_temperature (ndarray): Time traces of temperature at model levels [K].
+        levels_humidity (ndarray): Time traces of humidity at model levels [kg/kg].
+
+    Returns:
+        tuple of ndarray: Time traces of Full-level heights [m] and air densities [kg/m^3] at requested model levels.
+
+    Raises:
+        AssertError: If requested model levels are not consecutive or don't include the lower level.
+
+    """
+    assert np.all(np.diff(levels) == 1) and levels[-1] == 137, "Provided levels should be consecutive."
+    n_levels = len(levels)
+    n_hours = levels_temperature.shape[0]
+    densities = np.zeros((n_hours, n_levels))
+    heights = np.zeros((n_hours, n_levels))
+    for i_hr in range(n_hours):
+        h_h = 0  # Half-level height at lower model level.
+        for i, level in enumerate(reversed(levels)):  # Start from lower model level.
+            i_level = n_levels - 1 - i  # Identifier of model level.
+
+            # Get the half-level pressures.
+            ph_lev, ph_levplusone = get_ph_levs(level, surface_pressure[i_hr])
+
+            # Determine half- & full-level height using previous half-level height.
+            h_h, h_f = compute_level_height(levels_temperature[i_hr, i_level], levels_humidity[i_hr, i_level],
+                                            level, ph_lev, ph_levplusone, h_h)
+            heights[i_hr, i_level] = h_f
+
+            # Determine full-level air density.
+            pf = (ph_lev+ph_levplusone)/2  # Full-level pressure.
+            densities[i_hr, i_level] = pf/(r_d*levels_temperature[i_hr, i_level])
+
+    return heights, densities
diff --git a/preprocess_data.py b/preprocess_data.py
@@ -1,6 +1,8 @@
 import numpy as np
 from copy import copy
 
+from read_requested_data import get_wind_data
+
 ref_vector_height = 100.
 
 
@@ -28,16 +30,16 @@ def express_profiles_wrt_ref_vector(data):
                                       data['wind_direction'])
 
     data['wind_speed_parallel'] = data['wind_speed_east']*np.cos(ref_dir).reshape((-1, 1)) + \
-                                  data['wind_speed_north']*np.sin(ref_dir).reshape((-1, 1))
+        data['wind_speed_north']*np.sin(ref_dir).reshape((-1, 1))
     data['wind_speed_perpendicular'] = -data['wind_speed_east']*np.sin(ref_dir).reshape((-1, 1)) + \
-                                       data['wind_speed_north']*np.cos(ref_dir).reshape((-1, 1))
+        data['wind_speed_north']*np.cos(ref_dir).reshape((-1, 1))
     return data
 
 
 def reduce_wind_data(data, mask_keep):
     n_samples_after_filter = np.sum(mask_keep)
     print("{:.1f}% of data/{} samples remain after filtering.".format(n_samples_after_filter/data['n_samples'] * 100.,
-                                                                       n_samples_after_filter))
+                                                                      n_samples_after_filter))
     for k, val in data.items():
         if k in ['altitude', 'n_samples', 'n_locs', 'years']:
             continue
@@ -57,13 +59,14 @@ def remove_lt_mean_wind_speed_value(data, min_mean_wind_speed):
 
 def normalize_data(data):
     norm_ref = np.percentile(data['wind_speed'], 90., axis=1).reshape((-1, 1))
-
+    print('shape_single', data['wind_speed_parallel'].shape)
+    print('shape_norm', norm_ref.shape)
     training_data_prl = data['wind_speed_parallel']/norm_ref
     training_data_prp = data['wind_speed_perpendicular']/norm_ref
 
     data['training_data'] = np.concatenate((training_data_prl, training_data_prp), 1)
     data['normalisation_value'] = norm_ref.reshape(-1)
-
+    print('shape_single', data['training_data'].shape)
     return data
 
 
@@ -80,6 +83,6 @@ def preprocess_data(data, remove_low_wind_samples=True, return_copy=True):
 
 
 if __name__ == '__main__':
-    from read_data.dowa import read_data
-    wind_data = read_data({'i_lat': 110, 'i_lon': 55})
+    # Read data
+    wind_data, loc_info = get_wind_data()
     preprocess_data(wind_data)
diff --git a/principal_component_analysis.py b/principal_component_analysis.py
@@ -1,6 +1,14 @@
 from sklearn.decomposition import PCA
 from itertools import accumulate
 import numpy as np
+
+import matplotlib as mpl
+
+from config import plots_interactive, result_dir
+from read_requested_data import get_wind_data
+
+if not plots_interactive:
+    mpl.use('Pdf')
 import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap, LogNorm
 
@@ -9,7 +17,7 @@
 x_lim_profiles = [-0.8, 1.25]
 
 
-def plot_mean_and_pc_profiles(altitudes, var, get_profile):
+def plot_mean_and_pc_profiles(altitudes, var, get_profile, plot_info=""):
     plot_n_pcs = 2
     n_cols = 3
     n_rows = plot_n_pcs
@@ -37,7 +45,7 @@ def plot_mean_and_pc_profiles(altitudes, var, get_profile):
     ax_mean.set_xlim(x_lim_profiles)
     ax_mean.set_xlabel(x_label)
     ax_mean.legend(bbox_to_anchor=(1., 1.16, 3, 0.2), loc="lower left", mode="expand",
-                borderaxespad=0, ncol=4)
+                   borderaxespad=0, ncol=4)
 
     # Add plot window for hodograph to existing plot windows and plot it.
     y0 = ax[1, 0]._position.y0 + .05
@@ -80,7 +88,8 @@ def plot_mean_and_pc_profiles(altitudes, var, get_profile):
 
                 marker_counter += 1
                 ax[i_pc, i_col].plot(0.1, 0.1, 's', mfc="white", alpha=1, ms=12, mec='k', transform=ax[i_pc, i_col].transAxes)
-                ax[i_pc, i_col].plot(0.1, 0.1, marker='${}$'.format(marker_counter), alpha=1, ms=7, mec='k', transform=ax[i_pc, i_col].transAxes)
+                ax[i_pc, i_col].plot(0.1, 0.1, marker='${}$'.format(marker_counter), alpha=1, ms=7,
+                                     mec='k', transform=ax[i_pc, i_col].transAxes)
             ax[i_pc, i_col].grid(True)
 
     # Add labels on x-axes.
@@ -89,9 +98,10 @@ def plot_mean_and_pc_profiles(altitudes, var, get_profile):
             ax[-1, i_col].set_xlabel("Coefficient of PC [-]")
         else:
             ax[-1, i_col].set_xlabel(x_label)
+    if not plots_interactive: plt.savefig(result_dir + 'pc_mean_and_pc_profiles' + plot_info + '.pdf')
 
 
-def plot_frequency_projection(data_pc):
+def plot_frequency_projection(data_pc, plot_info=""):
     plt.figure(figsize=(5, 2.5))
     plt.subplots_adjust(top=0.975, bottom=0.178, left=0.15, right=0.94)
 
@@ -102,7 +112,6 @@ def plot_frequency_projection(data_pc):
     clrs_low_values[0, :] = 0.
     clrs_high_values = cmap_baseline(np.linspace(.5, 1., int(256*(1-frac))))
     cmap = ListedColormap(np.vstack((clrs_low_values, clrs_high_values)))
-
     n_bins = 120
     vmax = 1200
     h, _, _, im = plt.hist2d(data_pc[:, 0], data_pc[:, 1], bins=n_bins, cmap=cmap, norm=LogNorm(vmin=1, vmax=vmax))
@@ -119,30 +128,34 @@ def plot_frequency_projection(data_pc):
     plt.grid()
     plt.xlabel('PC1')
     plt.ylabel('PC2')
+    if not plots_interactive:
+        plt.savefig(result_dir + 'pc_frequency_projection' + plot_info + '.pdf')
 
 
-def analyse_pc(wind_data):
+
+def analyse_pc(wind_data, loc_info=""):
     altitudes = wind_data['altitude']
     normalized_data = wind_data['training_data']
-
     # Perform principal component analyis.
     n_features = normalized_data.shape[1]
     pca = PCA(n_components=n_features)
     pipeline = pca
 
     # print("{} features reduced to {} components.".format(n_features, n_components))
     data_pc = pipeline.fit_transform(normalized_data)
-    print("{:.1f}% of variance retained using first two principal components.".format(np.sum(pca.explained_variance_ratio_[:2])*100))
+    print("{:.1f}% of variance retained using first two principal components.".format(
+        np.sum(pca.explained_variance_ratio_[:2])*100))
     cum_var_exp = list(accumulate(pca.explained_variance_ratio_*100))
     print("Cumulative variance retained: " + ", ".join(["{:.2f}".format(var) for var in cum_var_exp]))
     var = pca.explained_variance_
 
     # Plot results.
-    plot_frequency_projection(data_pc)
+    plot_frequency_projection(data_pc, plot_info=loc_info)
     markers_pc1, markers_pc2 = [-var[0]**.5, var[0]**.5, 0, 0], [0, 0, -var[1]**.5, var[1]**.5]
     plt.plot(markers_pc1, markers_pc2, 's', mfc="white", alpha=1, ms=12, mec='k')
     for i, (pc1, pc2) in enumerate(zip(markers_pc1, markers_pc2)):
         plt.plot(pc1, pc2, marker='${}$'.format(i+1), alpha=1, ms=7, mec='k')
+    if not plots_interactive: plt.savefig(result_dir + 'pc_frequency_projection' + loc_info + '_markers.pdf')
 
     def get_pc_profile(i_pc=-1, multiplier=1., plot_pc=False):
         # Determine profile data by transforming data in PC to original coordinate system.
@@ -160,13 +173,14 @@ def get_pc_profile(i_pc=-1, multiplier=1., plot_pc=False):
         prp = profile[len(altitudes):]
         return prl, prp
 
-    plot_mean_and_pc_profiles(altitudes, var, get_pc_profile)
+    plot_mean_and_pc_profiles(altitudes, var, get_pc_profile, plot_info=loc_info)
 
 
 if __name__ == '__main__':
-    from read_data.dowa import read_data
-    wind_data = read_data({'name': 'mmij'})
+    wind_data, data_info = get_wind_data()
     from preprocess_data import preprocess_data
     wind_data = preprocess_data(wind_data)
-    analyse_pc(wind_data)
-    plt.show()
+    # Run principal component analysis
+    analyse_pc(wind_data, loc_info=data_info)
+
+    if plots_interactive: plt.show()