From a2b94edda3487f6d6d136666bbc2da23c7db1615 Mon Sep 17 00:00:00 2001 From: cvvsu Date: Wed, 24 Nov 2021 17:45:55 +0200 Subject: [PATCH] update code --- README.md | 32 +--- main.py | 10 +- model.py | 103 ++++++------ options.py | 2 +- requirements.txt | 22 +-- utils/__init__.py | 4 + utils/fitting.py | 392 +++++++++++++++++++++++++++++++++++++++------ utils/utils.py | 270 +++++-------------------------- utils/visualize.py | 269 +++++++++++++++++++++++++++++++ 9 files changed, 727 insertions(+), 377 deletions(-) create mode 100644 utils/visualize.py diff --git a/README.md b/README.md index 78c83fe..84e3ff4 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ If you have any question, please open an issue or directly send an email to us. - Upload the pre-trained model used in the manuscript. - ~~Use a notebook to illustrate how to use the code step-by-step.~~ - Train the Mask R-CNN model on all NPF events from the four datasets and upload the trained model. -- Provide a fancy GUI. ## Requirements Please install the packages in the `requirements.txt`. @@ -21,23 +20,8 @@ We find the versions of `PyTorch` (>=1.7) and `Python` (>=3.6) do not really aff ## Usage -1. Please change the parameters in the `options.py` file. Specific parameters: - - - `im_size` : image size for processing. It is also possible to use the size 128*128. - - `scores` : if you want analyze a long-term dataset, you can set a small value such as 0.2 or 0.3 to save time and effort. If you are dealing with one-year dataset, it is better to check all the detected masks (**0.0**). - -2. Run the `main.py`. - - - Drawing the NPF images does not take too much time. - - However, the mask detection process is quite slow if you do not have a powerful GPU. [Google Colab](https://colab.research.google.com/notebooks/intro.ipynb?utm_source=scs-index#) may provide a free GPU, which is a possible choice. After detecting the masks, you can download the masks to your local device for visualization. - - You need to select the masks yourself (more than one detected masks for some days). - - Once the masks are obtained, the GRs, start times, and end times can be determined automatically. - -**NOTE**: make sure that your local device is powerful enough; otherwise, please modify the function [save_SE_GR](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L285) in the `model.py` to calculate GRs one-by-one (do not use the `multiprocessing` package). - -Or use the command line: ``` -$ python3 main.py --station hyytiala --im_size 256 --scores 0.0 --vmax 1e4 +$ python3 main.py --station hyytiala --im_size 256 --scores 0.0 --vmax 1e4 --model_name maskrcnn.pth ``` Check the `demo.ipynb` file for more information. @@ -57,17 +41,9 @@ We currently use a simple GUI (based on the `tk` package) to select the detected ## GRs, start times, and end times Our code can calculate GRs automatically for the size ranges: - -[3-10 nm](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L249) - -[10-25 nm](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L250) - -[3-25 nm](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L253) - -[size ranges determined by the detected masks](https://github.com/cvvsu/maskNPF/blob/a7c188f64e8329e0ae50ec23936158e4c89e07b0/model.py#L254) - -You can change the size range to calculate other GRs, taking 3-7 nm as an example. The parameters are changed in the fuction [get_GRs](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L239) in the `model.py`. Once other size ranges are added, please also change the [save_dict](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L274) at the same time in the [get_SE_GR](https://github.com/cvvsu/maskNPF/blob/a959edf04f794d70e7ef8979494e8f36e317326e/model.py#L257) function. - +- 3 - 10 nm +- 10 - 25 nm +- 3 - 25 nm ## Citation diff --git a/main.py b/main.py index fdbe0f0..d840dba 100644 --- a/main.py +++ b/main.py @@ -15,20 +15,20 @@ ############################# # draw NPF images ############################# - NPF.draw_one_day_images() - NPF.draw_two_day_images() + #NPF.draw_one_day_images() + #NPF.draw_two_day_images() ############################# # Get masks for NPF images ############################# - NPF.detect_one_day_masks() - NPF.detect_two_day_masks() + # NPF.detect_one_day_masks() + # NPF.detect_two_day_masks() ############################# # Visualize and select masks ############################# - NPF.visualize_masks() + # NPF.visualize_masks() ##################################### # Save the start-end times and GRs diff --git a/model.py b/model.py index 5d8f914..dc9dfd7 100644 --- a/model.py +++ b/model.py @@ -3,6 +3,7 @@ import pandas as pd from multiprocessing import Pool from PIL import Image +from tqdm import tqdm from matplotlib.figure import Figure @@ -14,10 +15,10 @@ import torch from torchvision import transforms -from utils.utils import get_next_day, mkdirs, psd2im -from utils.networks import get_instance_segmentation_model -from utils.utils import reshape_mask -from utils.fitting import get_GR, get_SE +from utils import get_next_day, mkdirs, psd2im +from utils import get_instance_segmentation_model +from utils import reshape_mask +from utils import get_GR, get_SE class NPFDetection(object): @@ -29,7 +30,7 @@ def __init__(self, opt): self.cpu_count = os.cpu_count() // 2 + 1 self.dataroot = os.path.join(opt.dataroot, opt.station) self.station = opt.station - self.vmax = opt.vmax + self.vmax = None if opt.dynamic_vmax else opt.vmax self.tm_res = opt.time_res self.df = pd.read_csv(os.path.join(self.dataroot, self.station+'.csv'), parse_dates=[0], index_col=0) self.days = sorted(np.unique(self.df.index.date.astype(str)).tolist()) @@ -42,17 +43,26 @@ def draw_one_day_images(self): self.savefp = os.path.join(self.dataroot, 'images', 'one_day') mkdirs(self.savefp) self.dimg = 1 - - with Pool(self.cpu_count) as p: - p.map(self.draw_image, self.days) + + if self.cpu_count >= 8: + with Pool(self.cpu_count) as p: + p.map(self.draw_image, self.days) + else: + for day in tqdm(self.days): + self.draw_image(day) def draw_two_day_images(self): """Draw NPF images with two-day unit""" self.savefp = os.path.join(self.dataroot, 'images', 'two_day') mkdirs(self.savefp) self.dimg = 2 - with Pool(self.cpu_count) as p: - p.map(self.draw_image, self.days) + + if self.cpu_count >= 8: + with Pool(self.cpu_count) as p: + p.map(self.draw_image, self.days) + else: + for day in tqdm(self.days): + self.draw_image(day) def draw_image(self, day): """Draw an NPF image""" @@ -236,65 +246,50 @@ def save_mask(self, btn): mkdirs(savefp) np.save(os.path.join(savefp, f'{self.key}.npy'), self.masks_twoday[self.key][idx]) - def get_GRs(self, df, mask): - mask = reshape_mask(mask, df.shape) - pos = np.where(mask) - ymin = np.min(pos[1]) - ymax = np.max(pos[1]) - dps = [float(dp) for dp in df.columns] - dp_start = dps[ymin] - dp_end = dps[ymax] - s_tm, e_tm = get_SE(df, mask) - - gr_3_10, _, _ = get_GR(df, mask, 2.75, 10, savefp=None, tm_res=self.tm_res, vmax=self.vmax) - gr_10_25, _, _ = get_GR(df, mask, 10, 25.2, savefp=None, tm_res=self.tm_res, vmax=self.vmax) - savefp = os.path.join(self.dataroot, 'GR/3_25') - mkdirs(savefp) - gr_3_25, _, _ = get_GR(df, mask, 2.75, 25.2, savefp=savefp, tm_res=self.tm_res, vmax=self.vmax) - gr_mask, _, _ = get_GR(df, mask, dp_start, dp_end, savefp=None, tm_res=self.tm_res, vmax=self.vmax) - return s_tm, e_tm, gr_3_10, gr_10_25, gr_3_25, gr_mask - def get_SE_GR(self, day): df = self.df.loc[day] mask = np.load(os.path.join(self.dataroot, 'masks/one_day', day+'.npy'), allow_pickle=True) + mask = reshape_mask(mask, df.shape) try: - s_tm, e_tm, gr_3_10, gr_10_25, gr_3_25, gr_mask = self.get_GRs(df, mask) + st, et = get_SE(df, mask) + gr_dict = get_GR(df, mask, self.tm_res, savefp=self.savefp, vmax=self.vmax) except: - print(day) + # print(day) return try: mask_ = np.load(os.path.join(self.dataroot, 'masks/two_day', day+'.npy'), allow_pickle=True) df_ = self.df.loc[day:get_next_day(day)] mask_ = reshape_mask(mask_, df_.shape) - st, et = get_SE(df_, mask_) + st_two, et_two = get_SE(df_, mask_) except: - st, et = s_tm, e_tm - #return s_tm, e_tm, st, et, gr_3_10, gr_10_25, gr_3_25 - save_dict = {'date':[day], - 'GR (3-10)': [gr_3_10], - 'GR (10-25)': [gr_10_25], - 'GR (3-25)': [gr_3_25], - 'GR_mask': [gr_mask], - 'ST (one)': [s_tm], - 'ET (one)': [e_tm], - 'ST (two)': [st], - 'ET (two)': [et]} + st_two, et_two = st, et + + save_dict = {**{ + 'date': [day], + 'start_time_one': [st], + 'end_time_one': [et], + 'start_time_two': [st_two], + 'end_time_two': [et_two] + }, **gr_dict} pd.DataFrame(save_dict).to_csv(os.path.join(self.savefp, f'{day}.csv'), index=False) def save_SE_GR(self): - files = glob.glob(os.path.join(self.dataroot, 'masks/one_day')+'/*.npy') - daysh = [file.split(os.sep)[-1].split('.')[0] for file in files] - - grs = glob.glob(os.path.join(self.dataroot, 'GR')+'/*.csv') - days_1 = [gr.split(os.sep)[-1].split('.')[0] for gr in grs] - - days = [item for item in daysh if item not in days_1] - + r""" + obtain and save the start time, end time and the growth rates. + """ + files = sorted(glob.glob(os.path.join(self.dataroot, 'masks/one_day')+'/*.npy')) + days = [file.split(os.sep)[-1].split('.')[0] for file in files] + print(f'Calculating growth rates for {len(days)} days.') + self.savefp = os.path.join(self.dataroot, 'GR') mkdirs(self.savefp) - x_len = len(days) // 10 - for i in range(11): - days_ = days[i*x_len:(i+1)*x_len] + + if self.cpu_count >= 8: with Pool(self.cpu_count) as p: - p.map(self.get_SE_GR, days_) + p.map(self.get_SE_GR, days) + else: + for day in tqdm(days): + self.get_SE_GR(day) + + diff --git a/options.py b/options.py index 2a8e002..c8dbd50 100644 --- a/options.py +++ b/options.py @@ -24,10 +24,10 @@ def get_args(): parser.add_argument('--im_size', default=256, type=int, help='image size for training. Square sizes are used but rectangle are also possible') parser.add_argument('--scores', default=0.00, type=float, help='threshold for objectiveness scores') parser.add_argument('--vmax', default=1e4, type=float, help='value scales for drawing') + parser.add_argument('--dynamic_vmax', action='store_true', help='utilize the dynamic surface plot if specified') parser.add_argument('--time_res', default=10, type=float, help='the time resolution of measurements. Default is 10 minutes') parser.add_argument('--mask_thres', default=0.5, type=float, help='threshold to binarize an mask') parser.add_argument('--ftsize', default=16, type=int, help='font size for elements in plots') - #parser.add_argument('--twoday', action='store_true', help='draw two-day NPF images') args = parser.parse_args() return args, get_message(parser, args) diff --git a/requirements.txt b/requirements.txt index 5078cfb..e01dccf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -astropy==4.2.1 -matplotlib==3.4.2 -pandas==1.2.4 -numpy==1.21.1 -torch==1.7.1 -tqdm==4.61.0 -seaborn==0.11.1 -scikit_image==0.18.1 -torchvision==0.8.2 -Pillow==8.3.1 -scikit_learn==0.24.2 \ No newline at end of file +astropy>=4.2.1 +matplotlib>=3.4.2 +pandas>==1.2.4 +numpy>=1.21.1 +torch>=1.7.1 +tqdm>=4.61.0 +seaborn>=0.11.1 +scikit_image>=0.18.1 +torchvision>=0.8.2 +Pillow>=8.3.1 +scikit_learn>=0.24.2 \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py index e69de29..1210bc1 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -0,0 +1,4 @@ +from .networks import get_instance_segmentation_model +from .visualize import psd2im, draw_subplots +from .utils import mkdirs, get_next_day, reshape_mask, convert_matlab_time, time2num, num2time +from .fitting import get_SE, get_GR, get_GR_old diff --git a/utils/fitting.py b/utils/fitting.py index 4ad2263..511b2e0 100644 --- a/utils/fitting.py +++ b/utils/fitting.py @@ -1,35 +1,136 @@ -# -*- coding: utf-8 -*- -from astropy import modeling +r""" +This file contains the functions for automatic methods for determining the growth rates. + +1. mode fitting +2. maximum concentration +""" import os import numpy as np import pandas as pd -from sklearn.linear_model import LinearRegression, RANSACRegressor from matplotlib import pyplot as plt from matplotlib import dates - +from scipy.signal import find_peaks +from scipy.optimize import curve_fit from datetime import timedelta, datetime +from astropy import modeling +from sklearn.linear_model import LinearRegression, RANSACRegressor, TheilSenRegressor + +from .utils import num2time, time2num, mkdirs +from .visualize import psd2im + + +def get_SE(df, mask): + r"""Get the start and end times (one-day or two-day).""" + + pos = np.where(mask) + xmin, xmax = np.min(pos[0]), np.max(pos[0]) + + start_time = df.index[xmin] + end_time = df.index[xmax] + return start_time, end_time + +############################################ +# mode fitting +############################################ +def multi_gaussians(x, *params): + r""" + This function builds a combination of several Gaussian curves to fit data. + + Parameters: + params (list) -- a list of parameters for Gaussian curve fitting. [10, 0, 1, 3, 5, 2]. + y1 = 10 * exp(-((x-0)/1)^2) + y2 = 3 * exp(-((x-5)/2)^2) + y = y1 + y2 + References: + 1. https://stackoverflow.com/questions/26902283/fit-multiple-gaussians-to-the-data-in-python + """ -from .utils import psd2im + y = np.zeros_like(x) + # print(len(params)) + for i in range(0, len(params), 3): + # the amp is not the `1/sqrt(2*pi*sigma^2)`, but with a scale factor + amp = params[i] + mu = params[i+1] + std = params[i+2] # note that this is not the real std + y += amp * np.exp(-((x-mu)/std)**2) + return y -def time2num(tm): - """Convert time to number. - fit on numbers and then convert back the numbers to time +def fit_multi_gaussians(log_dps, arr): + r""" + fit multiple Gaussian distributions. + + The first choice is to fit multi log-norm curves, but since we use the log dps, + then it is equal to the log norm curves if the multi Gaussian curves are used. + + Parameters: + log_dps (array) -- log dps + arr (array) -- psd at a specific timepoint + + References: + 1. https://www.researchgate.net/publication/227944272_Evaluation_of_an_automatic_algorithm_for_fitting_the_particle_number_size_distribution + 2. https://doi.org/10.1016/j.atmosenv.2006.03.053 """ - time = tm.time() - minutes = time.hour * 60 + time.minute - return minutes + + # get rid of zeros and NaNs + idx_arr = arr > 0 + + log_dps = log_dps[idx_arr] + arr = arr[idx_arr] + try: + # guess the values for curve fitting + peak_idx, _ = find_peaks(arr) + idx = [ + peak_idx[arr[peak_idx].argmin()], + peak_idx[arr[peak_idx].argmax()], + np.where(arr == np.median(arr[peak_idx]))[0].item() + ] + except: + idx = [ + 0, + len(arr)-1, + int(len(arr)/2) + ] + + params = [] + for ix in idx: + params += [arr[ix], log_dps[ix], 1] + + try: + popt, pcov = curve_fit( + multi_gaussians, log_dps, arr, p0=params, maxfev=10000) + + dps = (np.exp(popt[1::3])*1e9).tolist() + # print(dps) + return [dp for dp in dps if (dp < 1000) and (dp > 2)] + except: + return None + + +def mask2minmaxdp(aligned_mask, dps): + r"""convert the mask to the minimum and maximum dps for a specific timestamp. + + Parameters: + aligned_mask (array) -- the detected mask (binary with 0s and 1s) + dps (list) -- a list containing dps (nm) + + Returns: + a list, and each element in this list is a list containing two elements (minimum dp and maximum dp) -def num2time(date, num): - """Convert number to time. + References: + https://stackoverflow.com/questions/38013778/is-there-any-numpy-group-by-function/43094244 """ - year, month, day = date.split('-') - hour = int(num / 60) - minute = int(num - hour*60) - return datetime(int(year), int(month), int(day), hour, minute) + arr = np.vstack(np.where(aligned_mask)).T + + return [[dps[item[0]], dps[item[-1]]] + for item in np.split(arr[:, 1], np.unique(arr[:, 0], return_index=True)[1][1:])] + +######################################### +# maximum concentration +######################################### def fit_gaussian(x, y): fitter = modeling.fitting.LevMarLSQFitter() @@ -41,10 +142,11 @@ def fit_gaussian(x, y): except Exception: return None + def get_max_time(arr1d, tm, time_resolution=10): # find nonzero values - arr = arr1d[arr1d>0] + arr = arr1d[arr1d > 0] num = len(arr) @@ -64,12 +166,207 @@ def get_max_time(arr1d, tm, time_resolution=10): max_time = np.nonzero(arr1d_)[0][0] + mean_val dt64 = tm[int(max_time)] decimal = np.around(time_resolution*(max_time - int(max_time))) - ts = (dt64 - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's') - time = datetime.utcfromtimestamp(ts) + timedelta(minutes = int(decimal)) + ts = (dt64 - np.datetime64('1970-01-01T00:00:00')) / \ + np.timedelta64(1, 's') + time = datetime.utcfromtimestamp( + ts) + timedelta(minutes=int(decimal)) return time -def get_GR(df, mask, dp_min, dp_max, savefp=None, tm_res=10, vmax=1e4): + +def get_maxcon(df, mask, tm_res): + + values = df.values * mask + tms = df.index + dps = [float(dp) for dp in list(df.columns)] + + # find the indexes of the start and end times + col_min = np.where((np.array(dps) >= 2.5) == True)[0][0] + col_max = np.where((np.array(dps) <= 25.9) == True)[0][-1] + + # obtain the time (index) when max concentration occurs for each dp + peak_con = np.array([[get_max_time(values[:, i], tms, tm_res), dps[i]] + for i in range(col_min, col_max+1)]) + + # drop the nan values + tm_dp = pd.DataFrame(peak_con).dropna(how='any').values + tm_dp = tm_dp[tm_dp[:, 1]<=25.5, :] + return tm_dp + + +def get_modefitting(df, mask): + + st, et = get_SE(df, mask) + + df_ = df[(df.index >= st) & (df.index <= et)] + df_ = df_[[col for col in df_.columns if float(col) < 1000]] + + dps = [float(dp) for dp in list(df_.columns)] + valid_dps = mask2minmaxdp(mask, dps) + log_dps = np.array([np.log(dp*1e-9) for dp in dps]) + vals = df_.values + tm_dp = [] + + for i in range(vals.shape[0]): + tm = df_.index[i] + fitted_dps = fit_multi_gaussians(log_dps, vals[i]) + if fitted_dps is not None: + + # check whether the dps are in the banana region + fitted_dps = [dp for dp in fitted_dps if ( + dp <= valid_dps[i][-1]) and (dp >= valid_dps[i][0])] + + # find the fitted dps related to higher concentrations + if len(fitted_dps) == 0: + continue + elif len(fitted_dps) == 1: + tm_dp += [[tm, fitted_dps[0]]] + else: + cons = [vals[i, np.argmin(np.abs(np.array(dps)-dp))] + for dp in fitted_dps] + dp = fitted_dps[np.argmax(np.array(cons))] + # for dp in fitted_dps: + tm_dp += [[tm, dp]] + tm_dp = pd.DataFrame(tm_dp).dropna(how='any').values + tm_dp = tm_dp[tm_dp[:, 1]<=25.5, :] + + return tm_dp + + +def get_size_gr(tm_dp, name='modefitting'): + r""" + Get the GR (slope * 60) and the intercept (for visualization) + + Parameters: + tm_dp (2d array) -- the 1st column is the time points and the 2nd column is the dps (nm) + name (str) -- used as the keys for the return dictionary + + Parameters: + a dictionary containing grs (3-7, 7-15, 15-25, 3-25) + """ + gr_dict = {} + tms = tm_dp[:, 0] + dt = str(tms[0].date()) + tms = np.array([time2num(tm) for tm in tms]) # convert the time points to numbers + dps = tm_dp[:, 1] + x_pred, y_pred = None, None + + for min_dp, max_dp in [[2.5, 10.5], [9.5, 25.5], [2.5, 25.5]]: + idx = (dps >= min_dp) & (dps <= max_dp) + if sum(idx) < 2: + gr_dict.update({f'{name}_{int(min_dp)+1}_{int(max_dp)}':None}) + # , f'inter_{name}_{int(min_dp)+1}_{int(max_dp)}':None}) + continue + if (min_dp == 2.5) and (max_dp == 25.5) and (name == 'modefitting'): + fit_dps = tm_dp[:, 1] + idx_list = [np.where((fit_dps >= dp_min) & (fit_dps < dp_min+1))[0].tolist()[:1] + for dp_min in np.arange(2.5, 25.5)] + idx = [] + for item in idx_list: + idx += item + + idx_arr = np.array(idx) + tm_dp_ = tm_dp[idx_arr] + tms_ = np.array([time2num(tm) for tm in tm_dp_[:, 0]]) + dps_ = tm_dp_[:, 1] + slope, intercept = fit_GR(tms_, dps_, name) + else: + tms_ = tms[idx] + dps_ = dps[idx] + slope, intercept = fit_GR(tms_, dps_, name) + + if slope is not None: + gr_dict.update({f'{name}_{int(min_dp)+1}_{int(max_dp)}': slope*60}) + # f'inter_{name}_{int(min_dp)+1}_{int(max_dp)}': intercept + if (min_dp == 2.5) and (max_dp == 25.5): + # plot the fitted curves + num_min, num_max = (min_dp - intercept) / \ + slope, (max_dp - intercept) / slope + num_min, num_max = int(np.maximum(tms[0], num_min)), int( + np.minimum(tms[-1], num_max)) + x_num = np.arange(num_min, num_max) + x_pred = np.array([num2time(dt, item) for item in x_num]) + y_pred = np.array([slope*item+intercept for item in x_num]) + else: + gr_dict.update({f'{name}_{int(min_dp)+1}_{int(max_dp)}': None}) + # f'inter_{name}_{int(min_dp)+1}_{int(max_dp)}': None}) + + return gr_dict, x_pred, y_pred + + +def fit_GR(tms, dps, name=None): + r""" + Fit the GR by the ransac, theil-sen, least-squre regression fitting. + + Parameters: + tms (array) -- time points (numbers) + dps (array) -- dps (unit: nm) + + Returns: + slope, intercept -- the gr (slope*60) and intercept (for visulization on the surface plots) + """ + # thei = TheilSenRegressor(random_state=42) + # thei.fit(tms.reshape(-1, 1), dps) + # slope = thei.coef_[0] + # intercept = thei.intercept_ + + try: + ransac = RANSACRegressor(random_state=42) + ransac.fit(tms.reshape(-1, 1), dps) + slope = ransac.estimator_.coef_[0] + intercept = ransac.estimator_.intercept_ + # method = 'RANSAC' + # thei = TheilSenRegressor(random_state=42) + # thei.fit(tms.reshape(-1, 1), dps) + # slope = thei.coef_[0] + # intercept = thei.intercept_ + except: + ols = LinearRegression() + ols.fit(tms.reshape(-1, 1), dps) + slope = ols.coef_[0] + intercept = ols.intercept_ + # method = 'LinearRegression' + if slope <= 0: + return None, None + return slope, intercept + + +def get_GR(df, mask, tm_res=10, savefp=None, vmax=1e4): + r""" + Get the growth rates (GRs) through the mode fitting and maximum concentration methods. + GRs: 3-7, 7-15, 15-25, 3-25 with different methods + + The automatic mode fitting method does not work well on some events. If you do want to use + it, please uncomment related lines and check the detection results. + + Parameters: + df (dataframe) -- one-day data + mask (array) -- the detected mask + tm_res (float) -- the time resolution of the dataset + + Returns: + gr_dict (dict) -- dictionary containing the determined GRs + x_pred (array) -- time information for the visualization of GR + y_pred (array) -- dps for the visualization of GR """ + + maxcon_tm_dp = get_maxcon(df, mask, tm_res) + # modefitting_tm_dp = get_modefitting(df, mask) + maxcon_dict, maxcon_x_pred, maxcon_y_pred = get_size_gr(maxcon_tm_dp, 'maxcon') + # modefitting_dict, modefitting_x_pred, modefitting_y_pred = get_size_gr(modefitting_tm_dp, 'modefitting') + # gr_dict = {**maxcon_dict, **modefitting_dict} + if (savefp is not None) and (maxcon_x_pred is not None): + mkdirs(os.path.join(savefp, 'maxcon')) + # mkdirs(os.path.join(savefp, 'modefitting')) + _ = psd2im(df, figsize=(12, 8), fit_data=[maxcon_tm_dp[:, 0], maxcon_tm_dp[:, 1]*1e-9], line_data=[ + maxcon_x_pred, maxcon_y_pred*1e-9], savefp=os.path.join(savefp, 'maxcon'), vmax=vmax, dpi=100, use_cbar=True) + # _ = psd2im(df, figsize=(12, 8), fit_data=[modefitting_tm_dp[:, 0], modefitting_tm_dp[:, 1]*1e-9], line_data=[ + # modefitting_x_pred, modefitting_y_pred*1e-9], savefp=os.path.join(savefp, 'modefitting'), vmax=vmax, dpi=100, use_cbar=True) + # return gr_dict + return maxcon_dict + + +def get_GR_old(df, mask, dp_min=2.5, dp_max=25.5, savefp=None, tm_res=10, vmax=1e4): + r""" Obtain the GRs. Parameters: @@ -86,34 +383,35 @@ def get_GR(df, mask, dp_min, dp_max, savefp=None, tm_res=10, vmax=1e4): x_pred (array) -- time information for the visualization of GR y_pred (array) -- dps for the visualization of GR """ - + # get the date dt = str(df.index[0].date()) - + # get the values of the banana-shape region values = df.values * mask - + # get the dps dps = [float(dp) for dp in list(df.columns)] - + # find the indexes of the start and end times col_min = np.where((np.array(dps) >= dp_min) == True)[0][0] col_max = np.where((np.array(dps) <= dp_max) == True)[0][-1] - + # obtain the time (index) when max concentration occurs for each dp - peak_con = np.array([[get_max_time(values[:, i], df.index.values, tm_res), dps[i]] for i in range(col_min, col_max+1)]) - + peak_con = np.array([[get_max_time(values[:, i], df.index.values, tm_res), dps[i]] + for i in range(col_min, col_max+1)]) + # drop the nan values tm_con_mat = pd.DataFrame(peak_con).dropna(how='any').values - + # split the time and dps x_tm = np.array([time2num(item) for item in tm_con_mat[:, 0]]) y_dp = tm_con_mat[:, 1] - + # if there are less than 2 valid time-dp points, return None if len(x_tm) < 2: - return None - + return None, None, None + # fit the time-dp relationship # the RANSAC fitting is applied and if failed, then ordinary linear fitting will be used try: @@ -128,25 +426,28 @@ def get_GR(df, mask, dp_min, dp_max, savefp=None, tm_res=10, vmax=1e4): slope = ols.coef_[0] intercept = ols.intercept_ method = 'LinearRegression' - + # if the fitting slope less than 0, return None (negative GR) if slope <= 0: - return None - + return None, None, None + # get the time and dp pairs for visualization - num_min, num_max = (dp_min - intercept) / slope, (dp_max - intercept) / slope - num_min, num_max = int(np.maximum(x_tm[0], num_min)), int(np.minimum(x_tm[-1], num_max)) + num_min, num_max = (dp_min - intercept) / \ + slope, (dp_max - intercept) / slope + num_min, num_max = int(np.maximum(x_tm[0], num_min)), int( + np.minimum(x_tm[-1], num_max)) x_num = np.arange(num_min, num_max) x_pred = np.array([num2time(dt, item) for item in x_num]) y_pred = np.array([slope*item+intercept for item in x_num]) - + # save the fitting plot (time-dp) if savefp is not None: fig, ax = plt.subplots(figsize=(12, 8)) ax.scatter(tm_con_mat[:, 0], y_dp) my_fmt = dates.DateFormatter('%H:%M') ax.xaxis.set_major_formatter(my_fmt) - ax.plot(x_pred, y_pred, 'r', label=f'Growth rate: {slope*60:.2f} ' + '$\mathrm{nm\;h}^{-1}$') + ax.plot(x_pred, y_pred, 'r', + label=f'Growth rate: {slope*60:.2f} ' + '$\mathrm{nm\;h}^{-1}$') ax.set_xlabel('Local time (h)', fontsize=12) ax.set_ylabel('Dp (nm)', fontsize=12) #ax.set_title(date, fontsize=ftsize+4) @@ -154,15 +455,6 @@ def get_GR(df, mask, dp_min, dp_max, savefp=None, tm_res=10, vmax=1e4): ax.text(0.8, 0.05, f'{method}', transform=ax.transAxes, fontsize=14) fig.savefig(os.path.join(savefp, f'{dt}_fit.png'), dpi=100) - psd2im(df, figsize=(12, 8), fit_data=[x_pred, y_pred*1e-9], savefp=savefp, vmax=vmax, dpi=100) + _ = psd2im(df, figsize=(12, 8), line_data=[ + x_pred, y_pred*1e-9], savefp=savefp, vmax=vmax, dpi=100) return slope*60, x_pred, y_pred - -def get_SE(df, mask): - """Get the start and end times (one-day or two-day).""" - - pos = np.where(mask) - xmin, xmax = np.min(pos[0]), np.max(pos[0]) - - start_time = df.index[xmin] - end_time = df.index[xmax] - return start_time, end_time diff --git a/utils/utils.py b/utils/utils.py index fba0b6f..8a509b4 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -7,257 +7,71 @@ from PIL import Image - -def get_next_day(current_day): - return str(pd.to_datetime(current_day) + timedelta(days=1))[:10] - def mkdirs(path): + r""" + create a new folder named `path` if not exists. + """ + path = f'{path}' if not os.path.exists(path): os.makedirs(path) -def psd2im(df, - ax=None, - fig=None, - mask=None, - savefp=None, - dpi=600, - n_xticks=5, - figsize=(1.6, 1.2), - show_figure=True, - use_title=False, - fit_data=None, - index=None, - vmax=1e4, - lcolor='white', - lwidth=3, - use_xaxis=True, - use_xlabel=False, - use_yaxis=True, - use_cbar=False, - ftsize=16 - ): - """Draw single or multiple surface plots. - Parameters: - df (dataframe) -- particle size distribution data for one day or multiple days - ax (ax) -- specify the ax to visualize the psd. If not specified, a new one will be created - fig (fig) -- the whole figure - mask (array) -- numpy array with the same shape as the input psds - savefp (str) -- path for storing the figures - dpi (int) -- default is 600 - n_xticks (int) -- how many ticklabels shown on the x-axis - figsize (tuple) -- used only if a new ax is created - show_figure (bool) -- clear all the figures if drawing many surface plots - use_title (bool) -- use the date as the title for the psd - fit_data (list) -- the fitted time points and related Dps - index (int) -- there many be more than one masks detected for one day's psd - vmax (int) -- color scale for visualization, default is 1e4. - lcolor (str) -- color for visualizing the GR - lwidth (int) -- linewidth - use_xaxis (bool) -- whether to draw the x-axis - use_yaxis (bool) -- whether to draw the y-axis - use_cbar (bool) -- whether to use the colorbar - ftsize (int) -- fontsize for plotting - +def get_next_day(current_day): + r""" + Get the next day given the current day. """ + return str(pd.to_datetime(current_day) + timedelta(days=1))[:10] - # get the psd data - dfc = df.copy(deep=True) # get a copy version - df_min = np.nanmin(dfc.replace(0, np.nan)) # find the minimul value - dfc.fillna(df_min, inplace=True) # use the minimul value to replace the na values - dfc[dfc == 0] = df_min # use the minimul value to replace 0 - dfc = dfc.replace(0, df_min) - - values = dfc.values.T if mask is None else (dfc.values*mask).T # values for visualization - dps = [float(dp)*1e-9 for dp in list(dfc.columns)] # Dps - tm = dfc.index.values # time points - - # check how many days of data to be shown - whole_dates = np.unique([item.date() for item in df.index]) - num_days = (whole_dates[-1] - whole_dates[0]).days + 1 - - # once the ax is none, create a new one - if ax is None: - fig, ax = plt.subplots(figsize=figsize) - - im = ax.pcolormesh(tm, # time points - dps, # particle sizes - values, # distribution - norm=colors.LogNorm(vmin=1e1, vmax=vmax), - cmap='jet', - shading='auto') - - # add the fitted line for determinating the GRs - if fit_data is not None: - ax.plot(fit_data[0], fit_data[1], c=lcolor, linewidth=lwidth) - - # use the log scale for y-axis - ax.set_yscale('log') - - # get title - title = str(whole_dates[0]) if num_days <= 1 else str(whole_dates[0])+'_'+str(whole_dates[-1]) - - # add index - if index is not None: - title = title + f' {index}' - - # add the title on the figure - if use_title: - ax.set_title(title, fontsize=ftsize+4) - - # add y-axis - if use_yaxis: - ax.set_ylabel('$\mathrm{D_p}$ (m)', fontsize=ftsize+2) - else: - ax.get_yaxis().set_visible(False) - - # add x-axis - if use_xaxis: - xtick = [datetime(sdate.year, sdate.month, sdate.day) + timedelta(i/(n_xticks-1)) for sdate in whole_dates - for i in range(n_xticks-1)] + [datetime(whole_dates[-1].year, whole_dates[-1].month, whole_dates[-1].day)+timedelta(1)] - xtick_labels = ['00:00', '06:00', '12:00', '18:00'] * num_days + ['00:00'] - ax.set_xticks(xtick) - ax.set_xticklabels(xtick_labels) - else: - ax.get_xaxis().set_visible(False) - - if use_xlabel: - ax.set_xlabel('Local time (h)', fontsize=ftsize+2) - - # add colorbar - if use_cbar: - cbar = fig.colorbar(im, ax=ax) # here fig is the default input for subplots - cbar.set_label('dN/dlog$\mathrm{D_p} (\mathrm{cm}^{-3})$', fontsize=ftsize+2) - - # to avoid the black edges - if (not use_xaxis) and (not use_yaxis): - ax.set_axis_off() - - # save the currect figure - if (savefp is not None) and (not use_xaxis) and (not use_yaxis): - fig.savefig(os.path.join(savefp, title +'.png'), bbox_inches='tight', pad_inches=0, dpi=dpi) - elif (savefp is not None) and (use_xaxis or use_yaxis): - fig.savefig(os.path.join(savefp, title +'.png'), bbox_inches='tight', pad_inches=0.1, dpi=dpi) - - if not show_figure: - plt.cla() - plt.clf() - plt.close('all') - return im - -def draw_subplots(dfs, - names, - nrows=3, - ncols=3, - savefp=None, - savename='test', - dpi=600, - GRs=None, - vmaxs=[1e4], - use_title=False, - indexes=None, - cbar='single', - lcolor='white', - lwidth=3, - texts=None, - ftsize=16 - ): - """Draw subplots with one colorbar. +def reshape_mask(mask, shape): + r"""reshape the mask to align the data + Parameters: - dfs (dataframe) -- pandas dataframe or list of dataframes - names (list) -- a list of days ['1999-01-01', '2000-01-01'] - nrows (int) -- number of rows - ncols (int) -- number of columns - savefp (str) -- path to save the figure - savename (str) -- name of the figure to save - dpi (int) -- dpi for saving the figures (both in `png` and `pdf` formats) - GRs (list) -- visualizing the GRs - vmaxs (int list) -- color scale for visualization - use_title (bool) -- add title on each subplot - indexes (str list) -- indexed for each subplot adding to the title (suffix) - cbar (str) -- ['single' | 'multirow' | 'all' | 'none'] - texts (str array) -- an array containing letters 'a', 'b', ... - ftsize (int) -- fontsize for plotting + mask (array) -- a detected mask + shape (tuple) -- the shape of the data + + Returns: + the aligned mask """ - assert len(names) == nrows * ncols + mask = Image.fromarray(mask).resize(shape, Image.ANTIALIAS) + mask = np.fliplr(np.array(mask).T) + return mask.astype(int) - if GRs is not None: - assert len(GRs) == len(names) - fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*6,nrows*4), constrained_layout=True) +def convert_matlab_time(matlab_time): + r""" + convert the Matlab time to Python time + """ + try: + return (datetime.fromordinal(int(matlab_time)) + timedelta(days=matlab_time % 1) - timedelta(days=366)).date() + except: + return np.nan + - use_cbar = True if cbar == 'all' else False +def time2num(tm): + r"""Convert time to number. + fit on numbers and then convert back the numbers to time + """ + time = tm.time() + minutes = time.hour * 60 + time.minute + return minutes - if len(vmaxs) == 1: - vmaxs = vmaxs * nrows - cnt = 0 - for i in range(nrows): - for j in range(ncols): +def num2time(date, num): + r"""Convert number to time. + """ + year, month, day = date.split('-') + hour = int(num / 60) + minute = int(num - hour*60) + return datetime(int(year), int(month), int(day), hour, minute) - vmax = vmaxs[i] - index = None if indexes==None else indexes[cnt] - use_xlabel = True if i==nrows-1 else False - GR = None if GRs is None else GRs[cnt] - if isinstance(dfs, list): - df = dfs[cnt] - elif isinstance(dfs, pd.DataFrame): - df = dfs[names[cnt]] - else: - raise ValueError('Unknown input type.') - im = psd2im(df, - ax=axes[i,j], - fig=fig, - use_cbar=use_cbar, - use_xlabel=use_xlabel, - vmax=vmax, - dpi=dpi, - use_title=use_title, - index=index, - lcolor=lcolor, - lwidth=lwidth, - fit_data=GR, - ftsize=ftsize - ) - cnt += 1 - if texts is not None: - axes[i, j].text(0.05, 0.92, f'({texts[i, j]})', transform=axes[i,j].transAxes, fontsize=ftsize+6, color='white') - if cbar == 'multirow': - #fig.tight_layout() - cb = fig.colorbar(im, ax=axes[i, :], pad=0.01) - cb.set_label('dN/dlog$\mathrm{D_p} (\mathrm{cm}^{-3})$', fontsize=ftsize) - if cbar == 'single': - fig.tight_layout() - im = plt.gca().get_children()[0] - plt.subplots_adjust(right=0.9) - cax = plt.axes([0.92, 0.1, 0.02, 0.85]) - cb = fig.colorbar(im, cax=cax) - cb.set_label('dN/dlog$\mathrm{D_p} (\mathrm{cm}^{-3})$', fontsize=ftsize) - if savefp is not None: - mkdirs(savefp) - fig.savefig(os.path.join(savefp, savename+'.png'), dpi=dpi, bbox_inches='tight', pad_inches=0.1) - fig.savefig(os.path.join(savefp, savename+'.pdf'), bbox_inches='tight', pad_inches=0.1) -def reshape_mask(mask, shape): - """reshape the mask to align the data - - Parameters: - mask (array) -- a detected mask - shape (tuple) -- the shape of the data - - Returns: - the aligned mask - """ - mask = Image.fromarray(mask).resize(shape, Image.ANTIALIAS) - mask = np.fliplr(np.array(mask).T) - return mask.astype(int) diff --git a/utils/visualize.py b/utils/visualize.py new file mode 100644 index 0000000..447a1eb --- /dev/null +++ b/utils/visualize.py @@ -0,0 +1,269 @@ +import os +import numpy as np +import pandas as pd +import matplotlib +matplotlib.use('Agg') +from matplotlib import pyplot as plt +from matplotlib import colors +from datetime import timedelta, datetime + +from .utils import mkdirs + + +def psd2im(df, + ax=None, + fig=None, + mask=None, + savefp=None, + dpi=600, + n_xticks=5, + figsize=(1.6, 1.2), + show_figure=True, + use_title=False, + fit_data=None, + line_data=None, + index=None, + vmax=None, + lcolor='white', + lwidth=3, + use_xaxis=True, + use_xlabel=False, + use_yaxis=True, + use_cbar=False, + ftsize=16 + ): + r"""Draw single or multiple surface plots. + + Parameters: + df (dataframe) -- particle size distribution data for one day or multiple days + ax (ax) -- specify the ax to visualize the psd. If not specified, a new one will be created + fig (fig) -- the whole figure + mask (array) -- numpy array with the same shape as the input psds + savefp (str) -- path for storing the figures + dpi (int) -- default is 600 + n_xticks (int) -- how many ticklabels shown on the x-axis + figsize (tuple) -- used only if a new ax is created + show_figure (bool) -- clear all the figures if drawing many surface plots + use_title (bool) -- use the date as the title for the psd + fit_data (list) -- the fitted time points and related Dps + line_data (list) -- the lines to show the determined GRs + index (int) -- there many be more than one masks detected for one day's psd + vmax (float|none) -- color scale for visualization, default is None. + lcolor (str) -- color for visualizing the GR + lwidth (int) -- linewidth + use_xaxis (bool) -- whether to draw the x-axis + use_yaxis (bool) -- whether to draw the y-axis + use_cbar (bool) -- whether to use the colorbar + ftsize (int) -- fontsize for plotting + + """ + + # get the psd data + dfc = df.copy(deep=True) # get a copy version + df_min = np.nanmin(dfc.replace(0, np.nan)) # find the minimul value + # use the minimul value to replace the na values + dfc.fillna(df_min, inplace=True) + dfc[dfc == 0] = df_min # use the minimul value to replace 0 + dfc = dfc.replace(0, df_min) + + # use the dynamic vmax + if vmax is None: + max_val = np.nanmax(dfc.values) + # check the number of digits + n_digits = len(str(int(max_val))) + vmax = np.power(10, n_digits) + + values = dfc.values.T if mask is None else ( + dfc.values*mask).T # values for visualization + dps = [float(dp)*1e-9 for dp in list(dfc.columns)] # Dps + tm = dfc.index.values # time points + + # check how many days of data to be shown + whole_dates = np.unique([item.date() for item in df.index]) + num_days = (whole_dates[-1] - whole_dates[0]).days + 1 + + # once the ax is none, create a new one + if ax is None: + fig, ax = plt.subplots(figsize=figsize) + + im = ax.pcolormesh(tm, # time points + dps, # particle sizes + values, # distribution + norm=colors.LogNorm(vmin=1e1, vmax=vmax), + cmap='jet', + shading='auto') + + # add the fitted line for determinating the GRs + if fit_data is not None: + ax.scatter(fit_data[0], fit_data[1], c='k', s=15, marker='o') + + if line_data is not None: + ax.plot(line_data[0], line_data[1], c=lcolor, linewidth=lwidth) + + # use the log scale for y-axis + ax.set_yscale('log') + + # get title + title = str(whole_dates[0]) if num_days <= 1 else str( + whole_dates[0])+'_'+str(whole_dates[-1]) + + # add index + if index is not None: + title = title + f' {index}' + + # add the title on the figure + if use_title: + ax.set_title(title, fontsize=ftsize+4) + + # add y-axis + if use_yaxis: + ax.set_ylabel('$\mathrm{D_p}$ (m)', fontsize=ftsize+2) + else: + ax.get_yaxis().set_visible(False) + + # add x-axis + if use_xaxis: + xtick = [datetime(sdate.year, sdate.month, sdate.day) + timedelta(i/(n_xticks-1)) for sdate in whole_dates + for i in range(n_xticks-1)] + [datetime(whole_dates[-1].year, whole_dates[-1].month, whole_dates[-1].day)+timedelta(1)] + xtick_labels = ['00:00', '06:00', '12:00', + '18:00'] * num_days + ['00:00'] + ax.set_xticks(xtick) + ax.set_xticklabels(xtick_labels) + else: + ax.get_xaxis().set_visible(False) + + if use_xlabel: + ax.set_xlabel('Local time (h)', fontsize=ftsize+2) + + # add colorbar + if use_cbar: + # here fig is the default input for subplots + cbar = fig.colorbar(im, ax=ax) + cbar.set_label( + 'dN/dlog$\mathrm{D_p} (\mathrm{cm}^{-3})$', fontsize=ftsize+2) + + # to avoid the black edges + if (not use_xaxis) and (not use_yaxis): + ax.set_axis_off() + + # save the currect figure + if (savefp is not None) and (not use_xaxis) and (not use_yaxis): + fig.savefig(os.path.join(savefp, title + '.png'), + bbox_inches='tight', pad_inches=0, dpi=dpi) + elif (savefp is not None) and (use_xaxis or use_yaxis): + fig.savefig(os.path.join(savefp, title + '.png'), + bbox_inches='tight', pad_inches=0.1, dpi=dpi) + + if not show_figure: + plt.cla() + plt.clf() + plt.close('all') + return im + + +def draw_subplots(dfs, + names, + nrows=3, + ncols=3, + savefp=None, + savename='test', + dpi=600, + GRs=None, + vmaxs=[1e4], + use_title=False, + indexes=None, + cbar='single', + lcolor='white', + lwidth=3, + texts=None, + ftsize=16 + ): + r"""Draw subplots with one colorbar. + + Parameters: + dfs (dataframe) -- pandas dataframe or list of dataframes + names (list) -- a list of days ['1999-01-01', '2000-01-01'] + nrows (int) -- number of rows + ncols (int) -- number of columns + savefp (str) -- path to save the figure + savename (str) -- name of the figure to save + dpi (int) -- dpi for saving the figures (both in `png` and `pdf` formats) + GRs (list) -- visualizing the GRs + vmaxs (int list) -- color scale for visualization + use_title (bool) -- add title on each subplot + indexes (str list) -- indexed for each subplot adding to the title (suffix) + cbar (str) -- ['single' | 'multirow' | 'all' | 'none'] + texts (str array) -- an array containing letters 'a', 'b', ... + ftsize (int) -- fontsize for plotting + """ + assert len(names) == nrows * ncols + + if GRs is not None: + assert len(GRs) == len(names) + + fig, axes = plt.subplots(nrows, ncols, figsize=( + ncols*6, nrows*4), constrained_layout=True) + + use_cbar = True if cbar == 'all' else False + + if len(vmaxs) == 1: + vmaxs = vmaxs * nrows + + cnt = 0 + for i in range(nrows): + for j in range(ncols): + + vmax = vmaxs[i] + + index = None if indexes == None else indexes[cnt] + use_xlabel = True if i == nrows-1 else False + GR = None if GRs is None else GRs[cnt] + + if isinstance(dfs, list): + df = dfs[cnt] + elif isinstance(dfs, pd.DataFrame): + df = dfs[names[cnt]] + else: + raise ValueError('Unknown input type.') + + im = psd2im(df, + ax=axes[i, j], + fig=fig, + use_cbar=use_cbar, + use_xlabel=use_xlabel, + vmax=vmax, + dpi=dpi, + use_title=use_title, + index=index, + lcolor=lcolor, + lwidth=lwidth, + line_data=GR, + ftsize=ftsize + ) + cnt += 1 + + if texts is not None: + axes[i, j].text( + 0.05, 0.92, f'({texts[i, j]})', transform=axes[i, j].transAxes, fontsize=ftsize+6, color='white') + + if cbar == 'multirow': + #fig.tight_layout() + cb = fig.colorbar(im, ax=axes[i, :], pad=0.01) + cb.set_label( + 'dN/dlog$\mathrm{D_p} (\mathrm{cm}^{-3})$', fontsize=ftsize) + + if cbar == 'single': + fig.tight_layout() + im = plt.gca().get_children()[0] + plt.subplots_adjust(right=0.9) + cax = plt.axes([0.92, 0.1, 0.02, 0.85]) + cb = fig.colorbar(im, cax=cax) + cb.set_label( + 'dN/dlog$\mathrm{D_p} (\mathrm{cm}^{-3})$', fontsize=ftsize) + + if savefp is not None: + mkdirs(savefp) + fig.savefig(os.path.join(savefp, savename+'.png'), + dpi=dpi, bbox_inches='tight', pad_inches=0.1) + fig.savefig(os.path.join(savefp, savename+'.pdf'), + bbox_inches='tight', pad_inches=0.1)