From 5ebaa5b60d12497e2eb781e98816fe618d26db9a Mon Sep 17 00:00:00 2001 From: Anastasia Gracheva Date: Wed, 29 Jan 2020 00:20:51 +0300 Subject: [PATCH] refactor for convenience, make baseline_model10_10 a class --- metrics/__init__.py | 8 +- models/baseline_10x10.py | 131 +++++++++++++++++ models/baseline_10x10/__init__.py | 138 ------------------ .../__init__.py => baseline_10x15.py} | 0 models/training.py | 7 +- plotting/__init__.py | 4 +- test_script_data_v1.py | 40 +++-- 7 files changed, 168 insertions(+), 160 deletions(-) create mode 100644 models/baseline_10x10.py delete mode 100644 models/baseline_10x10/__init__.py rename models/{baseline_10x15/__init__.py => baseline_10x15.py} (100%) diff --git a/metrics/__init__.py b/metrics/__init__.py index c1a48a2..2e6afc4 100644 --- a/metrics/__init__.py +++ b/metrics/__init__.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import PIL + def _gaussian_fit(img): assert img.ndim == 2 assert (img >= 0).all() @@ -24,6 +25,7 @@ def _gaussian_fit(img): ).reshape(2, 2) return mu, cov + def _get_val_metric_single(img): """Returns a vector of gaussian fit results to the image. The components are: [mu0, mu1, sigma0^2, sigma1^2, covariance, integral] @@ -36,10 +38,13 @@ def _get_val_metric_single(img): return np.array((*mu, *cov.diagonal(), cov[0, 1], img.sum())) + _METRIC_NAMES = ['Mean0', 'Mean1', 'Sigma0^2', 'Sigma1^2', 'Cov01', 'Sum'] + get_val_metric = np.vectorize(_get_val_metric_single, signature='(m,n)->(k)') + def get_val_metric_v(imgs): """Returns a vector of gaussian fit results to the image. The components are: [mu0, mu1, sigma0^2, sigma1^2, covariance, integral] @@ -84,6 +89,7 @@ def make_histograms(data_real, data_gen, title, figsize=(8, 8), n_bins=100, logy img = PIL.Image.open(buf) return np.array(img.getdata(), dtype=np.uint8).reshape(1, img.size[0], img.size[1], -1) + def make_metric_plots(images_real, images_gen): plots = {} try: @@ -96,5 +102,3 @@ def make_metric_plots(images_real, images_gen): pass return plots - - diff --git a/models/baseline_10x10.py b/models/baseline_10x10.py new file mode 100644 index 0000000..402a134 --- /dev/null +++ b/models/baseline_10x10.py @@ -0,0 +1,131 @@ +import tensorflow as tf + + +def get_generator(activation, kernel_init, latent_dim): + generator = tf.keras.Sequential([ + tf.keras.layers.Dense(units=64, activation=activation, input_shape=(latent_dim,)), + + tf.keras.layers.Reshape((4, 4, 4)), + + tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), + tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), + tf.keras.layers.UpSampling2D(), # 8x8 + + tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same' , activation=activation, kernel_initializer=kernel_init), + tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='valid', activation=activation, kernel_initializer=kernel_init), # 6x6 + tf.keras.layers.UpSampling2D(), # 12x12 + + tf.keras.layers.Conv2D(filters=8, kernel_size=3, padding='valid', activation=activation, kernel_initializer=kernel_init), # 10x10 + tf.keras.layers.Conv2D(filters=1, kernel_size=1, padding='valid', activation=tf.keras.activations.relu, kernel_initializer=kernel_init), + + tf.keras.layers.Reshape((10, 10)), + ], name='generator') + return generator + + +def get_discriminator(activation, kernel_init, dropout_rate): + discriminator = tf.keras.Sequential([ + tf.keras.layers.Reshape((10, 10, 1), input_shape=(10, 10)), + + tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), + tf.keras.layers.Dropout(dropout_rate), + tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='valid', activation=activation, kernel_initializer=kernel_init), # 8x8 + tf.keras.layers.Dropout(dropout_rate), + + tf.keras.layers.MaxPool2D(), # 4x4 + + tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), + tf.keras.layers.Dropout(dropout_rate), + tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), + tf.keras.layers.Dropout(dropout_rate), + + tf.keras.layers.MaxPool2D(), # 2x2 + + tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='valid', activation=activation, kernel_initializer=kernel_init), # 1x1 + tf.keras.layers.Dropout(dropout_rate), + + tf.keras.layers.Reshape((64,)), + + tf.keras.layers.Dense(units=128, activation=activation), + tf.keras.layers.Dropout(dropout_rate), + + tf.keras.layers.Dense(units=1, activation=None), + ], name='discriminator') + return discriminator + + +def disc_loss(d_real, d_fake): + return tf.reduce_mean(d_fake - d_real) + + +def gen_loss(d_real, d_fake): + return tf.reduce_mean(d_real - d_fake) + + +class BaselineModel10x10: + def __init__(self, activation=tf.keras.activations.relu, kernel_init='glorot_uniform', + dropout_rate=0.2, lr=1e-4, latent_dim=32, gp_lambda=10., num_disc_updates=3): + self.disc_opt = tf.keras.optimizers.RMSprop(lr) + self.gen_opt = tf.keras.optimizers.RMSprop(lr) + self.latent_dim = latent_dim + self.gp_lambda = gp_lambda + self.num_disc_updates = num_disc_updates + + self.generator = get_generator(activation=activation, kernel_init=kernel_init, latent_dim=latent_dim) + self.discriminator = get_discriminator(activation=activation, kernel_init=kernel_init, dropout_rate=dropout_rate) + + self.step_counter = tf.Variable(0, dtype='int32', trainable=False) + + def make_fake(self, size): + return self.generator( + tf.random.normal(shape=(size, self.latent_dim), dtype='float32') + ) + + def gradient_penalty(self, real, fake): + alpha = tf.random.uniform(shape=[len(real), 1, 1]) + interpolates = alpha * real + (1 - alpha) * fake + with tf.GradientTape() as t: + t.watch(interpolates) + d_int = self.discriminator(interpolates) + grads = tf.reshape(t.gradient(d_int, interpolates), [len(real), -1]) + return tf.reduce_mean(tf.maximum(tf.norm(grads, axis=-1) - 1, 0)**2) + + @tf.function + def calculate_losses(self, batch): + fake = self.make_fake(len(batch)) + d_real = self.discriminator(batch) + d_fake = self.discriminator(fake) + + d_loss = disc_loss(d_real, d_fake) + self.gp_lambda * self.gradient_penalty(batch, fake) + g_loss = gen_loss(d_real, d_fake) + return {'disc_loss': d_loss, 'gen_loss': g_loss} + + def disc_step(self, batch): + batch = tf.convert_to_tensor(batch) + + with tf.GradientTape() as t: + losses = self.calculate_losses(batch) + + grads = t.gradient(losses['disc_loss'], self.discriminator.trainable_variables) + self.disc_opt.apply_gradients(zip(grads, self.discriminator.trainable_variables)) + return losses + + def gen_step(self, batch): + batch = tf.convert_to_tensor(batch) + + with tf.GradientTape() as t: + losses = self.calculate_losses(batch) + + grads = t.gradient(losses['gen_loss'], self.generator.trainable_variables) + self.gen_opt.apply_gradients(zip(grads, self.generator.trainable_variables)) + return losses + + @tf.function + def training_step(self, batch): + if self.step_counter == self.num_disc_updates: + result = self.gen_step(batch) + self.step_counter.assign(0) + else: + result = self.disc_step(batch) + self.step_counter.assign_add(1) + return result diff --git a/models/baseline_10x10/__init__.py b/models/baseline_10x10/__init__.py deleted file mode 100644 index 683dd96..0000000 --- a/models/baseline_10x10/__init__.py +++ /dev/null @@ -1,138 +0,0 @@ -import tensorflow as tf - - -LATENT_DIM = 32 -NUM_DISC_UPDATES = 3 -GP_LAMBDA = 10. - - -def get_generator(activation = tf.keras.activations.relu, kernel_init='glorot_uniform'): # he_normalZ - generator = tf.keras.Sequential([ - tf.keras.layers.Dense(units=64, activation=activation, input_shape=(LATENT_DIM,)), - - tf.keras.layers.Reshape((4, 4, 4)), - - tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), - tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), - tf.keras.layers.UpSampling2D(), # 8x8 - - tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same' , activation=activation, kernel_initializer=kernel_init), - tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='valid', activation=activation, kernel_initializer=kernel_init), # 6x6 - tf.keras.layers.UpSampling2D(), # 12x12 - - tf.keras.layers.Conv2D(filters=8, kernel_size=3, padding='valid', activation=activation, kernel_initializer=kernel_init), # 10x10 - - tf.keras.layers.Conv2D(filters=1, kernel_size=1, padding='valid', activation=tf.keras.activations.relu, kernel_initializer=kernel_init), - - tf.keras.layers.Reshape((10, 10)), - ], name='generator') - return generator - - -def get_discriminator(activation=tf.keras.activations.relu, kernel_init='glorot_uniform', dropout_rate=0.2): - discriminator = tf.keras.Sequential([ - tf.keras.layers.Reshape((10, 10, 1), input_shape=(10, 10)), - - tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), - tf.keras.layers.Dropout(dropout_rate), - tf.keras.layers.Conv2D(filters=16, kernel_size=3, padding='valid', activation=activation, kernel_initializer=kernel_init), # 8x8 - tf.keras.layers.Dropout(dropout_rate), - - tf.keras.layers.MaxPool2D(), # 4x4 - - tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), - tf.keras.layers.Dropout(dropout_rate), - tf.keras.layers.Conv2D(filters=32, kernel_size=3, padding='same', activation=activation, kernel_initializer=kernel_init), - tf.keras.layers.Dropout(dropout_rate), - - tf.keras.layers.MaxPool2D(), # 2x2 - - tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='valid', activation=activation, kernel_initializer=kernel_init), # 1x1 - tf.keras.layers.Dropout(dropout_rate), - - tf.keras.layers.Reshape((64,)), - - tf.keras.layers.Dense(units=128, activation=activation), - tf.keras.layers.Dropout(dropout_rate), - - tf.keras.layers.Dense(units=1, activation=None), - ], name='discriminator') - return discriminator - - -def disc_loss(d_real, d_fake): - return tf.reduce_mean(d_fake - d_real) - - -def gen_loss(d_real, d_fake): - return tf.reduce_mean(d_real - d_fake) - - -disc_opt = tf.keras.optimizers.RMSprop(1e-4) -gen_opt = tf.keras.optimizers.RMSprop(1e-4) - -generator = get_generator() -discriminator = get_discriminator() - - -def make_fake(size): - return generator( - tf.random.normal(shape=(size, LATENT_DIM), dtype='float32') - ) - - -def gradient_penalty(real, fake): - alpha = tf.random.uniform(shape=[len(real), 1, 1]) - interpolates = alpha * real + (1 - alpha) * fake - with tf.GradientTape() as t: - t.watch(interpolates) - d_int = discriminator(interpolates) - grads = tf.reshape(t.gradient(d_int, interpolates), [len(real), -1]) - return tf.reduce_mean(tf.maximum(tf.norm(grads, axis=-1) - 1, 0)**2) - - -@tf.function -def calculate_losses(batch): - fake = make_fake(len(batch)) - d_real = discriminator(batch) - d_fake = discriminator(fake) - - d_loss = disc_loss(d_real, d_fake) + GP_LAMBDA * gradient_penalty(batch, fake) - g_loss = gen_loss(d_real, d_fake) - return {'disc_loss' : d_loss, 'gen_loss' : g_loss} - - -def disc_step(batch): - batch = tf.convert_to_tensor(batch) - - with tf.GradientTape() as t: - losses = calculate_losses(batch) - - grads = t.gradient(losses['disc_loss'], discriminator.trainable_variables) - disc_opt.apply_gradients(zip(grads, discriminator.trainable_variables)) - return losses - - -def gen_step(batch): - batch = tf.convert_to_tensor(batch) - - with tf.GradientTape() as t: - losses = calculate_losses(batch) - - grads = t.gradient(losses['gen_loss'], generator.trainable_variables) - gen_opt.apply_gradients(zip(grads, generator.trainable_variables)) - return losses - - -step_counter = tf.Variable(0, dtype='int32', trainable=False) - - -@tf.function -def training_step(batch): - if step_counter == NUM_DISC_UPDATES: - result = gen_step(batch) - step_counter.assign(0) - else: - result = disc_step(batch) - step_counter.assign_add(1) - return result diff --git a/models/baseline_10x15/__init__.py b/models/baseline_10x15.py similarity index 100% rename from models/baseline_10x15/__init__.py rename to models/baseline_10x15.py diff --git a/models/training.py b/models/training.py index 5c153cb..f811ef0 100644 --- a/models/training.py +++ b/models/training.py @@ -3,11 +3,12 @@ from tqdm import trange -def train(data_train, data_val, train_step_fn, loss_eval_fn, num_epochs, batch_size, train_writer=None, val_writer=None, callbacks=[]): +def train(data_train, data_val, train_step_fn, loss_eval_fn, num_epochs, batch_size, + train_writer=None, val_writer=None, callbacks=[]): for i_epoch in range(num_epochs): print("Working on epoch #{}".format(i_epoch), flush=True) - tf.keras.backend.set_learning_phase(1) # training + tf.keras.backend.set_learning_phase(1) # training shuffle_ids = np.random.permutation(len(data_train)) losses_train = {} @@ -20,7 +21,7 @@ def train(data_train, data_val, train_step_fn, loss_eval_fn, num_epochs, batch_s losses_train[k] = losses_train.get(k, 0) + l.numpy() * len(batch) losses_train = {k : l / len(data_train) for k, l in losses_train.items()} - tf.keras.backend.set_learning_phase(0) # testing + tf.keras.backend.set_learning_phase(0) # testing losses_val = {k : l.numpy() for k, l in loss_eval_fn(data_val).items()} for f in callbacks: diff --git a/plotting/__init__.py b/plotting/__init__.py index 2105360..5154df1 100644 --- a/plotting/__init__.py +++ b/plotting/__init__.py @@ -9,6 +9,7 @@ def _bootstrap_error(data, function, num_bs=100): bs_data = np.random.choice(data, size=(num_bs, len(data)), replace=True) return np.array([function(bs_sample) for bs_sample in bs_data]).std() + def _get_stats(arr): class Obj: pass @@ -22,6 +23,7 @@ class Obj: return result + def compare_two_dists(d_real, d_gen, label, tag=None, nbins=100): ax = plt.gca() bins = np.linspace( @@ -39,7 +41,7 @@ def compare_two_dists(d_real, d_gen, label, tag=None, nbins=100): leg_entry = 'gen' plt.hist(d_real, bins=bins, density=True, label='real') - plt.hist(d_gen , bins=bins, density=True, label=leg_entry, histtype='step', linewidth=2.) + plt.hist(d_gen, bins=bins, density=True, label=leg_entry, histtype='step', linewidth=2.) string = '\n'.join([ f"real: mean = {stats_real.mean :.4f} +/- {stats_real.mean_err :.4f}", diff --git a/test_script_data_v1.py b/test_script_data_v1.py index bcb830d..f75f94d 100644 --- a/test_script_data_v1.py +++ b/test_script_data_v1.py @@ -5,13 +5,24 @@ from sklearn.model_selection import train_test_split import tensorflow as tf +from data import preprocessing +from models.training import train +from models.baseline_10x10 import BaselineModel10x10 +from metrics import make_metric_plots, make_histograms + def main(): parser = argparse.ArgumentParser() parser.add_argument('--checkpoint_name', type=str, default='baseline_10x10', required=False) parser.add_argument('--batch_size', type=int, default=32, required=False) + parser.add_argument('--lr', type=float, default=1e-4, required=False) + parser.add_argument('--num_disc_updates', type=int, default=3, required=False) + parser.add_argument('--lr_schedule_rate', type=float, default=0.998, required=False) parser.add_argument('--save_every', type=int, default=50, required=False) + parser.add_argument('--num_epochs', type=int, default=10000, required=False) + parser.add_argument('--latent_dim', type=int, default=32, required=False) parser.add_argument('--gpu_num', type=str, default=None, required=False) + parser.add_argument('--kernel_init', type=str, default='glorot_uniform', required=False) args = parser.parse_args() @@ -25,19 +36,16 @@ def main(): logical_devices = tf.config.experimental.list_logical_devices('GPU') assert len(logical_devices) > 0, "Not enough GPU hardware devices available" - from data import preprocessing - from models import training, baseline_10x10 - from metrics import make_metric_plots, make_histograms - model_path = Path('saved_models') / args.checkpoint_name model_path.mkdir(parents=True) + model = BaselineModel10x10(kernel_init=args.kernel_init, lr=args.lr, + num_disc_updates=args.num_disc_updates, latent_dim=args.latent_dim) def save_model(step): if step % args.save_every == 0: print(f'Saving model on step {step} to {model_path}') - baseline_10x10.generator.save(str(model_path.joinpath("generator_{:05d}.h5".format(step)))) - baseline_10x10.discriminator.save(str(model_path.joinpath("discriminator_{:05d}.h5".format(step)))) - + model.generator.save(str(model_path.joinpath("generator_{:05d}.h5".format(step)))) + model.discriminator.save(str(model_path.joinpath("discriminator_{:05d}.h5".format(step)))) preprocessing._VERSION = 'data_v1' data = preprocessing.read_csv_2d(pad_range=(39, 49), time_range=(266, 276)) @@ -51,8 +59,8 @@ def save_model(step): unscale = lambda x: 10 ** x - 1 def write_hist_summary(step): - if step % 50 == 0: - gen_scaled = baseline_10x10.make_fake(len(X_test)).numpy() + if step % args.save_every == 0: + gen_scaled = model.make_fake(len(X_test)).numpy() real = unscale(X_test) gen = unscale(gen_scaled) gen[gen < 0] = 0 @@ -70,15 +78,15 @@ def write_hist_summary(step): tf.summary.image("log10(amplitude + 1)", img_amplitude, step) def schedule_lr(step): - baseline_10x10.disc_opt.lr.assign(baseline_10x10.disc_opt.lr * 0.998) - baseline_10x10.gen_opt.lr.assign(baseline_10x10.gen_opt.lr * 0.998) + model.disc_opt.lr.assign(model.disc_opt.lr * args.lr_schedule_rate) + model.gen_opt.lr.assign(model.gen_opt.lr * args.lr_schedule_rate) with writer_val.as_default(): - tf.summary.scalar("discriminator learning rate", baseline_10x10.disc_opt.lr, step) - tf.summary.scalar("generator learning rate", baseline_10x10.gen_opt.lr, step) + tf.summary.scalar("discriminator learning rate", model.disc_opt.lr, step) + tf.summary.scalar("generator learning rate", model.gen_opt.lr, step) - training.train(X_train, X_test, baseline_10x10.training_step, baseline_10x10.calculate_losses, 10000, args.batch_size, - train_writer=writer_train, val_writer=writer_val, - callbacks=[write_hist_summary, save_model, schedule_lr]) + train(X_train, X_test, model.training_step, model.calculate_losses, args.num_epochs, args.batch_size, + train_writer=writer_train, val_writer=writer_val, + callbacks=[write_hist_summary, save_model, schedule_lr]) if __name__ == '__main__':