diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..469fd3a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +# Base image +FROM tensorflow/tensorflow:1.15.0-gpu + +# Resolves error with key +# See: https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ +# See: https://askubuntu.com/questions/1444943/nvidia-gpg-error-the-following-signatures-couldnt-be-verified-because-the-publi +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + +# Update image contents to have latest python3 and pip3 for image +RUN apt-get update +RUN apt-get install -y python3-pip python3-dev vim +WORKDIR /usr/local/bin +RUN rm /usr/local/bin/python +RUN ln -s /usr/bin/python3 python +RUN pip3 install --upgrade pip +RUN apt-get install -y git curl zip unzip + +# Create /app directory +WORKDIR /app + +# Copy OmniAnomaly requirements into image +COPY ./requirements.txt /app + +# Install OmniAnomaly requirements +RUN pip3 install -r requirements.txt + +# Set initial folder to be OmniAnomaly +WORKDIR /app/OmniAnomaly diff --git a/README.md b/README.md index 9502647..fe50c77 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,24 @@ # OmniAnomaly - - ### Anomaly Detection for Multivariate Time Series through Modeling Temporal Dependence of Stochastic Variables OmniAnomaly is a stochastic recurrent neural network model which glues Gated Recurrent Unit (GRU) and Variational auto-encoder (VAE), its core idea is to learn the normal patterns of multivariate time series and uses the reconstruction probability to do anomaly judgment. - - ## Getting Started #### Clone the repo ``` -git clone https://github.com/smallcowbaby/OmniAnomaly && cd OmniAnomaly +git clone https://github.com/yyexela/OmniAnomaly && cd OmniAnomaly +``` + +#### Create and run Docker container + +```shell +# Build docker container with GPU support +docker build -t omnianomaly . +# Start docker container with GPU support +docker run --gpus all -v./:/app/OmniAnomaly -it --rm omnianomaly bash ``` #### Get data @@ -28,14 +33,6 @@ wget https://s3-us-west-2.amazonaws.com/telemanom/data.zip && unzip data.zip && cd data && wget https://raw.githubusercontent.com/khundman/telemanom/master/labeled_anomalies.csv ``` -#### Install dependencies (with python 3.5, 3.6) - -(virtualenv is recommended) - -```shell -pip install -r requirements.txt -``` - #### Preprocess the data ```shell @@ -56,8 +53,6 @@ If you want to change the default configuration, you can edit `ExpConfig` in `ma python main.py --dataset='MSL' --max_epoch=20 ``` - - ## Data ### Dataset Information diff --git a/main.py b/main.py index 1c6eb66..5f95b41 100644 --- a/main.py +++ b/main.py @@ -85,7 +85,7 @@ class ExpConfig(Config): test_score_filename = 'test_score.pkl' -def main(): +def main(): logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s' @@ -97,7 +97,7 @@ def main(): test_start=config.test_start) # construct the model under `variable_scope` named 'model' - with tf.variable_scope('model') as model_vs: + with tf.compat.v1.variable_scope('model') as model_vs: model = OmniAnomaly(config=config, name="model") # construct the trainer @@ -116,7 +116,7 @@ def main(): predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) - with tf.Session().as_default(): + with tf.compat.v1.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. diff --git a/omni_anomaly/model.py b/omni_anomaly/model.py index ac57747..a27c11e 100644 --- a/omni_anomaly/model.py +++ b/omni_anomaly/model.py @@ -43,7 +43,7 @@ def __init__(self, config, name=None, scope=None): ) if config.use_connected_z_p else Normal(mean=tf.zeros([config.z_dim]), std=tf.ones([config.z_dim])), p_x_given_z=Normal, q_z_given_x=partial(RecurrentDistribution, - mean_q_mlp=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE), + mean_q_mlp=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.compat.v1.AUTO_REUSE), std_q_mlp=partial(softplus_std, units=config.z_dim, epsilon=config.std_epsilon, name='z_std'), z_dim=config.z_dim, window_length=config.window_length) if config.use_connected_z_q else Normal, @@ -57,7 +57,7 @@ def __init__(self, config, name=None, scope=None): dense_dim=config.dense_dim, name='rnn_p_x'), mean_layer=partial( - tf.layers.dense, units=config.x_dim, name='x_mean', reuse=tf.AUTO_REUSE + tf.layers.dense, units=config.x_dim, name='x_mean', reuse=tf.compat.v1.AUTO_REUSE ), std_layer=partial( softplus_std, units=config.x_dim, epsilon=config.std_epsilon, @@ -84,7 +84,7 @@ def __init__(self, config, name=None, scope=None): dense_dim=config.dense_dim, name="rnn_q_z"), mean_layer=partial( - tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE + tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.compat.v1.AUTO_REUSE ), std_layer=partial( softplus_std, units=config.z_dim, epsilon=config.std_epsilon, diff --git a/omni_anomaly/prediction.py b/omni_anomaly/prediction.py index 073d8b1..8e57470 100644 --- a/omni_anomaly/prediction.py +++ b/omni_anomaly/prediction.py @@ -48,9 +48,9 @@ def __init__(self, model, n_z=1024, batch_size=32, with reopen_variable_scope(self.variable_scope): # input placeholders - self._input_x = tf.placeholder( + self._input_x = tf.compat.v1.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name='input_x') - self._input_y = tf.placeholder( + self._input_y = tf.compat.v1.placeholder( dtype=tf.int32, shape=[None, model.window_length], name='input_y') # outputs of interest diff --git a/omni_anomaly/recurrent_distribution.py b/omni_anomaly/recurrent_distribution.py index 59acefb..1605ff8 100644 --- a/omni_anomaly/recurrent_distribution.py +++ b/omni_anomaly/recurrent_distribution.py @@ -40,7 +40,7 @@ def sample_step(self, a, t): input_q_n = tf.broadcast_to(input_q_n, [tf.shape(z_previous)[0], tf.shape(input_q_n)[0], input_q_n.shape[1]]) input_q = tf.concat([input_q_n, z_previous], axis=-1) - mu_q = self.mean_q_mlp(input_q, reuse=tf.AUTO_REUSE) # n_sample * batch_size * z_dim + mu_q = self.mean_q_mlp(input_q, reuse=tf.compat.v1.AUTO_REUSE) # n_sample * batch_size * z_dim std_q = self.std_q_mlp(input_q) # n_sample * batch_size * z_dim @@ -59,13 +59,13 @@ def log_prob_step(self, _, t): input_q_n = tf.broadcast_to(input_q_n, [tf.shape(given_n)[0], tf.shape(input_q_n)[0], input_q_n.shape[1]]) input_q = tf.concat([given_n, input_q_n], axis=-1) - mu_q = self.mean_q_mlp(input_q, reuse=tf.AUTO_REUSE) + mu_q = self.mean_q_mlp(input_q, reuse=tf.compat.v1.AUTO_REUSE) std_q = self.std_q_mlp(input_q) - logstd_q = tf.log(std_q) + logstd_q = tf.math.log(std_q) precision = tf.exp(-2 * logstd_q) if self._check_numerics: - precision = tf.check_numerics(precision, "precision") + precision = tf.debugging.check_numerics(precision, "precision") log_prob_n = - 0.9189385332046727 - logstd_q - 0.5 * precision * tf.square(tf.minimum(tf.abs(given_n - mu_q), 1e8)) return log_prob_n @@ -98,7 +98,7 @@ def sample(self, n_samples=1024, is_reparameterized=None, group_ndims=0, compute noise = self.normal.sample(n_samples=n_samples) noise = tf.transpose(noise, [1, 0, 2]) # window_length * n_samples * z_dim - noise = tf.truncated_normal(tf.shape(noise)) + noise = tf.random.truncated_normal(tf.shape(noise)) time_indices_shape = tf.convert_to_tensor([n_samples, tf.shape(self.input_q)[1], self.z_dim]) diff --git a/omni_anomaly/training.py b/omni_anomaly/training.py index d9762fe..5c6bc2c 100644 --- a/omni_anomaly/training.py +++ b/omni_anomaly/training.py @@ -22,7 +22,7 @@ class Trainer(VarScopeObject): Args: model (OmniAnomaly): The :class:`OmniAnomaly` model instance. - model_vs (str or tf.VariableScope): If specified, will collect + model_vs (str or tf.compat.v1.VariableScope): If specified, will collect trainable variables only from this scope. If :obj:`None`, will collect all trainable variables within current graph. (default :obj:`None`) @@ -35,7 +35,7 @@ class Trainer(VarScopeObject): validation. If :obj:`None`, follow `feed_dict` of training. (default :obj:`None`) use_regularization_loss (bool): Whether or not to add regularization - loss from `tf.GraphKeys.REGULARIZATION_LOSSES` to the training + loss from `tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES` to the training loss? (default :obj:`True`) max_epoch (int or None): Maximum epochs to run. If :obj:`None`, will not stop at any particular epoch. (default 256) @@ -73,7 +73,7 @@ def __init__(self, model, model_vs=None, n_z=None, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, - optimizer=tf.train.AdamOptimizer, optimizer_params=None, + optimizer=tf.compat.v1.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=50.0, check_numerics=True, name=None, scope=None): super(Trainer, self).__init__(name=name, scope=scope) @@ -104,15 +104,15 @@ def __init__(self, model, model_vs=None, n_z=None, # build the trainer with reopen_variable_scope(self.variable_scope): # the global step for this model - self._global_step = tf.get_variable( + self._global_step = tf.compat.v1.get_variable( dtype=tf.int64, name='global_step', trainable=False, initializer=tf.constant(0, dtype=tf.int64) ) # input placeholders - self._input_x = tf.placeholder( + self._input_x = tf.compat.v1.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name='input_x') - self._learning_rate = tf.placeholder( + self._learning_rate = tf.compat.v1.placeholder( dtype=tf.float32, shape=(), name='learning_rate') # compose the training loss @@ -120,12 +120,12 @@ def __init__(self, model, model_vs=None, n_z=None, loss = model.get_training_loss( x=self._input_x, n_z=n_z) if use_regularization_loss: - loss += tf.losses.get_regularization_loss() + loss += tf.compat.v1.losses.get_regularization_loss() self._loss = loss # get the training variables train_params = get_variables_as_dict( - scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES) + scope=model_vs, collection=tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) self._train_params = train_params # create the trainer @@ -146,7 +146,7 @@ def __init__(self, model, model_vs=None, n_z=None, if grad_clip_norm: grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: - grad = tf.check_numerics( + grad = tf.debugging.check_numerics( grad, 'gradient for {} has numeric issue'.format(var.name) ) @@ -154,21 +154,21 @@ def __init__(self, model, model_vs=None, n_z=None, # build the training op with tf.control_dependencies( - tf.get_collection(tf.GraphKeys.UPDATE_OPS)): + tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)): self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step) # the training summary in case `summary_dir` is specified with tf.name_scope('summary'): - self._summary_op = tf.summary.merge([ - tf.summary.histogram(v.name.rsplit(':', 1)[0], v) + self._summary_op = tf.compat.v1.summary.merge([ + tf.compat.v1.summary.histogram(v.name.rsplit(':', 1)[0], v) for v in six.itervalues(self._train_params) ]) # initializer for the variables - self._trainer_initializer = tf.variables_initializer( + self._trainer_initializer = tf.compat.v1.variables_initializer( list(six.itervalues(get_variables_as_dict(scope=self.variable_scope, - collection=tf.GraphKeys.GLOBAL_VARIABLES))) + collection=tf.compat.v1.GraphKeys.GLOBAL_VARIABLES))) ) @property diff --git a/omni_anomaly/vae.py b/omni_anomaly/vae.py index 6f53e1e..2e306c3 100644 --- a/omni_anomaly/vae.py +++ b/omni_anomaly/vae.py @@ -174,7 +174,7 @@ def __call__(self, inputs, **kwargs): # Here `reopen_name_scope` is set to True, so that multiple # calls to the same Module instance will always generate operations # within the original name scope. - # However, in order for ``tf.variable_scope(default_name=...)`` + # However, in order for ``tf.compat.v1.variable_scope(default_name=...)`` # to work properly with variable reusing, we must generate a nested # unique name scope. with tf.name_scope('forward'): @@ -269,9 +269,9 @@ def variational(self, x, z=None, n_z=None, posterior_flow=None): if z is not None: observed['z'] = z net = BayesianNet(observed=observed) - with tf.variable_scope('h_for_q_z'): + with tf.compat.v1.variable_scope('h_for_q_z'): z_params = self.h_for_q_z(x) - with tf.variable_scope('q_z_given_x'): + with tf.compat.v1.variable_scope('q_z_given_x'): q_z_given_x = self.q_z_given_x(**z_params) assert (isinstance(q_z_given_x, Distribution)) with tf.name_scope('z'): @@ -313,9 +313,9 @@ def model(self, z=None, x=None, n_z=None, n_x=None): z = net.add('z', self.p_z, n_samples=n_z, group_ndims=self.z_group_ndims, is_reparameterized=self.is_reparameterized) - with tf.variable_scope('h_for_p_x'): + with tf.compat.v1.variable_scope('h_for_p_x'): x_params = self.h_for_p_x(z) - with tf.variable_scope('p_x_given_z'): + with tf.compat.v1.variable_scope('p_x_given_z'): p_x_given_z = self.p_x_given_z(**x_params) assert (isinstance(p_x_given_z, Distribution)) with tf.name_scope('x'): @@ -510,7 +510,7 @@ def __call__(self, inputs, **kwargs): # Here `reopen_name_scope` is set to True, so that multiple # calls to the same Module instance will always generate operations # within the original name scope. - # However, in order for ``tf.variable_scope(default_name=...)`` + # However, in order for ``tf.compat.v1.variable_scope(default_name=...)`` # to work properly with variable reusing, we must generate a nested # unique name scope. with tf.name_scope('forward'): diff --git a/omni_anomaly/wrapper.py b/omni_anomaly/wrapper.py index a5b6cde..5e0b702 100644 --- a/omni_anomaly/wrapper.py +++ b/omni_anomaly/wrapper.py @@ -77,7 +77,7 @@ def log_prob(self, given, group_ndims=0, name=None): def softplus_std(inputs, units, epsilon, name): - return tf.nn.softplus(tf.layers.dense(inputs, units, name=name, reuse=tf.AUTO_REUSE)) + epsilon + return tf.nn.softplus(tf.layers.dense(inputs, units, name=name, reuse=tf.compat.v1.AUTO_REUSE)) + epsilon def rnn(x, @@ -89,7 +89,7 @@ def rnn(x, time_axis=1, name='rnn'): from tensorflow.contrib import rnn - with tf.variable_scope(name, reuse=tf.AUTO_REUSE): + with tf.compat.v1.variable_scope(name, reuse=tf.compat.v1.AUTO_REUSE): if len(x.shape) == 4: x = tf.reduce_mean(x, axis=0) elif len(x.shape) != 3: @@ -122,7 +122,7 @@ def rnn(x, def wrap_params_net(inputs, h_for_dist, mean_layer, std_layer): - with tf.variable_scope('hidden', reuse=tf.AUTO_REUSE): + with tf.compat.v1.variable_scope('hidden', reuse=tf.compat.v1.AUTO_REUSE): h = h_for_dist(inputs) return { 'mean': mean_layer(h), @@ -131,7 +131,7 @@ def wrap_params_net(inputs, h_for_dist, mean_layer, std_layer): def wrap_params_net_srnn(inputs, h_for_dist): - with tf.variable_scope('hidden', reuse=tf.AUTO_REUSE): + with tf.compat.v1.variable_scope('hidden', reuse=tf.compat.v1.AUTO_REUSE): h = h_for_dist(inputs) return { 'input_q': h diff --git a/requirements.txt b/requirements.txt index 98041bc..4edc0b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ six == 1.11.0 matplotlib == 3.0.2 -numpy == 1.15.4 +numpy == 1.16.0 pandas == 0.23.4 scipy == 1.2.0 scikit_learn == 0.20.2 -tensorflow-gpu == 1.12.0 -tensorflow_probability == 0.5.0 +tensorflow-gpu == 1.15.0 +tensorflow_probability == 0.8.0 tqdm == 4.28.1 imageio == 2.4.1 fs == 2.3.0 click == 7.0 -git+https://github.com/thu-ml/zhusuan.git -git+https://github.com/haowen-xu/tfsnippet.git@v0.2.0-alpha1 \ No newline at end of file +git+https://github.com/yyexela/tfsnippet-tf-compat.git@v1 +git+https://github.com/yyexela/zhusuan-tf-compat.git@v1 \ No newline at end of file