From a0f68af225b73080b02541a55670428e1e0d2304 Mon Sep 17 00:00:00 2001
From: Joan Puigcerver <jpuigcerver@google.com>
Date: Wed, 7 Feb 2024 04:10:47 -0800
Subject: [PATCH] Release pretraining config on ImageNet-21k, used in the
 original V-MoE paper.

PiperOrigin-RevId: 604935917
---
 vmoe/app.py                                   |   2 +-
 vmoe/checkpoints/__init__.py                  |   2 +-
 vmoe/checkpoints/base.py                      |   2 +-
 vmoe/checkpoints/base_test.py                 |   2 +-
 vmoe/checkpoints/partitioned.py               |   2 +-
 vmoe/checkpoints/partitioned_test.py          |   2 +-
 vmoe/checkpoints/serialization.py             |   2 +-
 vmoe/checkpoints/serialization_test.py        |   2 +-
 vmoe/checkpoints/types.py                     |   2 +-
 vmoe/checkpoints/types_test.py                |   2 +-
 vmoe/configs/common_fewshot.py                |   2 +-
 .../eee_s32_last2_ilsvrc2012_ft_cifar100.py   |   2 +-
 vmoe/configs/vmoe_paper/common.py             |   2 +-
 .../vmoe_paper/pretrain_imagenet21k.py        | 221 ++++++++++++++++++
 ...6_imagenet21k_randaug_strong_ft_cifar10.py |   2 +-
 ...magenet21k_randaug_strong_ft_ilsvrc2012.py |   2 +-
 ...moe_s32_last2_ilsvrc2012_randaug_light1.py |   2 +-
 ...ilsvrc2012_randaug_light1_ft_ilsvrc2012.py |   2 +-
 vmoe/constants.py                             |   2 +-
 vmoe/data/builder.py                          |   2 +-
 vmoe/data/builder_test.py                     |   2 +-
 vmoe/data/input_pipeline.py                   |   2 +-
 vmoe/data/input_pipeline_test.py              |   2 +-
 vmoe/data/pjit_utils.py                       |   2 +-
 vmoe/data/pjit_utils_test.py                  |   2 +-
 vmoe/data/pp_ops.py                           |   2 +-
 vmoe/data/pp_ops_test.py                      |   2 +-
 vmoe/evaluate/ensemble.py                     |   2 +-
 vmoe/evaluate/ensemble_test.py                |   2 +-
 vmoe/evaluate/evaluator.py                    |   2 +-
 vmoe/evaluate/evaluator_test.py               |   2 +-
 vmoe/evaluate/fewshot.py                      |   2 +-
 vmoe/evaluate/fewshot_test.py                 |   2 +-
 vmoe/initialization/__init__.py               |   2 +-
 vmoe/initialization/initialization.py         |   2 +-
 vmoe/initialization/initialization_test.py    |   2 +-
 vmoe/initialization/mapping.py                |   2 +-
 vmoe/initialization/mapping_test.py           |   2 +-
 vmoe/initialization/rules.py                  |   2 +-
 vmoe/initialization/rules_test.py             |   2 +-
 vmoe/moe.py                                   |   2 +-
 vmoe/moe_test.py                              |   2 +-
 vmoe/multihost_utils.py                       |   2 +-
 vmoe/nn/ensemble_routing.py                   |   2 +-
 vmoe/nn/ensemble_routing_test.py              |   2 +-
 vmoe/nn/external.py                           |   2 +-
 vmoe/nn/external_test.py                      |   2 +-
 vmoe/nn/models.py                             |   2 +-
 vmoe/nn/routing.py                            |   2 +-
 vmoe/nn/routing_test.py                       |   2 +-
 vmoe/nn/vit_moe.py                            |   2 +-
 vmoe/nn/vit_moe_ensemble.py                   |   2 +-
 vmoe/nn/vit_moe_ensemble_test.py              |   2 +-
 vmoe/nn/vit_moe_test.py                       |   2 +-
 vmoe/partitioning.py                          |   2 +-
 vmoe/partitioning_test.py                     |   2 +-
 vmoe/projects/adversarial_attacks/attacks.py  |   2 +-
 .../adversarial_attacks/attacks_test.py       |   2 +-
 .../configs/attack/ilsvrc2012.py              |   2 +-
 .../adversarial_attacks/configs/common.py     |   2 +-
 vmoe/projects/adversarial_attacks/lib.py      |   2 +-
 vmoe/projects/adversarial_attacks/lib_test.py |   2 +-
 vmoe/projects/adversarial_attacks/main.py     |   2 +-
 vmoe/projects/adversarial_attacks/restore.py  |   2 +-
 .../adversarial_attacks/restore_test.py       |   2 +-
 vmoe/projects/soft_moe/configs/common.py      |   2 +-
 .../soft_moe/configs/pretrain_jft4b.py        |   2 +-
 vmoe/projects/soft_moe/main.py                |   2 +-
 vmoe/projects/soft_moe/router.py              |   2 +-
 vmoe/projects/soft_moe/router_test.py         |   2 +-
 .../kl_projection_routing.py                  |   2 +-
 .../kl_projection_routing_test.py             |   2 +-
 .../ksparse_projection_routing.py             |   2 +-
 .../ksparse_projection_routing_test.py        |   2 +-
 .../sparsity_constrained_ot/ot_routing.py     |   2 +-
 .../sparse_projection_routing.py              |   2 +-
 .../sparse_projection_routing_test.py         |   2 +-
 vmoe/train/main.py                            |   2 +-
 vmoe/train/optimizer.py                       |   2 +-
 vmoe/train/optimizer_test.py                  |   2 +-
 vmoe/train/periodic_actions.py                |   2 +-
 vmoe/train/periodic_actions_test.py           |   2 +-
 vmoe/train/schedule.py                        |   2 +-
 vmoe/train/schedule_test.py                   |   2 +-
 vmoe/train/train_state.py                     |   2 +-
 vmoe/train/train_state_test.py                |   2 +-
 vmoe/train/trainer.py                         |   2 +-
 vmoe/train/trainer_test.py                    |   2 +-
 vmoe/train/tree_summarizer.py                 |   2 +-
 vmoe/train/tree_summarizer_test.py            |   2 +-
 vmoe/utils.py                                 |   2 +-
 vmoe/utils_test.py                            |   2 +-
 92 files changed, 312 insertions(+), 91 deletions(-)
 create mode 100644 vmoe/configs/vmoe_paper/pretrain_imagenet21k.py

diff --git a/vmoe/app.py b/vmoe/app.py
index ddef089..ed9c380 100644
--- a/vmoe/app.py
+++ b/vmoe/app.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/__init__.py b/vmoe/checkpoints/__init__.py
index aa591ed..1e3ff7f 100644
--- a/vmoe/checkpoints/__init__.py
+++ b/vmoe/checkpoints/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/base.py b/vmoe/checkpoints/base.py
index 6d59f5b..096f811 100644
--- a/vmoe/checkpoints/base.py
+++ b/vmoe/checkpoints/base.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/base_test.py b/vmoe/checkpoints/base_test.py
index fe42ac4..e88e72d 100644
--- a/vmoe/checkpoints/base_test.py
+++ b/vmoe/checkpoints/base_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/partitioned.py b/vmoe/checkpoints/partitioned.py
index ae3d0f1..b873a2b 100644
--- a/vmoe/checkpoints/partitioned.py
+++ b/vmoe/checkpoints/partitioned.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/partitioned_test.py b/vmoe/checkpoints/partitioned_test.py
index 3db5f1d..ab1636c 100644
--- a/vmoe/checkpoints/partitioned_test.py
+++ b/vmoe/checkpoints/partitioned_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/serialization.py b/vmoe/checkpoints/serialization.py
index c897ac2..19bbc37 100644
--- a/vmoe/checkpoints/serialization.py
+++ b/vmoe/checkpoints/serialization.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/serialization_test.py b/vmoe/checkpoints/serialization_test.py
index 9fcbc3c..0d731a6 100644
--- a/vmoe/checkpoints/serialization_test.py
+++ b/vmoe/checkpoints/serialization_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/types.py b/vmoe/checkpoints/types.py
index b66b91b..f0ad5c3 100644
--- a/vmoe/checkpoints/types.py
+++ b/vmoe/checkpoints/types.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/checkpoints/types_test.py b/vmoe/checkpoints/types_test.py
index f981dd1..2342937 100644
--- a/vmoe/checkpoints/types_test.py
+++ b/vmoe/checkpoints/types_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/common_fewshot.py b/vmoe/configs/common_fewshot.py
index da67859..9bd1b87 100644
--- a/vmoe/configs/common_fewshot.py
+++ b/vmoe/configs/common_fewshot.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/eee_paper/eee_s32_last2_ilsvrc2012_ft_cifar100.py b/vmoe/configs/eee_paper/eee_s32_last2_ilsvrc2012_ft_cifar100.py
index 6d7179b..7cad8f3 100644
--- a/vmoe/configs/eee_paper/eee_s32_last2_ilsvrc2012_ft_cifar100.py
+++ b/vmoe/configs/eee_paper/eee_s32_last2_ilsvrc2012_ft_cifar100.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/vmoe_paper/common.py b/vmoe/configs/vmoe_paper/common.py
index f4fd172..8e1644c 100644
--- a/vmoe/configs/vmoe_paper/common.py
+++ b/vmoe/configs/vmoe_paper/common.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/vmoe_paper/pretrain_imagenet21k.py b/vmoe/configs/vmoe_paper/pretrain_imagenet21k.py
new file mode 100644
index 0000000..e3230e1
--- /dev/null
+++ b/vmoe/configs/vmoe_paper/pretrain_imagenet21k.py
@@ -0,0 +1,221 @@
+# Copyright 2024 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pylint: disable=line-too-long
+r"""Train ViT model with MoE layers on ImageNet-21k.
+
+This is the config for pre-training the model that was later fine-tuned on
+ILSVRC 2012 and CIFAR 10. See the corresponding fine-tuning configs:
+  - vmoe_b16_imagenet21k_randaug_strong_ft_cifar10.py
+  - vmoe_b16_imagenet21k_randaug_strong_ft_ilsvrc2012.py
+
+"""
+# pylint: enable=line-too-long
+import re
+
+import ml_collections
+
+DESCRIPTIONS_REGEX = re.compile(
+    r'^ViT-(?P<variant>.)/(?P<patch>[0-9]+), '
+    r'E=(?P<num_experts>[0-9]+), '
+    r'K=(?P<k>[0-9]+), '
+    r'(?P<where>Every|Last) (?P<where_num>[0-9]+), '
+    r'(?P<epochs>[0-9]+) Epochs$')
+# Number of ImageNet21k classes.
+NUM_CLASSES = 21_843
+
+
+def get_config():
+  """Config to train V-MoE S/32, B/32, B/16, L/32, L/16 & H/14."""
+  config = ml_collections.ConfigDict()
+
+  config.dataset = ml_collections.ConfigDict()
+  pp_common = f'value_range(-1,1)|onehot({NUM_CLASSES})|keep("image", "labels")'
+  # Dataset variation used for training.
+  config.dataset.train = ml_collections.ConfigDict()
+  config.dataset.train.name = 'imagenet21k'
+  config.dataset.train.split = 'full[102400:]'
+  config.dataset.train.process = (
+      f'decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,20)|{pp_common}'
+  )
+  config.dataset.train.shuffle_buffer = 250_000
+  config.dataset.train.batch_size = 4096
+  config.dataset.train.prefetch = 'autotune'
+  config.dataset.train.prefetch_device = 2
+  # Dataset variation used for evaluation.
+  config.dataset.val = ml_collections.ConfigDict()
+  config.dataset.val.name = 'imagenet21k'
+  config.dataset.val.split = 'full[:102400]'
+  config.dataset.val.process = (
+      f'decode|resize_small(256)|central_crop(224)|{pp_common}'
+  )
+  config.dataset.val.batch_size = 4096
+  config.dataset.val.cache = 'batched'
+  config.dataset.val.prefetch = 'autotune'
+  # Loss used to train the model.
+  config.loss = ml_collections.ConfigDict()
+  config.loss.name = 'sigmoid_xent'
+  # Model and optimizer parameters depend on the model type.
+  config.description = 'ViT-B/16, E=8, K=2, Every 2, 300 Epochs'
+  config.model = get_vmoe_params(config.description)
+  config.optimizer = get_optimizer_params(config.description)
+  config.train_epochs = get_num_epochs(config.description)
+  config.mixup = ml_collections.ConfigDict()
+  config.mixup.concentration = 0.5
+  config.mixup.mixup_size = 2
+
+  # These control how the model parameters are partitioned across the device
+  # mesh for running the models efficiently.
+  config.num_expert_partitions = config.model.encoder.moe.num_experts
+  config.params_axis_resources = [('Moe/Mlp/.*', ('expert',))]
+  config.extra_rng_keys = ('dropout', 'gating', 'mixup')
+  # Write checkpoints every 1000 steps.
+  config.save_checkpoint = ml_collections.ConfigDict()
+  config.save_checkpoint.every_steps = 1_000
+  config.save_checkpoint.keep_last = 1
+  config.save_checkpoint.wait_seconds = 300
+  # Report training progress every minute.
+  config.report_progress = ml_collections.ConfigDict()
+  config.report_progress.every_secs = None
+  config.report_progress.every_steps = 100
+  # Evaluate on the validation set every 1000 steps.
+  config.evaluate = ml_collections.ConfigDict()
+  config.evaluate.every_steps = 1_000
+  # Run device profiling on process_index = 0, for 5 steps, starting at step 10.
+  # Then repeat profiling every hour.
+  config.profile = ml_collections.ConfigDict()
+  config.profile.all_processes = False
+  config.profile.num_profile_steps = 5
+  config.profile.first_profile = 10
+  config.profile.every_secs = 3600.0
+
+  config.seed = 0
+
+  return config
+
+
+def get_vmoe_params(description: str,
+                    image_size: int = 224) -> ml_collections.ConfigDict:
+  """Returns transformer parameters for different canonical architectures."""
+  match = re.match(DESCRIPTIONS_REGEX, description)
+  if not match:
+    raise ValueError(f"Description {description!r} doesn't match the regex.")
+
+  variant = match.group('variant')
+  variant_idx = ['S', 'B', 'L', 'H'].index(variant)
+  patch_size = int(match.group('patch'))
+  num_total_experts = int(match.group('num_experts'))
+  num_selected_experts = int(match.group('k'))
+  moe_where = match.group('where')
+  moe_where_num = int(match.group('where_num'))
+  # Group size must be a divisor of the number of tokens per device.
+  # We assume here that the smallest batch size per device (images/device) is 8,
+  # and any other batch size per device will be a multiple of this.
+  min_batch_size_per_device = 8
+  num_patches = (image_size // patch_size) * (image_size // patch_size) + 1
+  group_size = min_batch_size_per_device * num_patches
+
+  config = ml_collections.ConfigDict()
+  config.name = 'VisionTransformerMoe'
+  config.num_classes = NUM_CLASSES
+  config.patch_size = (patch_size, patch_size)
+  config.hidden_size = [512, 768, 1024, 1280][variant_idx]
+  config.classifier = 'token'
+  config.representation_size = None
+  config.head_bias_init = -10.0
+  config.encoder = ml_collections.ConfigDict()
+  config.encoder.num_layers = [8, 12, 24, 32][variant_idx]
+  config.encoder.mlp_dim = [2048, 3072, 4096, 5120][variant_idx]
+  config.encoder.num_heads = [8, 12, 16, 16][variant_idx]
+  config.encoder.dropout_rate = 0.0
+  config.encoder.attention_dropout_rate = 0.0
+  config.encoder.moe = ml_collections.ConfigDict()
+  config.encoder.moe.num_experts = num_total_experts
+  # Position of MoE layers.
+  if moe_where == 'Every':
+    config.encoder.moe.layers = tuple(
+        range(moe_where_num - 1, config.encoder.num_layers, moe_where_num))
+  elif moe_where == 'Last':
+    config.encoder.moe.layers = tuple(
+        range(1, config.encoder.num_layers, 2))[-moe_where_num:]
+  else:
+    raise ValueError(
+        f'Unknown position for expert layers: {moe_where} {moe_where_num}')
+  config.encoder.moe.dropout_rate = 0.0
+  config.encoder.moe.split_rngs = False  # All experts share initialization.
+  config.encoder.moe.group_size = group_size
+  config.encoder.moe.router = ml_collections.ConfigDict()
+  config.encoder.moe.router.num_selected_experts = num_selected_experts
+  config.encoder.moe.router.noise_std = 1.0  # Actually, it's 1.0 / num_experts.
+  config.encoder.moe.router.importance_loss_weight = 0.005
+  config.encoder.moe.router.load_loss_weight = 0.005
+  config.encoder.moe.router.dispatcher = ml_collections.ConfigDict()
+  config.encoder.moe.router.dispatcher.name = 'einsum'
+  config.encoder.moe.router.dispatcher.bfloat16 = True
+  config.encoder.moe.router.dispatcher.capacity_factor = 1.05
+  # This is used to hint pjit about how data is distributed at the input/output
+  # of each MoE layer.
+  config.encoder.moe.router.dispatcher.partition_spec = (('expert', 'replica'),)
+  # By default we don't use batch priority for training the model.
+  config.encoder.moe.router.dispatcher.batch_priority = False
+
+  return config
+
+
+def get_optimizer_params(description: str) -> ml_collections.ConfigDict:
+  """Returns optimizer parameters for different canonical architectures."""
+  match = re.match(DESCRIPTIONS_REGEX, description)
+  if not match:
+    raise ValueError(f"Description {description!r} doesn't match the regex.")
+
+  variant = match.group('variant')
+  patch_size = int(match.group('patch'))
+
+  config = ml_collections.ConfigDict()
+  config.name = 'adam'
+  config.b1 = 0.9
+  config.b2 = 0.999
+  config.mu_dtype = 'float32'  # Optionally, use bfloat16 to save memory.
+  # config.weight_decay = 0.1  # Weight decay is applied to all parameters.
+  config.weight_decay = [('.*/kernel', 0.1)]
+
+  # Parameters of the learning rate schedule.
+  config.learning_rate = ml_collections.ConfigDict()
+  config.learning_rate.schedule = 'warmup_linear_decay'
+  config.learning_rate.peak_value = {
+      ('S', 32): 1e-3,
+      ('B', 32): 8e-4,
+      ('B', 16): 8e-4,
+      ('L', 32): 6e-4,
+      ('L', 16): 4e-4,
+      ('H', 14): 3e-4,
+  }[(variant, patch_size)]
+  config.learning_rate.end_value = 1e-5
+  config.learning_rate.warmup_steps = 10_000
+  # Gradient clipping is only used for VMoE-H/* models.
+  config.gradient_clip = ml_collections.ConfigDict()
+  config.gradient_clip.global_norm = 1.0
+  return config
+
+
+def get_num_epochs(description) -> int:
+  match = re.match(DESCRIPTIONS_REGEX, description)
+  if not match:
+    raise ValueError(f"Description {description!r} doesn't match the regex.")
+  return int(match.group('epochs'))
+
+
+def get_hyper(hyper):
+  # Adjust this to train with multiple seed or adjust other hyperparameters.
+  return hyper.product([])
diff --git a/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_cifar10.py b/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_cifar10.py
index a62ef0a..da428e0 100644
--- a/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_cifar10.py
+++ b/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_cifar10.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_ilsvrc2012.py b/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_ilsvrc2012.py
index e445d05..e9ce7f4 100644
--- a/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_ilsvrc2012.py
+++ b/vmoe/configs/vmoe_paper/vmoe_b16_imagenet21k_randaug_strong_ft_ilsvrc2012.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1.py b/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1.py
index c47c2a5..4b922d8 100644
--- a/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1.py
+++ b/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1_ft_ilsvrc2012.py b/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1_ft_ilsvrc2012.py
index d2c540e..56109e4 100644
--- a/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1_ft_ilsvrc2012.py
+++ b/vmoe/configs/vmoe_paper/vmoe_s32_last2_ilsvrc2012_randaug_light1_ft_ilsvrc2012.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/constants.py b/vmoe/constants.py
index b2952c8..88d6bf8 100644
--- a/vmoe/constants.py
+++ b/vmoe/constants.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/builder.py b/vmoe/data/builder.py
index ca377a2..5e72f82 100644
--- a/vmoe/data/builder.py
+++ b/vmoe/data/builder.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/builder_test.py b/vmoe/data/builder_test.py
index ce0f8de..9f10d49 100644
--- a/vmoe/data/builder_test.py
+++ b/vmoe/data/builder_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/input_pipeline.py b/vmoe/data/input_pipeline.py
index 03be887..8fb6b7b 100644
--- a/vmoe/data/input_pipeline.py
+++ b/vmoe/data/input_pipeline.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/input_pipeline_test.py b/vmoe/data/input_pipeline_test.py
index 963e203..3877a41 100644
--- a/vmoe/data/input_pipeline_test.py
+++ b/vmoe/data/input_pipeline_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/pjit_utils.py b/vmoe/data/pjit_utils.py
index 0567af8..6790aa2 100644
--- a/vmoe/data/pjit_utils.py
+++ b/vmoe/data/pjit_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/pjit_utils_test.py b/vmoe/data/pjit_utils_test.py
index f288df8..bb3aa2a 100644
--- a/vmoe/data/pjit_utils_test.py
+++ b/vmoe/data/pjit_utils_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/pp_ops.py b/vmoe/data/pp_ops.py
index 809607d..cb54efd 100644
--- a/vmoe/data/pp_ops.py
+++ b/vmoe/data/pp_ops.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/data/pp_ops_test.py b/vmoe/data/pp_ops_test.py
index b3e6a5a..c35d01f 100644
--- a/vmoe/data/pp_ops_test.py
+++ b/vmoe/data/pp_ops_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/evaluate/ensemble.py b/vmoe/evaluate/ensemble.py
index 3ea39d6..b2678b9 100644
--- a/vmoe/evaluate/ensemble.py
+++ b/vmoe/evaluate/ensemble.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/evaluate/ensemble_test.py b/vmoe/evaluate/ensemble_test.py
index 2bfcb62..d8b14a9 100644
--- a/vmoe/evaluate/ensemble_test.py
+++ b/vmoe/evaluate/ensemble_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/evaluate/evaluator.py b/vmoe/evaluate/evaluator.py
index b9c7082..61015f7 100644
--- a/vmoe/evaluate/evaluator.py
+++ b/vmoe/evaluate/evaluator.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/evaluate/evaluator_test.py b/vmoe/evaluate/evaluator_test.py
index c951e66..a6cc530 100644
--- a/vmoe/evaluate/evaluator_test.py
+++ b/vmoe/evaluate/evaluator_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/evaluate/fewshot.py b/vmoe/evaluate/fewshot.py
index 2829b1b..31e9511 100644
--- a/vmoe/evaluate/fewshot.py
+++ b/vmoe/evaluate/fewshot.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/evaluate/fewshot_test.py b/vmoe/evaluate/fewshot_test.py
index fcc5b6b..a55ee9f 100644
--- a/vmoe/evaluate/fewshot_test.py
+++ b/vmoe/evaluate/fewshot_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/__init__.py b/vmoe/initialization/__init__.py
index 28e64d8..7a1db00 100644
--- a/vmoe/initialization/__init__.py
+++ b/vmoe/initialization/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/initialization.py b/vmoe/initialization/initialization.py
index 748f2e6..591a512 100644
--- a/vmoe/initialization/initialization.py
+++ b/vmoe/initialization/initialization.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/initialization_test.py b/vmoe/initialization/initialization_test.py
index f93498e..1b028a7 100644
--- a/vmoe/initialization/initialization_test.py
+++ b/vmoe/initialization/initialization_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/mapping.py b/vmoe/initialization/mapping.py
index 876b35b..21239aa 100644
--- a/vmoe/initialization/mapping.py
+++ b/vmoe/initialization/mapping.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/mapping_test.py b/vmoe/initialization/mapping_test.py
index f46df15..0b353e6 100644
--- a/vmoe/initialization/mapping_test.py
+++ b/vmoe/initialization/mapping_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/rules.py b/vmoe/initialization/rules.py
index 7bbc36b..4d0ac7c 100644
--- a/vmoe/initialization/rules.py
+++ b/vmoe/initialization/rules.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/initialization/rules_test.py b/vmoe/initialization/rules_test.py
index 7b5678e..1adb19a 100644
--- a/vmoe/initialization/rules_test.py
+++ b/vmoe/initialization/rules_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/moe.py b/vmoe/moe.py
index 8f18e6a..d6e963a 100644
--- a/vmoe/moe.py
+++ b/vmoe/moe.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/moe_test.py b/vmoe/moe_test.py
index 7dc01c4..3ff6d1a 100644
--- a/vmoe/moe_test.py
+++ b/vmoe/moe_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/multihost_utils.py b/vmoe/multihost_utils.py
index 610fed6..a04dc7d 100644
--- a/vmoe/multihost_utils.py
+++ b/vmoe/multihost_utils.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/ensemble_routing.py b/vmoe/nn/ensemble_routing.py
index a25205a..786cce9 100644
--- a/vmoe/nn/ensemble_routing.py
+++ b/vmoe/nn/ensemble_routing.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/ensemble_routing_test.py b/vmoe/nn/ensemble_routing_test.py
index 4f40dd0..a257374 100644
--- a/vmoe/nn/ensemble_routing_test.py
+++ b/vmoe/nn/ensemble_routing_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/external.py b/vmoe/nn/external.py
index d13a4b0..10edf12 100644
--- a/vmoe/nn/external.py
+++ b/vmoe/nn/external.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/external_test.py b/vmoe/nn/external_test.py
index 27e01f9..5c3bf65 100644
--- a/vmoe/nn/external_test.py
+++ b/vmoe/nn/external_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/models.py b/vmoe/nn/models.py
index 972bfcb..bba0807 100644
--- a/vmoe/nn/models.py
+++ b/vmoe/nn/models.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/routing.py b/vmoe/nn/routing.py
index 9c84d40..95df4b3 100644
--- a/vmoe/nn/routing.py
+++ b/vmoe/nn/routing.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/routing_test.py b/vmoe/nn/routing_test.py
index 53d729e..570c570 100644
--- a/vmoe/nn/routing_test.py
+++ b/vmoe/nn/routing_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/vit_moe.py b/vmoe/nn/vit_moe.py
index 1a9afed..d9f0277 100644
--- a/vmoe/nn/vit_moe.py
+++ b/vmoe/nn/vit_moe.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/vit_moe_ensemble.py b/vmoe/nn/vit_moe_ensemble.py
index 07b34c8..b1f314d 100644
--- a/vmoe/nn/vit_moe_ensemble.py
+++ b/vmoe/nn/vit_moe_ensemble.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/vit_moe_ensemble_test.py b/vmoe/nn/vit_moe_ensemble_test.py
index 506c65b..f3810c5 100644
--- a/vmoe/nn/vit_moe_ensemble_test.py
+++ b/vmoe/nn/vit_moe_ensemble_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/nn/vit_moe_test.py b/vmoe/nn/vit_moe_test.py
index a4b7c54..d5d0bce 100644
--- a/vmoe/nn/vit_moe_test.py
+++ b/vmoe/nn/vit_moe_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/partitioning.py b/vmoe/partitioning.py
index a9e82cf..4f9d3df 100644
--- a/vmoe/partitioning.py
+++ b/vmoe/partitioning.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/partitioning_test.py b/vmoe/partitioning_test.py
index b43bd2d..f531583 100644
--- a/vmoe/partitioning_test.py
+++ b/vmoe/partitioning_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/attacks.py b/vmoe/projects/adversarial_attacks/attacks.py
index a81bd8a..f3177d7 100644
--- a/vmoe/projects/adversarial_attacks/attacks.py
+++ b/vmoe/projects/adversarial_attacks/attacks.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/attacks_test.py b/vmoe/projects/adversarial_attacks/attacks_test.py
index 8afed14..ee5a939 100644
--- a/vmoe/projects/adversarial_attacks/attacks_test.py
+++ b/vmoe/projects/adversarial_attacks/attacks_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/configs/attack/ilsvrc2012.py b/vmoe/projects/adversarial_attacks/configs/attack/ilsvrc2012.py
index 807b605..69a722e 100644
--- a/vmoe/projects/adversarial_attacks/configs/attack/ilsvrc2012.py
+++ b/vmoe/projects/adversarial_attacks/configs/attack/ilsvrc2012.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/configs/common.py b/vmoe/projects/adversarial_attacks/configs/common.py
index a7558d6..24e2342 100644
--- a/vmoe/projects/adversarial_attacks/configs/common.py
+++ b/vmoe/projects/adversarial_attacks/configs/common.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/lib.py b/vmoe/projects/adversarial_attacks/lib.py
index 9b74e68..da5f3f3 100644
--- a/vmoe/projects/adversarial_attacks/lib.py
+++ b/vmoe/projects/adversarial_attacks/lib.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/lib_test.py b/vmoe/projects/adversarial_attacks/lib_test.py
index 96711ae..8f14bde 100644
--- a/vmoe/projects/adversarial_attacks/lib_test.py
+++ b/vmoe/projects/adversarial_attacks/lib_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/main.py b/vmoe/projects/adversarial_attacks/main.py
index e09cc95..8cfd50b 100644
--- a/vmoe/projects/adversarial_attacks/main.py
+++ b/vmoe/projects/adversarial_attacks/main.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/restore.py b/vmoe/projects/adversarial_attacks/restore.py
index 9039ce1..90ff11e 100644
--- a/vmoe/projects/adversarial_attacks/restore.py
+++ b/vmoe/projects/adversarial_attacks/restore.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/adversarial_attacks/restore_test.py b/vmoe/projects/adversarial_attacks/restore_test.py
index b6e4842..149de91 100644
--- a/vmoe/projects/adversarial_attacks/restore_test.py
+++ b/vmoe/projects/adversarial_attacks/restore_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/soft_moe/configs/common.py b/vmoe/projects/soft_moe/configs/common.py
index 649c031..9ba3560 100644
--- a/vmoe/projects/soft_moe/configs/common.py
+++ b/vmoe/projects/soft_moe/configs/common.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/soft_moe/configs/pretrain_jft4b.py b/vmoe/projects/soft_moe/configs/pretrain_jft4b.py
index 58fedeb..c1551b6 100644
--- a/vmoe/projects/soft_moe/configs/pretrain_jft4b.py
+++ b/vmoe/projects/soft_moe/configs/pretrain_jft4b.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/soft_moe/main.py b/vmoe/projects/soft_moe/main.py
index aba33e6..652f7f3 100644
--- a/vmoe/projects/soft_moe/main.py
+++ b/vmoe/projects/soft_moe/main.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/soft_moe/router.py b/vmoe/projects/soft_moe/router.py
index 1fbae84..bff0978 100644
--- a/vmoe/projects/soft_moe/router.py
+++ b/vmoe/projects/soft_moe/router.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/soft_moe/router_test.py b/vmoe/projects/soft_moe/router_test.py
index d6d8476..80b048d 100644
--- a/vmoe/projects/soft_moe/router_test.py
+++ b/vmoe/projects/soft_moe/router_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/kl_projection_routing.py b/vmoe/projects/sparsity_constrained_ot/kl_projection_routing.py
index bca1832..4c4a48a 100644
--- a/vmoe/projects/sparsity_constrained_ot/kl_projection_routing.py
+++ b/vmoe/projects/sparsity_constrained_ot/kl_projection_routing.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/kl_projection_routing_test.py b/vmoe/projects/sparsity_constrained_ot/kl_projection_routing_test.py
index 85b3480..661b6cc 100644
--- a/vmoe/projects/sparsity_constrained_ot/kl_projection_routing_test.py
+++ b/vmoe/projects/sparsity_constrained_ot/kl_projection_routing_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing.py b/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing.py
index 5864878..be03437 100644
--- a/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing.py
+++ b/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing_test.py b/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing_test.py
index 9d20af0..fca49d1 100644
--- a/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing_test.py
+++ b/vmoe/projects/sparsity_constrained_ot/ksparse_projection_routing_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/ot_routing.py b/vmoe/projects/sparsity_constrained_ot/ot_routing.py
index 53c1ec1..663b6f5 100644
--- a/vmoe/projects/sparsity_constrained_ot/ot_routing.py
+++ b/vmoe/projects/sparsity_constrained_ot/ot_routing.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing.py b/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing.py
index 62e639d..772dcfb 100644
--- a/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing.py
+++ b/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing_test.py b/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing_test.py
index 8835eba..4deeb82 100644
--- a/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing_test.py
+++ b/vmoe/projects/sparsity_constrained_ot/sparse_projection_routing_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/main.py b/vmoe/train/main.py
index 6af0010..9274d13 100644
--- a/vmoe/train/main.py
+++ b/vmoe/train/main.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/optimizer.py b/vmoe/train/optimizer.py
index c1aae50..c569696 100644
--- a/vmoe/train/optimizer.py
+++ b/vmoe/train/optimizer.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/optimizer_test.py b/vmoe/train/optimizer_test.py
index 7050768..d613481 100644
--- a/vmoe/train/optimizer_test.py
+++ b/vmoe/train/optimizer_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/periodic_actions.py b/vmoe/train/periodic_actions.py
index 4736a41..d2b4566 100644
--- a/vmoe/train/periodic_actions.py
+++ b/vmoe/train/periodic_actions.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/periodic_actions_test.py b/vmoe/train/periodic_actions_test.py
index 0ba0642..bbc2576 100644
--- a/vmoe/train/periodic_actions_test.py
+++ b/vmoe/train/periodic_actions_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/schedule.py b/vmoe/train/schedule.py
index c1531bc..148766c 100644
--- a/vmoe/train/schedule.py
+++ b/vmoe/train/schedule.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/schedule_test.py b/vmoe/train/schedule_test.py
index 48ab561..340480d 100644
--- a/vmoe/train/schedule_test.py
+++ b/vmoe/train/schedule_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/train_state.py b/vmoe/train/train_state.py
index 41d80bd..cd7627a 100644
--- a/vmoe/train/train_state.py
+++ b/vmoe/train/train_state.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/train_state_test.py b/vmoe/train/train_state_test.py
index 280b93d..f6de995 100644
--- a/vmoe/train/train_state_test.py
+++ b/vmoe/train/train_state_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/trainer.py b/vmoe/train/trainer.py
index e618c3b..b80ffa0 100644
--- a/vmoe/train/trainer.py
+++ b/vmoe/train/trainer.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/trainer_test.py b/vmoe/train/trainer_test.py
index c7c3c71..0fb575d 100644
--- a/vmoe/train/trainer_test.py
+++ b/vmoe/train/trainer_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/tree_summarizer.py b/vmoe/train/tree_summarizer.py
index c10128e..074e743 100644
--- a/vmoe/train/tree_summarizer.py
+++ b/vmoe/train/tree_summarizer.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/train/tree_summarizer_test.py b/vmoe/train/tree_summarizer_test.py
index ce1d7a2..ceb8ff6 100644
--- a/vmoe/train/tree_summarizer_test.py
+++ b/vmoe/train/tree_summarizer_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/utils.py b/vmoe/utils.py
index 8e8a845..ab89971 100644
--- a/vmoe/utils.py
+++ b/vmoe/utils.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/vmoe/utils_test.py b/vmoe/utils_test.py
index d59c4df..ed8d0d8 100644
--- a/vmoe/utils_test.py
+++ b/vmoe/utils_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC.
+# Copyright 2024 Google LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.