From 9243be75ddde7a2ba6a9add8e3b968538f8fef6b Mon Sep 17 00:00:00 2001
From: Anh Uong <anh.uong@ibm.com>
Date: Tue, 1 Oct 2024 09:58:32 -0600
Subject: [PATCH 1/9] ci: run unit tests, fmt, image build on release branch
 (#361)

Signed-off-by: Anh Uong <anh.uong@ibm.com>
---
 .github/workflows/format.yml | 4 ++--
 .github/workflows/image.yaml | 4 ++--
 .github/workflows/test.yaml  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 574aac18a..6e26a490f 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -16,9 +16,9 @@ name: Format
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "main", "release" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "main", "release" ]
 
 jobs:
   lint:
diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml
index d4d836bec..6c1e043c6 100644
--- a/.github/workflows/image.yaml
+++ b/.github/workflows/image.yaml
@@ -1,9 +1,9 @@
 name: Image
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "main", "release" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "main", "release" ]
 
 jobs:
   build:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f8e24265c..ead02bb31 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,9 +1,9 @@
 name: Test
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "main", "release" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "main", "release" ]
 
 jobs:
   build:

From 7b97e9e478385b21b597ebb9c72ae75426e5ec11 Mon Sep 17 00:00:00 2001
From: Anh Uong <anh.uong@ibm.com>
Date: Wed, 2 Oct 2024 10:07:36 -0600
Subject: [PATCH 2/9] chore: update code owners (#363)

Signed-off-by: Anh Uong <anh.uong@ibm.com>
---
 CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index a28fcff97..bcd048d90 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -8,4 +8,4 @@
 #  https://help.github.com/en/articles/about-code-owners
 #
 
-*  @anhuong @Ssukriti @alex-jw-brooks
+*  @anhuong @Ssukriti @aluu317 @fabianlim @kmehant

From 63d0c5d6437f09e57e871367286f57adf146b310 Mon Sep 17 00:00:00 2001
From: Hari <harikrishmenon@gmail.com>
Date: Fri, 4 Oct 2024 20:53:47 +0530
Subject: [PATCH 3/9] fix: crash when output directory doesn't exist (#364)

Signed-off-by: Harikrishnan Balagopal <harikrishmenon@gmail.com>
---
 tuning/sft_trainer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
index 767fea1b4..7b52e3af4 100644
--- a/tuning/sft_trainer.py
+++ b/tuning/sft_trainer.py
@@ -637,6 +637,9 @@ def main():
     combined_tracker_configs.file_logger_config = file_logger_config
     combined_tracker_configs.aim_config = aim_config
 
+    if training_args.output_dir:
+        os.makedirs(training_args.output_dir, exist_ok=True)
+        logger.info("using the output directory at %s", training_args.output_dir)
     try:
         trainer, additional_train_info = train(
             model_args=model_args,

From ee2cd66d8c5f6817feb78b9f9702c5e68743afd5 Mon Sep 17 00:00:00 2001
From: Will <mwjohnson728@gmail.com>
Date: Tue, 8 Oct 2024 17:06:44 -0400
Subject: [PATCH 4/9] refactor: move tokenizer_data_utils with the rest of
 utils, add further unit testing. (#348)

* cleanup: Move tokenizer_data_utils to /utils to /data, change imports

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

* tests: Add additional tests for test_embedding_resize to check resize with special tokens, resize multiple of. fmt

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

* lint

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

* fix: more thorough testing from output of function

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

* test: move assertion

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

---------

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>
---
 scripts/run_inference.py                      |  4 +-
 tests/utils/test_embedding_resize.py          | 74 ++++++++++++++++++-
 tests/utils/test_tokenizer_data_utils.py      |  2 +-
 tuning/data/__init__.py                       | 13 ----
 tuning/sft_trainer.py                         |  4 +-
 .../{data => utils}/tokenizer_data_utils.py   |  0
 6 files changed, 75 insertions(+), 22 deletions(-)
 delete mode 100644 tuning/data/__init__.py
 rename tuning/{data => utils}/tokenizer_data_utils.py (100%)

diff --git a/scripts/run_inference.py b/scripts/run_inference.py
index 7e4465cac..de8462826 100644
--- a/scripts/run_inference.py
+++ b/scripts/run_inference.py
@@ -34,7 +34,7 @@
 import torch
 
 # Local
-from tuning.data import tokenizer_data_utils
+from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize
 
 
 ### Utilities
@@ -219,7 +219,7 @@ def load(
                     # where the model's layers are modified, in our case the embedding layer
                     # is modified, so we resize the backbone model's embedding layer with our own
                     # utility before passing it along to load the PEFT model.
-                    tokenizer_data_utils.tokenizer_and_embedding_resize(
+                    tokenizer_and_embedding_resize(
                         {}, tokenizer=tokenizer, model=base_model
                     )
                     model = PeftModel.from_pretrained(
diff --git a/tests/utils/test_embedding_resize.py b/tests/utils/test_embedding_resize.py
index 9a72f397b..43c6adbd4 100644
--- a/tests/utils/test_embedding_resize.py
+++ b/tests/utils/test_embedding_resize.py
@@ -20,9 +20,10 @@
 import torch
 
 # Local
-from tuning.data import tokenizer_data_utils
+from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize
 
 MODEL_NAME = "Maykeye/TinyLLama-v0"
+INPUT_TEXT = "### Text: @NortonSupport Thanks much.\n\n### Label:"
 
 
 def _inference(
@@ -41,16 +42,16 @@ def _inference(
 
 
 def test_output_unaltered_across_embedding_resizes():
-    input_text = "### Text: @NortonSupport Thanks much.\n\n### Label:"
+    input_text = INPUT_TEXT
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     model_not_resized = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
     model_resized = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
-    tokenizer_data_utils.tokenizer_and_embedding_resize(
+    tokenizer_and_embedding_resize(
         special_tokens_dict={}, tokenizer=tokenizer, model=model_resized, multiple_of=8
     )
 
-    tokenizer_data_utils.tokenizer_and_embedding_resize(
+    tokenizer_and_embedding_resize(
         special_tokens_dict={},
         tokenizer=tokenizer,
         model=model_not_resized,
@@ -74,3 +75,68 @@ def test_output_unaltered_across_embedding_resizes():
     )
 
     assert output_from_model_not_resized == output_from_model_resized
+
+
+def test_resize_with_special_tokens():
+    input_text = INPUT_TEXT
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+
+    input_tokenizer_len = len(tokenizer.get_vocab())
+
+    special_tokens = {"sep_token": "<SEP>", "pad_token": "<PAD>"}
+    resize_result = tokenizer_and_embedding_resize(
+        special_tokens_dict=special_tokens,
+        tokenizer=tokenizer,
+        model=model,
+        multiple_of=1,
+    )
+
+    assert "<SEP>" in tokenizer.get_vocab()
+    assert "<PAD>" in tokenizer.get_vocab()
+
+    output_tokenizer_len = len(tokenizer.get_vocab())
+
+    assert output_tokenizer_len == input_tokenizer_len + 2
+    assert resize_result["num_new_tokens"] == output_tokenizer_len - input_tokenizer_len
+
+    output = _inference(
+        tokenizer=tokenizer, model=model, input_text=input_text, max_new_tokens=20
+    )
+    assert output is not None
+
+
+def test_no_resize_when_no_special_tokens():
+    input_text = INPUT_TEXT
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+
+    input_tokenizer_len = len(tokenizer.get_vocab())
+
+    resize_result = tokenizer_and_embedding_resize(
+        special_tokens_dict={}, tokenizer=tokenizer, model=model, multiple_of=1
+    )
+
+    output_tokenizer_len = len(tokenizer.get_vocab())
+
+    assert input_tokenizer_len == output_tokenizer_len
+    assert resize_result["num_new_tokens"] == 0
+
+    output = _inference(
+        tokenizer=tokenizer, model=model, input_text=input_text, max_new_tokens=20
+    )
+
+    assert output is not None
+
+
+def test_resize_with_multiple_of():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+
+    resize_result = tokenizer_and_embedding_resize(
+        special_tokens_dict={}, tokenizer=tokenizer, model=model, multiple_of=8
+    )
+
+    assert model.get_input_embeddings().embedding_dim % 8 == 0
+    assert resize_result["new_embedding_size"] % 8 == 0
+    assert model.get_output_embeddings().out_features % 8 == 0
diff --git a/tests/utils/test_tokenizer_data_utils.py b/tests/utils/test_tokenizer_data_utils.py
index 118805100..1afd34d4d 100644
--- a/tests/utils/test_tokenizer_data_utils.py
+++ b/tests/utils/test_tokenizer_data_utils.py
@@ -7,7 +7,7 @@
 
 # Local
 # First party
-from tuning.data.tokenizer_data_utils import tokenizer_and_embedding_resize
+from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize
 
 
 def test_tokenizer_and_embedding_resize_return_values():
diff --git a/tuning/data/__init__.py b/tuning/data/__init__.py
deleted file mode 100644
index 38a9531ef..000000000
--- a/tuning/data/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright The FMS HF Tuning Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
index 7b52e3af4..fa7d0875c 100644
--- a/tuning/sft_trainer.py
+++ b/tuning/sft_trainer.py
@@ -53,7 +53,6 @@
     FileLoggingTrackerConfig,
     TrackerConfigFactory,
 )
-from tuning.data import tokenizer_data_utils
 from tuning.trackers.tracker_factory import FILE_LOGGING_TRACKER, get_tracker
 from tuning.trainercontroller import TrainerControllerCallback
 from tuning.utils.config_utils import get_hf_peft_config, get_json_config
@@ -70,6 +69,7 @@
     is_pretokenized_dataset,
     validate_data_args,
 )
+from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize
 
 
 def train(
@@ -294,7 +294,7 @@ def train(
 
     # TODO: lower priority but understand if resizing impacts inference quality and why its needed.
     # It makes sense if we manipulate tokenizer that we also save it and provide it to inference.
-    added_tokens_dict = tokenizer_data_utils.tokenizer_and_embedding_resize(
+    added_tokens_dict = tokenizer_and_embedding_resize(
         special_tokens_dict=special_tokens_dict,
         tokenizer=tokenizer,
         model=model,
diff --git a/tuning/data/tokenizer_data_utils.py b/tuning/utils/tokenizer_data_utils.py
similarity index 100%
rename from tuning/data/tokenizer_data_utils.py
rename to tuning/utils/tokenizer_data_utils.py

From b33634c8ce5c85b6d10daa187fd1a069db969b67 Mon Sep 17 00:00:00 2001
From: Anh Uong <anh.uong@ibm.com>
Date: Tue, 8 Oct 2024 15:56:15 -0600
Subject: [PATCH 5/9] deps: update transformers and accelerate depes (#355)

Signed-off-by: Anh Uong <anh.uong@ibm.com>
---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2b63b8f54..7991bc334 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,8 +27,8 @@ classifiers=[
 ]
 dependencies = [
 "numpy>=1.26.4,<2.0",
-"accelerate>=0.20.3,<0.34",
-"transformers>4.41,<4.45",
+"accelerate>=0.20.3,<0.35,!=0.34",
+"transformers>4.41,<4.50",
 "torch>=2.2.0,<3.0",
 "sentencepiece>=0.1.99,<0.3",
 "tokenizers>=0.13.3,<1.0",

From d36020230b3e4c743f61848d3e37ef163fae2dfd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 10 Oct 2024 09:45:21 -0600
Subject: [PATCH 6/9] build(deps): Update peft requirement from <0.13,>=0.8.0
 to >=0.8.0,<0.14 (#354)

Updates the requirements on [peft](https://github.com/huggingface/peft) to permit the latest version.
- [Release notes](https://github.com/huggingface/peft/releases)
- [Commits](https://github.com/huggingface/peft/compare/v0.8.0...v0.13.0)

---
updated-dependencies:
- dependency-name: peft
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Anh Uong <anh.uong@ibm.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7991bc334..541e89112 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
 "tokenizers>=0.13.3,<1.0",
 "tqdm>=4.66.2,<5.0",
 "trl>=0.9.3,<1.0",
-"peft>=0.8.0,<0.13",
+"peft>=0.8.0,<0.14",
 "protobuf>=5.28.0,<6.0.0",
 "datasets>=2.15.0,<3.0",
 "simpleeval>=0.9.13,<1.0",

From d138858ddd73eb41428344f693c3d964a884792b Mon Sep 17 00:00:00 2001
From: Will <mwjohnson728@gmail.com>
Date: Tue, 15 Oct 2024 10:50:44 -0400
Subject: [PATCH 7/9] build(deps): Upgrade accelerate requirement to allow
 version 1.0.0 (#371)

* deps: Upgrade accelerate to version 1.0.0

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

* tests: Replace out of date variable  with ,fixing unit tests

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

* fix: Add old parameter for backwards compatibility

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>

---------

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>
---
 fixtures/accelerate_fsdp_defaults.yaml | 3 ++-
 pyproject.toml                         | 2 +-
 tests/build/dummy_job_config.json      | 2 +-
 tests/build/test_utils.py              | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fixtures/accelerate_fsdp_defaults.yaml b/fixtures/accelerate_fsdp_defaults.yaml
index f70d74faa..30916b2a5 100644
--- a/fixtures/accelerate_fsdp_defaults.yaml
+++ b/fixtures/accelerate_fsdp_defaults.yaml
@@ -14,9 +14,10 @@ fsdp_config:
   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 
   # this controls the FSDP pipelining
-  fsdp_backward_prefetch_policy: BACKWARD_PRE # set to BACKWARD_PRE for the most time-efficient pipeline
+  fsdp_backward_prefetch: BACKWARD_PRE # set to BACKWARD_PRE for the most time-efficient pipeline
                                               # but requires the most memory. BACKWARD_POST is the less
                                               # memory intensive option
+  fsdp_backward_prefetch_policy: BACKWARD_PRE # for backwards compatibility
 
   # setting this to true will increase forward memory by prefetching the next FSDP all-gather, while performing
   # the current forward pass. 
diff --git a/pyproject.toml b/pyproject.toml
index 541e89112..021beeb57 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ classifiers=[
 ]
 dependencies = [
 "numpy>=1.26.4,<2.0",
-"accelerate>=0.20.3,<0.35,!=0.34",
+"accelerate>=0.20.3,!=0.34,<1.1",
 "transformers>4.41,<4.50",
 "torch>=2.2.0,<3.0",
 "sentencepiece>=0.1.99,<0.3",
diff --git a/tests/build/dummy_job_config.json b/tests/build/dummy_job_config.json
index 315a5b527..ed5abfa85 100644
--- a/tests/build/dummy_job_config.json
+++ b/tests/build/dummy_job_config.json
@@ -5,7 +5,7 @@
       "dynamo_use_dynamic": true,
       "num_machines": 1,
       "main_process_port": 1234,
-      "fsdp_backward_prefetch_policy": "TRANSFORMER_BASED_WRAP",
+      "fsdp_backward_prefetch": "TRANSFORMER_BASED_WRAP",
       "fsdp_sharding_strategy": 1,
       "fsdp_state_dict_type": "FULL_STATE_DICT",
       "fsdp_cpu_ram_efficient_loading": true,
diff --git a/tests/build/test_utils.py b/tests/build/test_utils.py
index fde0ffb2c..4ad228879 100644
--- a/tests/build/test_utils.py
+++ b/tests/build/test_utils.py
@@ -44,7 +44,7 @@ def test_process_accelerate_launch_args(job_config):
     args = process_accelerate_launch_args(job_config)
     # json config values used
     assert args.use_fsdp is True
-    assert args.fsdp_backward_prefetch_policy == "TRANSFORMER_BASED_WRAP"
+    assert args.fsdp_backward_prefetch == "TRANSFORMER_BASED_WRAP"
     assert args.env == ["env1", "env2"]
     assert args.training_script == "tuning.sft_trainer"
     assert args.config_file == "fixtures/accelerate_fsdp_defaults.yaml"

From 1570d04bb6d96989c8ec3fca06d18c324184655b Mon Sep 17 00:00:00 2001
From: Will <mwjohnson728@gmail.com>
Date: Tue, 15 Oct 2024 11:40:08 -0400
Subject: [PATCH 8/9] build: Set triton environment variables (#370)

Signed-off-by: Will Johnson <mwjohnson728@gmail.com>
Co-authored-by: Anh Uong <anh.uong@ibm.com>
---
 build/Dockerfile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/build/Dockerfile b/build/Dockerfile
index 507dd4f52..d8cc74877 100644
--- a/build/Dockerfile
+++ b/build/Dockerfile
@@ -169,6 +169,12 @@ RUN mkdir /app && \
     chown -R $USER:0 /app /tmp && \
     chmod -R g+rwX /app /tmp
 
+# Set Triton environment variables for qLoRA
+ENV TRITON_HOME="/tmp/triton_home"
+ENV TRITON_DUMP_DIR="/tmp/triton_dump_dir"
+ENV TRITON_CACHE_DIR="/tmp/triton_cache_dir"
+ENV TRITON_OVERRIDE_DIR="/tmp/triton_override_dir"
+
 # Need a better way to address these hacks
 RUN if [[ "${ENABLE_AIM}" == "true" ]] ; then \
         touch /.aim_profile && \

From d58faa5c38f88a1258e89e94f375bf46bce47769 Mon Sep 17 00:00:00 2001
From: Anh Uong <anh.uong@ibm.com>
Date: Fri, 18 Oct 2024 10:22:48 -0600
Subject: [PATCH 9/9] deps: torch<2.5 due to FA2 error with new version (#375)

Signed-off-by: Anh Uong <anh.uong@ibm.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 021beeb57..909ea64d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
 "numpy>=1.26.4,<2.0",
 "accelerate>=0.20.3,!=0.34,<1.1",
 "transformers>4.41,<4.50",
-"torch>=2.2.0,<3.0",
+"torch>=2.2.0,<2.5",
 "sentencepiece>=0.1.99,<0.3",
 "tokenizers>=0.13.3,<1.0",
 "tqdm>=4.66.2,<5.0",