diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 574aac18a..6e26a490f 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -16,9 +16,9 @@ name: Format on: push: - branches: [ "main" ] + branches: [ "main", "release" ] pull_request: - branches: [ "main" ] + branches: [ "main", "release" ] jobs: lint: diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index d4d836bec..6c1e043c6 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -1,9 +1,9 @@ name: Image on: push: - branches: [ "main" ] + branches: [ "main", "release" ] pull_request: - branches: [ "main" ] + branches: [ "main", "release" ] jobs: build: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f8e24265c..ead02bb31 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,9 +1,9 @@ name: Test on: push: - branches: [ "main" ] + branches: [ "main", "release" ] pull_request: - branches: [ "main" ] + branches: [ "main", "release" ] jobs: build: diff --git a/CODEOWNERS b/CODEOWNERS index a28fcff97..bcd048d90 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -8,4 +8,4 @@ # https://help.github.com/en/articles/about-code-owners # -* @anhuong @Ssukriti @alex-jw-brooks +* @anhuong @Ssukriti @aluu317 @fabianlim @kmehant diff --git a/build/Dockerfile b/build/Dockerfile index 507dd4f52..d8cc74877 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -169,6 +169,12 @@ RUN mkdir /app && \ chown -R $USER:0 /app /tmp && \ chmod -R g+rwX /app /tmp +# Set Triton environment variables for qLoRA +ENV TRITON_HOME="/tmp/triton_home" +ENV TRITON_DUMP_DIR="/tmp/triton_dump_dir" +ENV TRITON_CACHE_DIR="/tmp/triton_cache_dir" +ENV TRITON_OVERRIDE_DIR="/tmp/triton_override_dir" + # Need a better way to address these hacks RUN if [[ "${ENABLE_AIM}" == "true" ]] ; then \ touch /.aim_profile && \ diff --git a/fixtures/accelerate_fsdp_defaults.yaml b/fixtures/accelerate_fsdp_defaults.yaml index f70d74faa..30916b2a5 100644 --- a/fixtures/accelerate_fsdp_defaults.yaml +++ b/fixtures/accelerate_fsdp_defaults.yaml @@ -14,9 +14,10 @@ fsdp_config: fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP # this controls the FSDP pipelining - fsdp_backward_prefetch_policy: BACKWARD_PRE # set to BACKWARD_PRE for the most time-efficient pipeline + fsdp_backward_prefetch: BACKWARD_PRE # set to BACKWARD_PRE for the most time-efficient pipeline # but requires the most memory. BACKWARD_POST is the less # memory intensive option + fsdp_backward_prefetch_policy: BACKWARD_PRE # for backwards compatibility # setting this to true will increase forward memory by prefetching the next FSDP all-gather, while performing # the current forward pass. diff --git a/pyproject.toml b/pyproject.toml index 2b63b8f54..909ea64d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,14 +27,14 @@ classifiers=[ ] dependencies = [ "numpy>=1.26.4,<2.0", -"accelerate>=0.20.3,<0.34", -"transformers>4.41,<4.45", -"torch>=2.2.0,<3.0", +"accelerate>=0.20.3,!=0.34,<1.1", +"transformers>4.41,<4.50", +"torch>=2.2.0,<2.5", "sentencepiece>=0.1.99,<0.3", "tokenizers>=0.13.3,<1.0", "tqdm>=4.66.2,<5.0", "trl>=0.9.3,<1.0", -"peft>=0.8.0,<0.13", +"peft>=0.8.0,<0.14", "protobuf>=5.28.0,<6.0.0", "datasets>=2.15.0,<3.0", "simpleeval>=0.9.13,<1.0", diff --git a/scripts/run_inference.py b/scripts/run_inference.py index 7e4465cac..de8462826 100644 --- a/scripts/run_inference.py +++ b/scripts/run_inference.py @@ -34,7 +34,7 @@ import torch # Local -from tuning.data import tokenizer_data_utils +from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize ### Utilities @@ -219,7 +219,7 @@ def load( # where the model's layers are modified, in our case the embedding layer # is modified, so we resize the backbone model's embedding layer with our own # utility before passing it along to load the PEFT model. - tokenizer_data_utils.tokenizer_and_embedding_resize( + tokenizer_and_embedding_resize( {}, tokenizer=tokenizer, model=base_model ) model = PeftModel.from_pretrained( diff --git a/tests/build/dummy_job_config.json b/tests/build/dummy_job_config.json index 315a5b527..ed5abfa85 100644 --- a/tests/build/dummy_job_config.json +++ b/tests/build/dummy_job_config.json @@ -5,7 +5,7 @@ "dynamo_use_dynamic": true, "num_machines": 1, "main_process_port": 1234, - "fsdp_backward_prefetch_policy": "TRANSFORMER_BASED_WRAP", + "fsdp_backward_prefetch": "TRANSFORMER_BASED_WRAP", "fsdp_sharding_strategy": 1, "fsdp_state_dict_type": "FULL_STATE_DICT", "fsdp_cpu_ram_efficient_loading": true, diff --git a/tests/build/test_utils.py b/tests/build/test_utils.py index fde0ffb2c..4ad228879 100644 --- a/tests/build/test_utils.py +++ b/tests/build/test_utils.py @@ -44,7 +44,7 @@ def test_process_accelerate_launch_args(job_config): args = process_accelerate_launch_args(job_config) # json config values used assert args.use_fsdp is True - assert args.fsdp_backward_prefetch_policy == "TRANSFORMER_BASED_WRAP" + assert args.fsdp_backward_prefetch == "TRANSFORMER_BASED_WRAP" assert args.env == ["env1", "env2"] assert args.training_script == "tuning.sft_trainer" assert args.config_file == "fixtures/accelerate_fsdp_defaults.yaml" diff --git a/tests/utils/test_embedding_resize.py b/tests/utils/test_embedding_resize.py index 9a72f397b..43c6adbd4 100644 --- a/tests/utils/test_embedding_resize.py +++ b/tests/utils/test_embedding_resize.py @@ -20,9 +20,10 @@ import torch # Local -from tuning.data import tokenizer_data_utils +from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize MODEL_NAME = "Maykeye/TinyLLama-v0" +INPUT_TEXT = "### Text: @NortonSupport Thanks much.\n\n### Label:" def _inference( @@ -41,16 +42,16 @@ def _inference( def test_output_unaltered_across_embedding_resizes(): - input_text = "### Text: @NortonSupport Thanks much.\n\n### Label:" + input_text = INPUT_TEXT tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model_not_resized = AutoModelForCausalLM.from_pretrained(MODEL_NAME) model_resized = AutoModelForCausalLM.from_pretrained(MODEL_NAME) - tokenizer_data_utils.tokenizer_and_embedding_resize( + tokenizer_and_embedding_resize( special_tokens_dict={}, tokenizer=tokenizer, model=model_resized, multiple_of=8 ) - tokenizer_data_utils.tokenizer_and_embedding_resize( + tokenizer_and_embedding_resize( special_tokens_dict={}, tokenizer=tokenizer, model=model_not_resized, @@ -74,3 +75,68 @@ def test_output_unaltered_across_embedding_resizes(): ) assert output_from_model_not_resized == output_from_model_resized + + +def test_resize_with_special_tokens(): + input_text = INPUT_TEXT + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) + + input_tokenizer_len = len(tokenizer.get_vocab()) + + special_tokens = {"sep_token": "", "pad_token": ""} + resize_result = tokenizer_and_embedding_resize( + special_tokens_dict=special_tokens, + tokenizer=tokenizer, + model=model, + multiple_of=1, + ) + + assert "" in tokenizer.get_vocab() + assert "" in tokenizer.get_vocab() + + output_tokenizer_len = len(tokenizer.get_vocab()) + + assert output_tokenizer_len == input_tokenizer_len + 2 + assert resize_result["num_new_tokens"] == output_tokenizer_len - input_tokenizer_len + + output = _inference( + tokenizer=tokenizer, model=model, input_text=input_text, max_new_tokens=20 + ) + assert output is not None + + +def test_no_resize_when_no_special_tokens(): + input_text = INPUT_TEXT + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) + + input_tokenizer_len = len(tokenizer.get_vocab()) + + resize_result = tokenizer_and_embedding_resize( + special_tokens_dict={}, tokenizer=tokenizer, model=model, multiple_of=1 + ) + + output_tokenizer_len = len(tokenizer.get_vocab()) + + assert input_tokenizer_len == output_tokenizer_len + assert resize_result["num_new_tokens"] == 0 + + output = _inference( + tokenizer=tokenizer, model=model, input_text=input_text, max_new_tokens=20 + ) + + assert output is not None + + +def test_resize_with_multiple_of(): + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) + + resize_result = tokenizer_and_embedding_resize( + special_tokens_dict={}, tokenizer=tokenizer, model=model, multiple_of=8 + ) + + assert model.get_input_embeddings().embedding_dim % 8 == 0 + assert resize_result["new_embedding_size"] % 8 == 0 + assert model.get_output_embeddings().out_features % 8 == 0 diff --git a/tests/utils/test_tokenizer_data_utils.py b/tests/utils/test_tokenizer_data_utils.py index 118805100..1afd34d4d 100644 --- a/tests/utils/test_tokenizer_data_utils.py +++ b/tests/utils/test_tokenizer_data_utils.py @@ -7,7 +7,7 @@ # Local # First party -from tuning.data.tokenizer_data_utils import tokenizer_and_embedding_resize +from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize def test_tokenizer_and_embedding_resize_return_values(): diff --git a/tuning/data/__init__.py b/tuning/data/__init__.py deleted file mode 100644 index 38a9531ef..000000000 --- a/tuning/data/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright The FMS HF Tuning Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 767fea1b4..fa7d0875c 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -53,7 +53,6 @@ FileLoggingTrackerConfig, TrackerConfigFactory, ) -from tuning.data import tokenizer_data_utils from tuning.trackers.tracker_factory import FILE_LOGGING_TRACKER, get_tracker from tuning.trainercontroller import TrainerControllerCallback from tuning.utils.config_utils import get_hf_peft_config, get_json_config @@ -70,6 +69,7 @@ is_pretokenized_dataset, validate_data_args, ) +from tuning.utils.tokenizer_data_utils import tokenizer_and_embedding_resize def train( @@ -294,7 +294,7 @@ def train( # TODO: lower priority but understand if resizing impacts inference quality and why its needed. # It makes sense if we manipulate tokenizer that we also save it and provide it to inference. - added_tokens_dict = tokenizer_data_utils.tokenizer_and_embedding_resize( + added_tokens_dict = tokenizer_and_embedding_resize( special_tokens_dict=special_tokens_dict, tokenizer=tokenizer, model=model, @@ -637,6 +637,9 @@ def main(): combined_tracker_configs.file_logger_config = file_logger_config combined_tracker_configs.aim_config = aim_config + if training_args.output_dir: + os.makedirs(training_args.output_dir, exist_ok=True) + logger.info("using the output directory at %s", training_args.output_dir) try: trainer, additional_train_info = train( model_args=model_args, diff --git a/tuning/data/tokenizer_data_utils.py b/tuning/utils/tokenizer_data_utils.py similarity index 100% rename from tuning/data/tokenizer_data_utils.py rename to tuning/utils/tokenizer_data_utils.py