diff --git a/examples/jamba/README.md b/examples/jamba/README.md new file mode 100644 index 000000000..aa98c0245 --- /dev/null +++ b/examples/jamba/README.md @@ -0,0 +1,5 @@ +# Jamba + +qlora w/ deepspeed needs at least 2x GPUs and 35GiB VRAM per GPU + +qlora single-gpu - training will start, but loss is off by an order of magnitude diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml new file mode 100644 index 000000000..ef04fb53f --- /dev/null +++ b/examples/jamba/qlora_deepspeed.yaml @@ -0,0 +1,62 @@ +base_model: ai21labs/Jamba-v0.1 +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.0 +output_dir: ./out + +sequence_len: 4096 +sample_packing: false +pad_to_sequence_len: false +eval_sample_packing: false + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +adapter: qlora +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true + +low_cpu_mem_usage: true +gradient_accumulation_steps: 4 +micro_batch_size: 1 +num_epochs: 2 +optimizer: paged_adamw_8bit +lr_scheduler: cosine +learning_rate: 0.00001 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: +saves_per_epoch: 1 +debug: +deepspeed: deepspeed_configs/zero2.json +weight_decay: 0.0 +special_tokens: diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index cce0cbc76..c07c0ff75 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -533,6 +533,7 @@ class Config: Dict[Union[int, Literal["cpu", "disk"]], Union[int, str]] ] = None gpu_memory_limit: Optional[Union[int, str]] = None + low_cpu_mem_usage: Optional[bool] = None chat_template: Optional[ChatTemplate] = None default_system_message: Optional[str] = None diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index c40a15c48..911a6c31b 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -402,7 +402,9 @@ def load_model( from accelerate import infer_auto_device_map with init_empty_weights(): - model_canvas = AutoModelForCausalLM.from_config(model_config) + model_canvas = AutoModelForCausalLM.from_config( + model_config, trust_remote_code=cfg.trust_remote_code or False + ) model_canvas.tie_weights() device_map = infer_auto_device_map( model_canvas, @@ -502,6 +504,9 @@ def load_model( model_kwargs["attn_implementation"] = "eager" model_config._attn_implementation = "eager" # pylint: disable=protected-access + if cfg.low_cpu_mem_usage: + model_kwargs["low_cpu_mem_usage"] = True + qlora_fsdp = cfg.fsdp and cfg.adapter == "qlora" try: diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index da9f071c0..dc995fda8 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -312,6 +312,8 @@ def setup_fsdp_envs(cfg): os.environ["FSDP_USE_ORIG_PARAMS"] = "true" if cfg.fsdp_config.fsdp_state_dict_type: os.environ["FSDP_STATE_DICT_TYPE"] = cfg.fsdp_config.fsdp_state_dict_type + if cfg.fsdp_config.fsdp_auto_wrap_policy: + os.environ["FSDP_AUTO_WRAP_POLICY"] = cfg.fsdp_config.fsdp_auto_wrap_policy if cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap: os.environ[ "FSDP_TRANSFORMER_CLS_TO_WRAP"