From e04a9d5d9e912da0669a1845d44f9f6ff4646c4b Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Tue, 19 Nov 2024 15:27:33 +0800 Subject: [PATCH] add export readme and change default dtype --- README.md | 16 ++++++++++++++++ scripts/export_dcp.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c8b670f4..0a64ae97 100644 --- a/README.md +++ b/README.md @@ -156,3 +156,19 @@ For info about the arguments to the script, do: ```bash python3 scripts/subset_data.py --help ``` + +# Exporting checkpoints to huggingface compatible model +You can convert the checkpoints saved by the training script to a model that can be run with any huggingface-compatible inference engine (e.g. transformers, vLLM) using our export script. +The export script takes the training config as a positional argument and 2 keyword arguments, `ckpt.resume` which is the path to the checkpoint, `ckpt.path` which is the path you wish to save the converted model. +You may also pass the `torch_dtype` argument to either `float32` or `bfloat16` to specify the precision of the exported model weights. The default `torch_dtype` is `float32`. + +Example export command: +```bash +python scripts/export_dcp.py @configs/10B/H100.toml --ckpt.path /path/to/save/converted_model --ckpt.resume /path/to/ckpt/step_84000 --torch_dtype bfloat16 +``` + +You can then upload the model to huggingface using huggingface-cli: +```bash +# Usage: huggingface-cli upload [repo_id] [local_path] [path_in_repo] +huggingface-cli upload mymodel /path/to/save/converted_model . --private +``` diff --git a/scripts/export_dcp.py b/scripts/export_dcp.py index 7d1bfc85..51538baa 100644 --- a/scripts/export_dcp.py +++ b/scripts/export_dcp.py @@ -24,7 +24,7 @@ class ExportConfig(Config): save_format: Literal["pt", "safetensors"] = "safetensors" - torch_dtype: Literal["float32", "bfloat16"] = "bfloat16" + torch_dtype: Literal["float32", "bfloat16"] = "float32" with_debug_automap: bool = False