replace 30b with 33b (#7)

OptimalScale · Mar 28, 2023 · de5793d · de5793d
1 parent 297bb2e
commit de5793d
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -27,12 +27,12 @@ An extensible, convenient, and efficient toolbox for finetuning large machine le
 |  InstructGPT 175B   |   73.2   |     46.0    |  44.0   | 54.4 |
 |    ChatGPT |    63.9   |     **57.0**    |  44.7   | 55.2 |
 |      LLaMA 7B   |    5.2   |     27.1    |  24.3   | 18.9 |
-|      LLaMA 30B |    1.8   |     43.4    |  30.3   | 25.2 |
+|      LLaMA 33B |    1.8   |     43.4    |  30.3   | 25.2 |
 |   |      |             |            |    |  |
 |   Task-tuned LLaMA 7B (Full) |   **75.1**   |     44.5    |  49.9   | 56.5 |
-| Task-tuned LLaMA 30B (LoRA) |  74.0  |  51.3   | **50.2**|**58.5**|
+| Task-tuned LLaMA 33B (LoRA) |  74.0  |  51.3   | **50.2**|**58.5**|
 
-The LLaMA 30B (LoRA) performance is achieved with only **~16h** finetuning on the training split of PubMedQA and MedMCQA with a single 8 \* A100 server. 
+The LLaMA 33B (LoRA) performance is achieved with only **~16h** finetuning on the training split of PubMedQA and MedMCQA with a single 8 \* A100 server. 
 For more performance, including instruction tuning results, please refer to our [Documentation](https://optimalscale.github.io/LMFlow/).
 
 ## Supported Pipelines

diff --git a/scripts/convert_llama_weights_to_hf.py b/scripts/convert_llama_weights_to_hf.py
@@ -47,13 +47,13 @@
 INTERMEDIATE_SIZE_MAP = {
     "7B": 11008,
     "13B": 13824,
-    "30B": 17920,
+    "33B": 17920,
     "65B": 22016,
 }
 NUM_SHARDS = {
     "7B": 1,
     "13B": 2,
-    "30B": 4,
+    "33B": 4,
     "65B": 8,
 }
 
@@ -256,7 +256,7 @@ def main():
     )
     parser.add_argument(
         "--model_size",
-        choices=["7B", "13B", "30B", "65B", "tokenizer_only"],
+        choices=["7B", "13B", "33B", "65B", "tokenizer_only"],
     )
     parser.add_argument(
         "--output_dir",