Skip to content

Commit

Permalink
Merge pull request #53 from krai/llm_cleanup
Browse files Browse the repository at this point in the history
Cleaned up MOE dataset recipes and made them consistent with OpenOrca's
  • Loading branch information
Akshat-Tripathi authored Aug 7, 2024
2 parents a0f7cb6 + 4710f4c commit e860476
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 27 deletions.
3 changes: 1 addition & 2 deletions data_axs.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@
"moe_dataset_preprocessor": "moe_dataset_preprocessor",
"model_pytorch_mixtral_recipe": "model_pytorch_mixtral_recipe",
"base_mixtral_loadgen_experiment": "base_mixtral_loadgen_experiment",
"moe_reference_using_torch_loadgen": "moe_reference_using_torch_loadgen",
"dataset_mixtral_preprocessed_recipe": "dataset_mixtral_preprocessed_recipe"
"moe_reference_using_torch_loadgen": "moe_reference_using_torch_loadgen"
},
"repo_name": "axs2mlperf",
"submodules": false
Expand Down
8 changes: 0 additions & 8 deletions dataset_mixtral_preprocessed_recipe/data_axs.json

This file was deleted.

4 changes: 2 additions & 2 deletions dataset_openorca_mlperf_recipe/data_axs.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
]] ],

"desired_python_version": "3.8",
"model_family": "llama2",
"model_name": "llama2",
"variant": "7b",
"checkpoint_path_query": [ "^^", "substitute", "downloaded,hf_tokeniser,model_family=#{model_family}#,variant=#{variant}#" ],
"checkpoint_path_query": [ "^^", "substitute", "downloaded,hf_tokeniser,model_name=#{model_name}#,variant=#{variant}#" ],
"checkpoint_path": [ "^^", "execute", [[
[ "get_kernel" ],
[ "byquery", [[ "^^", "get", "checkpoint_path_query" ]] ],
Expand Down
24 changes: 17 additions & 7 deletions moe_dataset_preprocessor/data_axs.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
{
"_parent_entries": [ [ "^", "byname", "python_script" ], [ "^", "byname", "entry_creator" ] ],
"_producer_rules": [
[ [ "downloaded", "dataset_name=moe_dataset" ], [["get_kernel"],["byname","downloader"],["download"]], {
"url": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl",
"md5": "78823c13e0e73e518872105c4b09628b"
[ [ "downloaded", "dataset_name=moe_dataset", "dataset_type=full", "source?=via_wget" ], [["get_kernel"],["byname","downloader"],["download"]], {
"newborn_entry_name": "downloaded_moe_dataset_full",
"file_path": "dataset.pkl",
"url": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl"
} ],
[ [ "downloaded", "dataset_name=moe_dataset", "dataset_type=calibration", "source?=via_wget" ], [["get_kernel"],["byname","downloader"],["download"]], {
"newborn_entry_name": "downloaded_moe_dataset_calibration",
"file_path": "dataset.pkl",
"url": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_calibration_v4.pkl"
} ],
[ [ "preprocessed", "dataset_name=moe_dataset" ], [["get", "pipeline"]] ]
],

"dataset_path": [ "^", "execute", [[
[ "byquery", "downloaded,dataset_name=moe_dataset" ],
[ "get_path", "mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl" ]
"dataset_type": "full",

"dataset_query": [ "^^", "substitute", "downloaded,dataset_name=moe_dataset,dataset_type=#{dataset_type}#" ],
"dataset_path": [ "^^", "execute", [[
[ "get_kernel" ],
[ "byquery", [ "^^", "get", "dataset_query" ] ],
[ "get_path", "dataset.pkl" ]
]]],

"newborn_entry_name": "preprocessed_moe_dataset",
"newborn_entry_name": [ "^^", "substitute", "preprocessed_moe_dataset_#{dataset_type}#" ],
"newborn_entry_tags": [ "preprocessed" ],
"newborn_entry_param_names": [ "dataset_name" ],

Expand Down
8 changes: 4 additions & 4 deletions moe_reference_using_torch_loadgen/data_axs.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
[ "^^", "python_sync_pip_package", [[ "^^", "get", "loadgen_query" ]] ]
],

"dataset_query": [ "^^", "substitute", [ "downloaded,preprocessed,dataset_name=#{dataset_name}#"] ],
"dataset_type": "full",

"dataset_query": [ "^^", "substitute", [ "downloaded,dataset_name=moe_dataset,dataset_type=#{dataset_type}#"] ],
"dataset_entry": [ "^", "byquery", [[ "^^", "get", "dataset_query" ]], {}, ["dataset_query"] ],
"dataset_path": [ "^^", "execute", [[
[ "get", "dataset_entry" ],
Expand All @@ -49,8 +51,6 @@

"loadgen_scenario": "Offline",

"dataset_name": "mixtral",

"model_name": "mixtral-8x7b",

"model_query": [ "^^", "substitute", [ "downloaded,pytorch_model,model_name=#{model_name}#"] ],
Expand Down Expand Up @@ -81,7 +81,7 @@
"output_entry_parents": [ "AS^IS", "AS^IS", [ "^", "byname", "base_mixtral_loadgen_experiment" ] ],
"output_entry_param_names": [
"loadgen_compliance_test",
"dataset_name",
"dataset_type",
"dataset_path",

"model_name",
Expand Down
8 changes: 4 additions & 4 deletions openorca_preprocessor/data_axs.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
[ [ "preprocessed", "dataset_name=openorca" ], [[ "run" ]] ]
],

"openorca_dataset_type": "full",
"openorca_dataset_file": [ "^^", "case", [[ "^^", "get", "openorca_dataset_type" ],
"dataset_type": "full",
"openorca_dataset_file": [ "^^", "case", [[ "^^", "get", "dataset_type" ],
"full", "open_orca_gpt4_tokenized_llama.sampled_24576.pkl",
"calibration", "open_orca_gpt4_tokenized_llama.calibration_1000.pkl"
]],
Expand Down Expand Up @@ -34,8 +34,8 @@
],

"newborn_entry_tags": [ "preprocessed", "dataset_name=openorca" ],
"newborn_name_template": [ "preprocessed_openorca_dataset_#{openorca_dataset_type}#" ],
"newborn_entry_param_names": [ "openorca_dataset_type" ],
"newborn_name_template": [ "preprocessed_openorca_dataset_#{dataset_type}#" ],
"newborn_entry_param_names": [ "dataset_type" ],
"return_this_entry": [ "^^", "substitute", "#{newborn_entry}#" ],

"rel_script_path": "main.py",
Expand Down

0 comments on commit e860476

Please sign in to comment.