Merge remote-tracking branch 'origin/mixtral-dev'

krai · Aug 6, 2024 · cccfc97 · cccfc97
2 parents 39c382b + c40c3b7
commit cccfc97
Show file tree

Hide file tree

Showing 11 changed files with 411 additions and 4 deletions.
diff --git a/base_llama2_loadgen_experiment/code_axs.py b/base_llama2_loadgen_experiment/code_axs.py
@@ -10,6 +10,25 @@ def get_accuracy_dict(accuracy_dict_full):
             accuracy_dict[k] = accuracy_dict_full[k]
     return accuracy_dict
 
+def parse_tokens(
+    tokenised_accuracy_log_path: str, output_log_path: str
+):
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append(tokens)
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
+
 def detokenise(
     checkpoint_path: str, tokenised_accuracy_log_path: str, output_log_path: str
 ):

diff --git a/base_loadgen_program/data_axs.json b/base_loadgen_program/data_axs.json
@@ -18,14 +18,14 @@
     "loadgen_target_qps": null,
     "loadgen_compliance_test": null,
 
-    "loadgen_max_query_count": null,
     "loadgen_samples_per_query": null,
     "loadgen_offline_expected_qps": null,
     "loadgen_server_coalesce_queries": true,
+    "loadgen_ttft_latency": null,
+    "loadgen_tpot_latency": null,
 
     "param_to_conf_pair": {
         "loadgen_min_query_count":      ["min_query_count", 1],
-        "loadgen_max_query_count":      ["max_query_count", 1],
         "loadgen_multistreamness":      ["samples_per_query", 1],
         "loadgen_max_query_count":      ["max_query_count", 1],
         "loadgen_buffer_size":          ["performance_sample_count_override", 1],
@@ -36,7 +36,9 @@
         "loadgen_min_duration_s":       ["min_duration", 1000],
         "loadgen_offline_expected_qps": ["offline_expected_qps", 1],
         "loadgen_use_token_latencies":  ["use_token_latencies", 1],
-        "loadgen_server_coalesce_queries":  ["coalesce_queries", 1]
+        "loadgen_server_coalesce_queries":  ["coalesce_queries", 1],
+        "loadgen_ttft_latency":  ["ttft_latency", 1],
+        "loadgen_tpot_latency":  ["tpot_latency", 1]
     },
     "param_to_conf_keys": [ "^^", "execute", [[
         [ "get", "param_to_conf_pair" ],

diff --git a/base_mixtral_loadgen_experiment/data_axs.json b/base_mixtral_loadgen_experiment/data_axs.json
@@ -0,0 +1,69 @@
+{
+    "_parent_entries": [ [ "^", "byname", "base_loadgen_experiment" ] ],
+    "accuracy_log_path": ["^^", "get_path", "mlperf_log_accuracy.json"],
+    "output_accuracy_path": ["^^", "get_path", "accuracy_results.json"],
+
+    "mlperf_inference_git_entry": [ "^", "byquery", "git_repo,repo_name=mlperf_inference_git" ],
+
+    "abs_script_path": [ "^^", "execute", [[
+	[ "get", "mlperf_inference_git_entry" ],
+	[ "get_path_of", "mixtral_reference_accuracy_script"  ]
+    ]] ],
+
+    "dtype_value": "int32",
+    "n_workers": null,
+    "n_workers_cmd": [ "^^", "case", [ [ "^^", "get", "n_workers" ],
+        null, "" ],
+	{ "default_value": [ "^^", "substitute", "--n_workers #{n_workers}#" ] }
+    ],
+
+    "extract_accuracy_report": [ "^^", "execute", [[
+        [ "get_kernel" ],
+	[ "byname", "python_script" ],
+	[ "run", [], {
+	    "python_deps": [
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=numpy,package_version=1.24.1" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=transformers,package_version=4.41.2" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=evaluate,package_version=0.4.0" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=accelerate,package_version=0.21.0"],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=pybind11,package_version=2.10.4" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=torch,package_version=2.3.1" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=pandas,package_version=2.2.2" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=nltk,package_version=3.8.1" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=tqdm,package_version=4.66.4" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=scipy" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=pillow" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=absl-py" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=rouge_score" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=sentencepiece" ],
+		[ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=mxeval,installable=git+https://github.com/shubhamugare/mxeval.git" ]
+	    ],
+            "abs_script_path": ["^^", "get", "abs_script_path"],
+            "script_extra_params": [ "^^", "substitute", "--mlperf-accuracy-file \"#{accuracy_log_path}#\" --checkpoint-path \"#{model_path}#\" --dataset-file \"#{dataset_path}#\" --dtype \"#{dtype_value}#\" #{n_workers_cmd}#" ],
+	    "capture_output": true
+	} ],
+	0,
+	[ "func", [ "ufun.rematch", "Results\\s+(\\{.*\\})" ] ]
+    ]], {} ],
+
+    "accuracy_dict": [ "^^", "execute", [[
+        [ "get", "accuracy_report" ],
+	0,
+	[ "func", "eval" ]
+    ]], {} ],
+
+
+    "rouge1": [ "^^" , "dig","accuracy_dict.rouge1" ],
+    "rouge2": [ "^^" , "dig","accuracy_dict.rouge2" ],
+    "rougeL": [ "^^" , "dig","accuracy_dict.rougeL" ],
+    "rougeLsum": [ "^^" , "dig","accuracy_dict.rougeLsum" ],
+    "gsm8k": [ "^^" , "dig","accuracy_dict.gsm8k" ],
+    "mbxp": [ "^^" , "dig","accuracy_dict.mbxp" ],
+    "gen_len": [ "^^" , "dig","accuracy_dict.gen_len" ],
+    "gen_num": [ "^^" , "dig","accuracy_dict.gen_num" ],
+    "gen_tok_len": [ "^^" , "dig","accuracy_dict.gen_tok_len" ],
+    "tokens_per_sample": [ "^^" , "dig","accuracy_dict.tokens_per_sample" ],
+
+    "accuracy_range_dict": { "rouge1": [ 45.036189, null ], "rouge2": [ 23.050071, null ], "rougeL": [ 30.057885, null], "rougeLsum":[ null, null ], "gsm8k": [ 73.0422, null ], "mbxp": [ 59.5188, null ], "gen_len": [ null, null ], "gen_num": [ null, null ], "gen_tok_len": [ null, null ], "tokens_per_sample": [ 131.31, 160.49 ] }
+
+}
diff --git a/base_moe_loadgen_experiment/code_axs.py b/base_moe_loadgen_experiment/code_axs.py
@@ -0,0 +1,51 @@
+import json
+
+from transformers import AutoTokenizer
+
+
+def get_accuracy_dict(accuracy_dict_full):
+    accuracy_dict = {}
+    for k in accuracy_dict_full.keys():
+        if k in ["rouge1", "rouge2", "rougeL", "tokens_per_sample"]:
+            accuracy_dict[k] = accuracy_dict_full[k]
+    return accuracy_dict
+
+def parse_tokens(
+    tokenised_accuracy_log_path: str, output_log_path: str
+):
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append(tokens)
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
+
+def detokenise(
+    checkpoint_path: str, tokenised_accuracy_log_path: str, output_log_path: str
+):
+    tokeniser = AutoTokenizer.from_pretrained(checkpoint_path)
+
+    with open(tokenised_accuracy_log_path) as f:
+        log = json.load(f)
+
+    output_log = []
+    for item in log:
+        hex_str = item["data"]
+        hex_tokens = [hex_str[i : i + 8] for i in range(0, len(hex_str), 8)]
+        tokens = [
+            int.from_bytes(bytes.fromhex(tok), byteorder="little") for tok in hex_tokens
+        ]
+        output_log.append(tokeniser.decode(tokens))
+
+    with open(output_log_path, "w") as f:
+        json.dump(output_log, f, indent=2)
+    return output_log_path
diff --git a/base_moe_loadgen_experiment/data_axs.json b/base_moe_loadgen_experiment/data_axs.json
@@ -0,0 +1,89 @@
+{
+    "_parent_entries": [ [ "^", "byname", "base_loadgen_experiment" ] , [ "^", "byname", "shell" ], [ "^", "byname", "python_in_shell" ] ],
+
+    "transformers_query": [ "python_package", "package_name=transformers", ["desired_python_version", ["^", "kernel_python_major_dot_minor"]] ],
+
+    "_BEFORE_CODE_LOADING": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byquery", [[ "^^", "get", "transformers_query" ]] ],
+        [ "use" ]
+    ]] ],
+
+    "mlperf_inference_git_entry": [ "^", "byquery", "git_repo,repo_name=mlperf_inference_git" ],
+
+    "abs_script_path": [ "^^", "execute", [[
+        [ "get", "mlperf_inference_git_entry" ],
+        [ "get_path_of", "llama2_accuracy_script" ]
+    ]] ],
+
+    "accuracy_log_path": ["^^", "get_path", "mlperf_log_accuracy.json"],
+
+    "dataset_name": "openorca",
+    "dataset_query": [ "downloaded", [ "^^", "substitute", "dataset_name=#{dataset_name}#" ]],
+    "dataset_entry": [ "^", "byquery", [[ "^^", "get", "dataset_query" ]], {}, ["dataset_query"] ],
+
+    "dataset_path": [ "^^", "execute", [[
+        [ "get", "dataset_entry" ],
+        [ "get_path" ],
+        [ "__add__", "/open_orca_gpt4_tokenized_llama.sampled_24576.pkl" ]
+    ]] ],
+
+    "variant": "7b",
+    "hf_model_name": [ "^^", "substitute", "Llama-2-#{variant}#-chat-hf" ],
+    "checkpoint_path_query": [ "^^", "substitute", "downloaded,hf_model,model_name=#{hf_model_name}#" ],
+    "checkpoint_path": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byquery", [[ "^^", "get", "checkpoint_path_query" ]] ],
+        [ "get_path" ]
+    ]] ],
+
+    "accuracy_log_dtype": "int32",
+
+    "extract_accuracy_report": [ "^^", "execute", [[
+        [ "get_kernel" ],
+        [ "byname", "python_script" ],
+        [ "run", [], {
+                "python_deps": [
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=torch" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=transformers" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=nltk" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=rouge_score" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=sentencepiece" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=pillow" ],
+                    [ "AS^IS", "^^", "python_sync_pip_package", "python_package,package_name=evaluate" ]
+                ],
+                "abs_script_path": ["^^", "get", "abs_script_path"],
+                "script_extra_params": [ "^^", "substitute", "--mlperf-accuracy-file #{accuracy_log_path}# --dataset-file #{dataset_path}# --dtype #{accuracy_log_dtype}# --checkpoint-path #{checkpoint_path}#" ],
+                "desired_python_version": ["^", "kernel_python_major_dot_minor"],
+                "capture_output": true
+            } ],
+        0,
+        [ "func", [ "ufun.rematch", "(\\{.*\\})" ] ]
+    ]], {} ],
+
+    "accuracy_dict_full": [ "^^", "execute", [[
+        ["get", "accuracy_report" ],
+        0,
+        [ "func", "eval" ]
+     ]], {} ],
+    "accuracy_dict": [ "^^", "get_accuracy_dict" ],
+    "rouge1": [ "^^" , "dig","accuracy_dict.rouge1" ],
+    "rouge2": [ "^^" , "dig","accuracy_dict.rouge2" ],
+    "rougeL": [ "^^" , "dig","accuracy_dict.rougeL" ],
+    "tokens_per_sample": [ "^^" , "dig","accuracy_dict.tokens_per_sample" ],
+
+    "rougeLsum": [ "^^" , "dig","accuracy_dict.rougeLsum" ],
+    "gen_len": [ "^^" , "dig","accuracy_dict.gen_len" ],
+    "gen_num": [ "^^" , "dig","accuracy_dict.gen_num" ],
+
+    "accuracy_range_dict": { "rouge1": [ 43.986888, null ], "rouge2": [ 21.814848, null ], "rougeL": [ 28.330038, null ], "tokens_per_sample": [ 265.005, null ] },
+
+    "abs_path": [ "^^", "get_path" ],
+    "rel_log_path": "mlperf_log_accuracy.json",
+    "tokenised_accuracy_log_path": [ "^^", "substitute", "#{abs_path}#/#{rel_log_path}#" ],
+
+    "rel_output_log_path": "detokenised_mlperf_log.json",
+    "output_log_path": [ "^^", "substitute", "#{abs_path}#/#{rel_output_log_path}#" ],
+
+    "detokenised_log": [ "^^", "detokenise" ]
+}
diff --git a/data_axs.json b/data_axs.json
@@ -39,6 +39,7 @@
         "gptj_reference_loadgen": "gptj_reference_loadgen",
         "base_gptj_loadgen_experiment": "base_gptj_loadgen_experiment",
         "base_llama2_loadgen_experiment": "base_llama2_loadgen_experiment",
+        "base_moe_loadgen_experiment": "base_moe_loadgen_experiment",
         "power_measurement": "power_measurement",
         "model_pytorch_gptj_cnndm_recipe": "model_pytorch_gptj_cnndm_recipe",
         "base_power_measurement_experiment": "base_power_measurement_experiment",
@@ -54,7 +55,12 @@
         "base_sut": "base_sut",
         "rclone_mlc_inference_config": "rclone_mlc_inference_config",
         "preprocess_imagenet_using_opencv": "preprocess_imagenet_using_opencv",
-        "llama2_reference_loadgen": "llama2_reference_loadgen"
+        "llama2_reference_loadgen": "llama2_reference_loadgen",
+        "moe_dataset_preprocessor": "moe_dataset_preprocessor",
+        "model_pytorch_mixtral_recipe": "model_pytorch_mixtral_recipe",
+        "base_mixtral_loadgen_experiment": "base_mixtral_loadgen_experiment",
+        "moe_reference_using_torch_loadgen": "moe_reference_using_torch_loadgen",
+        "dataset_mixtral_preprocessed_recipe": "dataset_mixtral_preprocessed_recipe"
     },
     "repo_name": "axs2mlperf",
     "submodules": false

diff --git a/dataset_mixtral_preprocessed_recipe/data_axs.json b/dataset_mixtral_preprocessed_recipe/data_axs.json
@@ -0,0 +1,8 @@
+{
+    "_producer_rules": [
+        [ [ "downloaded", "preprocessed", "dataset_name=mixtral", "source?=via_wget" ], [["get_kernel"],["byname","downloader"],["download"]], {
+            "file_path": "2024.06.06_mixtral_15k_v4.pkl",
+            "url": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl"
+	} ]
+    ]
+}
diff --git a/model_pytorch_mixtral_recipe/data_axs.json b/model_pytorch_mixtral_recipe/data_axs.json
@@ -0,0 +1,9 @@
+{
+    "_producer_rules": [
+        [ [ "downloaded", "pytorch_model", "model_name=mixtral-8x7b", "source?=via_rclone" ], [["get_kernel"],["byname","downloader"],["download"]], {
+            "downloading_tool_query": "shell_tool,can_download_url_from_rclone",
+            "file_name": [ "mixtral-8x7b-instruct-v0.1" ],
+            "url": "mlc-inference:mlcommons-inference-wg-public/mixtral_8x7b/mixtral-8x7b-instruct-v0.1"
+        }, [] ]
+    ]
+}
diff --git a/moe_dataset_preprocessor/data_axs.json b/moe_dataset_preprocessor/data_axs.json
@@ -0,0 +1,28 @@
+{
+    "_parent_entries": [ [ "^", "byname", "python_script" ], [ "^", "byname", "entry_creator" ] ],
+    "_producer_rules": [
+        [ [ "downloaded", "dataset_name=moe_dataset" ], [["get_kernel"],["byname","downloader"],["download"]], {
+            "url": "https://inference.mlcommons-storage.org/mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl",
+            "md5": "78823c13e0e73e518872105c4b09628b"
+        } ],
+        [ [ "preprocessed", "dataset_name=moe_dataset" ], [["get", "pipeline"]] ]
+    ],
+
+    "dataset_path": [ "^", "execute", [[
+        [ "byquery", "downloaded,dataset_name=moe_dataset" ],
+        [ "get_path", "mixtral_8x7b%2F2024.06.06_mixtral_15k_v4.pkl" ]
+    ]]],
+
+    "newborn_entry_name": "preprocessed_moe_dataset",
+    "newborn_entry_tags": [ "preprocessed" ],
+    "newborn_entry_param_names": [ "dataset_name" ],
+
+    "rel_script_path": "preprocess.py",
+    "script_extra_params": [ "^^", "substitute", "--dataset-path #{dataset_path}# --output-path #{newborn_entry_path}#"],
+
+    "pipeline": [ "^^", "execute", [[
+        [ "run" ],
+        [],
+        [ "get", "stored_newborn_entry" ]
+    ]] ]
+}
diff --git a/moe_dataset_preprocessor/preprocess.py b/moe_dataset_preprocessor/preprocess.py
@@ -0,0 +1,28 @@
+import argparse
+import os
+
+import numpy as np
+import pandas as pd
+
+PAD_TOKEN = 2
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset-path", type=str, help="The path to the dataset")
+    parser.add_argument("--output-path", type=str, help="The path to the output")
+
+    args = parser.parse_args()
+
+    df = pd.read_pickle(args.dataset_path)
+    dtype = np.dtype("int32")
+
+    input_ids = np.full((len(df), 2048), PAD_TOKEN)
+    input_lens = np.full((len(df), 1), 0)
+
+    for i, (toks, tok_len) in enumerate(zip(df["tok_input"], df["tok_input_len"])):
+        input_ids[i][2048 - tok_len:] = toks
+        input_lens[i] = tok_len
+
+    input_ids.tofile(os.path.join(args.output_path, "input_ids_padded.bin"))
+    input_lens.tofile(os.path.join(args.output_path, "input_lengths.bin"))
+