From 27ef43ae504f11b40878473e4fabafc0e1f01066 Mon Sep 17 00:00:00 2001 From: mlcommons-bot <74634038+mlcommons-bot@users.noreply.github.com> Date: Thu, 25 Jan 2024 18:13:38 -0600 Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=94=84=20synced=20local=20'tools/subm?= =?UTF-8?q?ission/power/sources=5Fchecksums.json'=20with=20remote=20'compl?= =?UTF-8?q?iance/sources=5Fchecksums.json'=20(#1582)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: mlcommons-bot --- tools/submission/power/sources_checksums.json | 38 +------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/tools/submission/power/sources_checksums.json b/tools/submission/power/sources_checksums.json index 0ae4cc0205..78a240f1ad 100644 --- a/tools/submission/power/sources_checksums.json +++ b/tools/submission/power/sources_checksums.json @@ -1,38 +1,4 @@ [ - { - "server.py": "c3f90f2f7eeb4db30727556d0c815ebc89b3d28b", - "__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "client.py": "33ca4f26368777ac06e01f9567b714a4b8063886", - "tests/unit/test_source_hashes.py": "00468a2907583c593e6574a1f6b404e4651c221a", - "tests/unit/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "tests/unit/test_server.py": "948c1995d4008bc2aa6c4046a34ffa3858d6d671", - "lib/time_sync.py": "3210db56eb0ff0df57bf4293dc4d4b03fffd46f1", - "lib/source_hashes.py": "60a2e02193209e8d392803326208d5466342da18", - "lib/common.py": "611d8b29633d331eb19c9455ea3b5fa3284ed6df", - "lib/server.py": "8054263a14dedddcf8e1c01adc19596c21bad591", - "lib/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "lib/summary.py": "aa92f0a3f975eecd44d3c0cd0236342ccc9f941d", - "lib/client.py": "c146491755e219a28d440b31f83998dbd5532483", - "lib/external/ntplib.py": "4da8f970656505a40483206ef2b5d3dd5e81711d", - "lib/external/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709" - }, - { - "__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "client.py": "33ca4f26368777ac06e01f9567b714a4b8063886", - "lib/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "lib/client.py": "ac2aa093c8e8bbc9569b9e2a3471bc64e58a2258", - "lib/common.py": "611d8b29633d331eb19c9455ea3b5fa3284ed6df", - "lib/external/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "lib/external/ntplib.py": "4da8f970656505a40483206ef2b5d3dd5e81711d", - "lib/server.py": "c7af63c31bb2fbedea4345f571f6e3507d268ada", - "lib/source_hashes.py": "60a2e02193209e8d392803326208d5466342da18", - "lib/summary.py": "aa92f0a3f975eecd44d3c0cd0236342ccc9f941d", - "lib/time_sync.py": "122eba67a9abc85635223e054def53be1367ade2", - "server.py": "c3f90f2f7eeb4db30727556d0c815ebc89b3d28b", - "tests/unit/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", - "tests/unit/test_server.py": "948c1995d4008bc2aa6c4046a34ffa3858d6d671", - "tests/unit/test_source_hashes.py": "00468a2907583c593e6574a1f6b404e4651c221a" - }, { "__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", "client.py": "33ca4f26368777ac06e01f9567b714a4b8063886", @@ -41,7 +7,7 @@ "lib/common.py": "611d8b29633d331eb19c9455ea3b5fa3284ed6df", "lib/external/__init__.py": "da39a3ee5e6b4b0d3255bfef95601890afd80709", "lib/external/ntplib.py": "4da8f970656505a40483206ef2b5d3dd5e81711d", - "lib/server.py": "c7af63c31bb2fbedea4345f571f6e3507d268ada", + "lib/server.py": "99303c836c683aa9017ec565104e636161d02acb", "lib/source_hashes.py": "60a2e02193209e8d392803326208d5466342da18", "lib/summary.py": "aa92f0a3f975eecd44d3c0cd0236342ccc9f941d", "lib/time_sync.py": "80894ef2389e540781ff78de94db16aa4203a14e", @@ -50,4 +16,4 @@ "tests/unit/test_server.py": "948c1995d4008bc2aa6c4046a34ffa3858d6d671", "tests/unit/test_source_hashes.py": "00468a2907583c593e6574a1f6b404e4651c221a" } -] +] \ No newline at end of file From 9b8006ff2f0ade58cbc1935fbda967a19b2e363b Mon Sep 17 00:00:00 2001 From: Pablo Gonzalez Date: Thu, 25 Jan 2024 19:26:57 -0500 Subject: [PATCH 2/5] Fix image list mismatch (#1579) Co-authored-by: Miro --- text_to_image/coco.py | 4 ++-- tools/submission/submission_checker.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/text_to_image/coco.py b/text_to_image/coco.py index fa89ba2152..b2c9d6dfc2 100644 --- a/text_to_image/coco.py +++ b/text_to_image/coco.py @@ -174,9 +174,9 @@ def save_images(self, ids, ds): for id in ids: caption = ds.get_caption(id) generated = Image.fromarray(self.results[idx[id]]) - image_path_tmp = f"images/{self.content_ids[id]}.png" + image_path_tmp = f"images/{self.content_ids[idx[id]]}.png" generated.save(image_path_tmp) - info.append((self.content_ids[id], caption)) + info.append((self.content_ids[idx[id]], caption)) with open("images/captions.txt", "w+") as f: for id, caption in info: f.write(f"{id} {caption}\n") diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index bd716fbd00..c4c66a4aa9 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -1785,6 +1785,8 @@ def check_extra_files(path, target_files): if target_file not in files: check_pass = False missing_files.append(f"{os.path.join(path, dir, target_file)}.png") + if "captions" not in files: + missing_files.append(f"{os.path.join(path, dir, 'captions.txt')}") return check_pass, missing_files From 180014ad5724de84fd16a42bef69e8f58faf9e4f Mon Sep 17 00:00:00 2001 From: Zhihan Jiang <68881590+nvzhihanj@users.noreply.github.com> Date: Thu, 25 Jan 2024 19:08:54 -0800 Subject: [PATCH 3/5] #1558 update llama2 reference fp32 accuracy (#1583) Co-authored-by: Miro --- language/llama2-70b/README.md | 12 ++++++------ tools/submission/submission_checker.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/language/llama2-70b/README.md b/language/llama2-70b/README.md index a9a150b4cb..2614749a76 100644 --- a/language/llama2-70b/README.md +++ b/language/llama2-70b/README.md @@ -195,15 +195,15 @@ if [ -e ${ACCURACY_LOG_FILE} ]; then fi ``` -The ServerSUT was not tested for GPU runs. You can try setting `--device cuda:0`, but YMMV. +The ServerSUT was not tested for GPU runs. ## Accuracy Target Running the GPU implementation in FP32 precision resulted in the following FP32 accuracy targets (normalized to a 0-100 scale from a 0.0-1.0 scale): -- Rouge1: 43.88 -- Rouge2: 21.7108 -- RougeL: 28.2502 -- RougeLsum: 41.4821 +- Rouge1: 44.4312 +- Rouge2: 22.0352 +- RougeL: 28.6162 +- Tokens per sample: 294.45 -This was run an 8xH100 node. Total runtime was ~4.5 days. +This was run on a DGX-H100 node. Total runtime was ~4.5 days. diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py index c4c66a4aa9..b6cd599df1 100755 --- a/tools/submission/submission_checker.py +++ b/tools/submission/submission_checker.py @@ -1124,8 +1124,8 @@ "3d-unet-99.9": ("DICE", 0.86170 * 0.999), "gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9), "gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9), - "llama2-70b-99" : ("ROUGE1", 43.88 * 0.99, "ROUGE2", 21.7108 * 0.99, "ROUGEL", 28.2502 * 0.99, "TOKENS_PER_SAMPLE", 293.3*0.9), - "llama2-70b-99.9" : ("ROUGE1", 43.88 * 0.999, "ROUGE2", 21.7108 * 0.999, "ROUGEL", 28.2502 * 0.999, "TOKENS_PER_SAMPLE", 293.3*0.9), + "llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9), + "llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9), "stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758) }, "accuracy-upper-limit": { From 523316e5a49f458945e8c559de597df48abbf272 Mon Sep 17 00:00:00 2001 From: mlcommons-bot <74634038+mlcommons-bot@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:34:08 -0600 Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=94=84=20synced=20local=20'tools/subm?= =?UTF-8?q?ission/power/power=5Fchecker.py'=20with=20remote=20'compliance/?= =?UTF-8?q?check.py'=20(#1587)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: mlcommons-bot --- tools/submission/power/power_checker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/submission/power/power_checker.py b/tools/submission/power/power_checker.py index 5adcd197c6..93d9d6fb98 100755 --- a/tools/submission/power/power_checker.py +++ b/tools/submission/power/power_checker.py @@ -408,6 +408,8 @@ def get_avg_power(power_path: str, run_path: str) -> Tuple[float, float]: with open(spl_fname) as f: for line in f: + if not line.startswith("Time"): + continue timestamp = ( datetime.strptime(line.split(",")[1], datetime_format) ).replace(tzinfo=timezone.utc) From 3ad853426528a3d692ea34a72b81a7c4fed0346e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 26 Jan 2024 19:10:13 +0000 Subject: [PATCH 5/5] Update the main README.md for 4.0 (#1586) --- README.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c635c73b0e..f1cfa2721d 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,23 @@ Please see the [MLPerf Inference benchmark paper](https://arxiv.org/abs/1911.025 ``` ## MLPerf Inference v4.0 (submission deadline February 23, 2024) -Code freeze coming soon... +There is an extra one-week extension allowed only for the llama2-70b submissions. For submissions, please use the master branch and any commit since the [4.0 seed release](https://github.com/mlcommons/inference/commit/8e36925bd36a503e39fcbbc488e9e46126f079ed) although it is best to use the latest commit. v4.0 tag will be created from the master branch after the result publication. + +For power submissions please use [SPEC PTD 1.10](https://github.com/mlcommons/power/tree/main/inference_v1.0) (needs special access) and any commit of the power-dev repository after the [code-freeze](https://github.com/mlcommons/power-dev/commit/4e026f43481f46ad57d2464d28924018444b0428) + +| model | reference app | framework | dataset | category +| ---- | ---- | ---- | ---- | ---- | +| resnet50-v1.5 | [vision/classification_and_detection](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection) | tensorflow, onnx, tvm, ncnn | imagenet2012 | edge,datacenter | +| retinanet 800x800 | [vision/classification_and_detection](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection) | pytorch, onnx | openimages resized to 800x800| edge,datacenter | +| bert | [language/bert](https://github.com/mlcommons/inference/tree/master/language/bert) | tensorflow, pytorch, onnx | squad-1.1 | edge,datacenter | +| dlrm-v2 | [recommendation/dlrm_v2](https://github.com/mlcommons/inference/tree/master/recommendation/dlrm_v2/pytorch) | pytorch | Multihot Criteo Terabyte | datacenter | +| 3d-unet | [vision/medical_imaging/3d-unet-kits19](https://github.com/mlcommons/inference/tree/master/vision/medical_imaging/3d-unet-kits19) | pytorch, tensorflow, onnx | KiTS19 | edge,datacenter | +| rnnt | [speech_recognition/rnnt](https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt) | pytorch | OpenSLR LibriSpeech Corpus | edge,datacenter | +| gpt-j | [language/gpt-j](https://github.com/mlcommons/inference/tree/master/language/gpt-j)| pytorch | CNN-Daily Mail | edge,datacenter | +| stable-diffusion-xl | [text_to_image] (https://github.com/mlcommons/inference/tree/master/text_to_image) | pytorch | COCO 2014| edge,datacenter | +| llama2-70b | [language/llama2-70b](https://github.com/mlcommons/inference/tree/master/language/llama2-70b) | pytorch | OpenOrca | datacenter | + +* Framework here is given for the reference implementation. Submitters are free to use their own frameworks to run the benchmark. ## MLPerf Inference v3.1 (submission August 18, 2023) Please use [v3.1 tag](https://github.com/mlcommons/inference/releases/tag/v3.1) (```git checkout v3.1```) if you would like to reproduce the v3.1 results.