From e29e368e069711beb8c33c313e599f6c8258d86c Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sun, 2 Feb 2025 16:30:53 +0100 Subject: [PATCH 1/2] Finalizing aggregation of legacy CM automation scripts for MLOps and MLPerf --- CONTRIBUTING.md | 75 -- CONTRIBUTORS.md | 141 +++ README.md | 8 +- cmx4mlops/README.md | 48 +- cmx4mlops/cmx4mlops/CONTRIBUTING.md | 16 + cmx4mlops/cmx4mlops/CONTRIBUTORS.md | 74 ++ cmx4mlops/cmx4mlops/COPYRIGHT.txt | 3 + cmx4mlops/cmx4mlops/HISTORY.md | 127 +++ cmx4mlops/cmx4mlops/LICENSE.md | 177 ++++ cmx4mlops/cmx4mlops/LICENSE.third-party.md | 1 + cmx4mlops/cmx4mlops/VERSION | 2 +- cmx4mlops/cmx4mlops/cmr.yaml | 4 +- cmx4mlops/cmx4mlops/repo/README.md | 67 -- .../repo/automation/cache/module_misc.py | 2 +- .../repo/automation/cmx-demo/modulex.py | 20 +- .../repo/automation/script/module.py | 305 +++---- .../repo/automation/script/module_misc.py | 47 +- cmx4mlops/cmx4mlops/repo/script/README.md | 41 +- .../app-image-classification-onnx-py/_cm.yaml | 1 - .../COPYRIGHT.md | 9 + .../README-extra.md | 1 + .../_cm.yaml | 469 ++++++++++ .../customize.py | 253 ++++++ .../ref/LICENSE.md | 177 ++++ .../ref/README.md | 2 + .../ref/python/__init__.py | 0 .../ref/python/backend.py | 23 + .../ref/python/backend_pytorch_native.py | 95 ++ .../ref/python/cognata.py | 351 ++++++++ .../ref/python/cognata_labels.py | 49 ++ .../ref/python/dataset.py | 303 +++++++ .../ref/python/main.py | 659 ++++++++++++++ .../user.conf | 6 + .../script/app-mlperf-automotive/COPYRIGHT.md | 9 + .../script/app-mlperf-automotive/_cm.yaml | 287 ++++++ .../script/app-mlperf-automotive/customize.py | 103 +++ .../script/app-mlperf-inference-amd/_cm.yaml | 2 +- .../app-mlperf-inference-dummy/_cm.yaml | 2 +- .../app-mlperf-inference-intel/_cm.yaml | 2 +- .../_cm.yaml | 815 ++++++++++-------- .../customize.py | 49 +- .../app-mlperf-inference-nvidia/_cm.yaml | 2 +- .../app-mlperf-inference-nvidia/customize.py | 30 +- .../app-mlperf-inference-qualcomm/_cm.yaml | 2 +- .../app-mlperf-inference-redhat/_cm.yaml | 2 +- .../repo/script/app-mlperf-inference/_cm.yaml | 136 ++- .../script/app-mlperf-inference/customize.py | 34 +- .../repo/script/build-docker-image/_cm.yaml | 3 + .../script/build-docker-image/customize.py | 8 +- .../repo/script/build-dockerfile/_cm.yaml | 7 +- .../repo/script/build-dockerfile/customize.py | 46 +- .../script/draw-graph-from-json-data/_cm.yaml | 1 + .../_cm.yaml | 11 +- .../customize.py | 25 +- .../_cm.yaml | 2 +- .../customize.py | 4 +- .../cmx4mlops/repo/script/get-cudnn/_cm.yaml | 1 + .../script/get-dataset-coco2014/customize.py | 10 +- .../repo/script/get-dataset-coco2014/run.sh | 3 +- .../COPYRIGHT.md | 9 + .../README-extra.md | 62 ++ .../get-dataset-cognata-mlcommons/_cm.yaml | 161 ++++ .../checksums/cognata_poc.txt | 41 + .../customize.py | 449 ++++++++++ .../repo/script/get-dataset-igbh/COPYRIGHT.md | 9 + .../repo/script/get-dataset-igbh/_cm.yaml | 448 ++++++++++ .../repo/script/get-dataset-igbh/customize.py | 69 ++ .../repo/script/get-dataset-igbh/run.sh | 24 + .../script/get-dataset-imagenet-aux/_cm.yaml | 2 + .../get-dataset-imagenet-calibration/_cm.yaml | 2 + .../script/get-dataset-imagenet-val/_cm.yaml | 2 + .../COPYRIGHT.md | 9 + .../_cm.yaml | 56 ++ .../customize.py | 31 + .../_cm.yaml | 4 +- .../_cm.yaml | 2 + .../_cm.yaml | 2 + .../repo/script/get-dataset-openorca/_cm.yaml | 2 + .../script/get-dataset-squad-vocab/_cm.yaml | 2 + .../repo/script/get-dataset-squad/_cm.yaml | 2 + .../cmx4mlops/repo/script/get-docker/_cm.yaml | 3 +- .../repo/script/get-docker/customize.py | 10 +- .../get-generic-python-lib/customize.py | 4 + .../get-generic-python-lib/detect-version.py | 1 + .../repo/script/get-generic-sys-util/_cm.yaml | 20 +- .../script/get-gh-actions-runner/_cm.yaml | 7 + .../script/get-huggingface-cli/COPYRIGHT.md | 9 + .../repo/script/get-huggingface-cli/_cm.yaml | 26 + .../script/get-huggingface-cli/customize.py | 30 + .../repo/script/get-huggingface-cli/run.bat | 14 + .../repo/script/get-huggingface-cli/run.sh | 7 + .../get-ml-model-3d-unet-kits19/_cm.yaml | 8 + .../COPYRIGHT.md | 9 + .../README-extra.md | 5 + .../get-ml-model-abtf-ssd-pytorch/_cm.yaml | 174 ++++ .../customize.py | 49 ++ .../get-ml-model-bert-large-squad/_cm.yaml | 4 + .../get-ml-model-dlrm-terabyte/_cm.yaml | 2 + .../repo/script/get-ml-model-gptj/_cm.yaml | 6 + .../script/get-ml-model-gptj/run-nvidia.sh | 2 +- .../get-ml-model-huggingface-zoo/_cm.yaml | 6 + .../repo/script/get-ml-model-llama2/_cm.yaml | 2 + .../script/get-ml-model-llama3/COPYRIGHT.md | 9 + .../repo/script/get-ml-model-llama3/_cm.yaml | 68 ++ .../script/get-ml-model-llama3/customize.py | 35 + .../repo/script/get-ml-model-mixtral/_cm.yaml | 4 + .../get-ml-model-retinanet-nvidia/_cm.yaml | 2 + .../script/get-ml-model-retinanet/_cm.yaml | 4 + .../repo/script/get-ml-model-rgat/_cm.yaml | 20 +- .../script/get-ml-model-rgat/customize.py | 22 +- .../get-ml-model-stable-diffusion/_cm.yaml | 2 + .../script/get-ml-model-tiny-resnet/_cm.yaml | 2 + .../COPYRIGHT.md | 9 + .../_cm.yaml | 39 + .../customize.py | 40 + .../run.bat | 1 + .../run.sh | 27 + .../get-mlperf-inference-loadgen/run.sh | 7 +- .../script/get-mlperf-inference-src/_cm.yaml | 34 +- .../get-mlperf-inference-src/customize.py | 7 +- .../default-config.yaml | 5 +- .../default-config.yaml | 2 +- .../get-mlperf-inference-utils/customize.py | 2 +- .../repo/script/get-tensorrt/customize.py | 2 +- .../repo/script/install-python-src/_cm.yaml | 3 + .../_cm.yaml | 1 + .../customize.py | 5 +- .../script/process-mlperf-accuracy/_cm.yaml | 8 + .../process-mlperf-accuracy/customize.py | 46 +- .../repo/script/pull-git-repo/customize.py | 3 - .../repo/script/pull-git-repo/run.bat | 26 + .../customize.py | 8 + .../run.bat | 6 +- .../run.sh | 5 +- .../COPYRIGHT.md | 3 + .../repo/script/run-docker-container/_cm.yaml | 5 + .../script/run-docker-container/customize.py | 69 +- .../run-mlperf-automotive-app/COPYRIGHT.md | 9 + .../script/run-mlperf-automotive-app/_cm.yaml | 248 ++++++ .../run-mlperf-automotive-app/customize.py | 403 +++++++++ .../script/run-mlperf-inference-app/_cm.yaml | 37 +- .../run-mlperf-inference-app/customize.py | 587 +------------ .../_cm.yaml | 12 + .../script/submit-mlperf-results/COPYRIGHT.md | 9 + .../script/submit-mlperf-results/_cm.yaml | 22 + .../script/submit-mlperf-results/customize.py | 194 +++++ .../test-cm-core/src/script/process_tests.py | 38 + .../test-cm-core/src/script/test_docker.py | 4 +- .../src/tutorials/test_tutorial_retinanet.py | 2 +- .../src/tutorials/test_tutorial_tvm_pip_ge.py | 2 +- .../src/tutorials/test_tutorial_tvm_pip_vm.py | 2 +- cmx4mlops/pyproject.toml | 80 ++ 152 files changed, 7605 insertions(+), 1513 deletions(-) create mode 100644 CONTRIBUTORS.md create mode 100644 cmx4mlops/cmx4mlops/CONTRIBUTING.md create mode 100644 cmx4mlops/cmx4mlops/CONTRIBUTORS.md create mode 100644 cmx4mlops/cmx4mlops/COPYRIGHT.txt create mode 100644 cmx4mlops/cmx4mlops/HISTORY.md create mode 100644 cmx4mlops/cmx4mlops/LICENSE.md create mode 100644 cmx4mlops/cmx4mlops/LICENSE.third-party.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat create mode 100644 cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh create mode 100644 cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat create mode 100644 cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md create mode 100644 cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml create mode 100644 cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py create mode 100644 cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py create mode 100644 cmx4mlops/pyproject.toml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 277e33d512..8108f5b80e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,78 +13,3 @@ MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests. - -Collective Knowledge (CK), Collective Mind (CM) and Common Metadata eXchange (CMX) -were created by [Grigori Fursin](https://arxiv.org/abs/2406.16791), -sponsored by cKnowledge.org and cTuning.org, and donated to MLCommons -to benefit everyone. Since then, this open-source automation technology -(CM/CMX, CM4MLOps/MLPerf automations, CM4ABTF, CM4Research, etc) is being extended -as a community effort thanks to all our volunteers, collaborators -and contributors listed here in alphabetical order: - -* @Henryfzh -* @Leonard226 -* @Oseltamivir -* @Submandarine -* Resmi Arjun -* Omar Benjelloun (Google) -* Alice Cheng (Nvidia) -* Jiahao Chen (MIT) -* Ramesh N Chukka (Intel) -* Ray DeMoss (One Stop Systems) -* Ryan T DeRue (Purdue University) -* Himanshu Dutta (Indian Institute of Technology) -* Nicolas Essayan -* Justin Faust (One Stop Systems) -* Diane Feddema (Red Hat) -* Leonid Fursin (United Silicon Carbide) -* Anirban Ghosh (Nvidia) -* James Goel (Qualcomm) -* Michael Goin (Neural Magic) -* Jose Armando Hernandez (Paris Saclay University) -* Mehrdad Hessar (OctoML) -* Miro Hodak (AMD) -* Sachin Idgunji (Nvidia) -* Tom Jablin (Google) -* Nino Jacob -* David Kanter (MLCommons) -* Alex Karargyris -* Jason Knight (OctoML) -* Ilya Kozulin (Deelvin) -* @makaveli10 (Collabora) -* Steve Leak(NERSC) -* Amija Maji (Purdue University) -* Peter Mattson (Google, MLCommons) -* Kasper Mecklenburg (Arm) -* Pablo Gonzalez Mesa -* Thierry Moreau (OctoML) -* Sachin Mudaliyar -* Stanley Mwangi (Microsoft) -* Ashwin Nanjappa (Nvidia) -* Hai Ah Nam (NERSC) -* Nandeeka Nayak (UIUC) -* Datta Nimmaturi (Nutanix) -* Lakshman Patel -* Arun Tejusve Raghunath Rajan (Cruise) -* Vijay Janapa Reddi (Harvard University) -* Andrew Reusch (OctoML) -* Anandhu Sooraj (Kerala Technical University) -* Sergey Serebryakov (HPE) -* Warren Schultz (Principled Technologies) -* Amrutha Sheleenderan (Kerala Technical University) -* Micah J Sheller (Intel) -* Byoungjun Seo (TTA) -* Aditya Kumar Shaw (Indian Institute of Science) -* Ilya Slavutin (Deelvin) -* Jinho Suh (Nvidia) -* Arjun Suresh -* Badhri Narayanan Suresh (Intel) -* David Tafur (MLCommons) -* Chloe Tessier -* Gaurav Verma (Stony Brook University) -* Zixian Wang -* Nathan Wasson -* Scott Wasson (MLCommons) -* Haoyang Zhang (UIUC) -* Bojian Zheng (University of Toronto) -* Thomas Zhu (Oxford University) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 0000000000..c88508712d --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,141 @@ +Collective Knowledge (CK), Collective Mind (CM) and Common Metadata eXchange (CMX) +were created by [Grigori Fursin](https://arxiv.org/abs/2406.16791), +sponsored by cKnowledge.org and cTuning.org, and donated to MLCommons +to benefit everyone. Since then, this open-source automation technology +(CM, CMX, MLPerf automations, etc) is being extended +as a community effort thanks to all our volunteers, collaborators +and contributors listed here in alphabetical order: + +# MLCommons Collective Mind (CM) + +* @Henryfzh +* @Leonard226 +* @Oseltamivir +* @Submandarine +* Resmi Arjun +* Omar Benjelloun (Google) +* Alice Cheng (Nvidia) +* Jiahao Chen (MIT) +* Ramesh N Chukka (Intel) +* Ray DeMoss (One Stop Systems) +* Ryan T DeRue (Purdue University) +* Himanshu Dutta (Indian Institute of Technology) +* Nicolas Essayan +* Justin Faust (One Stop Systems) +* Diane Feddema (Red Hat) +* Leonid Fursin (United Silicon Carbide) +* Anirban Ghosh (Nvidia) +* James Goel (Qualcomm) +* Michael Goin (Neural Magic) +* Jose Armando Hernandez (Paris Saclay University) +* Mehrdad Hessar (OctoML) +* Miro Hodak (AMD) +* Sachin Idgunji (Nvidia) +* Tom Jablin (Google) +* Nino Jacob +* David Kanter (MLCommons) +* Alex Karargyris +* Jason Knight (OctoML) +* Ilya Kozulin (Deelvin) +* @makaveli10 (Collabora) +* Steve Leak(NERSC) +* Amija Maji (Purdue University) +* Peter Mattson (Google, MLCommons) +* Kasper Mecklenburg (Arm) +* Pablo Gonzalez Mesa +* Thierry Moreau (OctoML) +* Sachin Mudaliyar +* Stanley Mwangi (Microsoft) +* Ashwin Nanjappa (Nvidia) +* Hai Ah Nam (NERSC) +* Nandeeka Nayak (UIUC) +* Datta Nimmaturi (Nutanix) +* Lakshman Patel +* Arun Tejusve Raghunath Rajan (Cruise) +* Vijay Janapa Reddi (Harvard University) +* Andrew Reusch (OctoML) +* Anandhu Sooraj (Kerala Technical University) +* Sergey Serebryakov (HPE) +* Warren Schultz (Principled Technologies) +* Amrutha Sheleenderan (Kerala Technical University) +* Micah J Sheller (Intel) +* Byoungjun Seo (TTA) +* Aditya Kumar Shaw (Indian Institute of Science) +* Ilya Slavutin (Deelvin) +* Jinho Suh (Nvidia) +* Arjun Suresh +* Badhri Narayanan Suresh (Intel) +* David Tafur (MLCommons) +* Chloe Tessier +* Gaurav Verma (Stony Brook University) +* Zixian Wang +* Nathan Wasson +* Scott Wasson (MLCommons) +* Haoyang Zhang (UIUC) +* Bojian Zheng (University of Toronto) +* Thomas Zhu (Oxford University) + +See more acknowledgments at the end of this [article](https://arxiv.org/abs/2406.16791), +which describes the Collective Mind workflow automation framework. + +# Legacy Collective Knowledge framework (CK) + +* Sam Ainsworth (University of Cambridge, UK) +* Saheli Bhattacharjee (@sahelib25) +* Gianfranco Costamagna +* Chris Cummins (Facebook) +* Valentin Dalibard <valentin.dalibard@cl.cam.ac.uk> +* Alastair Donaldson <alastair.donaldson@imperial.ac.uk> +* Thibaut Dumontet +* Daniil Efremov (Xored) +* Todd Gamblin (LLNL) +* Chandan Reddy Gopal (ENS Paris) +* Leo Gordon (dividiti) +* Dave Greasley (University of Bristol) +* Herve Guillou +* Vincent Grevendonk (Arm) +* Christophe Guillon (STMicroelectronics) +* Sven van Haastregt (Arm) +* Michael Haidl +* Stephen Herbein (LLNL) +* Patrick Hesse (College of Saint Benedict and Saint John's University) +* Nikolay Istomin (Xored) +* Kenan Kalajdzic +* Yuriy Kashnikov +* Alexey Kravets (Arm) +* Michael Kruse <MichaelKruse@meinersbur.de> +* Andrei Lascu <andrei.lascu10@imperial.ac.uk> +* Anton Lokhmotov (Krai) +* Graham Markall <graham.markall@continuum.io> +* Michael Mcgeagh (Arm) +* Abdul Wahid Memon <engrwahidmemon@gmail.com> +* Sachin Mudaliyar +* Luigi Nardi +* Cedric Nugteren <web@cedricnugteren.nl> +* Lucas Nussbaum (Universite de Lorraine) +* Ivan Ospiov (Xored) +* Lakshman Patel @Patel230 +* Egor Pasko (Google) +* Ed Plowman (Arm) +* Lahiru Rasnayake (NTNU) +* Vijay Janapa Reddi (Harvard University) +* Alex Redshaw (Arm) +* Vincent Rehm +* Toomas Remmelg (University of Edinburgh) +* Jarrett Revels (MIT) +* Dmitry Savenko (Xored) +* Gavin Simpson (Arm) +* Aaron Smith (Microsoft) +* Michel Steuwer (University of Edinburgh) +* Flavio Vella (Free University of Bozen-Bolzano) +* Gaurav Verma (Stony Brook University) +* Emanuele Vitali +* Dave Wilkinson (University of Pittsburgh) +* Sergey Yakushkin (Synopsys) +* Eiko Yoneki <eiko.yoneki@cl.cam.ac.uk> +* Thomas Zhu (Oxford University) <thomas.zhu.sh@gmail.com> +* @filven +* @ValouBambou + +See more acknowledgments at the end of this [article](https://doi.org/10.1098/rsta.2020.0211), +which describes the original Collective Knowledge workflow automation framework. diff --git a/README.md b/README.md index aa88af22e7..bf3aebc7cf 100755 --- a/README.md +++ b/README.md @@ -53,8 +53,7 @@ The CM/CMX architecture diagram is available for viewing #### MLOps and MLPerf automations -[CM4MLOPS repository powered by CM](https://github.com/mlcommons/ck/tree/master/cm-mlops) - -a collection of portable, extensible and technology-agnostic automation recipes +We have developed a collection of portable, extensible and technology-agnostic automation recipes with a common CLI and Python API (CM scripts) to unify and automate all the manual steps required to compose, run, benchmark and optimize complex ML/AI applications on diverse platforms with any software and hardware. @@ -75,10 +74,9 @@ at MLCommons to run MLPerf inference benchmarks across diverse systems using CM. #### MLCommons ABTF automation -[CM4ABTF repository powered by CM](https://github.com/mlcommons/cm4abtf) - -a collection of portable automations and CM scripts to run the upcoming +We have developed a collection of portable automations and CM scripts to run the upcoming automotive MLPerf benchmark across different models, data sets, software -and hardware from different vendors. +and hardware from different vendors: see [CM4ABTF repository](https://github.com/mlcommons/cm4abtf). #### MLPerf results visualization diff --git a/cmx4mlops/README.md b/cmx4mlops/README.md index a0990367ef..e034c78c99 100644 --- a/cmx4mlops/README.md +++ b/cmx4mlops/README.md @@ -1 +1,47 @@ -TBD +# Aggregated CM and CMX automations for MLOps and MLPerf + +[![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) +[![Powered by CM/CMX](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://pypi.org/project/cmind). + +This repository is powered by the [Collective Mind workflow automation framework](https://github.com/mlcommons/ck/tree/master/cm). + +Two key automations developed using CM are **Script** and **Cache**, which streamline machine learning (ML) workflows, +including managing Docker runs. Both Script and Cache automations are part of the **cmx4mlops** repository. + +The [CM scripts](https://access.cknowledge.org/playground/?action=scripts), +also housed in this repository, consist of hundreds of modular Python-wrapped scripts accompanied +by `yaml` metadata, enabling the creation of robust and flexible ML workflows. + +## License + +[Apache 2.0](LICENSE.md) + +## Copyright + +© 2022-2025 MLCommons. All Rights Reserved. + +Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone and encourage collaborative development. + +## Maintainer(s) + +* MLCommons + +## Author + +[Grigori Fursin](https://cKnowledge.org/gfursin) + +We sincerely appreciate all [contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) +for their invaluable feedback and support! + +## Concepts + +Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) and the [white paper](https://arxiv.org/abs/2406.16791). + +## Parent project + +Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. + +## Citing this project + +If you found the CM automations helpful, kindly reference this article: +[ [ArXiv](https://arxiv.org/abs/2406.16791) ] diff --git a/cmx4mlops/cmx4mlops/CONTRIBUTING.md b/cmx4mlops/cmx4mlops/CONTRIBUTING.md new file mode 100644 index 0000000000..0975ca9efb --- /dev/null +++ b/cmx4mlops/cmx4mlops/CONTRIBUTING.md @@ -0,0 +1,16 @@ +### Contributing to the MLCommons + +The best way to contribute to the MLCommons is to get involved with one of our many project communities. +You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started). + +Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, +but outside pull requests are very welcome too. + +Regardless of if you are a member, your organization needs to sign the MLCommons CLA. +Please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get started. + +MLCommons project work is tracked with issue trackers and pull requests. +Modify the project in your own fork and issue a pull request once you want other developers +to take a look at what you have done and discuss the proposed changes. +Ensure that cla-bot and other checks pass for your Pull requests. + diff --git a/cmx4mlops/cmx4mlops/CONTRIBUTORS.md b/cmx4mlops/cmx4mlops/CONTRIBUTORS.md new file mode 100644 index 0000000000..cd3b7effd3 --- /dev/null +++ b/cmx4mlops/cmx4mlops/CONTRIBUTORS.md @@ -0,0 +1,74 @@ +Collective Knowledge (CK), Collective Mind (CM) and Common Metadata eXchange (CMX) +were created by [Grigori Fursin](https://arxiv.org/abs/2406.16791), +sponsored by cKnowledge.org and cTuning.org, and donated to MLCommons +to benefit everyone. Since then, this open-source automation technology +(CM, CMX, MLPerf automations, etc) is being extended +as a community effort thanks to all our volunteers, collaborators +and contributors listed here in alphabetical order: + +* @Henryfzh +* @Leonard226 +* @Oseltamivir +* @Submandarine +* Resmi Arjun +* Omar Benjelloun (Google) +* Alice Cheng (Nvidia) +* Jiahao Chen (MIT) +* Ramesh N Chukka (Intel) +* Ray DeMoss (One Stop Systems) +* Ryan T DeRue (Purdue University) +* Himanshu Dutta (Indian Institute of Technology) +* Nicolas Essayan +* Justin Faust (One Stop Systems) +* Diane Feddema (Red Hat) +* Leonid Fursin (United Silicon Carbide) +* Anirban Ghosh (Nvidia) +* James Goel (Qualcomm) +* Michael Goin (Neural Magic) +* Jose Armando Hernandez (Paris Saclay University) +* Mehrdad Hessar (OctoML) +* Miro Hodak (AMD) +* Sachin Idgunji (Nvidia) +* Tom Jablin (Google) +* Nino Jacob +* David Kanter (MLCommons) +* Alex Karargyris +* Jason Knight (OctoML) +* Ilya Kozulin (Deelvin) +* @makaveli10 (Collabora) +* Steve Leak(NERSC) +* Amija Maji (Purdue University) +* Peter Mattson (Google, MLCommons) +* Kasper Mecklenburg (Arm) +* Pablo Gonzalez Mesa +* Thierry Moreau (OctoML) +* Sachin Mudaliyar +* Stanley Mwangi (Microsoft) +* Ashwin Nanjappa (Nvidia) +* Hai Ah Nam (NERSC) +* Nandeeka Nayak (UIUC) +* Datta Nimmaturi (Nutanix) +* Lakshman Patel +* Arun Tejusve Raghunath Rajan (Cruise) +* Vijay Janapa Reddi (Harvard University) +* Andrew Reusch (OctoML) +* Anandhu Sooraj (Kerala Technical University) +* Sergey Serebryakov (HPE) +* Warren Schultz (Principled Technologies) +* Amrutha Sheleenderan (Kerala Technical University) +* Micah J Sheller (Intel) +* Byoungjun Seo (TTA) +* Aditya Kumar Shaw (Indian Institute of Science) +* Ilya Slavutin (Deelvin) +* Jinho Suh (Nvidia) +* Arjun Suresh +* Badhri Narayanan Suresh (Intel) +* David Tafur (MLCommons) +* Chloe Tessier +* Gaurav Verma (Stony Brook University) +* Zixian Wang +* Nathan Wasson +* Scott Wasson (MLCommons) +* Haoyang Zhang (UIUC) +* Bojian Zheng (University of Toronto) +* Thomas Zhu (Oxford University) diff --git a/cmx4mlops/cmx4mlops/COPYRIGHT.txt b/cmx4mlops/cmx4mlops/COPYRIGHT.txt new file mode 100644 index 0000000000..ea868e0781 --- /dev/null +++ b/cmx4mlops/cmx4mlops/COPYRIGHT.txt @@ -0,0 +1,3 @@ +Copyright (c) 2021-2025 MLCommons + +Grigori Fursin, the cTuning foundation and OctoML donated this project to MLCommons to benefit everyone. diff --git a/cmx4mlops/cmx4mlops/HISTORY.md b/cmx4mlops/cmx4mlops/HISTORY.md new file mode 100644 index 0000000000..4921bc0b9b --- /dev/null +++ b/cmx4mlops/cmx4mlops/HISTORY.md @@ -0,0 +1,127 @@ +This document narrates the history of the creation and design of CM, CM4MLOps and MLPerf automations (also known as CK2) +by [Grigori Fursin](https://cKnowledge.org/gfursin). It also highlights the donation of this open-source technology to MLCommons, +aimed at benefiting the broader community and fostering its ongoing development as a collaborative, community-driven initiative: + +* Jan 28, 2021: After delivering an invited ACM TechTalk'21 about the Collective Knowledge framework (CK1) + and reproducibility initiatives for conferences, as well as CK-MLOps and MLPerf automations, + Grigori received useful feedback and suggestions for improvements to workflow automations: + https://learning.acm.org/techtalks/reproducibility. + + Following this, Grigori began prototyping CK2 (later CM) to streamline CK1, CK-MLOps and MLPerf benchmarking. + The goal was to dramatically simplify CK1 workflows by introducing just a few core and portable automations, + which eventually evolved into `CM script` and `CM cache`. + + At that time, the cTuning foundation hosted CK1 and all the prototypes for the CM framework at https://github.com/ctuning/ck: + [ref1](https://github.com/mlcommons/ck/commit/9e57934f4999db23052531e92160772ab831463a), + [ref2](https://github.com/mlcommons/ck/tree/9e57934f4999db23052531e92160772ab831463a), + [ref3](https://github.com/mlcommons/ck/tree/9e57934f4999db23052531e92160772ab831463a/incubator). + +* Sep 23, 2021: donated CK1, CK-MLOps, MLPerf automations and early prototypes of CM from the cTuning repository to MLCommons: + [ref1](https://web.archive.org/web/20240803140223/https://octo.ai/blog/octoml-joins-the-community-effort-to-democratize-mlperf-inference-benchmarking), + [ref2](https://github.com/mlcommons/ck/tree/228f80b0bf44610c8244ff0c3f6bec5bbd25aa6c/incubator), + [ref3](https://github.com/mlcommons/ck/tree/695c3843fd8121bbdde6c453cd6ec9503986b0c6?tab=readme-ov-file#author-and-coordinator), + [ref4](https://github.com/mlcommons/ck/tree/master/ck), + [ref5](https://github.com/mlcommons/ck-mlops). + + Prepared MLCommons proposal for the creation of the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md), + aimed at fostering community-driven support for CK and CM developments to benefit everyone. + +* Jan, 2022: hired Arjun Suresh at OctoML to support and maintain CK1 framework and help prepare OctoML's MLPerf submissions using CK1. + Meanwhile, transitioned to focusing on CM and CM-MLOps development, building upon the prototypes created in 2021. + +* Mar 1, 2022: started developing cm-mlops: [ref](https://github.com/octoml/cm-mlops/commit/0ae94736a420dfa84f7417fc62d323303b8760c6). + +* Mar 24, 2022: after successfully stabilizing the initial prototype of CM, donated it to MLCommons to benefit the entire community: + [ref1](https://github.com/mlcommons/ck/tree/c7918ad544f26b6c499c2fc9c07431a9640fca5a/ck2), + [ref2](https://github.com/mlcommons/ck/tree/c7918ad544f26b6c499c2fc9c07431a9640fca5a/ck2#coordinators), + [ref3](https://github.com/mlcommons/ck/commit/3c146cb3c75a015363f7a96758adf6dcc43032d6), + [ref4](https://github.com/mlcommons/ck/commit/3c146cb3c75a015363f7a96758adf6dcc43032d6#diff-d97f0f6f5a32f16d6ed18b9600ffc650f7b25512685f7a2373436c492c6b52b3R48). + +* Apr 6, 2022: started transitioning previous MLOps and MLPerf automations from the mlcommons/ck-mlops format + to the new CM format using the cm-mlops repository (will be later renamed to cm4mlops): + [ref1](https://github.com/octoml/cm-mlops/commit/d1efdc30fb535ce144020d4e88f3ed768c933176), + [ref2](https://github.com/octoml/cm-mlops/blob/d1efdc30fb535ce144020d4e88f3ed768c933176/CONTRIBUTIONS). + +* Apr 22, 2022: began architecting "Intelligent Components" in the CM-MLOps repository, + which will be renamed to `CM Script` at a later stage: + [ref1](https://github.com/octoml/cm-mlops/commit/b335c609c47d2c547afe174d9df232652d57f4f8), + [ref2](https://github.com/octoml/cm-mlops/tree/b335c609c47d2c547afe174d9df232652d57f4f8), + [ref3](https://github.com/octoml/cm-mlops/blob/b335c609c47d2c547afe174d9df232652d57f4f8/CONTRIBUTIONS). + + At the same time, prototyped other core CM automations, including IC, Docker, and Experiment: + [ref1](https://github.com/octoml/cm-mlops/tree/b335c609c47d2c547afe174d9df232652d57f4f8/automation), + [ref2](https://github.com/mlcommons/ck/commits/master/?before=7f66e2438bfe21b4ce2d08326a5168bb9e3132f6+7001). + +* Apr 28, 2022: donated CM-MLOps to MLCommons, which was later renamed to CM4MLOps: + [ref](https://github.com/mlcommons/ck/commit/456e4861056c0e39c4d689c03da91f90a44be058). + +* May 9, 2022: developed the initial set of core IC automations for MLOps (aka CM scripts): + [ref1](https://github.com/octoml/cm-mlops/commit/4a4a027f4088ce7e7abcec29c39d98981bf09d4c), + [ref2](https://github.com/octoml/cm-mlops/tree/4a4a027f4088ce7e7abcec29c39d98981bf09d4c), + [ref3](https://github.com/octoml/cm-mlops/blob/7692240becd6397a96c3975388913ea082002e7a/CONTRIBUTIONS). + +* May 11, 2022: After successfully prototyping CM and CM-MLOps, deprecated the CK1 framework in favor of CM. + Transferred Arjun Suresh to the CM project as a maintainer and tester for CM and CM-MLOps: + [ref](https://github.com/octoml/cm-mlops/blob/17405833665bc1e93820f9ff76deb28a0f543bdb/CONTRIBUTIONS). + + Created a [file](https://github.com/mlcommons/ck/blob/master/cm-mlops/CHANGES.md) + to document and track our public developments at MLCommons. + +* Jun 8, 2022: renamed the 'IC' automation to the more intuitive 'CM script' automation. + [ref1](https://github.com/mlcommons/ck/tree/5ca4e2c33e58a660ac20a545d8aa5143ab6e8e81/cm-devops/automation/script), + [ref2](https://github.com/mlcommons/ck/tree/5ca4e2c33e58a660ac20a545d8aa5143ab6e8e81), + [ref3](https://github.com/octoml/cm-mlops/commit/7910fb7ffc62a617d987d2f887d6f9981ff80187). + +* Jun 16, 2022: prototyped the `CM cache` automation to facilitate caching and reuse of the outputs from CM scripts: + [ref1](https://github.com/mlcommons/ck/commit/1f81aae8cebd5567ec4ca55f693beaf32b49fb48), + [ref2](https://github.com/mlcommons/ck/tree/1f81aae8cebd5567ec4ca55f693beaf32b49fb48), + [ref3](https://github.com/mlcommons/ck/tree/1f81aae8cebd5567ec4ca55f693beaf32b49fb48?tab=readme-ov-file#contacts). + +* Sep 6, 2022: delivered CM demo to run MLPerf while deprecating CK1 automations for MLPerf: + [ref1](https://github.com/mlcommons/ck/commit/2c5d5c5c944ae5f252113c62af457c7a4c5e877a#diff-faac2c4ecfd0bfb928dafc938d3dad5651762fbb504a2544752a337294ee2573R224), + [ref2](https://github.com/mlcommons/ck/blob/2c5d5c5c944ae5f252113c62af457c7a4c5e877a/CONTRIBUTING.md#author-and-coordinator). + + Welcomed Arjun Suresh as a contributor to CM automations for MLPerf: [ref](https://github.com/mlcommons/ck/blob/2c5d5c5c944ae5f252113c62af457c7a4c5e877a/CONTRIBUTING.md#contributors-in-alphabetical-order). + +* From September 2022: coordinated community development of CM and CM4MLOps + to [modularize and automate MLPerf benchmarks](https://docs.mlcommons.org/inference) + and support [reproducibility initiatives at ML and Systems conferences](https://cTuning.or/ae) + through the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). + + * Directed and financed the creation of (CM) automations to streamline the MLPerf power measurement processes. + + * Proposed to use MLPerf benchmarks for the Student Cluster Competition, led the developments + and prepared a tutorial to run MLPerf inference at SCC'22 via CM: [ref](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md) + +* April 2023: departed OctoML to focus on the development of the [CK playground](https://access.cKnowledge.org) and CM automations + to make Mlperf accessible to everyone. Hired Arjun Suresh to help with developments. + + * Initiated and funded development of the [MLPerf explorer](https://github.com/ctuning/q2a-mlperf-visualizer) + to improve visualization of results + +* August 2023: organized the 1st mass-scale MLPerf community submission of 12217 inference benchmark v3.1 results + out of total 13351 results (including 90% of all power results) across diverse models, software and hardware + from different vendors via [open challenges](https://access.cknowledge.org/playground/?action=challenges) funded by cTuning.org : + [LinkedIn article](https://www.linkedin.com/pulse/new-milestone-make-mlperf-benchmarks-accessible-everyone-fursin/) + with results visualized by the [MLPerf explorer](https://github.com/ctuning/q2a-mlperf-visualizer), + [CM4MLOps challenges at GitHub](https://github.com/mlcommons/cm4mlops/tree/main/challenge). + +* February, 2024: proposed to use CM to automate [MLPerf automotive benchmark (ABTF)](https://mlcommons.org/working-groups/benchmarks/automotive/). + + * moved my prototypes of the CM automation for ABTF to cm4abtf repo: [ref](https://github.com/mlcommons/cm4abtf/commit/f92b9f464de89a38a4bde149290dede2d94c8631) + * led further CM4ABTF developments funded by cTuning.org. + +* Starting in April 2024, began the gradual transfer of ongoing maintenance and enhancement + responsibilities for CM and CM4MLOps, including MLPerf automations, to MLCommons. + Welcomed Anandhu Sooraj as a maintainer and contributor to CM4MLOps with MLPerf automations. + +* Took a break from all development activities. + +* July 2024: started prototyping the next generation of CM (CMX and CMX4MLOps) with simpler interfaces + based on user feedback while maintaining backward compatibility. + +* 2025: continue developing CMX and CMX4MLOPs to make it easier to run and customize MLPerf inference, training + and other benchmarks across diverse models, datasets, software and hardware. + +For more details, please refer to the [white paper](https://arxiv.org/abs/2406.16791) +and the [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339). diff --git a/cmx4mlops/cmx4mlops/LICENSE.md b/cmx4mlops/cmx4mlops/LICENSE.md new file mode 100644 index 0000000000..66a27ec5ff --- /dev/null +++ b/cmx4mlops/cmx4mlops/LICENSE.md @@ -0,0 +1,177 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/cmx4mlops/cmx4mlops/LICENSE.third-party.md b/cmx4mlops/cmx4mlops/LICENSE.third-party.md new file mode 100644 index 0000000000..faa0084585 --- /dev/null +++ b/cmx4mlops/cmx4mlops/LICENSE.third-party.md @@ -0,0 +1 @@ +This CM repository may contain CM scripts with third-party files licensed under Apache2, BSD or MIT license. diff --git a/cmx4mlops/cmx4mlops/VERSION b/cmx4mlops/cmx4mlops/VERSION index 4e379d2bfe..3eefcb9dd5 100644 --- a/cmx4mlops/cmx4mlops/VERSION +++ b/cmx4mlops/cmx4mlops/VERSION @@ -1 +1 @@ -0.0.2 +1.0.0 diff --git a/cmx4mlops/cmx4mlops/cmr.yaml b/cmx4mlops/cmx4mlops/cmr.yaml index 0a67a3b123..a62e4cf25b 100644 --- a/cmx4mlops/cmx4mlops/cmr.yaml +++ b/cmx4mlops/cmx4mlops/cmr.yaml @@ -3,12 +3,12 @@ uid: 428611a6db02407f git: true -version: "0.0.2" +version: "0.5.1" author: "Grigori Fursin" install_python_requirements: false -min_cm_version: "3.5.3" +min_cm_version: "3.5.2" prefix: repo diff --git a/cmx4mlops/cmx4mlops/repo/README.md b/cmx4mlops/cmx4mlops/repo/README.md index 49bd226a87..e69de29bb2 100644 --- a/cmx4mlops/cmx4mlops/repo/README.md +++ b/cmx4mlops/cmx4mlops/repo/README.md @@ -1,67 +0,0 @@ -## Unified and cross-platform CM interface for DevOps, MLOps and MLPerf - -[![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) -[![Python Version](https://img.shields.io/badge/python-3+-blue.svg)](https://github.com/mlcommons/ck/tree/master/cm/cmind) -[![Powered by CM](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://pypi.org/project/cmind). -[![Downloads](https://static.pepy.tech/badge/cm4mlops)](https://pepy.tech/project/cm4mlops) - -[![CM script automation features test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-cm-script-features.yml) -[![MLPerf inference bert (deepsparse, tf, onnxruntime, pytorch)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml) -[![MLPerf inference MLCommons C++ ResNet50](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml) -[![MLPerf inference ABTF POC Test](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-mlperf-inference-abtf-poc.yml) -[![Test Compilation of QAIC Compute SDK (build LLVM from src)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-compute-sdk-build.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-compute-sdk-build.yml) -[![Test QAIC Software kit Compilation](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml/badge.svg)](https://github.com/mlcommons/cm4mlops/actions/workflows/test-qaic-software-kit.yml) - - -# CM4MLOps repository - -**CM4MLOps** repository is powered by the [Collective Mind automation framework](https://github.com/mlcommons/ck/tree/master/cm), -a [Python package](https://pypi.org/project/cmind/) with a CLI and API designed for creating and managing automations. - -Two key automations developed using CM are **Script** and **Cache**, which streamline machine learning (ML) workflows, -including managing Docker runs. Both Script and Cache automations are part of the **cm4mlops** repository. - -The CM scripts, also housed in this repository, consist of hundreds of modular Python-wrapped scripts accompanied -by `yaml` metadata, enabling the creation of robust and flexible ML workflows. - -- **CM Scripts Documentation**: [https://docs.mlcommons.org/cm4mlops/](https://docs.mlcommons.org/cm4mlops/) -- **CM CLI Documentation**: [https://docs.mlcommons.org/ck/specs/cm-cli/](https://docs.mlcommons.org/ck/specs/cm-cli/) - -The `mlperf-branch` of the **cm4mlops** repository is dedicated to developments specific to MLPerf Inference. -Please submit any pull requests (PRs) to this branch. For more information about using CM for MLPerf Inference, -refer to the [MLPerf Inference Documentation](https://docs.mlcommons.org/inference/). - -## License - -[Apache 2.0](LICENSE.md) - -## Copyright - -© 2022-2025 MLCommons. All Rights Reserved. - -Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone and encourage collaborative development. - -## Maintainer(s) - -* MLCommons - -## CM author - -[Grigori Fursin](https://cKnowledge.org/gfursin) - -## CM concepts - -Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) and the [white paper](https://arxiv.org/abs/2406.16791). - -## CM script developers - -Arjun Suresh, Anandhu Sooraj, Grigori Fursin - -## Parent project - -Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. - -## Citing this project - -If you found the CM automations helpful, kindly reference this article: -[ [ArXiv](https://arxiv.org/abs/2406.16791) ] diff --git a/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py b/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py index d83d9f763a..aea5a92d09 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py +++ b/cmx4mlops/cmx4mlops/repo/automation/cache/module_misc.py @@ -1,5 +1,5 @@ # Author: Grigori Fursin -# Contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md +# Contributors: Arjun Suresh, Anandhu Sooraj # # Copyright: https://github.com/mlcommons/ck/blob/master/cm-mlops/COPYRIGHT.md # License: https://github.com/mlcommons/ck/blob/master/cm-mlops/LICENSE.md diff --git a/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py b/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py index f63b0d44b9..ac78286e1f 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py +++ b/cmx4mlops/cmx4mlops/repo/automation/cmx-demo/modulex.py @@ -3,14 +3,9 @@ # Author(s): Grigori Fursin # Contributor(s): # -# Copyright: https://github.com/mlcommons/ck/blob/master/COPYRIGHT.md +# Copyright: https://github.com/mlcommons/ck/blob/master/COPYRIGHT.txt # License: https://github.com/mlcommons/ck/blob/master/LICENSE.md -# -# White paper: https://arxiv.org/abs/2406.16791 -# History: https://github.com/mlcommons/ck/blob/master/HISTORY.CM.md -# -# CK and CM project contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md -# +# Project contributors: https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md import os @@ -81,14 +76,3 @@ def test(self, i): return {'return':0} - - ############################################################ - def run(self, i): - - import json - print (json.dumps(i, indent=2)) - - v = i.get('test', 'default') - v2 = i.get('test2', 'default') - - return {'return':0, 'new_key':v, 'new_key2':v2} diff --git a/cmx4mlops/cmx4mlops/repo/automation/script/module.py b/cmx4mlops/cmx4mlops/repo/automation/script/module.py index b08875892d..5860e6e653 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/script/module.py +++ b/cmx4mlops/cmx4mlops/repo/automation/script/module.py @@ -45,6 +45,7 @@ def __init__(self, cmind, automation_file): self.run_state['fake_deps'] = False self.run_state['parent'] = None self.run_state['version_info'] = [] + self.run_state['cache'] = False self.file_with_cached_state = 'cm-cached-state.json' @@ -87,7 +88,8 @@ def __init__(self, cmind, automation_file): 'accept_license', 'skip_system_deps', 'git_ssh', - 'gh_token'] + 'gh_token', + 'hf_token'] ############################################################ @@ -520,28 +522,7 @@ def _run(self, i): if os.environ.get(key, '') != '' and env.get(key, '') == '': env[key] = os.environ[key] - # Check path/input/output in input and pass to env - for key in self.input_flags_converted_to_tmp_env: - value = i.get(key, '').strip() - if value != '': - env['CM_TMP_' + key.upper()] = value - - for key in self.input_flags_converted_to_env: - value = i.get( - key, - '').strip() if isinstance( - i.get( - key, - ''), - str) else i.get( - key, - '') - if value: - env[f"CM_{key.upper()}"] = value - - r = update_env_with_values(env) - if r['return'] > 0: - return r + r = self._update_env_from_input(env, i) ####################################################################### # Check if we want to skip cache (either by skip_cache or by fake_run) @@ -860,6 +841,7 @@ def _run(self, i): 'alias', '') run_state['script_repo_git'] = script_artifact.repo_meta.get( 'git', False) + run_state['cache'] = meta.get('cache', False) if not recursion: run_state['script_entry_repo_to_report_errors'] = meta.get( @@ -1159,7 +1141,7 @@ def _run(self, i): # Check if the output of a selected script should be cached cache = False if i.get( 'skip_cache', - False) else meta.get( + False) else run_state.get( 'cache', False) cache = cache or ( @@ -1324,7 +1306,7 @@ def _run(self, i): r = self._call_run_deps(prehook_deps, self.local_env_keys, local_env_keys_from_meta, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1385,7 +1367,7 @@ def _run(self, i): r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1396,7 +1378,7 @@ def _run(self, i): # Check chain of post dependencies on other CM scripts r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1618,6 +1600,82 @@ def _run(self, i): if r['return'] > 0: return r + # Prepare common input to prepare and run script + run_script_input = { + 'path': path, + 'bat_ext': bat_ext, + 'os_info': os_info, + 'const': const, + 'state': state, + 'const_state': const_state, + 'reuse_cached': reuse_cached, + 'recursion': recursion, + 'recursion_spaces': recursion_spaces, + 'remembered_selections': remembered_selections, + 'tmp_file_run_state': self.tmp_file_run_state, + 'tmp_file_run_env': self.tmp_file_run_env, + 'tmp_file_state': self.tmp_file_state, + 'tmp_file_run': self.tmp_file_run, + 'local_env_keys': self.local_env_keys, + 'local_env_keys_from_meta': local_env_keys_from_meta, + 'posthook_deps': posthook_deps, + 'add_deps_recursive': add_deps_recursive, + 'remembered_selections': remembered_selections, + 'found_script_tags': found_script_tags, + 'variation_tags_string': variation_tags_string, + 'found_cached': False, + 'debug_script_tags': debug_script_tags, + 'verbose': verbose, + 'meta': meta, + 'self': self + } + + # Check and run predeps in customize.py + if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile( + path_to_customize_py): # possible duplicate execution - needs fix + r = utils.load_python_module( + {'path': path, 'name': 'customize'}) + if r['return'] > 0: + return r + + customize_code = r['code'] + + customize_common_input = { + 'input': i, + 'automation': self, + 'artifact': script_artifact, + 'customize': script_artifact.meta.get('customize', {}), + 'os_info': os_info, + 'recursion_spaces': recursion_spaces, + 'script_tags': script_tags, + 'variation_tags': variation_tags + } + run_script_input['customize_code'] = customize_code + run_script_input['customize_common_input'] = customize_common_input + + if repro_prefix != '': + run_script_input['repro_prefix'] = repro_prefix + if ignore_script_error: + run_script_input['ignore_script_error'] = True + if 'predeps' in dir(customize_code) and not fake_run: + + logging.debug( + recursion_spaces + + ' - Running preprocess ...') + + run_script_input['run_state'] = run_state + + ii = copy.deepcopy(customize_common_input) + ii['env'] = env + ii['state'] = state + ii['meta'] = meta + # may need to detect versions in multiple paths + ii['run_script_input'] = run_script_input + + r = customize_code.predeps(ii) + if r['return'] > 0: + return r + # Check chain of dependencies on other CM scripts if len(deps) > 0: logging.debug(recursion_spaces + @@ -1639,6 +1697,8 @@ def _run(self, i): # Clean some output files clean_tmp_files(clean_files, recursion_spaces) + # Repeated code + ''' # Prepare common input to prepare and run script run_script_input = { 'path': path, @@ -1668,6 +1728,7 @@ def _run(self, i): 'meta': meta, 'self': self } + ''' if os.path.isfile( path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( @@ -1732,9 +1793,16 @@ def _run(self, i): tmp_curdir = os.getcwd() if env.get('CM_OUTDIRNAME', '') != '': - if not os.path.exists(env['CM_OUTDIRNAME']): - os.makedirs(env['CM_OUTDIRNAME']) - os.chdir(env['CM_OUTDIRNAME']) + if os.path.isabs(env['CM_OUTDIRNAME']) or recursion: + c_outdirname = env['CM_OUTDIRNAME'] + else: + c_outdirname = os.path.join( + env['CM_TMP_CURRENT_PATH'], env['CM_OUTDIRNAME']) + env['CM_OUTDIRNAME'] = c_outdirname + + if not os.path.exists(c_outdirname): + os.makedirs(c_outdirname) + os.chdir(c_outdirname) # Check if pre-process and detect if 'preprocess' in dir(customize_code) and not fake_run: @@ -2128,29 +2196,6 @@ def _run(self, i): if print_readme or repro_prefix != '': readme = self._get_readme(cmd, run_state) - # Copy Docker sample - if repro_prefix != '' and repro_dir != '': - docker_template_path = os.path.join( - self.path, 'docker_repro_example') - if os.path.isdir(docker_template_path): - try: - - shutil.copytree( - docker_template_path, - repro_dir, - dirs_exist_ok=True) - except Exception as e: - pass - - docker_container = self._get_docker_container(cmd, run_state) - - try: - - with open(os.path.join(repro_dir, 'ubuntu-23.04.Dockerfile'), 'a+') as f: - f.write(docker_container) - except BaseException: - pass - if print_readme: with open('README-cm.md', 'w') as f: f.write(readme) @@ -2223,6 +2268,34 @@ def _run(self, i): return rr + ########################################################################## + + def _update_env_from_input(self, env, i): + # Check path/input/output in input and pass to env + for key in self.input_flags_converted_to_tmp_env: + value = i.get(key, '').strip() + if value != '': + env['CM_TMP_' + key.upper()] = value + + for key in self.input_flags_converted_to_env: + value = i.get( + key, + '').strip() if isinstance( + i.get( + key, + ''), + str) else i.get( + key, + '') + if value: + env[f"CM_{key.upper()}"] = value + + r = update_env_with_values(env) + if r['return'] > 0: + return r + + return {'return': 0} + ########################################################################## def _fix_cache_paths(self, env): cm_repos_path = os.environ.get( @@ -2312,7 +2385,6 @@ def _update_state_from_variations(self, i, meta, variation_tags, variations, env run_state['variation_groups'] = variation_groups # Add variation(s) if specified in the "tags" input prefixed by _ - # If there is only 1 default variation, then just use it or # substitute from CMD @@ -2828,7 +2900,6 @@ def search(self, i): # Print filtered paths if console if console: for script in r['list']: - # This should not be logging since the output can be consumed by other external tools and scripts # logging.info(script.path) print(script.path) @@ -3729,26 +3800,23 @@ def _get_readme(self, cmd_parts, run_state): content = '' content += """ -*This README was automatically generated by the [CM framework](https://github.com/mlcommons/ck).* +*This README was automatically generated.* ## Install CM ```bash -pip install cmind -U +pip install cm4mlops ``` Check [this readme](https://github.com/mlcommons/ck/blob/master/docs/installation.md) with more details about installing CM and dependencies across different platforms (Ubuntu, MacOS, Windows, RHEL, ...). -## Install CM automation repositories - -```bash -cm pull repo mlcommons@cm4mlops --checkout=dev """ current_cm_repo = run_state['script_repo_alias'] - if current_cm_repo not in ['mlcommons@ck', 'mlcommons@cm4mlops']: + if current_cm_repo not in [ + 'mlcommons@mlperf-automations', 'mlcommons@cm4mlops']: content += '\ncm pull repo ' + \ run_state['script_repo_alias'] + '\n' @@ -3788,57 +3856,6 @@ def _get_readme(self, cmd_parts, run_state): return content - ########################################################################## - def _get_docker_container(self, cmd_parts, run_state): - """ - Outputs a Markdown README file listing the CM run commands for the dependencies - """ - - deps = run_state['deps'] - - version_info = run_state.get('version_info', []) - version_info_dict = {} - - for v in version_info: - k = list(v.keys())[0] - version_info_dict[k] = v[k] - - content = '' - - content += """ - -# The following CM commands were automatically generated (prototype) - -cm pull repo mlcommons@cm4mlops --checkout=dev - -""" - current_cm_repo = run_state['script_repo_alias'] - if current_cm_repo not in ['mlcommons@ck', 'mlcommons@cm4mlops']: - content += '\ncm pull repo ' + \ - run_state['script_repo_alias'] + '\n\n' - - deps_ = '' - - for dep_tags in deps: - - xversion = '' - version = version_info_dict.get(dep_tags, {}).get('version', '') - if version != '': - xversion = ' --version={}\n'.format(version) - - content += "# cm run script --tags=" + \ - dep_tags + "{}\n\n".format(xversion) - - cmd = "cm run script " - - for cmd_part in cmd_parts: - x = '"' if ' ' in cmd_part and not cmd_part.startswith('-') else '' - cmd = cmd + " " + x + cmd_part + x - - content += cmd + '\n' - - return content - ########################################################################## def _print_versions(self, run_state): @@ -4130,7 +4147,6 @@ def find_file_in_paths(self, i): return rx else: # Version was detected - detected_version = rx.get('version', '') if detected_version != '': @@ -4693,41 +4709,6 @@ def doc(self, i): return utils.call_internal_module( self, __file__, 'module_misc', 'doc', i) - ############################################################ - def gui(self, i): - """ - Run GUI for CM script. - - Args: - (CM input dict): - - Returns: - (CM return dict): - - * return (int): return code == 0 if no error and >0 if error - * (error) (str): error string if return>0 - - """ - - artifact = i.get('artifact', '') - tags = '' - if artifact != '': - if ' ' in artifact: - tags = artifact.replace(' ', ',') - - if tags == '': - tags = i.get('tags', '') - - if 'tags' in i: - del (i['tags']) - - i['action'] = 'run' - i['artifact'] = 'gui' - i['parsed_artifact'] = [('gui', '605cac42514a4c69')] - i['script'] = tags.replace(',', ' ') - - return self.cmind.access(i) - ############################################################ def dockerfile(self, i): @@ -4795,7 +4776,7 @@ def docker(self, i): (docker_os_version) (str): force docker OS version (default: 22.04) (docker_image_tag_extra) (str): add extra tag (default:-latest) - (docker_cm_repo) (str): force CM automation repository when building Docker (default: cm4mlops) + (docker_cm_repo) (str): force CM automation repository when building Docker (default: mlperf-automations) (docker_cm_repos) (docker_cm_repo_flags) @@ -5541,7 +5522,6 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): if rc > 0 and not i.get('ignore_script_error', False): # Check if print files when error print_files = meta.get('print_files_if_script_error', []) - if len(print_files) > 0: for pr in print_files: if os.path.isfile(pr): @@ -5567,20 +5547,13 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"): script_repo_alias.replace('@', '/') + '/issues' if repo_to_report == '': - repo_to_report = 'https://github.com/mlcommons/cm4mlops/issues' + repo_to_report = 'https://github.com/mlcommons/mlperf-automations/issues' note = ''' ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Note that it is often a portability issue of a third-party tool or a native script -wrapped and unified by this CM script (automation recipe). Please re-run -this script with --repro flag and report this issue with the original -command line, cm-repro directory and full log here: - -{} - -The CM concept is to collaboratively fix such issues inside portable CM scripts -to make existing tools and native scripts more portable, interoperable -and deterministic. Thank you'''.format(repo_to_report) +Please file an issue at {} along with the full CM command being run and the relevant +or full console log. +'''.format(repo_to_report) rr = { 'return': 2, @@ -5800,7 +5773,10 @@ def convert_env_to_script(env, os_info, start_script=None): key = key[1:] # Append the existing environment variable to the new value - env_value = f"{env_separator.join(env_value)}{env_separator}{os_info['env_var'].replace('env_var', key)}" + env_value = f"""{ + env_separator.join(env_value)}{env_separator}{ + os_info['env_var'].replace( + 'env_var', key)}""" # Replace placeholders in the platform-specific environment command env_command = os_info['set_env'].replace( @@ -6086,6 +6062,9 @@ def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps Internal: update env and state from meta """ + if meta.get('cache', '') != '': + run_state['cache'] = meta['cache'] + default_env = meta.get('default_env', {}) for key in default_env: env.setdefault(key, default_env[key]) diff --git a/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py b/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py index 22b4cf2fdf..00883ba053 100644 --- a/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py +++ b/cmx4mlops/cmx4mlops/repo/automation/script/module_misc.py @@ -1647,12 +1647,12 @@ def dockerfile(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) cm_repo_branch = i.get( 'docker_cm_repo_branch', docker_settings.get( 'cm_repo_branch', - 'mlperf-inference')) + 'main')) cm_repo_flags = i.get( 'docker_cm_repo_flags', @@ -1915,6 +1915,9 @@ def docker(i): noregenerate_docker_file = i.get('docker_noregenerate', False) norecreate_docker_image = i.get('docker_norecreate', True) + recreate_docker_image = i.get('docker_recreate', False) + if recreate_docker_image: # force recreate + norecreate_docker_image = False if i.get('docker_skip_build', False): noregenerate_docker_file = True @@ -1987,8 +1990,6 @@ def docker(i): env['CM_DOCKER_CACHE'] = docker_cache image_repo = i.get('docker_image_repo', '') - if image_repo == '': - image_repo = 'local' # Host system needs to have docker r = self_module.cmind.access({'action': 'run', @@ -2084,6 +2085,14 @@ def docker(i): continue ''' + r = script_automation._update_env_from_input(env, i) + if r['return'] > 0: + return r + + # mount outdirname path + if env.get('CM_OUTDIRNAME', '') != '': + mounts.append(f"""{env['CM_OUTDIRNAME']}:{env['CM_OUTDIRNAME']}""") + # Check if need to update/map/mount inputs and env r = process_inputs({'run_cmd_arc': i_run_cmd_arc, 'docker_settings': docker_settings, @@ -2174,7 +2183,7 @@ def docker(i): # env keys corresponding to container mounts are explicitly passed to # the container run cmd - container_env_string = '' + container_env = {} for index in range(len(mounts)): mount = mounts[index] # Since windows may have 2 :, we search from the right @@ -2216,7 +2225,6 @@ def docker(i): new_container_mount, new_container_mount_env = get_container_path( env[tmp_value]) container_env_key = new_container_mount_env - # container_env_string += " --env.{}={} ".format(tmp_value, new_container_mount_env) else: # we skip those mounts mounts[index] = None skip = True @@ -2228,8 +2236,7 @@ def docker(i): continue mounts[index] = new_host_mount + ":" + new_container_mount if host_env_key: - container_env_string += " --env.{}={} ".format( - host_env_key, container_env_key) + container_env[host_env_key] = container_env_key for v in docker_input_mapping: if docker_input_mapping[v] == host_env_key: @@ -2260,10 +2267,16 @@ def docker(i): for key in proxy_keys: if os.environ.get(key, '') != '': value = os.environ[key] - container_env_string += " --env.{}={} ".format(key, value) + container_env[key] = value env['+ CM_DOCKER_BUILD_ARGS'].append( "{}={}".format(key, value)) + if container_env: + if not i_run_cmd.get('env'): + i_run_cmd['env'] = container_env + else: + i_run_cmd['env'] = {**i_run_cmd['env'], **container_env} + docker_use_host_group_id = i.get( 'docker_use_host_group_id', docker_settings.get('use_host_group_id')) @@ -2308,7 +2321,7 @@ def docker(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) docker_path = i.get('docker_path', '').strip() if docker_path == '': @@ -2405,8 +2418,7 @@ def docker(i): 'docker_run_cmd_prefix': i.get('docker_run_cmd_prefix', '')}) if r['return'] > 0: return r - run_cmd = r['run_cmd_string'] + ' ' + \ - container_env_string + ' --docker_run_deps ' + run_cmd = r['run_cmd_string'] + ' ' + ' --docker_run_deps ' env['CM_RUN_STATE_DOCKER'] = True @@ -2422,7 +2434,8 @@ def docker(i): print(final_run_cmd) print('') - docker_recreate_image = 'yes' if not norecreate_docker_image else 'no' + docker_recreate_image = 'yes' if str(norecreate_docker_image).lower() not in [ + "yes", "true", "1"] else 'no' if i.get('docker_push_image', '') in ['True', True, 'yes']: env['CM_DOCKER_PUSH_IMAGE'] = 'yes' @@ -2436,10 +2449,8 @@ def docker(i): 'docker_os_version': docker_os_version, 'cm_repo': cm_repo, 'env': env, - 'image_repo': image_repo, 'interactive': interactive, 'mounts': mounts, - 'image_name': image_name, # 'image_tag': script_alias, 'image_tag_extra': image_tag_extra, 'detached': detached, @@ -2456,6 +2467,12 @@ def docker(i): } } + if image_repo: + cm_docker_input['image_repo'] = image_repo + + if image_name: + cm_docker_input['image_name'] = image_name + if all_gpus: cm_docker_input['all_gpus'] = True diff --git a/cmx4mlops/cmx4mlops/repo/script/README.md b/cmx4mlops/cmx4mlops/repo/script/README.md index d2667369c0..a9e5e41450 100644 --- a/cmx4mlops/cmx4mlops/repo/script/README.md +++ b/cmx4mlops/cmx4mlops/repo/script/README.md @@ -1,40 +1,13 @@ -## About +### About -Portable CM automations for MLOps and MLPerf. +This is a source code of portable and reusable automation recipes +from MLCommons projects with a [human-friendly CM interface](https://github.com/mlcommons/ck) - +you can find a human-readable catalog of these automation recipes [here](../../docs/list_of_scripts.md). -## License +### License [Apache 2.0](../../LICENSE.md) -## Copyright +### Copyright -© 2022-2025 MLCommons. All Rights Reserved. - -Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone. - -This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: - -[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) - -Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. - -## Maintainer(s) - -* MLCommons - -## CM author - -[Grigori Fursin](https://cKnowledge.org/gfursin) - -## CM script developers - -Arjun Suresh, Anandhu Sooraj, Grigori Fursin - -## Parent project - -Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. - -## Citing this project - -If you found the CM automations helpful, kindly reference this article: -[ [ArXiv](https://arxiv.org/abs/2406.16791) ] +2022-2024 [MLCommons](https://mlcommons.org) diff --git a/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml index 740a8a18ab..e53b91ec2f 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-image-classification-onnx-py/_cm.yaml @@ -22,7 +22,6 @@ default_env: deps: - tags: detect,os -#- tags: get,sys-utils-cm - names: - python - python3 diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md new file mode 100644 index 0000000000..582991f6d2 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/README-extra.md @@ -0,0 +1 @@ +# CM script diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml new file mode 100644 index 0000000000..b22f119d6c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/_cm.yaml @@ -0,0 +1,469 @@ +alias: app-mlperf-automotive-mlcommons-python +uid: 621240c5d30a437c + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: "Modular MLPerf inference benchmark pipeline for ABTF model" + + +# User-friendly tags to find this CM script +tags: +- demo +- run-mlperf-inference +- object-detection +- abtf-model + + +# Default environment +default_env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_LOADGEN_SCENARIO: Offline + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: 'on' + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + CM_TEST_QUERY_COUNT: '10' + CM_MLPERF_QUANTIZATION: off + CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + + +# Map script inputs to environment variables +input_mapping: + device: CM_MLPERF_DEVICE + count: CM_MLPERF_LOADGEN_QUERY_COUNT + docker: CM_RUN_DOCKER_CONTAINER + hw_name: CM_HW_NAME + imagenet_path: IMAGENET_PATH + max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + mode: CM_MLPERF_LOADGEN_MODE + num_threads: CM_NUM_THREADS + threads: CM_NUM_THREADS + dataset: CM_MLPERF_VISION_DATASET_OPTION + model: CM_MLPERF_CUSTOM_MODEL_PATH + output_dir: OUTPUT_BASE_DIR + power: CM_MLPERF_POWER + power_server: CM_MLPERF_POWER_SERVER_ADDRESS + ntp_server: CM_MLPERF_POWER_NTP_SERVER + max_amps: CM_MLPERF_POWER_MAX_AMPS + max_volts: CM_MLPERF_POWER_MAX_VOLTS + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + scenario: CM_MLPERF_LOADGEN_SCENARIO + test_query_count: CM_TEST_QUERY_COUNT + clean: CM_MLPERF_CLEAN_SUBMISSION_DIR + dataset_args: CM_MLPERF_EXTRA_DATASET_ARGS + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + output: CM_MLPERF_OUTPUT_DIR + +# Duplicate CM environment variables to the ones used in native apps +env_key_mappings: + CM_HOST_: HOST_ + CM_ML_: ML_ + CM_MLPERF_TVM: MLPERF_TVM + CM_MLPERF_DELETE: MLPERF_DELETE + +# Env keys which are exposed to higher level scripts +new_env_keys: + - CM_MLPERF_* + - CM_DATASET_* + - CM_HW_NAME + - CM_COGNATA_ACCURACY_DUMP_FILE + - CM_OUTPUT_PREDICTIONS_PATH + - CM_ML_MODEL_* + - CM_MAX_EXAMPLES + +new_state_keys: + - mlperf-inference-implementation + - CM_SUT_* + +# Dependencies on other CM scripts +deps: + + # Detect host OS features + - tags: detect,os + + # Detect host CPU features + - tags: detect,cpu + + # Install system dependencies on a given host + - tags: get,sys-utils-cm + + # Detect/install python + - tags: get,python + names: + - python + - python3 + + # Use cmind inside CM scripts + - tags: get,generic-python-lib,_package.cmind + + + # CUDA + - tags: get,cuda + enable_if_env: + USE_CUDA: + - yes + names: + - cuda + + + + ######################################################################## + # Install ML engines via CM + + ## Onnx CPU Runtime + - tags: get,generic-python-lib,_onnxruntime + names: + - ml-engine-onnxruntime + - onnxruntime + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + - tvm-onnx + CM_MLPERF_DEVICE: + - cpu + - rocm + + ## Onnx CUDA Runtime + - tags: get,generic-python-lib,_onnxruntime_gpu + names: + - ml-engine-onnxruntime-cuda + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + - tvm-onnx + CM_MLPERF_DEVICE: + - gpu + skip_if_env: + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + + ## resnet50 and 3d-unet need both onnxruntime and onnxruntime_gpu on cuda + - tags: get,generic-python-lib,_onnxruntime + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + CM_MLPERF_DEVICE: + - gpu + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 + - tags: get,generic-python-lib,_onnxruntime_gpu + env: + CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS: "" + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + CM_MLPERF_DEVICE: + - gpu + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 + + ## Pytorch (CPU) + - tags: get,generic-python-lib,_torch + names: + - ml-engine-pytorch + - pytorch + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + CM_MLPERF_DEVICE: + - cpu + - rocm + + ## Pytorch (CUDA) + - tags: get,generic-python-lib,_torch_cuda + names: + - ml-engine-pytorch + - pytorch + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + - ray + CM_MLPERF_DEVICE: + - gpu + + ## Torchvision (CPU) + - tags: get,generic-python-lib,_torchvision + names: + - ml-engine-torchvision + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + CM_MLPERF_DEVICE: + - cpu + + ## Torchvision (CUDA) + - tags: get,generic-python-lib,_torchvision_cuda + names: + - ml-engine-torchvision + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + - ray + CM_MLPERF_DEVICE: + - gpu + + ## tensorrt + - tags: get,generic-python-lib,_tensorrt + names: + - ml-engine-tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## torch_tensorrt + - tags: get,generic-python-lib,_torch_tensorrt + names: + - ml-engine-torch_tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## Ray + - tags: get,generic-python-lib,_ray + names: + - ray + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + + + ## Tensorflow + - tags: get,generic-python-lib,_tensorflow + names: + - ml-engine-tensorflow + - tensorflow + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite + + # Install MLPerf inference dependencies + + + + # Creates user conf for given SUT + - tags: generate,user-conf,mlperf,inference + names: + - user-conf-generator + + + # Install MLPerf loadgen + - tags: get,generic-python-lib,_package.mlcommons-loadgen + enable_if_env: + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: + - "off" + names: + - loadgen + - mlperf-inference-loadgen + + - tags: get,loadgen + enable_if_any_env: + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: + - "on" + names: + - loadgen + - mlperf-inference-loadgen + - mlperf-inference-loadgen-from-src + + +# +# # Download MLPerf inference source +# - tags: get,mlcommons,inference,src +# env: +# CM_GET_MLPERF_IMPLEMENTATION_ONLY: 'yes' +# names: +# - mlperf-implementation + + - tags: get,generic-python-lib,_package.psutil + + + + +prehook_deps: + - names: + - remote-run-cmds + tags: remote,run,cmds + enable_if_env: + CM_ASSH_RUN_COMMANDS: + - "on" + + + +posthook_deps: + - names: + - mlperf-runner + tags: benchmark-mlperf + skip_if_env: + CM_MLPERF_SKIP_RUN: + - "on" + + +post_deps: + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state + + +docker: + real_run: false + +# Variations to customize dependencies +variations: + # Implementation + python: + group: implementation + default: true + env: + CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + + + # ML engine + onnxruntime: + group: framework + env: + CM_MLPERF_BACKEND: onnxruntime + + onnxruntime,cpu: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + + onnxruntime,cuda: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + ONNXRUNTIME_PREFERRED_EXECUTION_PROVIDER: "CUDAExecutionProvider" + + + pytorch: + group: framework + default: true + add_deps_recursive: + imagenet-preprocessed: + tags: _NCHW + openimages-preprocessed: + tags: _NCHW + ml-model: + tags: raw,_pytorch + env: + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_BACKEND_VERSION: <<>> + + + + +# retinanet: +# group: models +# deps: +# - tags: get,generic-python-lib,_opencv-python +# - tags: get,generic-python-lib,_numpy +# - tags: get,generic-python-lib,_pycocotools +# +# env: +# CM_MODEL: retinanet +# CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' +# CM_MLPERF_LOADGEN_MAX_BATCHSIZE: '1' + + + abtf-demo-model: + group: models + deps: + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.faster-coco-eval + version_max: "1.5.7" + version_max_usable: "1.5.7" + names: + - cocoeval + - tags: get,dataset,raw,mlcommons-cognata + names: + - raw-dataset-mlcommons-cognata + - tags: get,ml-model,abtf-ssd-pytorch,_abtf-mvp + names: + - ml-model-abtf + + env: + CM_MODEL: retinanet + + abtf-poc-model: + group: models + default: true + deps: + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.faster-coco-eval + version_max: "1.5.7" + version_max_usable: "1.5.7" + names: + - cocoeval + - tags: get,dataset,raw,mlcommons-cognata,_abtf-poc + skip_if_env: + CM_RUN_STATE_DOCKER: + - 'yes' + names: + - raw-dataset-mlcommons-cognata + - tags: get,ml-model,abtf-ssd-pytorch,_abtf-poc + names: + - ml-model-abtf + + env: + CM_MODEL: retinanet + + # Target devices + cpu: + group: device + default: true + env: + CM_MLPERF_DEVICE: cpu + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: no + USE_GPU: no + + cuda: + group: device + env: + CM_MLPERF_DEVICE: gpu + USE_CUDA: yes + USE_GPU: yes + + + + # Loadgen scenarios + offline: + env: + CM_MLPERF_LOADGEN_SCENARIO: Offline + multistream: + env: + CM_MLPERF_LOADGEN_SCENARIO: MultiStream + singlestream: + env: + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_MLPERF_LOADGEN_MAX_BATCHSIZE: 1 + server: + env: + CM_MLPERF_LOADGEN_SCENARIO: Server + + mvp_demo: + env: diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py new file mode 100644 index 0000000000..58ee04e1fc --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/customize.py @@ -0,0 +1,253 @@ +from cmind import utils +import os +import json +import shutil +import subprocess + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if env.get('CM_MLPERF_SKIP_RUN', '') == "yes": + return {'return': 0} + + if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": + return {'return': 0} + + if env.get('CM_MLPERF_POWER', '') == "yes": + power = "yes" + else: + power = "no" + + rerun = True if env.get("CM_RERUN", "") != '' else False + + if 'CM_MLPERF_LOADGEN_SCENARIO' not in env: + env['CM_MLPERF_LOADGEN_SCENARIO'] = "Offline" + + if 'CM_MLPERF_LOADGEN_MODE' not in env: + env['CM_MLPERF_LOADGEN_MODE'] = "accuracy" + + if 'CM_MODEL' not in env: + return { + 'return': 1, 'error': "Please select a variation specifying the model to run"} + + # if env['CM_MODEL'] == "resnet50": + # cmd = "cp " + os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt") + " " + os.path.join(env['CM_DATASET_PATH'], + # "val_map.txt") + # ret = os.system(cmd) + + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] = " " + \ + env.get('CM_MLPERF_LOADGEN_EXTRA_OPTIONS', '') + " " + + if 'CM_MLPERF_LOADGEN_QPS' not in env: + env['CM_MLPERF_LOADGEN_QPS_OPT'] = "" + else: + env['CM_MLPERF_LOADGEN_QPS_OPT'] = " --qps " + \ + env['CM_MLPERF_LOADGEN_QPS'] + + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += env['CM_MLPERF_LOADGEN_QPS_OPT'] + + if 'CM_NUM_THREADS' not in env: + if 'CM_MINIMIZE_THREADS' in env: + env['CM_NUM_THREADS'] = str(int(env['CM_HOST_CPU_TOTAL_CORES']) // + (int(env.get('CM_HOST_CPU_SOCKETS', '1')) * int(env.get('CM_HOST_CPU_TOTAL_CORES', '1')))) + else: + env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1') + + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '' and not env.get( + 'CM_MLPERF_MODEL_SKIP_BATCHING', False): + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + \ + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE']) + + if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE', '') != '': + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --batch-size " + \ + str(env['CM_MLPERF_LOADGEN_BATCH_SIZE']) + + if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get( + 'CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \ + env['CM_MLPERF_LOADGEN_QUERY_COUNT'] + + print("Using MLCommons Inference source from '" + + env['CM_MLPERF_INFERENCE_SOURCE'] + "'") + + if 'CM_MLPERF_CONF' not in env: + env['CM_MLPERF_CONF'] = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + + x = "" if os_info['platform'] == 'windows' else "'" + if "llama2-70b" in env['CM_MODEL']: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf-conf " + \ + x + env['CM_MLPERF_CONF'] + x + else: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf " + \ + x + env['CM_MLPERF_CONF'] + x + + env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') + if not env['MODEL_DIR']: + env['MODEL_DIR'] = os.path.dirname( + env.get( + 'CM_MLPERF_CUSTOM_MODEL_PATH', + env.get('CM_ML_MODEL_FILE_WITH_PATH'))) + + RUN_CMD = "" + + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + scenario_extra_options = '' + + NUM_THREADS = env['CM_NUM_THREADS'] + if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU + + if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + scenario_extra_options += " --threads " + NUM_THREADS + + ml_model_name = env['CM_MODEL'] + if 'CM_MLPERF_USER_CONF' in env: + user_conf_path = env['CM_MLPERF_USER_CONF'] + x = "" if os_info['platform'] == 'windows' else "'" + scenario_extra_options += " --user_conf " + x + user_conf_path + x + + mode = env['CM_MLPERF_LOADGEN_MODE'] + mode_extra_options = "" + + # Grigori blocked for ABTF to preprocess data set on the fly for now + # we can later move it to a separate script to preprocess data set + +# if 'CM_DATASET_PREPROCESSED_PATH' in env and env['CM_MODEL'] in [ 'resnet50', 'retinanet' ]: +# #dataset_options = " --use_preprocessed_dataset --preprocessed_dir "+env['CM_DATASET_PREPROCESSED_PATH'] +# if env.get('CM_MLPERF_LAST_RELEASE') not in [ "v2.0", "v2.1" ]: +# dataset_options = " --use_preprocessed_dataset --cache_dir "+env['CM_DATASET_PREPROCESSED_PATH'] +# else: +# dataset_options = "" +# if env['CM_MODEL'] == "retinanet": +# dataset_options += " --dataset-list "+ env['CM_DATASET_ANNOTATIONS_FILE_PATH'] +# elif env['CM_MODEL'] == "resnet50": +# dataset_options += " --dataset-list "+ os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt") +# env['DATA_DIR'] = env.get('CM_DATASET_PREPROCESSED_PATH') +# else: +# if 'CM_DATASET_PREPROCESSED_PATH' in env: +# env['DATA_DIR'] = env.get('CM_DATASET_PREPROCESSED_PATH') +# else: +# env['DATA_DIR'] = env.get('CM_DATASET_PATH') +# dataset_options = '' + + # Grigori added for ABTF +# dataset_path = env.get('CM_DATASET_PATH') +# env['DATA_DIR'] = dataset_path + +# dataset_options = " --dataset-list " + env['CM_DATASET_ANNOTATIONS_FILE_PATH'] +# dataset_options += " --cache_dir " + os.path.join(script_path, 'preprocessed-dataset') + + dataset_options = '' + + if env.get('CM_MLPERF_EXTRA_DATASET_ARGS', '') != '': + dataset_options += " " + env['CM_MLPERF_EXTRA_DATASET_ARGS'] + + if mode == "accuracy": + mode_extra_options += " --accuracy" + env['CM_OUTPUT_PREDICTIONS_PATH'] = os.path.join( + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'], + env['CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS'], + 'Cognata_Camera_01_8M_png', + 'output') + + elif mode == "performance": + pass + + elif mode == "compliance": + + audit_full_path = env['CM_MLPERF_INFERENCE_AUDIT_PATH'] + mode_extra_options = " --audit '" + audit_full_path + "'" + + if env.get('CM_MLPERF_OUTPUT_DIR', '') == '': + env['CM_MLPERF_OUTPUT_DIR'] = os.getcwd() + + mlperf_implementation = env.get('CM_MLPERF_IMPLEMENTATION', 'reference') + + # Generate CMD + + # Grigori updated for ABTF demo +# cmd, run_dir = get_run_cmd(os_info, env, scenario_extra_options, mode_extra_options, dataset_options, mlperf_implementation) + cmd, run_dir = get_run_cmd_reference( + os_info, env, scenario_extra_options, mode_extra_options, dataset_options, script_path) + + if env.get('CM_NETWORK_LOADGEN', '') == "lon": + + run_cmd = i['state']['mlperf_inference_run_cmd'] + env['CM_SSH_RUN_COMMANDS'] = [] + env['CM_SSH_RUN_COMMANDS'].append( + run_cmd.replace( + "--network=lon", + "--network=sut") + " &") + + env['CM_MLPERF_RUN_CMD'] = cmd + env['CM_RUN_DIR'] = run_dir + env['CM_RUN_CMD'] = cmd + env['CK_PROGRAM_TMP_DIR'] = env.get('CM_ML_MODEL_PATH') # for tvm + + if env.get('CM_HOST_PLATFORM_FLAVOR', '') == "arm64": + env['CM_HOST_PLATFORM_FLAVOR'] = "aarch64" + + if not env.get('CM_COGNATA_ACCURACY_DUMP_FILE'): + env['CM_COGNATA_ACCURACY_DUMP_FILE'] = os.path.join( + env['OUTPUT_DIR'], "accuracy.txt") + + return {'return': 0} + + +def get_run_cmd_reference(os_info, env, scenario_extra_options, + mode_extra_options, dataset_options, script_path=None): + + q = '"' if os_info['platform'] == 'windows' else "'" + + ########################################################################## + # Grigori added for ABTF demo + + if env['CM_MODEL'] in ['retinanet']: + + run_dir = os.path.join(script_path, 'ref') + + env['RUN_DIR'] = run_dir + + env['OUTPUT_DIR'] = env['CM_MLPERF_OUTPUT_DIR'] + + cognata_dataset_path = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] +# cognata_dataset_path = env['CM_DATASET_PATH'] # Using open images +# dataset for some tests + + path_to_model = env.get( + 'CM_MLPERF_CUSTOM_MODEL_PATH', + env.get( + 'CM_ML_MODEL_FILE_WITH_PATH', + env.get('CM_ML_MODEL_CODE_WITH_PATH'))) + env['MODEL_FILE'] = path_to_model + + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " " + os.path.join(run_dir, "python", "main.py") + " --profile " + env['CM_MODEL'] + "-" + env['CM_MLPERF_BACKEND'] + \ + " --model=" + q + path_to_model + q + \ + " --dataset=" + env["CM_MLPERF_VISION_DATASET_OPTION"] + \ + " --dataset-path=" + q + cognata_dataset_path + q + \ + " --cache_dir=" + q + os.path.join(script_path, 'tmp-preprocessed-dataset') + q + \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + \ + " --output " + q + env['OUTPUT_DIR'] + q + " " + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + scenario_extra_options + mode_extra_options + dataset_options + + ########################################################################## + + return cmd, run_dir + + +def postprocess(i): + + env = i['env'] + + state = i['state'] + + inp = i['input'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md new file mode 100644 index 0000000000..f433b1a53f --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md new file mode 100644 index 0000000000..b2dcb039f3 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/README.md @@ -0,0 +1,2 @@ +Base code was taken from https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection +and modified to prototype support for Cognata data set and ABTF model. diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py new file mode 100644 index 0000000000..955eddb888 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py @@ -0,0 +1,23 @@ +""" +abstract backend class +""" + + +# pylint: disable=unused-argument,missing-docstring + +class Backend(): + def __init__(self): + self.inputs = [] + self.outputs = [] + + def version(self): + raise NotImplementedError("Backend:version") + + def name(self): + raise NotImplementedError("Backend:name") + + def load(self, model_path, inputs=None, outputs=None): + raise NotImplementedError("Backend:load") + + def predict(self, feed): + raise NotImplementedError("Backend:predict") diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py new file mode 100644 index 0000000000..ec54019798 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py @@ -0,0 +1,95 @@ +""" +Pytoch native backend +Extended by Grigori Fursin for the ABTF demo +""" +# pylint: disable=unused-argument,missing-docstring +import torch # currently supports pytorch1.0 +import torchvision +import backend + +import os +import sys +import importlib + + +class BackendPytorchNative(backend.Backend): + def __init__(self): + super(BackendPytorchNative, self).__init__() + self.sess = None + self.model = None + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + + # Grigori added for ABTF model + self.config = None + self.num_classes = None + self.image_size = None + + def version(self): + return torch.__version__ + + def name(self): + return "pytorch-native" + + def image_format(self): + return "NCHW" + + def load(self, model_path, inputs=None, outputs=None): + + # From ABTF code + sys.path.insert(0, os.environ['CM_ML_MODEL_CODE_WITH_PATH']) + + from src.transform import SSDTransformer + from src.utils import generate_dboxes, Encoder, colors, coco_classes + from src.model import SSD, ResNet + + abtf_model_config = os.environ.get('CM_ABTF_ML_MODEL_CONFIG', '') + + num_classes_str = os.environ.get('CM_ABTF_NUM_CLASSES', '').strip() + self.num_classes = int( + num_classes_str) if num_classes_str != '' else 15 + + self.config = importlib.import_module('config.' + abtf_model_config) + self.image_size = self.config.model['image_size'] + + self.model = SSD( + self.config.model, + backbone=ResNet( + self.config.model), + num_classes=self.num_classes) + + checkpoint = torch.load( + model_path, + map_location=torch.device( + self.device)) + + self.model.load_state_dict(checkpoint["model_state_dict"]) + + if self.device.startswith('cuda'): + self.model.cuda() + + self.model.eval() + + self.model = self.model.to(self.device) + + self.inputs = inputs + self.outputs = outputs + + return self + + def predict(self, feed): + # For ABTF + + # Always first element for now (later may stack for batching) + img = feed['image'][0] + + if torch.cuda.is_available(): + img = img.cuda() + + inp = img.unsqueeze(dim=0) + + with torch.no_grad(): + ploc, plabel = self.model(inp) + + output = (ploc, plabel) + + return output diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py new file mode 100644 index 0000000000..005fa4e2d6 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py @@ -0,0 +1,351 @@ +""" +Original code was extended by Grigori Fursin to support cognata data set +""" + +import json +import logging +import os +import time + +import cv2 +from PIL import Image + +import numpy as np +from pycocotools.cocoeval import COCOeval +# import pycoco +import dataset + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("cognata") + + +class Cognata(dataset.Dataset): + def __init__(self, data_path, image_list, name, use_cache=0, image_size=None, + image_format="NHWC", pre_process=None, count=None, cache_dir=None, preprocessed_dir=None, use_label_map=False, threads=os.cpu_count(), + model_config=None, model_num_classes=None, model_image_size=None): # For ABTF + super().__init__() + + self.image_size = image_size + self.image_list = [] + self.label_list = [] + self.image_ids = [] + self.image_sizes = [] + self.count = count + self.use_cache = use_cache + self.data_path = data_path + self.pre_process = pre_process + self.use_label_map = use_label_map + + self.model_config = model_config + self.model_num_classes = model_num_classes + self.model_image_size = model_image_size + self.ignore_classes = None + self.files = None + self.dboxes = None + self.transform = None + self.label_map = None + self.label_info = None + self.image_bin = [] + self.encoder = None + self.targets = [] + + ####################################################################### + # From ABTF source + + import torch + from src.utils import generate_dboxes, Encoder + from src.transform import SSDTransformer + from src.dataset import prepare_cognata + import cognata_labels + import csv + import ast + + self.dboxes = generate_dboxes(model_config.model, model="ssd") + self.transform = SSDTransformer( + self.dboxes, self.model_image_size, val=True) + self.encoder = Encoder(self.dboxes) + + folders = model_config.dataset['folders'] + cameras = model_config.dataset['cameras'] + self.ignore_classes = [2, 25, 31] + if 'ignore_classes' in model_config.dataset: + self.ignore_classes = model_config.dataset['ignore_classes'] + + # Grigori added for tests + # Check if overridden by extrnal environment for tests + x = os.environ.get( + 'CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS', + '').strip() + if x != '': + folders = x.split(';') if ';' in x else [x] + + x = os.environ.get( + 'CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES', + '').strip() + if x != '': + cameras = x.split(';') if ';' in x else [x] + + print('') + print('Cognata folders: {}'.format(str(folders))) + print('Cognata cameras: {}'.format(str(cameras))) + print('') + + # From ABTF source + print('') + print('Scanning Cognata dataset ...') + start = time.time() + files, label_map, label_info = prepare_cognata( + data_path, folders, cameras, self.ignore_classes) + + self.files = files + + print(' Number of files found: {}'.format(len(files))) + print(' Time: {:.2f} sec.'.format(time.time() - start)) + + if os.environ.get( + 'CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS', '') == 'yes': + label_map = cognata_labels.label_map + label_info = cognata_labels.label_info + + self.label_map = label_map + self.label_info = label_info + + if self.model_num_classes is not None: + self.model_num_classes = len(label_map.keys()) + + print('') + print('Preloading and preprocessing Cognata dataset on the fly ...') + + start = time.time() + + idx = 0 + + for f in self.files: + + image_name = self.files[idx]['img'] + + img = Image.open(image_name).convert('RGB') + + width, height = img.size + boxes = [] + boxes2 = [] + labels = [] + gt_boxes = [] + targets = [] + with open(self.files[idx]['ann']) as f: + reader = csv.reader(f) + rows = list(reader) + header = rows[0] + annotations = rows[1:] + bbox_index = header.index('bounding_box_2D') + class_index = header.index('object_class') + distance_index = header.index('center_distance') + for annotation in annotations: + bbox = annotation[bbox_index] + bbox = ast.literal_eval(bbox) + object_width = bbox[2] - bbox[0] + object_height = bbox[3] - bbox[1] + object_area = object_width * object_height + label = ast.literal_eval(annotation[class_index]) + distance = ast.literal_eval(annotation[distance_index]) + if object_area < 50 or int( + label) in self.ignore_classes or object_height < 8 or object_width < 8 or distance > 300: + continue + label = self.label_map[label] + boxes.append([bbox[0] / width, bbox[1] / height, + bbox[2] / width, bbox[3] / height]) + boxes2.append([bbox[0], bbox[1], bbox[2], bbox[3]]) + gt_boxes.append( + [bbox[0], bbox[1], bbox[2], bbox[3], label, 0, 0]) + labels.append(label) + + boxes = torch.tensor(boxes) + boxes2 = torch.tensor(boxes2) + labels = torch.tensor(labels) + gt_boxes = torch.tensor(gt_boxes) + + targets.append({'boxes': boxes2.to(device='cpu'), + 'labels': labels.to(device='cpu', + dtype=torch.int32)}) + + img, (height, width), boxes, labels = self.transform( + img, (height, width), boxes, labels, max_num=500) + + _, height, width = img.shape + + self.image_bin.append(img) + self.image_ids.append(idx) + self.image_list.append(image_name) + self.image_sizes.append((height, width)) + + self.label_list.append((labels, boxes)) + + self.targets.append(targets) + + # limit the dataset if requested + idx += 1 + if self.count is not None and idx >= self.count: + break + + print(' Time: {:.2f} sec.'.format(time.time() - start)) + print('') + + return + + def get_item(self, nr): + """Get image by number in the list.""" + + return self.image_bin[nr], self.label_list[nr] + + def get_item_loc(self, nr): + + return self.files[nr]['img'] + + # Grigori added here to be able to return Torch tensor and not Numpy + + def get_samples(self, id_list): + + data = [self.image_list_inmemory[idx] for idx in id_list] + labels = [self.label_list[idx] for idx in id_list] + + return data, labels + + +class PostProcessCognata: + """ + Post processing for tensorflow ssd-mobilenet style models + """ + + def __init__(self): + self.results = [] + self.good = 0 + self.total = 0 + self.content_ids = [] + self.use_inv_map = False + + def add_results(self, results): + self.results.extend(results) + + def __call__(self, results, ids, expected=None, result_dict=None, ): + + # Dummy + processed_results = [] + return processed_results + + def start(self): + self.results = [] + self.good = 0 + self.total = 0 + + def finalize(self, result_dict, ds=None, output_dir=None): + + # To be improved + + from torchmetrics.detection.mean_ap import MeanAveragePrecision + metric = MeanAveragePrecision( + iou_type="bbox", + class_metrics=True, + backend='faster_coco_eval') + + result_dict["good"] += self.good + result_dict["total"] += self.total + + preds = [] + targets = [] + # For now batch_size = 1 + for idx in range(0, len(self.results)): + preds.append(self.results[idx][0]) + id = self.results[idx][0]['id'] + targets.append(ds.targets[id][0]) + metric.update(preds, targets) + + metrics = metric.compute() + + print('=================================================') + import pprint + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(metrics) + print('=================================================') + + classes = metrics['classes'].tolist() + map_per_classes = metrics['map_per_class'].tolist() + + final_map = {} + for c in range(0, len(classes)): + final_map[ds.label_info[classes[c]]] = float(map_per_classes[c]) + + result_dict["mAP"] = float(metrics['map']) + result_dict["mAP_classes"] = final_map + + +class PostProcessCognataPt(PostProcessCognata): + """ + Post processing required by ssd-resnet34 / pytorch + """ + + def __init__(self, nms_threshold, max_output, + score_threshold, height, width): + super().__init__() + self.nms_threshold = nms_threshold + self.max_output = max_output + self.score_threshold = score_threshold + self.height = height + self.width = width + + def __call__(self, results, ids, expected=None, result_dict=None): + # results come as: + # detection_boxes,detection_classes,detection_scores + + import torch + + processed_results = [] + + # For now 1 result (batch 1) - need to add support for batch size > 1 + # later + ploc = results[0] + plabel = results[1] + + # Get predictions (from cognata_eval) +# ploc, plabel = model(img) + ploc, plabel = ploc.float(), plabel.float() + + preds = [] + + for i in range(ploc.shape[0]): + dts = [] + labels = [] + scores = [] + + ploc_i = ploc[i, :, :].unsqueeze(0) + plabel_i = plabel[i, :, :].unsqueeze(0) + + result = self.encoder.decode_batch( + ploc_i, plabel_i, self.nms_threshold, self.max_output)[0] + + loc, label, prob = [r.cpu().numpy() for r in result] + for loc_, label_, prob_ in zip(loc, label, prob): + if label_ in expected[i][0]: + self.good += 1 + self.total += 1 + dts.append([loc_[0] * + self.width, loc_[1] * + self.height, loc_[2] * + self.width, loc_[3] * + self.height,]) + labels.append(label_) + scores.append(prob_) + + dts = torch.tensor(dts, device='cpu') + labels = torch.tensor(labels, device='cpu', dtype=torch.int32) + scores = torch.tensor(scores, device='cpu') + preds.append({'boxes': dts, 'labels': labels, + 'scores': scores, 'id': ids[i]}) + + # Only batch size supported + idx = 0 + + processed_results.append(preds) + + # self.total += 1 + + return processed_results diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py new file mode 100644 index 0000000000..b77f09b0ee --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py @@ -0,0 +1,49 @@ +label_map = { + 0: 0, + 28: 1, + 29: 2, + 33: 3, + 8: 4, + 36: 5, + 13: 6, + 11: 7, + 35: 8, + 14: 9, + 9: 10, + 48: 11, + 10: 12, + 46: 13, + 44: 14} +label_info = {0: 'background', 1: 'Traffic_light', 2: 'Props', 3: 'TrafficSign', + 4: 'Car', 5: 'Van', 6: 'Rider', 7: 'Motorcycle', 8: 'Bicycle', + 9: 'Pedestrian', 10: 'Truck', 11: 'PersonalMobility', 12: 'Bus', 13: 'Trailer', 14: 'Animal'} + +colors = [None, (39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), + (14, 89, 122), + (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, + 132), (169, 158, 85), (188, 185, 26), (103, 1, 17), + (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, + 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60), + (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, + 159), (182, 173, 32), (34, 113, 133), (90, 135, 34), + (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, + 112), (89, 60, 55), (15, 54, 88), (112, 75, 181), + (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, + 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108), + (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, + 148), (182, 101, 89), (44, 65, 179), (1, 33, 26), + (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, + 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50), + (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, + 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33), + (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, + 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91), + (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, + 106), (113, 68, 54), (136, 116, 112), (119, 139, 130), + (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, + 180), (49, 119, 155), (153, 50, 183), (125, 38, 3), + (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, + 148), (28, 144, 118), (29, 9, 24), (175, 45, 108), + (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, + 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95), + (2, 20, 184), (122, 37, 185)] diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py new file mode 100644 index 0000000000..9b8af84f55 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py @@ -0,0 +1,303 @@ +""" +dataset related classes and methods +""" + +# pylint: disable=unused-argument,missing-docstring + +import logging +import sys +import time + +import cv2 +import numpy as np + + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("dataset") + + +class Item(): + def __init__(self, label, img, idx): + self.label = label + self.img = img + self.idx = idx + self.start = time.time() + + +def usleep(sec): + if sys.platform == 'win32': + # on windows time.sleep() doesn't work to well + import ctypes + kernel32 = ctypes.windll.kernel32 + timer = kernel32.CreateWaitableTimerA( + ctypes.c_void_p(), True, ctypes.c_void_p()) + delay = ctypes.c_longlong(int(-1 * (10 * 1000000 * sec))) + kernel32.SetWaitableTimer( + timer, + ctypes.byref(delay), + 0, + ctypes.c_void_p(), + ctypes.c_void_p(), + False) + kernel32.WaitForSingleObject(timer, 0xffffffff) + else: + time.sleep(sec) + + +class Dataset(): + def __init__(self): + self.arrival = None + self.image_list = [] + self.label_list = [] + self.image_list_inmemory = {} + self.last_loaded = -1 + + def preprocess(self, use_cache=True): + raise NotImplementedError("Dataset:preprocess") + + def get_item_count(self): + return len(self.image_list) + + def get_list(self): + raise NotImplementedError("Dataset:get_list") + + def load_query_samples(self, sample_list): + self.image_list_inmemory = {} + for sample in sample_list: + self.image_list_inmemory[sample], _ = self.get_item(sample) + self.last_loaded = time.time() + + def unload_query_samples(self, sample_list): + if sample_list: + for sample in sample_list: + if sample in self.image_list_inmemory: + del self.image_list_inmemory[sample] + else: + self.image_list_inmemory = {} + + def get_samples(self, id_list): + data = np.array([self.image_list_inmemory[id] for id in id_list]) + return data, self.label_list[id_list] + + def get_item_loc(self, id): + raise NotImplementedError("Dataset:get_item_loc") + + +# +# Post processing +# +class PostProcessCommon: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + n = len(results[0]) + for idx in range(0, n): + result = results[0][idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +class PostProcessArgMax: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + results = np.argmax(results[0], axis=1) + n = results.shape[0] + for idx in range(0, n): + result = results[idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +# +# pre-processing +# + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def resize_with_aspectratio( + img, out_height, out_width, scale=87.5, inter_pol=cv2.INTER_LINEAR): + height, width, _ = img.shape + new_height = int(100. * out_height / scale) + new_width = int(100. * out_width / scale) + if height > width: + w = new_width + h = int(new_height * height / width) + else: + h = new_height + w = int(new_width * width / height) + img = cv2.resize(img, (w, h), interpolation=inter_pol) + return img + + +def pre_process_vgg(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + cv2_interpol = cv2.INTER_AREA + img = resize_with_aspectratio( + img, + output_height, + output_width, + inter_pol=cv2_interpol) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + # normalize image + means = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img -= means + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_mobilenet(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + img = resize_with_aspectratio( + img, + output_height, + output_width, + inter_pol=cv2.INTER_LINEAR) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + img /= 255.0 + img -= 0.5 + img *= 2 + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_imagenet_pytorch(img, dims=None, need_transpose=False): + from PIL import Image + import torchvision.transforms.functional as F + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + img = F.resize(img, 256, Image.BILINEAR) + img = F.center_crop(img, 224) + img = F.to_tensor(img) + img = F.normalize( + img, mean=[ + 0.485, 0.456, 0.406], std=[ + 0.229, 0.224, 0.225], inplace=False) + if not need_transpose: + img = img.permute(1, 2, 0) # NHWC + img = np.asarray(img, dtype='float32') + return img + + +def maybe_resize(img, dims): + img = np.array(img, dtype=np.float32) + if len(img.shape) < 3 or img.shape[2] != 3: + # some images might be grayscale + img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + if dims is not None: + im_height, im_width, _ = dims + img = cv2.resize(img, (im_width, im_height), + interpolation=cv2.INTER_LINEAR) + return img + + +def pre_process_coco_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img = np.asarray(img, dtype=np.uint8) + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_pt_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img -= 127.5 + img /= 127.5 + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_resnet34(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) + std = np.array([0.229, 0.224, 0.225], dtype=np.float32) + + img = img / 255. - mean + img = img / std + + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_coco_resnet34_tf(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img = img - mean + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_openimages_retinanet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img /= 255. + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py new file mode 100644 index 0000000000..e4462da8c4 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py @@ -0,0 +1,659 @@ +""" +mlperf inference benchmarking tool +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import array +import collections +import json +import logging +import os +import sys +import threading +import time +from queue import Queue +from PIL import Image +import mlperf_loadgen as lg +import numpy as np +import cv2 +import glob +import dataset +import cognata +import cognata_labels + +# import imagenet +# import coco +# import openimages + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("main") + +NANO_SEC = 1e9 +MILLI_SEC = 1000 + +# pylint: disable=missing-docstring + +# the datasets we support +SUPPORTED_DATASETS = { + "cognata-4mp-pt": + (cognata.Cognata, None, cognata.PostProcessCognataPt(0.5, 200, 0.05, 1440, 2560), + {"image_size": [1440, 2560, 3]}), + "cognata-8mp-pt": + (cognata.Cognata, None, cognata.PostProcessCognataPt(0.5, 200, 0.05, 2160, 3840), + {"image_size": [2160, 3840, 3]}) +} + +# pre-defined command line options so simplify things. They are used as defaults and can be +# overwritten from command line + +SUPPORTED_PROFILES = { + "defaults": { + "dataset": "imagenet", + "backend": "tensorflow", + "cache": 0, + "max-batchsize": 32, + }, + + # retinanet + "retinanet-pytorch": { + "inputs": "image", + "outputs": "boxes,labels,scores", + "dataset": "openimages-800-retinanet", + "backend": "pytorch-native", + "model-name": "retinanet", + }, +} + +SCENARIO_MAP = { + "SingleStream": lg.TestScenario.SingleStream, + "MultiStream": lg.TestScenario.MultiStream, + "Server": lg.TestScenario.Server, + "Offline": lg.TestScenario.Offline, +} + +last_timeing = [] + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + choices=SUPPORTED_DATASETS.keys(), + help="dataset") + parser.add_argument( + "--dataset-path", + required=True, + help="path to the dataset") + parser.add_argument("--dataset-list", help="path to the dataset list") + parser.add_argument( + "--data-format", + choices=[ + "NCHW", + "NHWC"], + help="data format") + parser.add_argument( + "--profile", + choices=SUPPORTED_PROFILES.keys(), + help="standard profiles") + parser.add_argument("--scenario", default="SingleStream", + help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys()))) + parser.add_argument( + "--max-batchsize", + type=int, + help="max batch size in a single inference") + parser.add_argument("--model", required=True, help="model file") + parser.add_argument("--output", default="output", help="test results") + parser.add_argument("--inputs", help="model inputs") + parser.add_argument("--outputs", help="model outputs") + parser.add_argument("--backend", help="runtime to use") + parser.add_argument( + "--model-name", + help="name of the mlperf model, ie. resnet50") + parser.add_argument( + "--threads", + default=os.cpu_count(), + type=int, + help="threads") + parser.add_argument("--qps", type=int, help="target qps") + parser.add_argument("--cache", type=int, default=0, help="use cache") + parser.add_argument( + "--cache_dir", + type=str, + default=None, + help="dir path for caching") + parser.add_argument( + "--preprocessed_dir", + type=str, + default=None, + help="dir path for storing preprocessed images (overrides cache_dir)") + parser.add_argument( + "--use_preprocessed_dataset", + action="store_true", + help="use preprocessed dataset instead of the original") + parser.add_argument( + "--accuracy", + action="store_true", + help="enable accuracy pass") + parser.add_argument( + "--find-peak-performance", + action="store_true", + help="enable finding peak performance pass") + parser.add_argument( + "--debug", + action="store_true", + help="debug, turn traces on") + + # file to use mlperf rules compliant parameters + parser.add_argument( + "--mlperf_conf", + default="../../mlperf.conf", + help="mlperf rules config") + # file for user LoadGen settings such as target QPS + parser.add_argument( + "--user_conf", + default="user.conf", + help="user config for user LoadGen settings such as target QPS") + # file for LoadGen audit settings + parser.add_argument( + "--audit_conf", + default="audit.config", + help="config for LoadGen audit settings") + + # below will override mlperf rules compliant settings - don't use for + # official submission + parser.add_argument("--time", type=int, help="time to scan in seconds") + parser.add_argument("--count", type=int, help="dataset items to use") + parser.add_argument( + "--performance-sample-count", + type=int, + help="performance sample count") + parser.add_argument( + "--max-latency", + type=float, + help="mlperf max latency in pct tile") + parser.add_argument( + "--samples-per-query", + default=8, + type=int, + help="mlperf multi-stream samples per query") + args = parser.parse_args() + + # don't use defaults in argparser. Instead we default to a dict, override that with a profile + # and take this as default unless command line give + defaults = SUPPORTED_PROFILES["defaults"] + + if args.profile: + profile = SUPPORTED_PROFILES[args.profile] + defaults.update(profile) + for k, v in defaults.items(): + kc = k.replace("-", "_") + if getattr(args, kc) is None: + setattr(args, kc, v) + if args.inputs: + args.inputs = args.inputs.split(",") + if args.outputs: + args.outputs = args.outputs.split(",") + + if args.scenario not in SCENARIO_MAP: + parser.error("valid scanarios:" + str(list(SCENARIO_MAP.keys()))) + return args + + +def get_backend(backend): + if backend == "null": + from backend_null import BackendNull + backend = BackendNull() + elif backend == "pytorch": + from backend_pytorch import BackendPytorch + backend = BackendPytorch() + elif backend == "pytorch-native": + from backend_pytorch_native import BackendPytorchNative + backend = BackendPytorchNative() + else: + raise ValueError("unknown backend: " + backend) + return backend + + +class Item: + """An item that we queue for processing by the thread pool.""" + + def __init__(self, query_id, content_id, img, label=None): + self.query_id = query_id + self.content_id = content_id + self.img = img + self.label = label + self.start = time.time() + + +class RunnerBase: + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + self.take_accuracy = False + self.ds = ds + self.model = model + self.post_process = post_proc + self.threads = threads + self.take_accuracy = False + self.max_batchsize = max_batchsize + self.result_timing = [] + self.proc_results = [] + + def handle_tasks(self, tasks_queue): + pass + + def start_run(self, result_dict, take_accuracy): + self.result_dict = result_dict + self.result_timing = [] + self.take_accuracy = take_accuracy + self.post_process.start() + + def run_one_item(self, qitem): + # run the prediction + processed_results = [] + try: + results = self.model.predict({self.model.inputs[0]: qitem.img}) + + processed_results = self.post_process( + results, qitem.content_id, qitem.label, self.result_dict) + if self.take_accuracy: + self.post_process.add_results(processed_results) + + self.result_timing.append(time.time() - qitem.start) + + except Exception as ex: # pylint: disable=broad-except + src = [self.ds.get_item_loc(i) for i in qitem.content_id] + log.error("thread: failed on contentid=%s, %s", src, ex) + # since post_process will not run, fake empty responses + processed_results = [[]] * len(qitem.query_id) + finally: + response_array_refs = [] + response = [] + for idx, query_id in enumerate(qitem.query_id): + + # Temporal hack for Cognata to add only boxes - fix + processed_results2 = [x['boxes'].numpy() + for x in processed_results[idx]] + self.proc_results.append([{'boxes': x['boxes'].tolist(), 'scores': x['scores'].tolist(), 'labels': x['labels'].tolist(), 'id': x['id']} + for x in processed_results[idx]]) + response_array = array.array("B", np.array( + processed_results2, np.float32).tobytes()) + response_array_refs.append(response_array) + bi = response_array.buffer_info() + response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) + lg.QuerySamplesComplete(response) + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.run_one_item(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + data, label = self.ds.get_samples(idx[i:i + bs]) + self.run_one_item( + Item(query_id[i:i + bs], idx[i:i + bs], data, label)) + + def finish(self): + pass + + +class QueueRunner(RunnerBase): + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + super().__init__(model, ds, threads, post_proc, max_batchsize) + self.tasks = Queue(maxsize=threads * 4) + self.workers = [] + self.result_dict = {} + + for _ in range(self.threads): + worker = threading.Thread( + target=self.handle_tasks, args=( + self.tasks,)) + worker.daemon = True + self.workers.append(worker) + worker.start() + + def handle_tasks(self, tasks_queue): + """Worker thread.""" + while True: + qitem = tasks_queue.get() + if qitem is None: + # None in the queue indicates the parent want us to exit + tasks_queue.task_done() + break + self.run_one_item(qitem) + tasks_queue.task_done() + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.tasks.put(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + ie = i + bs + + data, label = self.ds.get_samples(idx[i:ie]) + self.tasks.put(Item(query_id[i:ie], idx[i:ie], data, label)) + + def finish(self): + # exit all threads + for _ in self.workers: + self.tasks.put(None) + for worker in self.workers: + worker.join() + + +def add_results(final_results, name, result_dict, + result_list, took, show_accuracy=False): + percentiles = [50., 80., 90., 95., 99., 99.9] + buckets = np.percentile(result_list, percentiles).tolist() + buckets_str = ",".join(["{}:{:.4f}".format(p, b) + for p, b in zip(percentiles, buckets)]) + + if result_dict["total"] == 0: + result_dict["total"] = len(result_list) + # this is what we record for each run + result = { + "took": took, + "mean": np.mean(result_list), + "percentiles": {str(k): v for k, v in zip(percentiles, buckets)}, + "qps": len(result_list) / took, + "count": len(result_list), + "good_items": result_dict["good"], + "total_items": result_dict["total"], + } + acc_str = "" + if show_accuracy: + result["accuracy"] = 100. * result_dict["good"] / result_dict["total"] + acc_str = ", acc={:.3f}%".format(result["accuracy"]) + if "mAP" in result_dict: + result["mAP"] = 100. * result_dict["mAP"] + acc_str += ", mAP={:.3f}%".format(result["mAP"]) + if os.environ.get('CM_COGNATA_ACCURACY_DUMP_FILE', '') != '': + accuracy_file = os.environ['CM_COGNATA_ACCURACY_DUMP_FILE'] + with open(accuracy_file, "w") as f: + f.write("{:.3f}%".format(result["mAP"])) + + if "mAP_classes" in result_dict: + result['mAP_per_classes'] = result_dict["mAP_classes"] + acc_str += ", mAP_classes={}".format(result_dict["mAP_classes"]) + + # add the result to the result dict + final_results[name] = result + + # to stdout + print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format( + name, result["qps"], result["mean"], took, acc_str, + len(result_list), buckets_str)) + + print('======================================================================') + +######################################################################### + + +def main(): + print('======================================================================') + + global last_timeing + args = get_args() + + log.info(args) + + # Find backend + backend = get_backend(args.backend) + + # Load model to backend (Grigori moved here before dataset + # since we get various info about pre-processing from the model) + + print('') + print('Loading model ...') + print('') + + model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) + +# print (model.num_classes) +# print (model.image_size) + + # --count applies to accuracy mode only and can be used to limit the number of images + # for testing. + count_override = False + count = args.count + if count: + count_override = True + + # dataset to use + wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] +# if args.use_preprocessed_dataset: +# pre_proc=None + + print('') + print('Loading dataset and preprocessing if needed ...') + print('* Dataset path: {}'.format(args.dataset_path)) + print('* Preprocessed cache path: {}'.format(args.cache_dir)) + print('') + + ds = wanted_dataset(data_path=args.dataset_path, + image_list=args.dataset_list, + name=args.dataset, + pre_process=pre_proc, + use_cache=args.cache, + count=count, + cache_dir=args.cache_dir, + preprocessed_dir=args.preprocessed_dir, + threads=args.threads, + model_config=model.config, # For ABTF + model_num_classes=model.num_classes, # For ABTF + model_image_size=model.image_size, # For ABTF + **kwargs) + + # For ABTF - maybe find cleaner way + post_proc.encoder = ds.encoder + + final_results = { + "runtime": model.name(), + "version": model.version(), + "time": int(time.time()), + "args": vars(args), + "cmdline": str(args), + } + + mlperf_conf = os.path.abspath(args.mlperf_conf) + if not os.path.exists(mlperf_conf): + log.error("{} not found".format(mlperf_conf)) + sys.exit(1) + + user_conf = os.path.abspath(args.user_conf) + if not os.path.exists(user_conf): + log.error("{} not found".format(user_conf)) + sys.exit(1) + + audit_config = os.path.abspath(args.audit_conf) + + if args.output: + output_dir = os.path.abspath(args.output) + os.makedirs(output_dir, exist_ok=True) + os.chdir(output_dir) + + # + # make one pass over the dataset to validate accuracy + # + count = ds.get_item_count() + + # warmup + if os.environ.get('CM_ABTF_ML_MODEL_SKIP_WARMUP', + '').strip().lower() != 'yes': + ds.load_query_samples([0]) + for _ in range(5): + img, _ = ds.get_samples([0]) + _ = backend.predict({backend.inputs[0]: img}) + ds.unload_query_samples(None) + + scenario = SCENARIO_MAP[args.scenario] + runner_map = { + lg.TestScenario.SingleStream: RunnerBase, + lg.TestScenario.MultiStream: QueueRunner, + lg.TestScenario.Server: QueueRunner, + lg.TestScenario.Offline: QueueRunner + } + + runner = runner_map[scenario]( + model, + ds, + args.threads, + post_proc=post_proc, + max_batchsize=args.max_batchsize) + + def issue_queries(query_samples): + runner.enqueue(query_samples) + + def flush_queries(): + pass + + log_output_settings = lg.LogOutputSettings() + log_output_settings.outdir = output_dir + log_output_settings.copy_summary_to_stdout = False + log_settings = lg.LogSettings() + log_settings.enable_trace = args.debug + log_settings.log_output = log_output_settings + + settings = lg.TestSettings() + settings.FromConfig(mlperf_conf, args.model_name, args.scenario) + settings.FromConfig(user_conf, args.model_name, args.scenario) + settings.scenario = scenario + settings.mode = lg.TestMode.PerformanceOnly + if args.accuracy: + settings.mode = lg.TestMode.AccuracyOnly + if args.find_peak_performance: + settings.mode = lg.TestMode.FindPeakPerformance + + if args.time: + # override the time we want to run + settings.min_duration_ms = args.time * MILLI_SEC + settings.max_duration_ms = args.time * MILLI_SEC + + if args.qps: + qps = float(args.qps) + settings.server_target_qps = qps + settings.offline_expected_qps = qps + + if count_override: + settings.min_query_count = count + settings.max_query_count = count + + if args.samples_per_query: + settings.multi_stream_samples_per_query = args.samples_per_query + + if args.max_latency: + settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) + settings.multi_stream_expected_latency_ns = int( + args.max_latency * NANO_SEC) + + performance_sample_count = args.performance_sample_count if args.performance_sample_count else min( + count, 500) + sut = lg.ConstructSUT(issue_queries, flush_queries) + qsl = lg.ConstructQSL( + count, + performance_sample_count, + ds.load_query_samples, + ds.unload_query_samples) + + log.info("starting {}".format(scenario)) + result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} + runner.start_run(result_dict, args.accuracy) + + lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config) + + if not last_timeing: + last_timeing = runner.result_timing + if args.accuracy: + post_proc.finalize(result_dict, ds, output_dir=args.output) + + add_results(final_results, "{}".format(scenario), + result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) + + runner.finish() + lg.DestroyQSL(qsl) + lg.DestroySUT(sut) + # + # write final results + # + if args.output: + with open("results.json", "w") as f: + json.dump(final_results, f, sort_keys=True, indent=4) + if args.accuracy: + print('Saving model output examples ...') + files = glob.glob( + os.path.join( + args.dataset_path, + '10002_Urban_Clear_Morning', + 'Cognata_Camera_01_8M_png', + '*.png')) + files = sorted(files) + for pred_batch in runner.proc_results: + for pred in pred_batch: + f = files[pred['id']] + cls_threshold = 0.3 + img = Image.open(f).convert("RGB") + loc, label, prob = np.array( + pred['boxes']), np.array( + pred['labels']), np.array( + pred['scores']) + best = np.argwhere(prob > cls_threshold).squeeze(axis=1) + + loc = loc[best] + label = label[best] + prob = prob[best] + + # Update input image with boxes and predictions + output_img = cv2.imread(f) + if len(loc) > 0: + + loc = loc.astype(np.int32) + + for box, lb, pr in zip(loc, label, prob): + category = cognata_labels.label_info[lb] + color = cognata_labels.colors[lb] + + xmin, ymin, xmax, ymax = box + + cv2.rectangle( + output_img, (xmin, ymin), (xmax, ymax), color, 2) + + text_size = cv2.getTextSize( + category + " : %.2f" % + pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] + + cv2.rectangle( + output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) + + cv2.putText( + output_img, category + " : %.2f" % pr, + (xmin, ymin + + text_size[1] + + 4), cv2.FONT_HERSHEY_PLAIN, 1, + (255, 255, 255), 1) + output = "{}_prediction.jpg".format(f[:-4]) + + d1 = os.path.join(os.path.dirname(output), 'output') + if not os.path.isdir(d1): + os.makedirs(d1) + + d2 = os.path.basename(output) + + output = os.path.join(d1, d2) + cv2.imwrite(output, output_img) + with open("preds.json", "w") as f: + json.dump(runner.proc_results, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf new file mode 100644 index 0000000000..edffe6912b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive-mlcommons-python/user.conf @@ -0,0 +1,6 @@ +# Please set these fields depending on the performance of your system to +# override default LoadGen settings. +*.SingleStream.target_latency = 10 +*.MultiStream.target_latency = 80 +*.Server.target_qps = 1.0 +*.Offline.target_qps = 1.0 diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml new file mode 100644 index 0000000000..cfb22101ff --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/_cm.yaml @@ -0,0 +1,287 @@ +alias: app-mlperf-automotive +uid: f7488ce376484fd2 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: "Modular MLPerf inference benchmark pipeline for ABTF model" + + +# User-friendly tags to find this CM script +tags: +- app +- app-mlperf-inference +- mlperf-inference +- abtf-inference + +predeps: no + +# Default environment +default_env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_LOADGEN_SCENARIO: Offline + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + CM_TEST_QUERY_COUNT: '10' + CM_MLPERF_QUANTIZATION: off + CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + + +# Map script inputs to environment variables +input_mapping: + device: CM_MLPERF_DEVICE + count: CM_MLPERF_LOADGEN_QUERY_COUNT + docker: CM_RUN_DOCKER_CONTAINER + hw_name: CM_HW_NAME + imagenet_path: IMAGENET_PATH + max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + mode: CM_MLPERF_LOADGEN_MODE + num_threads: CM_NUM_THREADS + threads: CM_NUM_THREADS + dataset: CM_MLPERF_VISION_DATASET_OPTION + model: CM_MLPERF_CUSTOM_MODEL_PATH + output_dir: OUTPUT_BASE_DIR + power: CM_MLPERF_POWER + power_server: CM_MLPERF_POWER_SERVER_ADDRESS + ntp_server: CM_MLPERF_POWER_NTP_SERVER + max_amps: CM_MLPERF_POWER_MAX_AMPS + max_volts: CM_MLPERF_POWER_MAX_VOLTS + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + scenario: CM_MLPERF_LOADGEN_SCENARIO + test_query_count: CM_TEST_QUERY_COUNT + clean: CM_MLPERF_CLEAN_SUBMISSION_DIR + dataset_args: CM_MLPERF_EXTRA_DATASET_ARGS + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + output: CM_MLPERF_OUTPUT_DIR + +# Env keys which are exposed to higher level scripts +new_env_keys: + - CM_MLPERF_* + - CM_OUTPUT_PREDICTIONS_PATH + +new_state_keys: + - cm-mlperf-inference-results* + +# Dependencies on other CM scripts +deps: + + # Detect host OS features + - tags: detect,os + + # Detect host CPU features + - tags: detect,cpu + + # Install system dependencies on a given host + - tags: get,sys-utils-cm + + # Detect/install python + - tags: get,python + names: + - python + - python3 + + # Use cmind inside CM scripts + - tags: get,generic-python-lib,_package.cmind + + - tags: get,mlperf,inference,utils + + +docker: + cm_repo: gateoverflow@cm4mlops + use_host_group_id: True + use_host_user_id: True + real_run: false + interactive: True + cm_repos: 'cm pull repo mlcommons@cm4abtf --checkout=poc' + deps: + - tags: get,abtf,scratch,space + mounts: + - "${{ CM_ABTF_SCRATCH_PATH_DATASETS }}:${{ CM_ABTF_SCRATCH_PATH_DATASETS }}" + + +# Variations to customize dependencies +variations: + + # Implementation + mlcommons-python: + group: implementation + default: true + env: + CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + prehook_deps: + - names: + - python-reference-abtf-inference + - abtf-inference-implementation + tags: run-mlperf-inference,demo,abtf-model + skip_if_env: + CM_SKIP_RUN: + - yes + + + # Execution modes + fast: + group: execution-mode + env: + CM_FAST_FACTOR: '5' + CM_OUTPUT_FOLDER_NAME: fast_results + CM_MLPERF_RUN_STYLE: fast + + test: + group: execution-mode + default: true + env: + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + + valid: + group: execution-mode + env: + CM_OUTPUT_FOLDER_NAME: valid_results + CM_MLPERF_RUN_STYLE: valid + + + # ML engine + onnxruntime: + group: framework + env: + CM_MLPERF_BACKEND: onnxruntime + add_deps_recursive: + abtf-inference-implementation: + tags: _onnxruntime + + + onnxruntime,cpu: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + + onnxruntime,cuda: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + ONNXRUNTIME_PREFERRED_EXECUTION_PROVIDER: "CUDAExecutionProvider" + + + pytorch: + group: framework + default: true + env: + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_BACKEND_VERSION: <<>> + add_deps_recursive: + abtf-inference-implementation: + tags: _pytorch + + + abtf-demo-model: + env: + CM_MODEL: retinanet + group: models + add_deps_recursive: + abtf-inference-implementation: + tags: _abtf-demo-model + + abtf-poc-model: + env: + CM_MODEL: retinanet + default: true + group: models + add_deps_recursive: + abtf-inference-implementation: + tags: _abtf-poc-model + docker: + deps: + - tags: get,dataset,raw,mlcommons-cognata,_abtf-poc + names: + - raw-dataset-mlcommons-cognata + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_IN_HOST: + - yes + + mounts: + - "${{ CM_DATASET_MLCOMMONS_COGNATA_PATH }}:${{ CM_DATASET_MLCOMMONS_COGNATA_PATH }}" + + + # Target devices + cpu: + group: device + default: true + env: + CM_MLPERF_DEVICE: cpu + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: no + USE_GPU: no + add_deps_recursive: + abtf-inference-implementation: + tags: _cpu + + cuda: + group: device + env: + CM_MLPERF_DEVICE: gpu + USE_CUDA: yes + USE_GPU: yes + add_deps_recursive: + abtf-inference-implementation: + tags: _cuda + docker: + all_gpus: 'yes' + base_image: nvcr.io/nvidia/pytorch:24.03-py3 + + + + # Loadgen scenarios + offline: + env: + CM_MLPERF_LOADGEN_SCENARIO: Offline + add_deps_recursive: + abtf-inference-implementation: + tags: _offline + multistream: + env: + CM_MLPERF_LOADGEN_SCENARIO: MultiStream + add_deps_recursive: + abtf-inference-implementation: + tags: _multistream + singlestream: + group: loadgen-scenario + default: true + env: + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + add_deps_recursive: + abtf-inference-implementation: + tags: _singlestream + server: + env: + CM_MLPERF_LOADGEN_SCENARIO: Server + add_deps_recursive: + abtf-inference-implementation: + tags: _server + + mvp-demo: + env: + CM_ABTF_MVP_DEMO: yes + CM_MLPERF_VISION_DATASET_OPTION: cognata-8mp-pt + CM_ABTF_ML_MODEL_CONFIG: baseline_8MP_ss_scales_all + CM_ABTF_NUM_CLASSES: 15 + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: 10002_Urban_Clear_Morning + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: Cognata_Camera_01_8M + CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS: 'yes' + CM_ABTF_ML_MODEL_SKIP_WARMUP: 'yes' + + poc-demo: + env: + CM_ABTF_POC_DEMO: yes + CM_MLPERF_VISION_DATASET_OPTION: cognata-8mp-pt + CM_ABTF_ML_MODEL_CONFIG: baseline_8MP_ss_scales_fm1_5x5_all + CM_ABTF_NUM_CLASSES: 15 + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: 10002_Urban_Clear_Morning + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: Cognata_Camera_01_8M + CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS: 'yes' + CM_ABTF_ML_MODEL_SKIP_WARMUP: 'yes' diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py new file mode 100644 index 0000000000..070f2b3c1c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-automotive/customize.py @@ -0,0 +1,103 @@ +from cmind import utils +import os +import json +import shutil +import subprocess +import mlperf_utils +from log_parser import MLPerfLog + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if 'cmd' in i['input']: + state['mlperf_inference_run_cmd'] = "cm run script " + \ + " ".join(i['input']['cmd']) + + state['mlperf-inference-implementation'] = {} + + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation']['script_id'] = run_state['script_id'] + \ + ":" + ",".join(run_state['script_variation_tags']) + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + inp = i['input'] + os_info = i['os_info'] + + xsep = '^' if os_info['platform'] == 'windows' else '\\' + + env['CMD'] = '' + + # if env.get('CM_MLPERF_USER_CONF', '') == '': + # return {'return': 0} + + output_dir = env['CM_MLPERF_OUTPUT_DIR'] + mode = env['CM_MLPERF_LOADGEN_MODE'] + + model = env['CM_MODEL'] + model_full_name = env.get('CM_ML_MODEL_FULL_NAME', model) + + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + + if not os.path.exists(output_dir) or not os.path.exists( + os.path.join(output_dir, "mlperf_log_summary.txt")): + # No output, fake_run? + return {'return': 0} + + mlperf_log = MLPerfLog(os.path.join(output_dir, "mlperf_log_detail.txt")) + if mode == "performance": + result = mlperf_log['result_mean_latency_ns'] / 1000000 + elif mode == "accuracy": + if not env.get( + 'CM_COGNATA_ACCURACY_DUMP_FILE'): # can happen while reusing old runs + env['CM_COGNATA_ACCURACY_DUMP_FILE'] = os.path.join( + output_dir, "accuracy.txt") + acc = "" + if os.path.exists(env['CM_COGNATA_ACCURACY_DUMP_FILE']): + with open(env['CM_COGNATA_ACCURACY_DUMP_FILE'], "r") as f: + acc = f.readline() + result = acc + else: + return {'return': 1, 'error': f"Unknown mode {mode}"} + + valid = {'performance': True, 'accuracy': True} # its POC + power_result = None # No power measurement in POC + + # result, valid, power_result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode) + + if not state.get('cm-mlperf-inference-results'): + state['cm-mlperf-inference-results'] = {} + if not state.get('cm-mlperf-inference-results-last'): + state['cm-mlperf-inference-results-last'] = {} + if not state['cm-mlperf-inference-results'].get( + state['CM_SUT_CONFIG_NAME']): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ].get(model): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model].get(scenario): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario] = {} + + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario][mode] = result + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario][mode + '_valid'] = valid.get(mode, False) + + state['cm-mlperf-inference-results-last'][mode] = result + state['cm-mlperf-inference-results-last'][mode + + '_valid'] = valid.get(mode, False) + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml index 305578a17a..f073011f83 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-amd/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml index a1f311cc74..1343835b68 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-dummy/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml index 0975f0b0b6..9a7c042d78 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-intel/_cm.yaml @@ -48,7 +48,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 85fddc989e..250d2dc86e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -26,10 +26,10 @@ default_env: CM_MLPERF_LOADGEN_SCENARIO: Offline CM_OUTPUT_FOLDER_NAME: test_results CM_MLPERF_RUN_STYLE: test - CM_TEST_QUERY_COUNT: '10' + CM_TEST_QUERY_COUNT: "10" CM_MLPERF_QUANTIZATION: off CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference - CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: "" docker: real_run: False @@ -67,7 +67,6 @@ input_mapping: network: CM_NETWORK_LOADGEN sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS - # Duplicate CM environment variables to the ones used in native apps env_key_mappings: CM_HOST_: HOST_ @@ -89,7 +88,6 @@ new_state_keys: # Dependencies on other CM scripts deps: - # Detect host OS features - tags: detect,os @@ -102,8 +100,8 @@ deps: # Detect/install python - tags: get,python names: - - python - - python3 + - python + - python3 # Detect CUDA if required - tags: get,cuda,_cudnn @@ -111,21 +109,18 @@ deps: - cuda enable_if_env: CM_MLPERF_DEVICE: - - gpu + - gpu CM_MLPERF_BACKEND: - - onnxruntime - - tf - - tflite - - pytorch + - onnxruntime + - tf + - tflite + - pytorch # Detect TensorRT if required - tags: get,nvidia,tensorrt enable_if_env: CM_MLPERF_BACKEND: - - tensorrt - - - + - tensorrt ######################################################################## # Install ML engines via CM @@ -133,54 +128,54 @@ deps: ## Onnx CPU Runtime - tags: get,generic-python-lib,_onnxruntime names: - - ml-engine-onnxruntime - - onnxruntime + - ml-engine-onnxruntime + - onnxruntime enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime - - tvm-onnx + - onnxruntime + - tvm-onnx CM_MLPERF_DEVICE: - - cpu - - rocm + - cpu + - rocm ## Onnx CUDA Runtime - tags: get,generic-python-lib,_onnxruntime_gpu names: - - ml-engine-onnxruntime-cuda + - ml-engine-onnxruntime-cuda enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime - - tvm-onnx + - onnxruntime + - tvm-onnx CM_MLPERF_DEVICE: - - gpu + - gpu skip_if_env: CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 + - 3d-unet-99 + - 3d-unet-99.9 ## resnet50 and 3d-unet need both onnxruntime and onnxruntime_gpu on cuda - tags: get,generic-python-lib,_onnxruntime enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime + - onnxruntime CM_MLPERF_DEVICE: - - gpu + - gpu CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 - - resnet50 + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 - tags: get,generic-python-lib,_onnxruntime_gpu env: CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS: "" enable_if_env: CM_MLPERF_BACKEND: - - onnxruntime + - onnxruntime CM_MLPERF_DEVICE: - - gpu + - gpu CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 - - resnet50 + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 ## Pytorch (CPU) - tags: get,generic-python-lib,_torch @@ -194,74 +189,80 @@ deps: - dlrm-v2-99.9 enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch + - pytorch + - tvm-pytorch CM_MLPERF_DEVICE: - - cpu - - rocm + - cpu + - rocm ## Pytorch (CUDA) - tags: get,generic-python-lib,_torch_cuda names: - - ml-engine-pytorch - - pytorch + - ml-engine-pytorch + - pytorch enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch - - ray + - pytorch + - tvm-pytorch + - ray CM_MLPERF_DEVICE: - - gpu + - gpu ## Torchvision (CPU) - tags: get,generic-python-lib,_torchvision names: - - ml-engine-torchvision - - torchvision + - ml-engine-torchvision + - torchvision skip_if_env: CM_MODEL: - dlrm-v2-99 - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch + - pytorch + - tvm-pytorch CM_MLPERF_DEVICE: - - cpu + - cpu ## Torchvision (CUDA) - tags: get,generic-python-lib,_torchvision_cuda names: - - ml-engine-torchvision - - torchvision + - ml-engine-torchvision + - torchvision + skip_if_env: + CM_MODEL: + - dlrm-v2-99 + - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - - pytorch - - tvm-pytorch - - ray + - pytorch + - tvm-pytorch + - ray CM_MLPERF_DEVICE: - - gpu + - gpu ## tensorrt - tags: get,generic-python-lib,_tensorrt names: - - ml-engine-tensorrt + - ml-engine-tensorrt enable_if_env: CM_MLPERF_BACKEND: - - ray + - ray ## torch_tensorrt - tags: get,generic-python-lib,_torch_tensorrt names: - - ml-engine-torch_tensorrt + - ml-engine-torch_tensorrt enable_if_env: CM_MLPERF_BACKEND: - - ray + - ray ## Ray - tags: get,generic-python-lib,_ray names: - - ray + - ray enable_if_env: CM_MLPERF_BACKEND: - ray @@ -271,7 +272,7 @@ deps: # async_timeout to be installed, so we need to install it manually. - tags: get,generic-python-lib,_async_timeout names: - - async_timeout + - async_timeout enable_if_env: CM_MLPERF_BACKEND: - ray @@ -279,49 +280,48 @@ deps: ## Transformers - tags: get,generic-python-lib,_transformers names: - - ml-engine-transformers + - ml-engine-transformers enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 - - gptj-99 - - gptj-99.9 + - bert-99 + - bert-99.9 + - gptj-99 + - gptj-99.9 ## Tensorflow - tags: get,generic-python-lib,_tensorflow names: - - ml-engine-tensorflow - - tensorflow + - ml-engine-tensorflow + - tensorflow enable_if_env: CM_MLPERF_BACKEND: - - tf + - tf ## NCNN - tags: get,generic-python-lib,_package.ncnn names: - - ml-engine-ncnn + - ml-engine-ncnn enable_if_env: CM_MLPERF_BACKEND: - - ncnn - + - ncnn + - tags: get,tensorflow,lib,_tflite names: - - ml-engine-tflite + - ml-engine-tflite enable_if_env: CM_MLPERF_BACKEND: - - tflite - + - tflite ######################################################################## - # Install ML models + # Install ML models - tags: get,ml-model,neural-magic,zoo # sets CM_MLPERF_CUSTOM_MODEL_PATH names: - - custom-ml-model + - custom-ml-model enable_if_env: CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB: - - "on" + - "on" update_tags_from_env_with_prefix: "_model-stub.": - CM_MLPERF_NEURALMAGIC_MODEL_ZOO_STUB @@ -329,93 +329,91 @@ deps: ## ResNet50 - tags: get,ml-model,image-classification,resnet50 names: - - ml-model - - resnet50-model + - ml-model + - resnet50-model enable_if_env: CM_MODEL: - - resnet50 + - resnet50 skip_if_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" ## RetinaNet - tags: get,ml-model,object-detection,retinanet names: - - ml-model - - retinanet-model + - ml-model + - retinanet-model enable_if_env: CM_MODEL: - - retinanet + - retinanet ## GPT-J - tags: get,ml-model,large-language-model,gptj names: - - ml-model - - gptj-model - - gpt-j-model + - ml-model + - gptj-model + - gpt-j-model enable_if_env: CM_MODEL: - - gptj-99 - - gptj-99.9 + - gptj-99 + - gptj-99.9 skip_if_env: CM_NETWORK_LOADGEN: - - lon - - + - lon ## RetinaNet (PyTorch weights, FP32) - tags: get,ml-model,object-detection,resnext50,fp32,_pytorch-weights names: - - ml-model - - retinanet-model + - ml-model + - retinanet-model enable_if_env: CM_MLPERF_BACKEND: - - pytorch + - pytorch CM_MLPERF_IMPLEMENTATION: - - nvidia + - nvidia CM_MODEL: - - retinanet + - retinanet ## BERT - tags: get,ml-model,language-processing,bert-large names: - - ml-model - - bert-model + - ml-model + - bert-model enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 + - bert-99 + - bert-99.9 skip_if_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" ## SDXL - tags: get,ml-model,stable-diffusion,text-to-image,sdxl names: - - ml-model - - sdxl-model - - ml-model-float16 + - ml-model + - sdxl-model + - ml-model-float16 enable_if_env: CM_MODEL: - - stable-diffusion-xl + - stable-diffusion-xl skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" skip_if_env: CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" ## LLAMA2-70B - tags: get,ml-model,llama2 names: - - ml-model - - llama2-model + - ml-model + - llama2-model enable_if_env: CM_MODEL: - - llama2-70b-99 - - llama2-70b-99.9 + - llama2-70b-99 + - llama2-70b-99.9 skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - "on" @@ -423,73 +421,86 @@ deps: - "on" skip_if_env: CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" ## mixtral-8x7b - tags: get,ml-model,mixtral names: - - ml-model - - mixtral-model + - ml-model + - mixtral-model enable_if_env: CM_MODEL: - - mixtral-8x7b + - mixtral-8x7b skip_if_any_env: CM_MLPERF_CUSTOM_MODEL_PATH: - - "on" + - "on" skip_if_env: CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" ## 3d-unet - tags: get,ml-model,medical-imaging,3d-unet names: - - ml-model - - 3d-unet-model + - ml-model + - 3d-unet-model enable_if_env: CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 + - 3d-unet-99 + - 3d-unet-99.9 ## Rnnt - tags: get,ml-model,speech-recognition,rnnt names: - - ml-model - - rnnt-model + - ml-model + - rnnt-model enable_if_env: CM_MODEL: - - rnnt + - rnnt ## Dlrm - tags: get,ml-model,recommendation,dlrm names: - - ml-model - - dlrm-model + - ml-model + - dlrm-model enable_if_env: CM_MODEL: - - dlrm-99 - - dlrm-99.9 - - dlrm-v2-99 - - dlrm-v2-99.9 + - dlrm-99 + - dlrm-99.9 + - dlrm-v2-99 + - dlrm-v2-99.9 skip_if_env: CM_ML_MODEL_FILE_WITH_PATH: - - 'on' - + - "on" ## RGAT - tags: get,ml-model,rgat names: - - ml-model - - rgat-model + - rgat-model enable_if_env: CM_MODEL: - - rgat + - rgat skip_if_env: RGAT_CHECKPOINT_PATH: - - 'on' + - "on" + + ## LLAMA3_1-405B + - tags: get,ml-model,llama3 + names: + - llama3-405b-model + - llama3-402b-model + enable_if_env: + CM_MODEL: + - llama3_1-405b + - llama3-405b + skip_if_env: + CM_USE_MODEL_FROM_HOST: + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ######################################################################## # Install datasets @@ -497,129 +508,153 @@ deps: ## ImageNet (small for tests) - tags: get,dataset,image-classification,imagenet,preprocessed names: - - imagenet-preprocessed + - imagenet-preprocessed enable_if_env: CM_MODEL: - - resnet50 + - resnet50 skip_if_env: CM_MLPERF_VISION_DATASET_OPTION: - - on + - on - tags: get,dataset,image-classification,imagenet,preprocessed,_pytorch names: - - imagenet-preprocessed + - imagenet-preprocessed enable_if_env: CM_MODEL: - - resnet50 + - resnet50 CM_MLPERF_VISION_DATASET_OPTION: - - imagenet_pytorch + - imagenet_pytorch - tags: get,dataset-aux,image-classification,imagenet-aux enable_if_env: CM_MODEL: - - resnet50 + - resnet50 ## Open Images for RetinaNet - tags: get,dataset,object-detection,open-images,openimages,preprocessed,_validation names: - - openimages-preprocessed + - openimages-preprocessed enable_if_env: CM_MODEL: - - retinanet + - retinanet ## CNNDM for Large Language Model - tags: get,dataset,cnndm,_validation names: - - cnndm-original + - cnndm-original enable_if_env: CM_MODEL: - - gptj-99 - - gptj-99.9 + - gptj-99 + - gptj-99.9 ## Squad for BERT - tags: get,dataset,squad,original names: - - squad-original + - squad-original enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 + - bert-99 + - bert-99.9 - tags: get,dataset-aux,squad-vocab enable_if_env: CM_MODEL: - - bert-99 - - bert-99.9 + - bert-99 + - bert-99.9 ## COCO for SDXL - tags: get,dataset,coco2014,_validation names: - - coco2014-preprocessed - - coco2014-dataset + - coco2014-preprocessed + - coco2014-dataset enable_if_env: CM_MODEL: - - stable-diffusion-xl + - stable-diffusion-xl ## OpenOrca for LLAMA2-70b - tags: get,preprocessed,dataset,openorca,_validation,_mlcommons names: - - openorca-preprocessed + - openorca-preprocessed enable_if_env: CM_MODEL: - - llama2-70b-99 - - llama2-70b-99.9 + - llama2-70b-99 + - llama2-70b-99.9 ## OpenOrca,mbxp,gsm8k combined dataset for mixtral-8x7b - tags: get,dataset-mixtral,openorca-mbxp-gsm8k-combined names: - - openorca-mbxp-gsm8k-combined-preprocessed + - openorca-mbxp-gsm8k-combined-preprocessed enable_if_env: CM_MODEL: - - mixtral-8x7b + - mixtral-8x7b skip_if_env: CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ## Kits19 for 3d-unet - tags: get,dataset,kits19,preprocessed names: - - kits19-preprocessed + - kits19-preprocessed enable_if_env: CM_MODEL: - - 3d-unet-99 - - 3d-unet-99.9 + - 3d-unet-99 + - 3d-unet-99.9 skip_if_env: CM_MLPERF_DATASET_3DUNET_DOWNLOAD_TO_HOST: - - 'yes' + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ## Librispeech for rnnt - tags: get,dataset,librispeech,preprocessed names: - - librispeech-preprocessed + - librispeech-preprocessed enable_if_env: CM_MODEL: - - rnnt + - rnnt ## Criteo for dlrm - tags: get,dataset,criteo,preprocessed,_mlc names: - - criteo-preprocessed + - criteo-preprocessed enable_if_env: CM_MODEL: - - dlrm-v2-99 - - dlrm-v2-99.9 + - dlrm-v2-99 + - dlrm-v2-99.9 skip_if_env: CM_CRITEO_PREPROCESSED_PATH: - - on + - on ## igbh for rgat - tags: get,dataset,mlperf,inference,igbh names: - - igbh-dataset - - illinois-graph-benchmark-heterogeneous + - igbh-dataset + - illinois-graph-benchmark-heterogeneous enable_if_env: CM_MODEL: - - rgat + - rgat + skip_if_env: + CM_RUN_STATE_DOCKER: + - "yes" + CM_USE_DATASET_FROM_HOST: + - "yes" + + ## llama3_1 dataset + - tags: get,dataset,mlperf,inference,llama3,_validation + names: + - llama3_1-dataset + - llama3-dataset + enable_if_env: + CM_MODEL: + - llama3_1-405b + - llama3-402b + skip_if_env: + CM_USE_DATASET_FROM_HOST: + - "yes" + CM_RUN_STATE_DOCKER: + - "yes" ######################################################################## # Install MLPerf inference dependencies @@ -627,47 +662,46 @@ deps: # Creates user conf for given SUT - tags: generate,user-conf,mlperf,inference names: - - user-conf-generator + - user-conf-generator skip_if_env: CM_RUN_STATE_DOCKER: - - 'yes' + - "yes" # Install MLPerf loadgen - tags: get,loadgen names: - - loadgen - - mlperf-inference-loadgen + - loadgen + - mlperf-inference-loadgen # Download MLPerf inference source - tags: get,mlcommons,inference,src names: - - inference-src - + - inference-src # Download MLPerf inference source - tags: get,mlcommons,inference,src env: - CM_GET_MLPERF_IMPLEMENTATION_ONLY: 'yes' + CM_GET_MLPERF_IMPLEMENTATION_ONLY: "yes" names: - - mlperf-implementation + - mlperf-implementation - tags: get,generic-python-lib,_package.psutil prehook_deps: - names: - - remote-run-cmds + - remote-run-cmds tags: remote,run,cmds enable_if_env: CM_ASSH_RUN_COMMANDS: - - "on" + - "on" -posthook_deps: +posthook_deps: - names: - - mlperf-runner + - mlperf-runner tags: benchmark-mlperf skip_if_env: CM_MLPERF_SKIP_RUN: - - "on" + - "on" post_deps: - tags: save,mlperf,inference,state @@ -683,7 +717,7 @@ variations: imagenet-accuracy-script: tags: _float32 env: - CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_PYTHON: "yes" CM_MLPERF_IMPLEMENTATION: reference # ML engine @@ -807,14 +841,14 @@ variations: CM_MLPERF_BACKEND: deepsparse CM_MLPERF_BACKEND_VERSION: <<>> deps: - - tags: get,generic-python-lib,_deepsparse - skip_if_env: - CM_HOST_PLATFORM_FLAVOR: - - aarch64 - - tags: get,generic-python-lib,_package.deepsparse-nightly - enable_if_env: - CM_HOST_PLATFORM_FLAVOR: - - aarch64 + - tags: get,generic-python-lib,_deepsparse + skip_if_env: + CM_HOST_PLATFORM_FLAVOR: + - aarch64 + - tags: get,generic-python-lib,_package.deepsparse-nightly + enable_if_env: + CM_HOST_PLATFORM_FLAVOR: + - aarch64 add_deps_recursive: mlperf-implementation: version: deepsparse @@ -827,92 +861,91 @@ variations: CM_MLPERF_BACKEND: tvm-onnx CM_MLPERF_BACKEND_VERSION: <<>> deps: - - tags: get,generic-python-lib,_onnx - - tags: get,generic-python-lib,_numpy - version_max: "1.26.4" - version_max_usable: "1.26.4" - - tags: get,tvm - names: - - tvm - - tags: get,tvm-model,_onnx - names: - - tvm-model - update_tags_from_env_with_prefix: - _model.: - - CM_MODEL - - + - tags: get,generic-python-lib,_onnx + - tags: get,generic-python-lib,_numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,tvm + names: + - tvm + - tags: get,tvm-model,_onnx + names: + - tvm-model + update_tags_from_env_with_prefix: + _model.: + - CM_MODEL + tvm-tflite: group: framework env: CM_MLPERF_BACKEND: tvm-tflite CM_MLPERF_BACKEND_VERSION: <<>> deps: - - tags: get,generic-python-lib,_tflite - - tags: get,tvm - names: - - tvm - - tags: get,tvm-model,_tflite - names: - - tvm-model - update_tags_from_env_with_prefix: - _model.: - - CM_MODEL + - tags: get,generic-python-lib,_tflite + - tags: get,tvm + names: + - tvm + - tags: get,tvm-model,_tflite + names: + - tvm-model + update_tags_from_env_with_prefix: + _model.: + - CM_MODEL tvm-pytorch: group: framework env: CM_MLPERF_BACKEND: tvm-pytorch CM_MLPERF_BACKEND_VERSION: <<>> - CM_PREPROCESS_PYTORCH: 'yes' + CM_PREPROCESS_PYTORCH: "yes" MLPERF_TVM_TORCH_QUANTIZED_ENGINE: qnnpack deps: - - tags: get,generic-python-lib,_torch - names: - - torch - - pytorch - - tags: get,tvm - names: - - tvm - - tags: get,tvm-model,_pytorch - names: - - tvm-model - update_tags_from_env_with_prefix: - _model.: - - CM_MODEL + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch + - tags: get,tvm + names: + - tvm + - tags: get,tvm-model,_pytorch + names: + - tvm-model + update_tags_from_env_with_prefix: + _model.: + - CM_MODEL # Reference MLPerf models gptj-99.9: group: models base: - - gptj_ + - gptj_ env: CM_MODEL: gptj-99.9 gptj-99: group: models base: - - gptj_ + - gptj_ env: CM_MODEL: gptj-99 gptj_: deps: - - tags: get,generic-python-lib,_package.datasets - - tags: get,generic-python-lib,_package.attrs - - tags: get,generic-python-lib,_package.accelerate + - tags: get,generic-python-lib,_package.datasets + - tags: get,generic-python-lib,_package.attrs + - tags: get,generic-python-lib,_package.accelerate bert-99.9: group: models base: - - bert + - bert env: CM_MODEL: bert-99.9 bert-99: group: models base: - - bert + - bert env: CM_MODEL: bert-99 @@ -920,29 +953,29 @@ variations: env: CM_MLPERF_MODEL_SKIP_BATCHING: true deps: - - tags: get,generic-python-lib,_package.pydantic - - tags: get,generic-python-lib,_tokenization - - tags: get,generic-python-lib,_six - - tags: get,generic-python-lib,_package.absl-py - - tags: get,generic-python-lib,_protobuf - names: - - protobuf - version_max: "3.19" - enable_if_env: - CM_MLPERF_BACKEND: - - tf - - tflite - - tags: get,generic-python-lib,_boto3 - enable_if_env: - CM_MLPERF_BACKEND: - - pytorch - - tags: get,generic-python-lib,_torch - names: - - ml-engine-pytorch - - pytorch - skip_if_env: - CM_MLPERF_DEVICE: - - gpu + - tags: get,generic-python-lib,_package.pydantic + - tags: get,generic-python-lib,_tokenization + - tags: get,generic-python-lib,_six + - tags: get,generic-python-lib,_package.absl-py + - tags: get,generic-python-lib,_protobuf + names: + - protobuf + version_max: "3.19" + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite + - tags: get,generic-python-lib,_boto3 + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tags: get,generic-python-lib,_torch + names: + - ml-engine-pytorch + - pytorch + skip_if_env: + CM_MLPERF_DEVICE: + - gpu add_deps_recursive: inference-src: tags: _deeplearningexamples @@ -1087,7 +1120,7 @@ variations: - tags: get,generic-python-lib,_mxeval names: - rouge-score - + mixtral-8x7b,cuda: default_env: CM_MLPERF_LOADGEN_BATCH_SIZE: 1 @@ -1095,14 +1128,14 @@ variations: 3d-unet-99.9: group: models base: - - 3d-unet + - 3d-unet env: CM_MODEL: 3d-unet-99.9 3d-unet-99: group: models base: - - 3d-unet + - 3d-unet env: CM_MODEL: 3d-unet-99 @@ -1111,23 +1144,23 @@ variations: CM_TMP_IGNORE_MLPERF_QUERY_COUNT: true CM_MLPERF_MODEL_SKIP_BATCHING: true deps: - - tags: get,generic-python-lib,_package.nibabel - - tags: get,generic-python-lib,_package.scipy - names: - - scipy - version: 1.10.1 + - tags: get,generic-python-lib,_package.nibabel + - tags: get,generic-python-lib,_package.scipy + names: + - scipy + version: 1.10.1 dlrm-v2-99.9: group: models base: - - dlrm-v2_ + - dlrm-v2_ env: CM_MODEL: dlrm-v2-99.9 dlrm-v2-99: group: models base: - - dlrm-v2_ + - dlrm-v2_ env: CM_MODEL: dlrm-v2-99 @@ -1138,34 +1171,33 @@ variations: dlrm-v2_,pytorch: deps: - - tags: get,dlrm,src - names: - - dlrm-src - # to force the version - - tags: get,generic-python-lib,_torch - names: - - torch - - pytorch - - ml-engine-pytorch - - tags: get,generic-python-lib,_mlperf_logging - - tags: get,generic-python-lib,_opencv-python - - tags: get,generic-python-lib,_tensorboard - - tags: get,generic-python-lib,_protobuf - - tags: get,generic-python-lib,_scikit-learn - - tags: get,generic-python-lib,_tqdm - - tags: get,generic-python-lib,_onnx - - tags: get,generic-python-lib,_numpy - names: - - numpy - - tags: get,generic-python-lib,_package.pyre-extensions - - tags: get,generic-python-lib,_package.torchsnapshot - - tags: get,generic-python-lib,_package.torchmetrics - - tags: get,generic-python-lib,_package.torchrec - - tags: get,generic-python-lib,_package.fbgemm-gpu - - tags: get,generic-python-lib,_package.fbgemm-gpu-cpu - - tags: get,generic-python-lib,_package.fvcore - - tags: set,user,limit,_large-nofile - + - tags: get,dlrm,src + names: + - dlrm-src + # to force the version + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch + - ml-engine-pytorch + - tags: get,generic-python-lib,_mlperf_logging + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_tensorboard + - tags: get,generic-python-lib,_protobuf + - tags: get,generic-python-lib,_scikit-learn + - tags: get,generic-python-lib,_tqdm + - tags: get,generic-python-lib,_onnx + - tags: get,generic-python-lib,_numpy + names: + - numpy + - tags: get,generic-python-lib,_package.pyre-extensions + - tags: get,generic-python-lib,_package.torchsnapshot + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.torchrec + - tags: get,generic-python-lib,_package.fbgemm-gpu + - tags: get,generic-python-lib,_package.fbgemm-gpu-cpu + - tags: get,generic-python-lib,_package.fvcore + - tags: set,user,limit,_large-nofile rnnt: group: models @@ -1174,77 +1206,128 @@ variations: CM_MLPERF_MODEL_SKIP_BATCHING: true CM_TMP_IGNORE_MLPERF_QUERY_COUNT: true deps: - - tags: get,generic-python-lib,_package.pydantic - version_max: "1.10.9" - - tags: get,generic-python-lib,_librosa - names: - - librosa - - tags: get,generic-python-lib,_inflect - - tags: get,generic-python-lib,_unidecode - - tags: get,generic-python-lib,_toml + - tags: get,generic-python-lib,_package.pydantic + version_max: "1.10.9" + - tags: get,generic-python-lib,_librosa + names: + - librosa + - tags: get,generic-python-lib,_inflect + - tags: get,generic-python-lib,_unidecode + - tags: get,generic-python-lib,_toml retinanet: group: models deps: - - tags: get,generic-python-lib,_opencv-python - - tags: get,generic-python-lib,_numpy - names: - - numpy - - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + names: + - numpy + - tags: get,generic-python-lib,_pycocotools env: CM_MODEL: retinanet - CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' - CM_MLPERF_LOADGEN_MAX_BATCHSIZE: '1' + CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: "yes" + CM_MLPERF_LOADGEN_MAX_BATCHSIZE: "1" resnet50: group: models default: true env: CM_MODEL: resnet50 - CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' + CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: "yes" deps: - - tags: get,generic-python-lib,_opencv-python - - tags: get,generic-python-lib,_numpy - names: - - numpy - - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + names: + - numpy + - tags: get,generic-python-lib,_pycocotools prehook_deps: - - tags: get,generic-python-lib,_protobuf - names: - - protobuf - version_max: "4.23.4" - version_max_usable: "4.23.4" - enable_if_env: - CM_MLPERF_BACKEND: - - tf - - tflite + - tags: get,generic-python-lib,_protobuf + names: + - protobuf + version_max: "4.23.4" + version_max_usable: "4.23.4" + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite rgat: group: models env: CM_MODEL: rgat + add_deps_recursive: + pytorch: + version_max: "2.4.0" + version_max_usable: "2.4.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata + - tags: get,generic-python-lib,_package.pybind11 + - tags: get,generic-python-lib,_package.PyYAML + - tags: get,generic-python-lib,_package.numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,generic-python-lib,_package.pydantic + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - tags: get,generic-python-lib,_package.torch-geometric + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-scatter + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-sparse + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL + - tags: get,generic-python-lib,_package.dgl + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL + + rgat,cuda: + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" + + rgat,cpu: + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/repo.html" + + llama3_1-405b: + group: models + env: + CM_MODEL: llama3_1-405b + adr: + pytorch: + version_max: 2.5.1 + CM_MODEL: llama3-402b + deps: + - tags: get,generic-python-lib,_package.torchvision + - tags: get,generic-python-lib,_package.torchaudio + - tags: get,generic-python-lib,_package.torch-geometric + - tags: get,generic-python-lib,_package.transformers + - tags: get,generic-python-lib,_package.sentencepiece + - tags: get,generic-python-lib,_package.accelerate + - tags: get,generic-python-lib,_package.vllm + env: + CM_GENERIC_PYTHON_PIP_EXTRA: "--upgrade" - tags: get,generic-python-lib,_package.pybind11 - - tags: get,generic-python-lib,_package.PyYAML - - tags: get,generic-python-lib,_package.pydantic - - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - cpu - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - gpu + - tags: get,generic-python-lib,_package.pandas + version_max: 2.2.1 + + llama3_1-405b,cuda: + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + llama3_1-405b,cpu: + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" # Target devices cpu: @@ -1252,7 +1335,7 @@ variations: default: true env: CM_MLPERF_DEVICE: cpu - CUDA_VISIBLE_DEVICES: '' + CUDA_VISIBLE_DEVICES: "" USE_CUDA: no USE_GPU: no @@ -1299,8 +1382,7 @@ variations: default: true add_deps_recursive: ml-model: - tags: - _fp32 + tags: _fp32 env: CM_MLPERF_QUANTIZATION: off CM_MLPERF_MODEL_PRECISION: float32 @@ -1310,8 +1392,7 @@ variations: group: precision add_deps_recursive: ml-model-float16: - tags: - _fp16 + tags: _fp16 env: CM_MLPERF_QUANTIZATION: off CM_MLPERF_MODEL_PRECISION: float16 @@ -1321,8 +1402,7 @@ variations: group: precision add_deps_recursive: ml-model-float16: - tags: - _fp16 + tags: _fp16 env: CM_MLPERF_QUANTIZATION: off CM_MLPERF_MODEL_PRECISION: bfloat16 @@ -1334,8 +1414,7 @@ variations: CM_MLPERF_MODEL_PRECISION: int8 add_deps_recursive: ml-model: - tags: - _int8 + tags: _int8 quantized: alias: int8 @@ -1346,11 +1425,9 @@ variations: CM_MLPERF_LOADGEN_MAX_BATCHSIZE: "#" add_deps_recursive: ml-model: - tags: - _batch_size.# + tags: _batch_size.# tvm-model: - tags: - _batch_size.# + tags: _batch_size.# network-sut: group: network @@ -1382,6 +1459,6 @@ variations: loadgen: version: r2.1 env: - CM_RERUN: 'yes' - CM_SKIP_SYS_UTILS: 'yes' - CM_TEST_QUERY_COUNT: '100' + CM_RERUN: "yes" + CM_SKIP_SYS_UTILS: "yes" + CM_TEST_QUERY_COUNT: "100" diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py index 87e09151ba..fc1e8450b8 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-mlcommons-python/customize.py @@ -79,7 +79,7 @@ def preprocess(i): str(env['CM_MLPERF_LOADGEN_BATCH_SIZE']) if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get('CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and ( - env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": + env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL'] or 'llama3' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \ env['CM_MLPERF_LOADGEN_QUERY_COUNT'] @@ -126,17 +126,19 @@ def preprocess(i): scenario_extra_options = '' NUM_THREADS = env['CM_NUM_THREADS'] - if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + if int( + NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat": NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU - if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + if env['CM_MODEL'] in ['resnet50', 'retinanet', + 'stable-diffusion-xl', 'rgat']: scenario_extra_options += " --threads " + NUM_THREADS ml_model_name = env['CM_MODEL'] if 'CM_MLPERF_USER_CONF' in env: user_conf_path = env['CM_MLPERF_USER_CONF'] x = "" if os_info['platform'] == 'windows' else "'" - if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"]: + if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"] or "llama3" in env["CM_MODEL"]: scenario_extra_options += " --user-conf " + x + user_conf_path + x else: scenario_extra_options += " --user_conf " + x + user_conf_path + x @@ -397,7 +399,9 @@ def get_run_cmd_reference( env['CM_VLLM_SERVER_MODEL_NAME'] = env.get( "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd += f""" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} \ + --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} \ + --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm """ else: cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}" @@ -496,15 +500,40 @@ def get_run_cmd_reference( # have to add the condition for running in debug mode or real run mode cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ - " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ - " --device " + device.replace("cuda", "cuda:0") + \ + " --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \ + " --device " + device.replace("cuda", "gpu") + \ env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + dtype_rgat + \ - " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ - " --mlperf_conf " + \ - os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + " --model-path " + env['RGAT_CHECKPOINT_PATH'] + + if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": + cmd += " --in-memory " + + elif "llama3" in env['CM_MODEL']: + env['RUN_DIR'] = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], + "language", + "llama3.1-405b") + + if int(env.get('CM_MLPERF_INFERENCE_TP_SIZE', '')) > 1: + env['VLLM_WORKER_MULTIPROC_METHOD'] = "spawn" + + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ + " --dataset-path " + env['CM_DATASET_LLAMA3_PATH'] + \ + " --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \ + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ + " --model-path " + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + \ + " --tensor-parallel-size " + env['CM_MLPERF_INFERENCE_TP_SIZE'] + \ + " --vllm " + + if env.get('CM_MLPERF_INFERENCE_NUM_WORKERS', '') != '': + cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}" + + cmd = cmd.replace("--count", "--total-sample-count") + cmd = cmd.replace("--max-batchsize", "--batch-size") if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml index 0547783f60..5b96c7f656 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/_cm.yaml @@ -1823,7 +1823,7 @@ variations: default_variations: batch-size: batch_size.2048 env: - CM_MLPERF_PERFORMANCE_SAMPLE_COUNT: "2048" + CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT: "2048" a100,sxm,retinanet,offline,run_harness: default_variations: diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py index 0ede381f80..3653c1f9a2 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-nvidia/customize.py @@ -48,14 +48,14 @@ def preprocess(i): make_command = env['MLPERF_NVIDIA_RUN_COMMAND'] if make_command == "prebuild": - cmds.append(f"make prebuild NETWORK_NODE=SUT") + cmds.append(f"""make prebuild NETWORK_NODE=SUT""") if env['CM_MODEL'] == "resnet50": target_data_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'data', 'imagenet') if not os.path.exists(target_data_path): cmds.append( - f"ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}") + f"""ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}""") model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], @@ -64,11 +64,11 @@ def preprocess(i): 'resnet50_v1.onnx') if not os.path.exists(os.path.dirname(model_path)): - cmds.append(f"mkdir -p {os.path.dirname(model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(model_path)}""") if not os.path.exists(model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}") + f"""ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}""") model_name = "resnet50" elif "bert" in env['CM_MODEL']: @@ -94,17 +94,17 @@ def preprocess(i): 'vocab.txt') if not os.path.exists(os.path.dirname(fp32_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp32_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp32_model_path)}""") if not os.path.exists(fp32_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}""") if not os.path.exists(int8_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}""") if not os.path.exists(vocab_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}""") model_name = "bert" model_path = fp32_model_path @@ -123,9 +123,9 @@ def preprocess(i): # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") env['CM_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv") + f"""cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt") + f"""cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt""") fp16_model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'models', @@ -135,7 +135,7 @@ def preprocess(i): 'stable_diffusion_fp16') if not os.path.exists(os.path.dirname(fp16_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp16_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp16_model_path)}""") if not os.path.exists(fp16_model_path): if os.path.islink(fp16_model_path): @@ -698,11 +698,15 @@ def preprocess(i): '') # will be ignored during build engine if "stable-diffusion" in env["CM_MODEL"]: - extra_build_engine_options_string += f" --model_path {os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'SDXL/')}" + extra_build_engine_options_string += f""" --model_path { + os.path.join( + env['MLPERF_SCRATCH_PATH'], + 'models', + 'SDXL/')}""" run_config += " --no_audit_verify" - cmds.append(f"make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'") + cmds.append(f"""make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'""") run_cmd = " && ".join(cmds) env['CM_MLPERF_RUN_CMD'] = run_cmd diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml index 8de84ac084..5e3de43029 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-qualcomm/_cm.yaml @@ -49,7 +49,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF devices: CM_QAIC_DEVICES diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml index 75f460f370..2c7011bd58 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference-redhat/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml index 6e95a00827..7596b30efe 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/_cm.yaml @@ -66,6 +66,8 @@ input_mapping: tp_size: CM_NVIDIA_TP_SIZE use_dataset_from_host: CM_USE_DATASET_FROM_HOST +predeps: False + # Duplicate CM environment variables to the ones used in native apps env_key_mappings: CM_HOST_: HOST_ @@ -219,6 +221,8 @@ variations: tags: _int32 cnndm-accuracy-script: tags: _int32 + llama3_1-405b-accuracy-script: + tags: _int32 env: CM_MLPERF_PYTHON: 'yes' CM_MLPERF_IMPLEMENTATION: mlcommons_python @@ -270,6 +274,10 @@ variations: default_variations: backend: pytorch + reference,llama3_1-405b: + default_variations: + backend: pytorch + reference,mixtral-8x7b: default_variations: backend: pytorch @@ -336,12 +344,16 @@ variations: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' - skip_if_env: CM_HOST_PLATFORM_FLAVOR: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.1-cuda12.4-pytorch24.04-ubuntu22.04-aarch64-GraceHopper-release + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp310-cp310-linux_aarch64.whl' @@ -403,7 +415,7 @@ variations: nvidia-original: docker: interactive: True - extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public os: "ubuntu" os_version: "20.04" @@ -428,7 +440,7 @@ variations: CM_HOST_OS_VERSION: - 20.04 docker: - extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --runtime=nvidia --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' default_variations: backend: tensorrt @@ -765,6 +777,63 @@ variations: env: CM_MODEL: rgat + posthook_deps: + - enable_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + - all + CM_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - 3d-unet-accuracy-script + tags: run,accuracy,mlperf,_igbh + docker: + deps: + - tags: get,dataset,igbh + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + names: + - igbh-original + - igbh-dataset + + llama3_1-405b: + group: + model + add_deps_recursive: + mlperf-inference-implementation: + tags: _llama3_1-405b + env: + CM_MODEL: + llama3_1-405b + posthook_deps: + - enable_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + - all + CM_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - llama3_1-405b-accuracy-script + tags: run,accuracy,mlperf,_dataset_llama3 + docker: + deps: + - tags: get,ml-model,llama3 + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + names: + - llama3_1-405b + - llama3-405b + sdxl: group: @@ -1602,7 +1671,7 @@ variations: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl' + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' #uses public code for inference v4.1 @@ -1621,8 +1690,6 @@ variations: default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' - env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' r4.1_default: group: @@ -1643,6 +1710,27 @@ variations: CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' CM_MLPERF_INFERENCE_VERSION: '4.1' + r5.0-dev_default: + group: + reproducibility + add_deps_recursive: + nvidia-inference-common-code: + version: r4.1 + tags: _mlcommons + nvidia-inference-server: + version: r4.1 + tags: _mlcommons + intel-harness: + tags: _v4.1 + inference-src: + version: r5.0 + default_env: + CM_SKIP_SYS_UTILS: 'yes' + CM_REGENERATE_MEASURE_FILES: 'yes' + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + + invalid_variation_combinations: - - retinanet @@ -1729,13 +1817,32 @@ input_description: debug: desc: "Debug MLPerf script" -gui: - title: "CM GUI for the MLPerf inference benchmark" +update_meta_if_env: + - enable_if_env: + CM_CONTAINER_TOOL: + - podman + # podman maps the host userid to the root user inside the container + docker: + use_host_group_id: False + use_host_user_id: False + pass_user_group: False #useful if docker is run by a different user from the one who built it and under the same group + default_env: + CM_DOCKER_USE_DEFAULT_USER: 'yes' + - skip_if_env: + CM_CONTAINER_TOOL: + - podman + docker: + use_host_group_id: True + use_host_user_id: True + pass_user_group: True #useful if docker is run by a different user from the one who built it and under the same group + - enable_if_env: + CM_HOST_OS_TYPE: + - linux + adr: + compiler: + tags: gcc docker: - use_host_group_id: True - use_host_user_id: True - pass_user_group: True #useful if docker is run by a different user fromt he one who built it and under the same group deps: - tags: get,mlperf,inference,results,dir,local names: @@ -1751,7 +1858,6 @@ docker: pre_run_cmds: #- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update - cm pull repo - - cm rm cache --tags=inference,src -f mounts: - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}" - "${{ CM_DATASET_OPENIMAGES_PATH }}:${{ CM_DATASET_OPENIMAGES_PATH }}" @@ -1766,13 +1872,15 @@ docker: - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" + - "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}" + - "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}" skip_run_cmd: 'no' shm_size: '32gb' interactive: True - extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev real_run: False os_version: '22.04' docker_input_mapping: diff --git a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py index 07fb7cb4ed..30bbf07328 100644 --- a/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/app-mlperf-inference/customize.py @@ -129,6 +129,8 @@ def postprocess(i): if mode == "accuracy" or mode == "compliance" and env[ 'CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] == "TEST01": + out_baseline_accuracy_string = f"""> {os.path.join(output_dir, "accuracy", "baseline_accuracy.txt")} """ + out_compliance_accuracy_string = f"""> {os.path.join(output_dir, "accuracy", "compliance_accuracy.txt")} """ if model == "resnet50": accuracy_filename = "accuracy-imagenet.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", @@ -158,6 +160,17 @@ def postprocess(i): datatype_option = " --output_dtype " + \ env['CM_SQUAD_ACCURACY_DTYPE'] + elif 'rgat' in model: + accuracy_filename = "accuracy_igbh.py" + accuracy_filepath = os.path.join( + env['CM_MLPERF_INFERENCE_RGAT_PATH'], "tools", accuracy_filename) + dataset_args = " --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + \ + env['CM_DATASET_IGBH_SIZE'] + "'" + accuracy_log_file_option_name = " --mlperf-accuracy-file " + datatype_option = "" + out_baseline_accuracy_string = f""" --output-file {os.path.join(output_dir, "accuracy", "baseline_accuracy.txt")} """ + out_compliance_accuracy_string = f""" --output-file {os.path.join(output_dir, "accuracy", "compliance_accuracy.txt")} """ + elif 'stable-diffusion-xl' in model: pass # No compliance check for now elif 'gpt' in model: @@ -367,7 +380,7 @@ def postprocess(i): host_info['system_name'] = env['CM_HOST_SYSTEM_NAME'] # Check CM automation repository - repo_name = 'mlcommons@cm4mlops' + repo_name = 'mlcommons@mlperf-automations' repo_hash = '' r = cm.access({'action': 'find', 'automation': 'repo', 'artifact': 'mlcommons@cm4mlops,9e97bb72b0474657'}) @@ -401,9 +414,7 @@ def postprocess(i): cmd = "" xcmd = "" - readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/cm4mlops).\n\n" - - readme_init += "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" + readme_init = "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), platform.processor(), sys.version, cm.__version__) @@ -513,6 +524,7 @@ def postprocess(i): cmd = "cd " + TEST01_DIR + " && bash " + SCRIPT_PATH + " " + os.path.join(ACCURACY_DIR, "mlperf_log_accuracy.json") + " " + \ os.path.join(COMPLIANCE_DIR, "mlperf_log_accuracy.json") env['CMD'] = cmd + print(cmd) r = automation.run_native_script( {'run_script_input': run_script_input, 'env': env, 'script_name': 'verify_accuracy'}) if r['return'] > 0: @@ -527,9 +539,11 @@ def postprocess(i): print("\nDeterministic TEST01 failed... Trying with non-determinism.\n") # #Normal test failed, trying the check with non-determinism + baseline_accuracy_file = os.path.join( + TEST01_DIR, "mlperf_log_accuracy_baseline.json") CMD = "cd " + ACCURACY_DIR + " && " + env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + accuracy_filepath + accuracy_log_file_option_name + \ - os.path.join(TEST01_DIR, "mlperf_log_accuracy_baseline.json") + dataset_args + datatype_option + " > " + \ - os.path.join(OUTPUT_DIR, "baseline_accuracy.txt") + baseline_accuracy_file + ' ' + dataset_args + \ + datatype_option + out_baseline_accuracy_string env['CMD'] = CMD r = automation.run_native_script( @@ -537,9 +551,13 @@ def postprocess(i): if r['return'] > 0: return r + if os.stat(baseline_accuracy_file).st_size == 0: + return {'return': 1, + 'error': f"{baseline_accuracy_file} is empty"} + CMD = "cd " + ACCURACY_DIR + " && " + env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + accuracy_filepath + accuracy_log_file_option_name + \ - os.path.join(TEST01_DIR, "mlperf_log_accuracy.json") + dataset_args + datatype_option + " > " + \ - os.path.join(OUTPUT_DIR, "compliance_accuracy.txt") + os.path.join(TEST01_DIR, "mlperf_log_accuracy.json") + \ + dataset_args + datatype_option + out_compliance_accuracy_string env['CMD'] = CMD r = automation.run_native_script( diff --git a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml index a9dc8cb677..8fd7c25719 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/_cm.yaml @@ -38,6 +38,9 @@ input_mapping: new_env_keys: - CM_DOCKER_* +deps: + - tags: get,docker + prehook_deps: - enable_if_env: CM_BUILD_DOCKERFILE: diff --git a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py index 4746e98c3c..4f2f1ac9e7 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/build-docker-image/customize.py @@ -48,14 +48,15 @@ def preprocess(i): # env['CM_BUILD_DOCKERFILE'] = "no" # if env.get("CM_DOCKER_IMAGE_REPO", "") == '': - env['CM_DOCKER_IMAGE_REPO'] = "local" + env['CM_DOCKER_IMAGE_REPO'] = "localhost/local" docker_image_name = env.get('CM_DOCKER_IMAGE_NAME', '') if docker_image_name == '': docker_image_name = "cm-script-" + \ env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '').replace( ',', '-').replace('_', '-') - env['CM_DOCKER_IMAGE_NAME'] = docker_image_name + + env['CM_DOCKER_IMAGE_NAME'] = docker_image_name.lower() if env.get("CM_DOCKER_IMAGE_TAG", "") == '': env['CM_DOCKER_IMAGE_TAG'] = "latest" @@ -76,7 +77,8 @@ def preprocess(i): # Prepare CMD to build image XCMD = [ - 'docker build ' + env.get('CM_DOCKER_CACHE_ARG', ''), + f'{env["CM_CONTAINER_TOOL"]} build ' + + env.get('CM_DOCKER_CACHE_ARG', ''), ' ' + build_args, ' -f "' + dockerfile_path + '"', ' -t "' + image_name, diff --git a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml index 7535311ea2..9f91c07752 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/_cm.yaml @@ -19,7 +19,7 @@ default_env: ' CM_DOCKER_OS: ubuntu CM_DOCKER_NOT_PULL_UPDATE: False - CM_MLOPS_REPO_BRANCH: mlperf-inference + CM_MLOPS_REPO_BRANCH: dev input_mapping: build: CM_BUILD_DOCKER_IMAGE @@ -57,6 +57,11 @@ input_mapping: new_env_keys: - CM_DOCKERFILE_* +deps: + - tags: get,docker + names: + - docker + post_deps: - enable_if_env: CM_BUILD_DOCKER_IMAGE: diff --git a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py index 3fdd1613e2..e50af9a536 100644 --- a/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/build-dockerfile/customize.py @@ -142,7 +142,7 @@ def preprocess(i): print( f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}") else: - cm_mlops_repo = "mlcommons@cm4mlops" + cm_mlops_repo = "mlcommons@mlperf-automations" cm_mlops_repo_branch_string = f" --branch={env['CM_MLOPS_REPO_BRANCH']}" @@ -183,7 +183,7 @@ def preprocess(i): shell = get_value(env, config, 'SHELL', 'CM_DOCKER_IMAGE_SHELL') if shell: - f.write('SHELL ' + shell + EOL) + # f.write('SHELL ' + shell + EOL) f.write(EOL) for arg in config['ARGS_DEFAULT']: @@ -261,7 +261,12 @@ def preprocess(i): docker_user = get_value(env, config, 'USER', 'CM_DOCKER_USER') docker_group = get_value(env, config, 'GROUP', 'CM_DOCKER_GROUP') - if docker_user: + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_DOCKER_USE_DEFAULT_USER', '') == '': + env['CM_DOCKER_USE_DEFAULT_USER'] = 'yes' + + if docker_user and str(env.get('CM_DOCKER_USE_DEFAULT_USER', '')).lower() not in [ + "yes", "1", "true"]: f.write('RUN groupadd -g $GID -o ' + docker_group + EOL) @@ -277,14 +282,20 @@ def preprocess(i): ' ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers' + EOL) f.write('USER ' + docker_user + ":" + docker_group + EOL) + f.write('ENV HOME=/home/cmuser' + EOL) + + else: + f.write('ENV HOME=/root' + EOL) dockerfile_env = env.get('CM_DOCKERFILE_ENV', {}) dockerfile_env_input_string = "" for docker_env_key in dockerfile_env: dockerfile_env_input_string = dockerfile_env_input_string + " --env." + \ docker_env_key + "=" + str(dockerfile_env[docker_env_key]) + workdir = get_value(env, config, 'WORKDIR', 'CM_DOCKER_WORKDIR') - if workdir: + if workdir and ("/home/cmuser" not in workdir or str(env.get('CM_DOCKER_USE_DEFAULT_USER', '')).lower() not in [ + "yes", "1", "true"]): f.write('WORKDIR ' + workdir + EOL) f.write(EOL + '# Install python packages' + EOL) @@ -292,9 +303,10 @@ def preprocess(i): docker_use_virtual_python = env.get('CM_DOCKER_USE_VIRTUAL_PYTHON', "yes") if str(docker_use_virtual_python).lower() not in ["no", "0", "false"]: - f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL) - f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL) + f.write('RUN {} -m venv $HOME/venv/cm'.format(python) + " " + EOL) + f.write('ENV PATH="$HOME/venv/cm/bin:$PATH"' + EOL) # f.write('RUN . /opt/venv/cm/bin/activate' + EOL) + f.write( 'RUN {} -m pip install '.format(python) + " ".join( @@ -310,7 +322,7 @@ def preprocess(i): f.write(EOL + '# Download CM repo for scripts' + EOL) if use_copy_repo: - docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@cm4mlops" + docker_repo_dest = "$HOME/CM/repos/mlcommons@mlperf-automations" f.write( f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' + EOL) @@ -390,6 +402,26 @@ def preprocess(i): if run_cmd_extra != '': x += ' ' + run_cmd_extra + if env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '') != '' and str(env.get( + 'CM_DOCKER_ADD_DEPENDENT_SCRIPTS_RUN_COMMANDS', '')).lower() in ["yes", "1", "true"]: + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': f"""{env['CM_DOCKER_RUN_SCRIPT_TAGS']}""", + 'print_deps': True, + 'quiet': True, + 'silent': True, + 'fake_run': True, + 'fake_deps': True + } + r = self_module.cmind.access(cm_input) + if r['return'] > 0: + return r + print_deps = r['new_state']['print_deps'] + fake_run_str = " --fake_run" if env.get('CM_DOCKER_FAKE_DEPS') else "" + cmds = ["RUN " + dep for dep in print_deps] + for cmd in cmds: + f.write(cmd + fake_run_str + EOL) + f.write(x + EOL) # fake_run to install the dependent scripts and caching them diff --git a/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml index 4cea12c422..eb1d1a1570 100644 --- a/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/draw-graph-from-json-data/_cm.yaml @@ -19,3 +19,4 @@ deps: - python3 - tags: get,generic-python-lib,_package.networkx - tags: get,generic-python-lib,_package.matplotlib + - tags: get,generic-python-lib,_package.typing_extensions diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml index 32003a1b33..013997df9e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/_cm.yaml @@ -7,6 +7,7 @@ default_env: CM_MLPERF_RUN_STYLE: valid CM_MLPERF_SUBMISSION_DIR_SHARED: 'yes' CM_RUN_MLPERF_ACCURACY: 'on' +predeps: False deps: - names: - python @@ -31,8 +32,8 @@ deps: - 'on' tags: get,mlperf,submission,dir docker: - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev deps: - names: get-mlperf-inference-results-dir skip_if_env: @@ -68,6 +69,7 @@ input_mapping: device: CM_MLPERF_DEVICE division: CM_MLPERF_SUBMISSION_DIVISION duplicate: CM_MLPERF_DUPLICATE_SCENARIO_RESULTS + extra_checker_args: CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARG hw_name: CM_HW_NAME hw_notes_extra: CM_MLPERF_SUT_HW_NOTES_EXTRA infer_scenario_results: CM_MLPERF_DUPLICATE_SCENARIO_RESULTS @@ -84,6 +86,7 @@ input_mapping: sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA tar: CM_TAR_SUBMISSION_DIR get_platform_details: CM_GET_PLATFORM_DETAILS + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION post_deps: - enable_if_env: CM_RUN_MLPERF_ACCURACY: @@ -99,9 +102,9 @@ post_deps: - 'yes' - true tags: preprocess,mlperf,submission -- enable_if_env: +- skip_if_env: CM_RUN_SUBMISSION_CHECKER: - - 'yes' + - 'no' names: - mlperf-inference-submission-checker - submission-checker diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py index e6e3507285..02161eb202 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-submission/customize.py @@ -171,11 +171,15 @@ def generate_submission(env, state, inp, submission_division): print('* MLPerf inference submitter: {}'.format(submitter)) if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA', '') != '': - sw_notes = f"{system_meta_tmp['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" + sw_notes = f"""{ + system_meta_tmp['sw_notes']} { + env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}""" system_meta_tmp['sw_notes'] = sw_notes if env.get('CM_MLPERF_SUT_HW_NOTES_EXTRA', '') != '': - hw_notes = f"{system_meta_tmp['hw_notes']} {env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" + hw_notes = f"""{ + system_meta_tmp['hw_notes']} { + env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}""" system_meta_tmp['hw_notes'] = hw_notes path_submission = os.path.join(path_submission_division, submitter) @@ -197,7 +201,7 @@ def generate_submission(env, state, inp, submission_division): result_path, 'system_meta.json') # checks for json file containing system meta sut_info = { - "hardware_name": None, + "system_name": None, "implementation": None, "device": None, "framework": None, @@ -283,7 +287,7 @@ def generate_submission(env, state, inp, submission_division): {model: returned_model_name}) if check_dict_filled(sut_info.keys(), sut_info): - system = sut_info["hardware_name"] + system = env.get('CM_HW_NAME', sut_info["system_name"]) implementation = sut_info["implementation"] device = sut_info["device"] framework = sut_info["framework"].replace(" ", "_") @@ -308,6 +312,10 @@ def generate_submission(env, state, inp, submission_division): system_path = os.path.join(path_submission, "systems") submission_system_path = system_path + if not os.path.isdir(submission_path): + os.makedirs(submission_path) + if not os.path.isdir(measurement_path): + os.makedirs(measurement_path) if not os.path.isdir(submission_system_path): os.makedirs(submission_system_path) system_file = os.path.join(submission_system_path, sub_res + ".json") @@ -585,8 +593,11 @@ def generate_submission(env, state, inp, submission_division): os.makedirs(target) for log_file in os.listdir( compliance_accuracy_run_path): - if log_file.startswith( - "mlperf_log_accuracy.json") or log_file.endswith("accuracy.txt"): + log_file_name = os.path.basename( + log_file) + # print(os.path.join(compliance_accuracy_run_path, log_file)) + if log_file_name in [ + "mlperf_log_accuracy.json", "accuracy.txt", "baseline_accuracy.txt", "compliance_accuracy.txt"]: shutil.copy( os.path.join( compliance_accuracy_run_path, log_file), os.path.join( @@ -735,6 +746,8 @@ def postprocess(i): # submission_generation function if env.get('CM_MLPERF_SUBMISSION_DIVISION', '') == '': r = generate_submission(env, state, inp, submission_division="") + if r['return'] > 0: + return r else: for submission_division in submission_divisions: r = generate_submission(env, state, inp, submission_division) diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml index 99fc359953..c19bdcba32 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/_cm.yaml @@ -49,7 +49,7 @@ input_mapping: server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT # Env keys which are exposed to higher level scripts new_env_keys: diff --git a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py index 6fc9a3bb4c..fc31f0c1de 100644 --- a/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/generate-mlperf-inference-user-conf/customize.py @@ -228,8 +228,8 @@ def preprocess(i): user_conf += ml_model_name + "." + scenario + \ "." + metric + " = " + str(metric_value) + "\n" - if env.get('CM_MLPERF_PERFORMANCE_SAMPLE_COUNT', '') != '': - performance_sample_count = env['CM_MLPERF_PERFORMANCE_SAMPLE_COUNT'] + if env.get('CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT', '') != '': + performance_sample_count = env['CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT'] user_conf += ml_model_name + ".*.performance_sample_count_override = " + \ performance_sample_count + "\n" diff --git a/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml index b01506f6dc..fa5ccd2c77 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-cudnn/_cm.yaml @@ -19,6 +19,7 @@ default_env: deps: - tags: detect,os +- tags: detect,sudo - names: - cuda skip_if_env: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py index 0349003fd5..43511c0426 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/customize.py @@ -35,17 +35,17 @@ def postprocess(i): env = i['env'] if env.get('CM_GENERATE_SAMPLE_ID', '') == "yes": env['CM_COCO2014_SAMPLE_ID_PATH'] = os.path.join( - os.getcwd(), 'install', 'sample_ids.txt') + os.getcwd(), 'sample_ids.txt') print(env['CM_COCO2014_SAMPLE_ID_PATH']) if env.get('CM_DATASET_CALIBRATION', '') == "no": - env['CM_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install') + env['CM_DATASET_PATH_ROOT'] = os.getcwd() # env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data') env['CM_DATASET_CAPTIONS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'captions') + os.getcwd(), 'captions') env['CM_DATASET_LATENTS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'latents') + os.getcwd(), 'latents') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join( - os.getcwd(), 'install', 'calibration', 'data') + os.getcwd(), 'calibration', 'data') return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh index 61b9ffe52a..3685b161c4 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-coco2014/run.sh @@ -5,8 +5,7 @@ python3() { export -f python3 CUR=${PWD} -mkdir -p install -INSTALL_DIR=${CUR}/install +INSTALL_DIR=${CUR} cd ${CM_RUN_DIR} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md new file mode 100644 index 0000000000..0bb16ad460 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/README-extra.md @@ -0,0 +1,62 @@ +Examples: + +### Check flags + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --help +``` + +### Import already downloaded dataset + +Note that this automation will attempt to install aria2 tool via sudo apt on Ubuntu. + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=${HOME}/datasets/cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=${HOME}/datasets/cognata -j --private_url="{ADD PRIVATE URL FOR COGNATA} FOR FULL AUTOMATION" +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=%userprofile%\datasets\cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=D:\Work2\cognata -j +``` + +### Download dataset to CM cache + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata +``` + +### Find dataset in CM cache + +```bash +cm show cache --tags=dataset,mlcommons-cognata + +cm rm cache --tags=dataset,mlcommons-cognata +``` + +### Download dataset to some local directory + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=${HOME}/datasets/cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=%userprofile%\datasets\cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=D:\Work2\cognata-downloaded -j + +``` + +### Download subsets of this dataset + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M --file_names=Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M --file_names=Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip +``` + +Compact way to download the ABTF demo data set to the CM cache: + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata,_abtf-demo +``` + +or to specific path +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata _abtf-demo" --path=./cognata +cm run script --tags=get,raw,dataset,mlcommons-cognata _abtf-demo" --path=.\cognata +``` diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml new file mode 100644 index 0000000000..1b8155d7b4 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/_cm.yaml @@ -0,0 +1,161 @@ +# Written by Grigori Fursin + +alias: get-dataset-cognata-mlcommons +uid: 464ce21f2dce464e + +automation_alias: script +automation_uid: 5b4e0237da074764 + +developers: "Grigori Fursin, Radoyeh Shojaei" + +tags: +- get +- raw +- dataset +- cognata +- mlcommons-cognata +- ml-task--object-detection +- ml-task--image-segmentation + +min_cm_version: '2.2.0' + +private: true + +cache: false + +category: AI/ML datasets +category_sort: 8500 + + +input_mapping: + update: CM_DATASET_MLCOMMONS_COGNATA_UPDATE + import: CM_DATASET_MLCOMMONS_COGNATA_IMPORT_PATH + private_url: CM_DATASET_MLCOMMONS_COGNATA_PRIVATE_URL + serial_numbers: CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS + group_names: CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES + file_names: CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES + +env: + CM_DATASET: MLCOMMONS_COGNATA + CM_DATASET_MLCOMMONS_COGNATA_KEY1: "Dataset 1.0" + + + + +deps: +# Prepare dummy CM cache entry to manage dataset +- names: + - custom-cache-entry-mlcommons-cognata-dataset + tags: create,custom,cache,entry + extra_cache_tags: dataset,cognata,mlcommons-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'off' + env_key: DATASET_MLCOMMONS_COGNATA + # this script will prepare env CM_CUSTOM_CACHE_ENTRY_{env_key}_PATH + + +prehook_deps: +- names: + - gdrive-downloader-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'no' + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: + - gdrive + tags: download,file,_gdown,_url.https://drive.google.com/drive/folders/1FS-qLbzB5htgMnfry6z4gx8J_ZH_7MsJ?usp=drive_link + env: + CM_DOWNLOAD_EXTRA_OPTIONS: " --folder" + CM_DOWNLOAD_FILENAME: 10002_Urban_Clear_Morning + CM_DOWNLOAD_FINAL_ENV_NAME: CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH + force_cache: true + extra_cache_tags: abtf,cognata,poc,dataset + +- names: + - rclone-downloader-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'no' + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: + - rclone + tags: download-and-extract,file,_extract,_rclone,_url.https://automotive.mlcommons-storage.org/Cognata_Dataset_PoC_Demo%2F10002_Urban_Clear_Morning.zip + env: + CM_RCLONE_COPY_USING: copyurl + CM_RCLONE_CONFIG_CMD: '' + CM_DOWNLOAD_CHECKSUM: '76389b05b0ee1e08d354d3c1b696b8c0' + CM_EXTRACT_EXTRACTED_CHECKSUM_FILE: "<<>>" + CM_DOWNLOAD_PATH: <<>> + CM_EXTRACT_PATH: <<>> + CM_EXTRACT_EXTRACTED_FILENAME: 10002_Urban_Clear_Morning + CM_DAE_FINAL_ENV_NAME: CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH + force_cache: true + extra_cache_tags: abtf,cognata,poc,dataset + +- names: + - python + - python3 + tags: get,python3 + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + +# Python package to read/write Excel files +- tags: get,generic-python-lib,_package.openpyxl + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + +# Tool to download large files +- tags: get,aria2 + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + + +variations: + abtf-demo: + group: dataset-type + env: + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: "10002_Urban_Clear_Morning" + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: "Cognata_Camera_01_8M" + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: "Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip" + + abtf-poc: + group: dataset-type + default: true + env: + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: "10002_Urban_Clear_Morning" + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: "Cognata_Camera_01_8M" + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: "" + + rclone: + group: download-tool + default: true + env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: rclone + + gdrive: + group: download-tool + env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: gdrive + +new_env_keys: +- CM_DATASET_MLCOMMONS_COGNATA* + +print_env_at_the_end: + CM_DATASET_MLCOMMONS_COGNATA_PATH: Path to Cognata dataset diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt new file mode 100644 index 0000000000..b119faf770 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt @@ -0,0 +1,41 @@ +9791a229f8fcd33de5db5ad5a4d8ca93 ./Cognata_Camera_01_8M_ann/0000001666.csv +eb03eead214fad6e9a8964b9b0fdb2a6 ./Cognata_Camera_01_8M_ann/0000003333.csv +72fa733da768e725cd562fd4472ada05 ./Cognata_Camera_01_8M_ann/0000005000.csv +5b30693ca5a339d68244ab5eaf1bb13a ./Cognata_Camera_01_8M_ann/0000006666.csv +644060ed86d5ac1ae58e25dc2762f294 ./Cognata_Camera_01_8M_ann/0000008333.csv +20cf4818e7b68f50101bf19614c36fee ./Cognata_Camera_01_8M_ann/0000010000.csv +056637bd394a898899445bd9d9d638c4 ./Cognata_Camera_01_8M_ann/0000011666.csv +5c092bdb26838c22e6c970b85838a8c6 ./Cognata_Camera_01_8M_ann/0000013333.csv +58c0b653fc17e74d590e6a8448f37f20 ./Cognata_Camera_01_8M_ann/0000015000.csv +866628a72aeda956ee2c994a06efd67e ./Cognata_Camera_01_8M_ann/0000016666.csv +20181f8d8fb36616974355016292807c ./Cognata_Camera_01_8M_ann/0000018333.csv +0433e6949bea924a6329c17ca9125971 ./Cognata_Camera_01_8M_ann/0000020000.csv +eb910004272cab64fc41a0dfdf521ca8 ./Cognata_Camera_01_8M_ann/0000021666.csv +9c26c4a7360f3aad89bfea862d1bac93 ./Cognata_Camera_01_8M_ann/0000023333.csv +2f4a7ea573a26a59cb740a4072a8fe71 ./Cognata_Camera_01_8M_ann/0000025000.csv +398b5768046964d141d555ad313f9f47 ./Cognata_Camera_01_8M_ann/0000026666.csv +8c0c7fcef25efb87c041d785ee0d87a6 ./Cognata_Camera_01_8M_ann/0000028333.csv +43bf64024584aa1ca42738517a347599 ./Cognata_Camera_01_8M_ann/0000030000.csv +545dadaafca21841fcfd78404e7da7ba ./Cognata_Camera_01_8M_ann/0000031666.csv +77357f1b417fcd548be949a8b8d9131a ./Cognata_Camera_01_8M_ann/0000033333.csv +6b7de80e2b6114645c1a039761a0422b ./Cognata_Camera_01_8M_png/0000001666.png +776e04bb64d2d782012b6923bec62ae6 ./Cognata_Camera_01_8M_png/0000003333.png +aad25fa016258b71490299c53d588f32 ./Cognata_Camera_01_8M_png/0000005000.png +b7acf5249e09817f8d82469737c016bd ./Cognata_Camera_01_8M_png/0000006666.png +6bf8031926f4b3d6b82b30e7055855e5 ./Cognata_Camera_01_8M_png/0000008333.png +c3248feec77175811e8a85e6c88d5424 ./Cognata_Camera_01_8M_png/0000010000.png +ae1f50d358940d335ae89193bd78aca8 ./Cognata_Camera_01_8M_png/0000011666.png +1b98c42fd12819d14e980b72518ddb88 ./Cognata_Camera_01_8M_png/0000013333.png +b3d9b5a2fc5abffbebb5b63b2e1cce0a ./Cognata_Camera_01_8M_png/0000015000.png +3af5f660ed930d853a048a10a715104a ./Cognata_Camera_01_8M_png/0000016666.png +a52ae7aba6c56d6ef0d4d29f4a8267cb ./Cognata_Camera_01_8M_png/0000018333.png +9388222ca6e65beae42cf4a2f4b1d020 ./Cognata_Camera_01_8M_png/0000020000.png +a5d4d3312f5592d64b57b69a0a0edcea ./Cognata_Camera_01_8M_png/0000021666.png +35af823a8177abef9b72846a93063695 ./Cognata_Camera_01_8M_png/0000023333.png +75eb93f99c36135a16df23612c5802d2 ./Cognata_Camera_01_8M_png/0000025000.png +d9a86566fbcaead7f4a43659723014e7 ./Cognata_Camera_01_8M_png/0000026666.png +0ae5d8933fdb9a86e01e9192d2210340 ./Cognata_Camera_01_8M_png/0000028333.png +8b3775e07f902bb3644f7b1eb9de5a69 ./Cognata_Camera_01_8M_png/0000030000.png +5d2584af6566683784e78f7c71968fa7 ./Cognata_Camera_01_8M_png/0000031666.png +ed9f9570448a8fd3af8540169f0df6df ./Cognata_Camera_01_8M_png/0000033333.png +d17ff83e2bbbf012a54ecac2491144a7 ./demo_files.txt diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py new file mode 100644 index 0000000000..d4791c60ab --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-cognata-mlcommons/customize.py @@ -0,0 +1,449 @@ +from cmind import utils +import os +import json + + +def preprocess(i): + + env = i['env'] + + cm_cache_dataset_path = env.get( + 'CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH', '').strip() + cfg = utils.safe_load_json(cm_cache_dataset_path, 'cfg.json')['meta'] + if cfg.get('imported', False): + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'yes' + + if env.get('CM_ABTF_SCRATCH_PATH_DATASETS', '') != '': + env['CM_ABTF_SCRATCH_PATH_DATASET_COGNATA'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH_DATASETS'], "cognata") + env['CM_ABTF_SCRATCH_PATH_DATASET_COGNATA_TMP'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH_DATASETS'], "cognata_tmp") + + env['CM_DATASET_COGNATA_POC_TEXT_MD5_FILE_PATH'] = os.path.join( + i['run_script_input']['path'], 'checksums', 'cognata_poc.txt') + + # Check if user requests path not in CM cache + # + # --path (env CM_TMP_PATH) shows where to store Cognata data set instead of CM cahe + # --import tells CM to import existing Cognata from a given path and skip further download/processing + # + import_path = env.get( + 'CM_DATASET_MLCOMMONS_COGNATA_IMPORT_PATH', + '').strip() + if import_path != '': + if not os.path.isdir(import_path): + return {'return': 1, 'error': 'directory to import this dataset doesn\'t exist: {}'.format( + import_path)} + + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'yes' + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = import_path + + else: + path = env.get('CM_TMP_PATH', '') + if path != '': + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'no' + + if not os.path.isdir(path): + os.makedirs(path) + + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = path + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + automation = i['automation'] + cm = automation.cmind + + cur_dir = os.getcwd() + + quiet = (env.get('CM_QUIET', False) == 'yes') + + cm_cache_dataset_path = env.get( + 'CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH', '').strip() + + if not os.path.isdir(cm_cache_dataset_path): + return { + 'return': 1, 'error': 'Dataset corrupted - CM cache path not found: {}'.format(cm_cache_dataset_path)} + + if env.get('CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES', '') == '': + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = os.path.dirname( + env['CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH']) + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] + return {'return': 0} + + cm_cache_dataset_cfg_file = os.path.join(cm_cache_dataset_path, 'cfg.json') + env['CM_DATASET_MLCOMMONS_COGNATA_CFG_FILE'] = cm_cache_dataset_cfg_file + + cfg = utils.safe_load_json('', cm_cache_dataset_cfg_file)['meta'] + + dataset_path = cfg.get('real_path', '') + dataset_path_requested = env.get('CM_DATASET_MLCOMMONS_COGNATA_PATH', '') + if dataset_path == '': + if dataset_path_requested != '': + dataset_path = dataset_path_requested + else: + dataset_path = os.path.join(cm_cache_dataset_path, 'cognata') + else: + if dataset_path_requested != '': + dataset_path = dataset_path_requested + + cfg['real_path'] = dataset_path + + print('') + print('Used dataset path: {}'.format(dataset_path)) + + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = dataset_path + + # If imported, don't process further + if env.get('CM_DATASET_MLCOMMONS_COGNATA_IMPORTED', '') == 'yes': + cfg['imported'] = True + else: + cfg['imported'] = False + + utils.save_json(cm_cache_dataset_cfg_file, cfg) + + if cfg.get('imported', False): + return {'return': 0} + + # If processed once, don't process unless forced + if cfg.get('processed', False): + if not utils.check_if_true_yes_on( + env, 'CM_DATASET_MLCOMMONS_COGNATA_UPDATE'): + print('') + print('Already processed: use --update to update this dataset') + + return {'return': 0} + + # First level dir + dataset_path1 = dataset_path + + if not os.path.isdir(dataset_path1): + os.makedirs(dataset_path1) + + # Check if has license and download URL + dataset_path_secret = os.path.join(dataset_path1, 'secret.json') + + first_url = '' + dataset_meta = {} + + if os.path.isfile(dataset_path_secret): + r = utils.load_json(dataset_path_secret) + if r['return'] > 0: + return r + + dataset_meta = r['meta'] + + first_url = dataset_meta.get('first_url', '').strip() + + if first_url == '': + x = env.get('CM_DATASET_MLCOMMONS_COGNATA_PRIVATE_URL', '').strip() + if x != '': + first_url = x + else: + print('') + first_url = input( + 'Please register at https://mlcommons.org/datasets/cognata and enter private URL: ') + + first_url = first_url.strip() + + if first_url == '': + return {'return': 1, + 'error': 'Private MLCommons Cognata URL was not provided'} + + dataset_meta['first_url'] = first_url + + with open(dataset_path_secret, 'w') as f: + f.write(json.dumps(dataset_meta, indent=2) + '\n') + + ########################################################################## + # Check if first.xlsx exists + file_first_xlsx = 'first.xlsx' + first_xlsx = os.path.join(dataset_path1, file_first_xlsx) + + if not os.path.isfile(first_xlsx): + # Attempting to download file + first_url_export, dummy = google_url_for_export(first_url) + + if first_url_export == '': + return { + 'return': 1, 'error': 'can\'t parse URL for export: {}'.format(first_url)} + + r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'download,file,_wget', + 'verify': 'no', + 'url': first_url_export, + 'output_file': file_first_xlsx, + 'store': dataset_path1}) + if r['return'] > 0: + return r + + if not os.path.isfile(first_xlsx): + return {'return': 1, + 'error': 'File {} was not downloaded'.format(first_xlsx)} + + ########################################################################## + # Parse XLSX and check serial number + serial_numbers = [] + for s in env.get( + 'CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS', '').strip().split(','): + s = s.strip() + if s != '' and s not in serial_numbers: + serial_numbers.append(s) + + dataset_key = env['CM_DATASET_MLCOMMONS_COGNATA_KEY1'] + url_key = 'Link to Excel File (Download Links)' + serial_key = 'Serial Number' + + r = process_xlsx( + first_xlsx, + dataset_key, + url_key, + serial_key, + serial_numbers) + if r['return'] > 0: + return r + + headers = r['headers'] + data = r['data'] + all_data = r['all_data'] + + if len(all_data) != 0: + file_first_json = 'first.json' + first_json = os.path.join(dataset_path1, file_first_json) + + if not os.path.isfile(first_json): + with open(first_json, 'w') as f: + f.write(json.dumps(all_data, indent=2) + '\n') + + if len(data) == 0: + return {'return': 0, 'error': 'no sets found'} + + ########################################################################## + print('') + print('Available or selected serial numbers (use --serial_numbers=a,b,c to download specific subsets):') + print('') + for d in data: + s = d[serial_key] + print(s) + + for d in data: + url = d[url_key] + url_export, dummy = google_url_for_export(url) + + serial_file = d[serial_key] + '.xlsx' + + dataset_path2 = os.path.join(dataset_path1, serial_file) + dataset_path3 = os.path.join(dataset_path1, d[serial_key]) + + if not os.path.isdir(dataset_path3): + os.makedirs(dataset_path3) + + if not os.path.isfile(dataset_path2): + + print('') + print('Downloading {} ...'.format(url_export)) + + r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'download,file,_wget', + 'verify': 'no', + 'url': url_export, + 'output_file': serial_file, + 'store': dataset_path1}) + if r['return'] > 0: + return r + + ########################################################################## + print('') + print('Processing subsets ...') + + group_names = [] + for s in env.get('CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES', + '').strip().split(','): + s = s.strip() + if s != '' and s not in group_names: + group_names.append(s) + + # Check if force some filenames + x = env.get('CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES', '').strip() + file_names = [] + if x != '': + file_names = x.strip(';') if ';' in x else [x] + + for d in data: + serial_file = d[serial_key] + '.xlsx' + + dataset_path2 = os.path.join(dataset_path1, serial_file) + dataset_path3 = os.path.join(dataset_path1, d[serial_key]) + + print('') + print('Processing {} ...'.format(serial_file)) + + dataset_key = 'File_Data' + url_key = 'File_Link' + serial_key = 'Group_Name' + + r = process_xlsx( + dataset_path2, + dataset_key, + url_key, + serial_key, + group_names) + if r['return'] > 0: + return r + + headers = r['headers'] + data = r['data'] + all_data = r['all_data'] + + if len(all_data) != 0: + file_all_json = 'all.json' + all_json = os.path.join(dataset_path3, file_all_json) + + if not os.path.isfile(all_json): + with open(all_json, 'w') as f: + f.write(json.dumps(all_data, indent=2) + '\n') + + if len(data) == 0: + return {'return': 0, 'error': 'no sub-sets found'} + + for d in data: + file_name = d['File_Name'] + + if len(file_names) > 0 and file_name not in file_names: + continue + + file_name_with_path = os.path.join(dataset_path3, file_name) + file_name_with_path_done = os.path.join( + dataset_path3, file_name) + '.done' + + url = d[url_key] + + print('') + print('Downloading {} ...'.format(file_name)) + + if os.path.isfile(file_name_with_path_done): + print('') + print(' Already processed - skipping ...') + continue + + if os.name == 'nt': + aria2_tool = env['CM_ARIA2_BIN_WITH_PATH'] + else: + aria2_tool = 'aria2c' + + cmd = aria2_tool + \ + ' --async-dns=false -x15 -s15 "{}" --dir "{}" -o "{}"'.format( + url, dataset_path3, file_name) + + print('') + print(cmd) + print('') + + os.system(cmd) + + # Unarchive + print('') + print('Extracting file {} ...'.format(file_name_with_path)) + print('') + + if file_name.endswith('.zip'): + + import zipfile + extractor = zipfile.ZipFile(file_name_with_path, "r") + + elif file_name.endswith('.tar'): + + import tarfile + extractor = tarfile.ZipFile(file_name_with_path, "r") + + else: + extractor = None + + if extractor is not None: + + try: + extractor.extractall(dataset_path3) + extractor.close() + + except Exception as e: + return {'return': 1, + 'error': 'extracting failed: {}'.format(e)} + + # Mark as downloaded + with open(file_name_with_path_done, 'w') as f: + f.write('DONE\n') + + # Remove file + os.remove(file_name_with_path) + + print('') + + # Mark that processed this dataset once correctly + cfg['processed'] = True + utils.save_json(cm_cache_dataset_cfg_file, cfg) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] + + return {'return': 0} + + +# Prepare Google URL for export +def google_url_for_export(url): + url2 = '' + + j = url.rfind('/') + + if j > 0: + url = url[:j + 1] + url2 = url + 'export' + + return (url2, url) + +# Download Cognata XLSX + + +def process_xlsx(filename, dataset_key, url_key, serial_key, serial_numbers): + import openpyxl + + ex = openpyxl.load_workbook(filename) + + sets = ex[dataset_key] + + headers = {} + + data = [] + all_data = [] + + for row in sets.iter_rows(values_only=True): + lrow = list(row) + + if len(headers) == 0: + for j in range(0, len(lrow)): + headers[j] = str(lrow[j]).strip() + else: + xrow = {} + + for j in range(0, len(lrow)): + xrow[headers[j]] = lrow[j] + + url = str(xrow.get(url_key, '')) + if 'https' in url: + all_data.append(xrow) + + if len(serial_numbers) > 0: + serial_number = xrow.get(serial_key, '') + + if serial_number not in serial_numbers: + continue + + if url != '': + data.append(xrow) + + return {'return': 0, 'headers': headers, + 'data': data, 'all_data': all_data} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml new file mode 100644 index 0000000000..8e5c7b4cda --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/_cm.yaml @@ -0,0 +1,448 @@ +alias: get-dataset-igbh +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +tags: +- get +- dataset +- mlperf +- rgat +- igbh +- inference +uid: 824e61316c074253 +new_env_keys: + - CM_DATASET_IGBH_PATH + - CM_DATASET_IGBH_SIZE +input_mapping: + out_path: CM_DATASET_IGBH_OUT_PATH +env: + SKIP_USER_PROMPT: yes +deps: + - tags: mlperf,inference,source + names: + - inference-src + - tags: get,python + names: + - get-python + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/gateoverflow/IGB-Datasets.git + - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm + + +prehook_deps: + #paper + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_feat + force_env_keys: + - CM_OUTDIRNAME + force_cache: true + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_19.npy + CM_DOWNLOAD_CHECKSUM: be6fda45566e679bdb05ebea98ad16d4 + CM_DOWNLOAD_FILENAME: node_label_19.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_label_19 + force_env_keys: + - CM_OUTDIRNAME + force_cache: true + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-node-label19 + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_2K.npy + CM_DOWNLOAD_CHECKSUM: 6eccab9a14f92f42be5b367c39002031 + CM_DOWNLOAD_FILENAME: node_label_2K.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,node_label_2K + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-node-label2k + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/paper_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: f70dd642a4f7e41d926c91c8c054fc4c + CM_DOWNLOAD_FILENAME: paper_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + #paper_cites_paper + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__cites__paper/edge_index.npy + CM_DOWNLOAD_CHECKSUM: f4897f53636c04a9c66f6063ec635c16 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ + extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # author + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/author_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 58c15aab7dae03bbd57e6a4ac5e61bd9 + CM_DOWNLOAD_FILENAME: author_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + extra_cache_tags: dataset,igbh,author,author_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - author-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 2ec2512b554088381c04ec013e893c8d + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + extra_cache_tags: dataset,igbh,author,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - author-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # conference + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/conference_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 0bf7c555d8c697b31b6af6c4cb6b6612 + CM_DOWNLOAD_FILENAME: conference_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - conference-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 898ff529b8cf972261fedd50df6377f8 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + extra_cache_tags: dataset,igbh,conference,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - conference-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # institute + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/institute_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 03fb45eafb7bd35875ef4c7cd2a299a9 + CM_DOWNLOAD_FILENAME: institute_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - institute-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 12eaeced22d17b4e97d4b4742331c819 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + extra_cache_tags: dataset,igbh,institute,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - institute-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # journal + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/journal_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: b630c20852b76d17a5c9c37b39176f69 + CM_DOWNLOAD_FILENAME: journal_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - journal-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 49d51b554b3004f10bee19d1c7f9b416 + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + extra_cache_tags: dataset,igbh,journal,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - journal-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # fos + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/fos_id_index_mapping.npy + CM_DOWNLOAD_CHECKSUM: 0f0cfde619361cde35d3be9f201d081a + CM_DOWNLOAD_FILENAME: fos_id_index_mapping.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - fos-id-index-mapping + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/node_feat.npy + CM_DOWNLOAD_CHECKSUM: 3ef3df19e2475c387fec10bac82773df + CM_DOWNLOAD_FILENAME: node_feat.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + extra_cache_tags: dataset,igbh,fos,node_feat + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - fos-node-feat + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # author__affiliated_to__institute + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author__affiliated_to__institute/edge_index.npy + CM_DOWNLOAD_CHECKSUM: e35dba208f81e0987207f78787c75711 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ + extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - author-to-institute-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__published__journal + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__published__journal/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 38505e83bde8e5cf94ae0a85afa60e13 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ + extra_cache_tags: dataset,igbh,paper_published_journal,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-published-journal-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__topic__fos + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__topic__fos/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 427fb350a248ee6eaa8c21cde942fda4 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ + extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-topic-fos-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__venue__conference + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__venue__conference/edge_index.npy + CM_DOWNLOAD_CHECKSUM: 541b8d43cd93579305cfb71961e10a7d + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ + extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-venue-conference-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL + # paper__written_by__author + - env: + CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__written_by__author/edge_index.npy + CM_DOWNLOAD_CHECKSUM: df39fe44bbcec93a640400e6d81ffcb5 + CM_DOWNLOAD_FILENAME: edge_index.npy + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ + extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index + force_cache: true + force_env_keys: + - CM_OUTDIRNAME + enable_if_env: + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' + names: + - dae + - paper-written-by-author-edge-index + tags: download-and-extract,_wget + update_tags_from_env_with_prefix: + _url.: + - CM_PACKAGE_URL +variations: + debug: + default: true + group: dataset-type + env: + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny + full: + group: dataset-type + env: + CM_DATASET_IGBH_TYPE: full + CM_DATASET_IGBH_SIZE: full + glt: + env: + CM_IGBH_GRAPH_COMPRESS: yes + csc: + group: compressed-layout + default: true + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csc + csr: + group: compressed-layout + env: + CM_IGBH_GRAPH_COMPRESS_LAYOUT: csr diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py new file mode 100644 index 0000000000..de85bd9001 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/customize.py @@ -0,0 +1,69 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if os_info['platform'] == "windows": + return {'return': 1, 'error': 'Script not supported in windows yet!'} + + print("Using MLCommons Inference source from '" + + env['CM_MLPERF_INFERENCE_SOURCE'] + "'") + + # run cmd + run_cmd = "" + graph_folder = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') + + if env.get('CM_DATASET_IGBH_PATH', + '') != '': # skip download, just register in cache + env['CM_DATASET_IGBH_OUT_PATH'] = env['CM_DATASET_IGBH_PATH'] + return {'return': 0} + + download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd()) + + env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc + + run_cmd += f"cd {graph_folder} " + x_sep = " && " + + # download the model + if env['CM_DATASET_IGBH_TYPE'] == "debug": + run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ + f" tools/download_igbh_test.py --target-path {download_loc} " + + else: + env['CM_DATASET_IGBH_FULL_DOWNLOAD'] = 'yes' + + # split seeds + run_cmd += x_sep + \ + f"""{ + env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size { + env['CM_DATASET_IGBH_SIZE']} """ + + # compress graph(for glt implementation) + if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": + run_cmd += x_sep + \ + f"""{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']} + """ + + env['CM_RUN_CMD'] = run_cmd + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_DATASET_IGBH_PATH'] = env.get( + 'CM_DATASET_IGBH_OUT_PATH', os.getcwd()) + + print( + f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh new file mode 100644 index 0000000000..2386521602 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-igbh/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + eval "$1" + exit_if_error +} + +run "$CM_RUN_CMD" diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml index 242b53abcf..c5944aedff 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-aux/_cm.yaml @@ -14,6 +14,8 @@ prehook_deps: extra_cache_tags: imagenet-aux,dataset-aux force_cache: true tags: download-and-extract,_extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml index 741d7e2055..7e499146a2 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: imagenet-calibration,imagenet,calibration names: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml index 0b9923927b..0a23afac15 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-imagenet-val/_cm.yaml @@ -35,6 +35,8 @@ prehook_deps: env: CM_EXTRACT_TO_FOLDER: imagenet-2012-val tags: download-and-extract,file,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env: - CM_DAE_EXTRA_TAGS update_tags_from_env_with_prefix: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml new file mode 100644 index 0000000000..d8af83b88c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/_cm.yaml @@ -0,0 +1,56 @@ +alias: get-dataset-mlperf-inference-llama3 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +tags: +- get +- dataset +- mlperf +- llama3 +- inference +uid: c3bc69599cbc4db7 +new_env_keys: + - CM_DATASET_LLAMA3_PATH +input_mapping: + outdirname: CM_OUTDIRNAME +prehook_deps: + - env: + CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_LLAMA3_PATH + CM_EXTRACT_TO_FOLDER: llama-3-dataset + extra_cache_tags: dataset,llama3 + force_cache: true + enable_if_env: + CM_TMP_REQUIRE_DOWNLOAD: + - 'yes' + names: + - dae + tags: download-and-extract + update_tags_from_env_with_prefix: + _url.: + - CM_DOWNLOAD_URL +variations: + validation: + default: true + group: dataset-type + env: + CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl + CM_DATASET_TYPE: validation + CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl + calibration: + group: dataset-type + env: + CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl + CM_DATASET_TYPE: calibration + CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl + rclone: + add_deps_recursive: + dae: + tags: _rclone + default: true + env: + CM_DOWNLOAD_FILENAME: checkpoint + CM_DOWNLOAD_URL: <<>> + CM_RCLONE_CONFIG_NAME: mlc-inference + group: download-tool +print_env_at_the_end: + CM_DATASET_LLAMA3_PATH: Path to the dataset diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py new file mode 100644 index 0000000000..745dc52fee --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-llama3/customize.py @@ -0,0 +1,31 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if os_info['platform'] == "windows": + return {'return': 1, 'error': 'Script not supported in windows yet!'} + + if env.get('CM_DATASET_LLAMA3_PATH', '') == '': + env['CM_TMP_REQUIRE_DOWNLOAD'] = "yes" + + if env.get('CM_OUTDIRNAME', '') != '': + env['CM_DOWNLOAD_PATH'] = env['CM_OUTDIRNAME'] + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + if env.get('CM_TMP_REQUIRE_DOWNLOAD', '') == "yes": + env['CM_DATASET_LLAMA3_PATH'] = os.path.join( + env['CM_DATASET_LLAMA3_PATH'], env['CM_DATASET_FILE_NAME']) + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml index f8684eef59..566f7bb05a 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-mlperf-inference-mixtral/_cm.yaml @@ -8,9 +8,11 @@ new_env_keys: prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_PREPROCESSED_PATH - extra_cache_tags: mixtral,get-mixtral-dataset + extra_cache_tags: mixtral,get-mixtral-dataset force_cache: true tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml index a96e7f58e8..16158cef6c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-annotations/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: retinanet,get,dataset-openimages-annotations force_cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml index b8bd73e12c..6edd3716c6 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openimages-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: openimages-calibration,openimages,calibration names: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml index 861c495759..c860b0213e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-openorca/_cm.yaml @@ -15,6 +15,8 @@ deps: names: - openorca-src tags: get,git,repo,_lfs,_repo.https://huggingface.co/datasets/Open-Orca/OpenOrca + force_env_keys: + - CM_OUTDIRNAME env: CM_DATASET: OPENORCA new_env_keys: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml index a6ec2e902d..aa1bad21ca 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad-vocab/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad-vocab force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml index a9dd6ed948..d47fc9ce3e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-dataset-squad/_cm.yaml @@ -16,6 +16,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml index e2f33e875b..881039852c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-docker/_cm.yaml @@ -9,7 +9,8 @@ docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [ - "CM_DOCKER_VERSION" + "CM_DOCKER_VERSION", + "CM_CONTAINER_TOOL" ] new_state_keys: [] post_deps: [] diff --git a/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py index d84a8eaed2..0be862b5c6 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-docker/customize.py @@ -58,8 +58,13 @@ def detect_version(i): version = r['version'] + tool = "docker" + + if "podman" in r['string'].lower(): + tool = "podman" + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version} + return {'return': 0, 'version': version, "tool": tool} def postprocess(i): @@ -71,6 +76,7 @@ def postprocess(i): return r version = r['version'] + tool = r['tool'] found_file_path = env['CM_DOCKER_BIN_WITH_PATH'] found_path = os.path.dirname(found_file_path) @@ -81,4 +87,6 @@ def postprocess(i): env['CM_DOCKER_VERSION'] = version + env['CM_CONTAINER_TOOL'] = tool + return {'return': 0, 'version': version} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py index edbb2d552b..2f61bac020 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/customize.py @@ -200,4 +200,8 @@ def postprocess(i): if pip_version and len(pip_version) > 1 and int(pip_version[0]) >= 23: env['CM_PYTHON_PIP_COMMON_EXTRA'] = " --break-system-packages" + if version.count('.') > 1: + env[f"{env_version_key}_MAJOR_MINOR"] = ".".join( + version.split(".")[:2]) + return {'return': 0, 'version': version} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py index 001c39b372..fc879f04e1 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-generic-python-lib/detect-version.py @@ -2,6 +2,7 @@ import sys package_name = os.environ.get('CM_GENERIC_PYTHON_PACKAGE_NAME', '') +package_name = package_name.split("[")[0] filename = 'tmp-ver.out' diff --git a/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml index 1d45c2c284..b75e24bbca 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-generic-sys-util/_cm.yaml @@ -212,21 +212,33 @@ variations: brew: '' dnf: boost-devel yum: boost-devel - libbz2-dev: + bzip2: env: - CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_NAME: bzip2 CM_SYS_UTIL_VERSION_CMD_OVERRIDE: bzcat --version 2>&1 | grep bzip > tmp-ver.out CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) CM_TMP_VERSION_DETECT_GROUP_NUMBER: 1 new_env_keys: + - CM_BZIP2_VERSION + state: + bzip2: + apt: bzip2 + brew: bzip2 + dnf: bzip2 + yum: bzip2 + libbz2-dev: + env: + CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_VERSION_CMD: dpkg -s libbz2-dev | grep 'Version' + CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) + CM_TMP_VERSION_DETECT_GROUP_NUMBER: 0 + new_env_keys: - CM_LIBBZ2_DEV_VERSION state: libbz2_dev: apt: libbz2-dev - brew: bzip2 dnf: libbzip2-devel yum: libbzip2-devel - zlib-devel: libbz2-devel libev-dev: env: CM_SYS_UTIL_NAME: libev_dev diff --git a/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml index 3008f63650..287ee254a1 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-gh-actions-runner/_cm.yaml @@ -6,6 +6,7 @@ can_force_cache: true tags: - get - gh +- github - actions-runner - runner-code - runner @@ -29,21 +30,27 @@ deps: variations: config: + group: command + default: true env: CM_GH_ACTIONS_RUNNER_COMMAND: config remove: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: remove install: + group: command deps: - tags: get,gh,actions-runner,_config force_cache: yes env: CM_GH_ACTIONS_RUNNER_COMMAND: install uninstall: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: uninstall start: + group: command deps: - tags: get,gh,actions-runner,_install force_cache: yes diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml new file mode 100644 index 0000000000..6643eb222f --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/_cm.yaml @@ -0,0 +1,26 @@ +alias: get-huggingface-cli +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: false +can_force_cache: true +category: DevOps automation +clean_files: [] +deps: +- tags: detect,os +tags: +- get +- huggingface +- hf-cli +- huggingface-cli +- cli +input_mapping: + token: CM_HF_TOKEN +uid: e9488a272f1d4160 +deps: + - tags: get,generic-python-lib,_package.huggingface_hub[cli] +variations: + with-login: + cache: true + force_cache: true + env: + CM_HF_DO_LOGIN: yes diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py new file mode 100644 index 0000000000..d9e63e42c9 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/customize.py @@ -0,0 +1,30 @@ +from cmind import utils +import os + + +def preprocess(i): + env = i['env'] + if env.get('CM_HF_TOKEN', '') != '': + env['CM_HF_LOGIN_CMD'] = f"""git config --global credential.helper store && huggingface-cli login --token {env['CM_HF_TOKEN']} --add-to-git-credential +""" + elif str(env.get('CM_HF_DO_LOGIN')).lower() in ["yes", "1", "true"]: + env['CM_HF_LOGIN_CMD'] = f"""git config --global credential.helper store && huggingface-cli login +""" + return {'return': 0} + + +def postprocess(i): + env = i['env'] + + r = i['automation'].parse_version({'match_text': r'huggingface_hub\s*version:\s*([\d.]+)', + 'group_number': 1, + 'env_key': 'CM_GITHUBCLI_VERSION', + 'which_env': i['env']}) + if r['return'] > 0: + return r + + version = r['version'] + + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) + + return {'return': 0, 'version': version} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat new file mode 100644 index 0000000000..464afe5c7f --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.bat @@ -0,0 +1,14 @@ +@echo off +REM Check if the environment variable CM_HF_LOGIN_CMD is defined and not empty +IF DEFINED CM_HF_LOGIN_CMD ( + echo %CM_HF_LOGIN_CMD% + call %CM_HF_LOGIN_CMD% + IF ERRORLEVEL 1 ( + echo Command failed with error code %ERRORLEVEL% + exit /b %ERRORLEVEL% + ) +) + +REM Run the Hugging Face CLI version command and save output +huggingface-cli version > tmp-ver.out + diff --git a/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh new file mode 100644 index 0000000000..43d20f3676 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-huggingface-cli/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash +if [[ -n ${CM_HF_LOGIN_CMD} ]]; then + echo "${CM_HF_LOGIN_CMD}" + eval ${CM_HF_LOGIN_CMD} + test $? -eq 0 || exit $? +fi +huggingface-cli version > tmp-ver.out diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml index 7dc7f5b069..658f306a77 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-3d-unet-kits19/_cm.yaml @@ -43,6 +43,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128_dynbatch.onnx @@ -60,6 +62,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch.ptc @@ -73,6 +77,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch_checkpoint.pth @@ -92,6 +98,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128.tf diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md new file mode 100644 index 0000000000..e08259617c --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/README-extra.md @@ -0,0 +1,5 @@ +# Example to import local model + +```bash +cm run script --tags=get,ml-model,abtf-ssd-pytorch,_local.test_8mp.pth +``` diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml new file mode 100644 index 0000000000..b346288d28 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml @@ -0,0 +1,174 @@ +# Written by Grigori Fursin + +alias: get-ml-model-abtf-ssd-pytorch +uid: b3750a4b222a485d + +automation_alias: script +automation_uid: 5b4e0237da074764 + +cache: true + +category: AI/ML models + +private: true + + +tags: +- get +- ml-model +- abtf-ssd-pytorch +- cmc + + +input_mapping: + model_code_git_url: CM_ABTF_MODEL_CODE_GIT_URL + model_code_git_branch: CM_ABTF_MODEL_CODE_GIT_BRANCH + + +default_env: + CM_ABTF_MODEL_CODE_GIT_URL: https://github.com/mlcommons/abtf-ssd-pytorch + CM_ABTF_MODEL_CODE_GIT_BRANCH: cognata + + +deps: + +- tags: detect,os + +- tags: get,git,repo + names: + - abtf-ssd-pytorch-git-repo + - abtf-ml-model-code-git-repo + skip_if_env: + CM_SKIP_MODEL_CODE_DOWNLOAD: + - 'yes' + env: + CM_GIT_AUTH: 'yes' + CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ABTF_SSD_PYTORCH + extra_cache_tags: abtf,ssd,pytorch,ml-model,cmc + update_tags_from_env_with_prefix: + _repo.: + - CM_ABTF_MODEL_CODE_GIT_URL + _branch.: + - CM_ABTF_MODEL_CODE_GIT_BRANCH + + +- tags: download,file + env: + CM_DOWNLOAD_CHECKSUM: <<>> + CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH + CM_DOWNLOAD_FILENAME: <<>> + CM_VERIFY_SSL: 'no' + force_cache: true + names: + - abtf-ml-model-weights + - abtf-ml-model-weights-download + skip_if_env: + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: + - 'yes' + update_tags_from_env_with_prefix: + _url.: + - CM_ML_MODEL_URL + + + +new_env_keys: +- CM_ML_MODEL_* + +print_env_at_the_end: + CM_ML_MODEL_FILE_WITH_PATH: Path to the ML model weights + CM_ML_MODEL_CODE_WITH_PATH: Path to the ML model code + + +variations: + e01: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 31d177228308bbe43917c912b01c2d67 + CM_ML_MODEL_DATASET: coco + CM_ML_MODEL_FILENAME: SSD_e1.pth + CM_ML_MODEL_IMAGE_HEIGHT: '300' + CM_ML_MODEL_IMAGE_WIDTH: '300' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/7nqt5z8gplgeaveo933eo/SSD_e1.pth?rlkey=7lyb4qs2hzg491bfprwcuvx54&dl=0 + group: model-weights + + e65: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: f769eb0321ac7fc1c16f982db6131d2f + CM_ML_MODEL_DATASET: coco + CM_ML_MODEL_FILENAME: SSD_e65.pth + CM_ML_MODEL_IMAGE_HEIGHT: '300' + CM_ML_MODEL_IMAGE_WIDTH: '300' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/wkegl2qxvm8cefbqq00o3/SSD_e65.pth?rlkey=ez26jafjdcly665npl6pdqxl8&dl=0 + group: model-weights + + abtf-mvp: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 1ab66f523715f9564603626e94e59c8c + CM_ML_MODEL_DATASET: cognata + CM_ML_MODEL_FILENAME: baseline_8MP_ss_scales_all_ep60.pth + CM_ML_MODEL_IMAGE_SIZE: '8M' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/9un2i2169rgebui4xklnm/baseline_8MP_ss_scales_all_ep60.pth?rlkey=sez3dnjep4waa09s5uy4r3wmk&st=z859czgk&dl=0 + group: model-weights + + abtf-poc: + default_variations: + download-tool: rclone + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 26845c3b9573ce115ef29dca4ae5be14 + CM_ML_MODEL_DATASET: cognata + CM_ML_MODEL_FILENAME: baseline_8MP_ss_scales_fm1_5x5_all_ep60.pth + CM_ML_MODEL_IMAGE_SIZE: '8M' + group: model-weights + + abtf-poc,gdrive: + env: + CM_ML_MODEL_URL: https://drive.google.com/file/d/1kfJR_bs54KONprVd51kZu0PYmmh1wZZa/view + + abtf-poc,rclone: + env: + CM_RCLONE_COPY_USING: copyurl + CM_ML_MODEL_URL: https://automotive.mlcommons-storage.org/SSD_ResNet50%2Fbaseline_8MP_ss_scales_fm1_5x5_all_ep60.pth + CM_RCLONE_CONFIG_CMD: '' + + + local.#: + env: + CM_ML_MODEL_FILENAME: '#' + CM_ML_MODEL_LOCAL: 'yes' + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: 'yes' + group: model-weights + + skip_weights: + default: true + env: + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: 'yes' + group: model-weights + + skip_code: + env: + CM_SKIP_MODEL_CODE_DOWNLOAD: 'yes' + + rclone: + group: download-tool + env: + CM_RCLONE_COPY_USING: copyurl + adr: + abtf-ml-model-weights-download: + tags: _rclone + + wget: + group: download-tool + default: true + adr: + abtf-ml-model-weights-download: + tags: _wget + gdown: + group: download-tool + env: + CM_DOWNLOAD_EXTRA_OPTIONS: " --fuzzy" + adr: + abtf-ml-model-weights-download: + tags: _gdown diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py new file mode 100644 index 0000000000..10a3a7f837 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-abtf-ssd-pytorch/customize.py @@ -0,0 +1,49 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if env.get('CM_ML_MODEL_LOCAL', '') == 'yes': + ml_model = env.get('CM_ML_MODEL_FILENAME', '') + if ml_model == '': + return {'return': 1, 'error': '_local.{model name.pth} is not specified'} + + if not os.path.isabs(ml_model): + ml_model = os.path.join( + env.get( + 'CM_TMP_CURRENT_PATH', + ''), + ml_model) + + if not os.path.isfile(ml_model): + return {'return': 1, + 'error': 'ML model {} is not found'.format(ml_model)} + + env['CM_ML_MODEL_FILE_WITH_PATH'] = ml_model + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + if env.get('CM_ML_MODEL_FILE_WITH_PATH', '') == '': + env['CM_ML_MODEL_FILE_WITH_PATH'] = 'model-weights-skipped' + + env['CM_ML_MODEL_FILE'] = os.path.basename( + env['CM_ML_MODEL_FILE_WITH_PATH']) + + if env.get('CM_ABTF_SSD_PYTORCH', '') == '': + env['CM_ABTF_SSD_PYTORCH'] = 'model-code-skipped' + + env['CM_ML_MODEL_CODE_WITH_PATH'] = env['CM_ABTF_SSD_PYTORCH'] + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_FILE_WITH_PATH'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml index e5b4d11bb3..e818192799 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-bert-large-squad/_cm.yaml @@ -13,6 +13,8 @@ new_env_keys: - CM_ML_MODEL* post_deps: - tags: get,dataset-aux,squad-vocab + force_env_keys: + - CM_OUTDIRNAME prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH @@ -25,6 +27,8 @@ prehook_deps: CM_ML_MODEL_BERT_PACKED: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml index 6227a9a177..4d5c93f1bd 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-dlrm-terabyte/_cm.yaml @@ -25,6 +25,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml index 25e8deca4c..25b2ef9812 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/_cm.yaml @@ -29,6 +29,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL @@ -159,6 +161,10 @@ variations: - python3 tags: get,python3 - tags: get,generic-python-lib,_package.safetensors + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch rclone: add_deps_recursive: dae: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh index 27e5a675ce..b16ee45dac 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-gptj/run-nvidia.sh @@ -17,5 +17,5 @@ export DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" make -C docker run LOCAL_USER=1 test $? -eq 0 || exit $? -${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 +PYTHONPATH='' ${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 test $? -eq 0 || exit $? diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml index d10c3f4486..b8235a57d1 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-huggingface-zoo/_cm.yaml @@ -35,9 +35,15 @@ uid: 53cf8252a443446a variations: clone-repo: deps: + - tags: get,hf-cli,_with-login + enable_if_env: + CM_HF_TOKEN: + - on - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ML_MODEL_PATH tags: get,git,repo,_lfs + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _repo.https://huggingface.co/: - CM_MODEL_ZOO_STUB diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml index 2ff45866bc..fe082718ee 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama2/_cm.yaml @@ -26,6 +26,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: LLAMA2_CHECKPOINT_PATH: LLAMA2 checkpoint path tags: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml new file mode 100644 index 0000000000..3765538237 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/_cm.yaml @@ -0,0 +1,68 @@ +alias: get-ml-model-llama3 +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +category: AI/ML models +input_mapping: + outdirname: CM_OUTDIRNAME +new_env_keys: +- CM_ML_MODEL_* +- LLAMA3_CHECKPOINT_PATH +prehook_deps: +- enable_if_env: + CM_TMP_REQUIRE_DOWNLOAD: + - 'yes' + env: {} + extra_cache_tags: llama3,llama-3 + force_env_keys: + - CM_GIT_CHECKOUT_FOLDER + names: + - hf-zoo + tags: get,ml-model,huggingface,zoo,_clone-repo +print_env_at_the_end: + LLAMA3_CHECKPOINT_PATH: LLAMA3 checkpoint path +tags: +- get +- raw +- ml-model +- language-processing +- llama3 +- llama3-405b +uid: 2f8cef2acc334e80 +variations: + fp16: + default: true + env: + CM_ML_MODEL_INPUT_DATA_TYPES: fp16 + CM_ML_MODEL_PRECISION: fp16 + CM_ML_MODEL_WEIGHT_DATA_TYPES: fp16 + group: precision + meta-llama/Llama-3.1-405B-Instruct: + adr: + hf-zoo: + tags: _model-stub.meta-llama/Llama-3.1-405B-Instruct + default: true + env: + CM_ML_MODEL_NAME: Llama-3-405b-instruct + CM_MODEL_ZOO_ENV_KEY: LLAMA3 + group: huggingface-stub + meta-llama/Llama-3.1-8B-Instruct: + adr: + hf-zoo: + tags: _model-stub.meta-llama/Llama-3.1-8B-Instruct + env: + CM_ML_MODEL_NAME: Llama-3-8b-instruct + CM_MODEL_ZOO_ENV_KEY: LLAMA3 + group: huggingface-stub + vllm: + default: true + env: + CM_ML_MODEL_FRAMEWORK: vllm + group: framework + stub.#: + adr: + hf-zoo: + tags: _model-stub.# + env: + CM_MODEL_ZOO_ENV_KEY: LLAMA3 + group: huggingface-stub diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py new file mode 100644 index 0000000000..9ec7edecd4 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-llama3/customize.py @@ -0,0 +1,35 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + + # skip download and register in cache if the llama3 checkpoint path is + # already defined by the user + if env.get('CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH', '') != '': + env['LLAMA3_CHECKPOINT_PATH'] = env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + return {'return': 0} + + path = env.get('CM_OUTDIRNAME', '').strip() + + if path != "": + os.makedirs(path, exist_ok=True) + env['CM_GIT_CHECKOUT_FOLDER'] = os.path.join( + path, env['CM_ML_MODEL_NAME']) + + env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH'] + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_PATH'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml index 2542d4dc7d..358d563187 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-mixtral/_cm.yaml @@ -6,6 +6,8 @@ category: AI/ML models env: CM_ML_MODEL_DATASET: '' CM_ML_MODEL_WEIGHT_TRANSFORMATIONS: 'no' +docker: + real_run: False input_mapping: checkpoint: MIXTRAL_CHECKPOINT_PATH new_env_keys: @@ -22,6 +24,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: MIXTRAL_CHECKPOINT_PATH: MIXTRAL checkpoint path tags: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml index 4e114e43dc..7f68803223 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet-nvidia/_cm.yaml @@ -14,6 +14,8 @@ deps: - tags: get,mlperf,training,src,_nvidia-retinanet - tags: get,mlperf,inference,src - tags: get,ml-model,retinanet,_pytorch,_fp32,_weights + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_TORCH_DEVICE: cpu tags: get,generic-python-lib,_torch diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml index 90e937000d..8da05da0eb 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-retinanet/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: CM_TMP_ML_MODEL_RETINANET_NO_NMS: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL @@ -80,6 +82,8 @@ variations: extra_cache_tags: retinanet,training,patch,file force_cache: true tags: download,file,_url.https://raw.githubusercontent.com/arjunsuresh/ck-qaic/main/package/model-onnx-mlperf-retinanet-no-nms/remove-nms-and-extract-priors.patch + force_env_keys: + - CM_OUTDIRNAME - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_TRAINING_REPO_PATCHED_PATH CM_GIT_PATCH_FILEPATHS: <<>> diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml index 0bc4b1eab1..27a7e39e2b 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/_cm.yaml @@ -3,15 +3,15 @@ automation_alias: script automation_uid: 5b4e0237da074764 cache: true category: AI/ML models +docker: + fake_run_deps: True env: CM_ML_MODEL: RGAT - CM_ML_MODEL_DATASET: ICBH input_mapping: checkpoint: RGAT_CHECKPOINT_PATH - download_path: CM_DOWNLOAD_PATH - to: CM_DOWNLOAD_PATH new_env_keys: - CM_ML_MODEL_* +- CM_ML_MODEL_RGAT_CHECKPOINT_PATH - RGAT_CHECKPOINT_PATH prehook_deps: - enable_if_env: @@ -20,12 +20,14 @@ prehook_deps: CM_TMP_REQUIRE_DOWNLOAD: - 'yes' env: - CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_PATH - extra_cache_tags: rgat,gnn,model + CM_DOWNLOAD_FINAL_ENV_NAME: RGAT_DIR_PATH + extra_cache_tags: rgat,gnn,model,ml-model force_cache: true names: - - dae - tags: download-and-extract + - download-file + tags: download,file + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL @@ -54,7 +56,7 @@ variations: group: download-source rclone: adr: - dae: + download-file: tags: _rclone env: CM_DOWNLOAD_TOOL: rclone @@ -62,4 +64,6 @@ variations: group: download-tool rclone,fp32: env: + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt + CM_DOWNLOAD_FILENAME: RGAT diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py index dbecb0d8a1..99e6731ecf 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-rgat/customize.py @@ -18,10 +18,17 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + download_dir = env.get('CM_OUTDIRNAME', '') + path = env.get('RGAT_CHECKPOINT_PATH', '').strip() if path == '' or not os.path.exists(path): - env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' + if download_dir != '' and os.path.exists( + os.path.join(download_dir, "RGAT", "RGAT.pt")): + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + download_dir, "RGAT", "RGAT.pt") + else: + env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' return {'return': 0} @@ -32,10 +39,15 @@ def postprocess(i): if env.get('RGAT_CHECKPOINT_PATH', '') == '': env['RGAT_CHECKPOINT_PATH'] = os.path.join( - env['CM_ML_MODEL_PATH'], "RGAT.pt") - elif env.get('CM_ML_MODEL_PATH', '') == '': - env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['RGAT_DIR_PATH'], "RGAT.pt") + + if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env['RGAT_CHECKPOINT_PATH'] + + if env.get('CM_ML_MODEL_PATH', '') == '': + env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] - env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml index b2326daff9..ae9ee2757f 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-stable-diffusion/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo,_model-stub.stabilityai/stable-diffusion-xl-base-1.0 + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_DOWNLOAD_TOOL: - rclone diff --git a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml index 4f8406e297..791ecccee9 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-ml-model-tiny-resnet/_cm.yaml @@ -22,6 +22,8 @@ prehook_deps: env: CM_EXTRACT_EXTRACTED_FILENAME: <<>> tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml new file mode 100644 index 0000000000..cefe6da4ca --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/_cm.yaml @@ -0,0 +1,39 @@ +alias: get-mlperf-automotive-scratch-space +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +category: MLPerf benchmark support +deps: [] +docker: + run: false +input_description: {} +input_mapping: + scratch_path: CM_ABTF_SCRATCH_PATH +new_env_keys: +- CM_ABTF_SCRATCH_PATH +- CM_ABTF_SCRATCH_PATH_MODELS +- CM_ABTF_SCRATCH_PATH_DATASETS +- CM_ABTF_SCRATCH_VERSION +new_state_keys: [] +post_deps: [] +posthook_deps: [] +prehook_deps: [] +tags: +- get +- abtf +- inference +- scratch +- space +uid: c384b7604e5c47d5 +variations: + version.#: + env: + CM_ABTF_SCRATCH_VERSION: '#' + group: version + version.4_0: + default: true + env: + CM_ABTF_SCRATCH_VERSION: '4_0' + group: version +versions: {} + diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py new file mode 100644 index 0000000000..057acd2c4e --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/customize.py @@ -0,0 +1,40 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + if env.get('CM_ABTF_SCRATCH_PATH', '') == '': + env['CM_ABTF_SCRATCH_PATH'] = os.getcwd() + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_ABTF_SCRATCH_PATH_MODELS'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH'], "models") + env['CM_ABTF_SCRATCH_PATH_DATASETS'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH'], "datasets") + + if not os.path.exists(env['CM_ABTF_SCRATCH_PATH_MODELS']): + os.makedirs(env['CM_ABTF_SCRATCH_PATH_MODELS']) + + if not os.path.exists(env['CM_ABTF_SCRATCH_PATH_DATASETS']): + os.makedirs(env['CM_ABTF_SCRATCH_PATH_DATASETS']) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ABTF_SCRATCH_PATH'] + + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat new file mode 100644 index 0000000000..648302ca71 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.bat @@ -0,0 +1 @@ +rem native script diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh new file mode 100644 index 0000000000..3a584c10cf --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-automotive-scratch-space/run.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + if [[ ${CM_FAKE_RUN} != 'yes' ]]; then + eval "$1" + exit_if_error + fi +} + +#Add your run commands here... +# run "$CM_RUN_CMD" diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh index ac61ad329b..47885f1508 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-loadgen/run.sh @@ -25,14 +25,14 @@ cmake \ -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" \ -DPYTHON_EXECUTABLE:FILEPATH="${CM_PYTHON_BIN_WITH_PATH}" -B . -if [ ${?} -ne 0 ]; then exit $?; fi +test $? -eq 0 || exit $? echo "******************************************************" CM_MAKE_CORES=${CM_MAKE_CORES:-${CM_HOST_CPU_TOTAL_CORES}} CM_MAKE_CORES=${CM_MAKE_CORES:-2} cmake --build . --target install -j "${CM_MAKE_CORES}" -if [ ${?} -ne 0 ]; then exit $?; fi +test $? -eq 0 || exit $? # Clean build directory (too large) cd "${CUR_DIR}" @@ -43,8 +43,7 @@ fi cd "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" ${CM_PYTHON_BIN_WITH_PATH} -m pip install . --target="${MLPERF_INFERENCE_PYTHON_SITE_BASE}" - -if [ ${?} -ne 0 ]; then exit $?; fi +test $? -eq 0 || exit $? # Clean the built wheel #find . -name 'mlcommons_loadgen*.whl' | xargs rm diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml index e19e653787..a9f7410a53 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/_cm.yaml @@ -24,6 +24,7 @@ new_env_keys: - CM_MLPERF_INFERENCE_DLRM_V2_PATH - CM_MLPERF_INFERENCE_GPTJ_PATH - CM_MLPERF_INFERENCE_RNNT_PATH +- CM_MLPERF_INFERENCE_RGAT_PATH - CM_MLPERF_INFERENCE_SOURCE - CM_MLPERF_INFERENCE_SOURCE_VERSION - CM_MLPERF_INFERENCE_VERSION @@ -49,8 +50,6 @@ prehook_deps: _submodules.: - CM_GIT_SUBMODULES print_env_at_the_end_disabled: - CM_MLPERF_INFERENCE_CONF_PATH: Path to the MLPerf inference benchmark configuration - file CM_MLPERF_INFERENCE_SOURCE: Path to MLPerf inference benchmark sources tags: - get @@ -134,38 +133,55 @@ variations: versions: custom: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 deepsparse: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: deepsparse CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: main master: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: master r2.1: env: CM_MLPERF_LAST_RELEASE: v2.1 CM_TMP_GIT_CHECKOUT: v2.1 r3.0: - adr: + ad: inference-git-repo: tags: _tag.v3.0 env: CM_MLPERF_LAST_RELEASE: v3.0 CM_TMP_GIT_CHECKOUT: '' r3.1: - adr: + ad: inference-git-repo: tags: _tag.v3.1 env: CM_MLPERF_LAST_RELEASE: v3.1 - CM_TMP_GIT_CHECKOUT: '' + CM_GIT_CHECKOUT_TAG: 'v3.1' + r4.0: + ad: + inference-git-repo: + tags: _tag.v4.0 + env: + CM_MLPERF_LAST_RELEASE: v4.0 + CM_GIT_CHECKOUT_TAG: 'v4.0' + r4.1: + ad: + inference-git-repo: + tags: _tag.v4.1 + env: + CM_MLPERF_LAST_RELEASE: v4.1 + CM_GIT_CHECKOUT_TAG: 'v4.1' + r5.0: + env: + CM_MLPERF_LAST_RELEASE: v5.0 tvm: env: CM_MLPERF_LAST_RELEASE: v3.1 diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py index 0f6f10e2b1..4076ebe008 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-src/customize.py @@ -52,7 +52,8 @@ def preprocess(i): # if not try to assign the values specified in version parameters, # if version parameters does not have the value to a parameter, set the # default one - if env.get('CM_GIT_CHECKOUT', '') == '': + if env.get('CM_GIT_CHECKOUT', '') == '' and env.get( + 'CM_GIT_CHECKOUT_TAG', '') == '': if env.get('CM_TMP_GIT_CHECKOUT', '') != '': env["CM_GIT_CHECKOUT"] = env["CM_TMP_GIT_CHECKOUT"] else: @@ -65,7 +66,7 @@ def preprocess(i): env["CM_GIT_URL"] = "https://github.com/mlcommons/inference" if env.get("CM_MLPERF_LAST_RELEASE", '') == '': - env["CM_MLPERF_LAST_RELEASE"] = "v4.1" + env["CM_MLPERF_LAST_RELEASE"] = "v5.0" if 'CM_GIT_DEPTH' not in env: env['CM_GIT_DEPTH'] = '' @@ -120,6 +121,8 @@ def postprocess(i): inference_root, 'recommendation', 'dlrm') env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'] = os.path.join( inference_root, 'recommendation', 'dlrm_v2') + env['CM_MLPERF_INFERENCE_RGAT_PATH'] = os.path.join( + inference_root, 'graph', 'R-GAT') env['CM_MLPERF_INFERENCE_3DUNET_PATH'] = os.path.join( inference_root, 'vision', 'medical_imaging', '3d-unet-kits19') diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index a9ad05a50d..d764ab24d9 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -28,9 +28,8 @@ retinanet: target_qps: 850.0 Server: target_qps: 630.0 -sdxl: +stable-diffusion-xl: Offline: target_qps: 0.7 Server: - target_qps: 0.3 - + target_qps: 0.3 diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index 4820e8b525..294b2eda74 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -35,7 +35,7 @@ target_qps: 8 Server: target_qps: 7 - sdxl: + stable-diffusion-xl: Offline: target_qps: 1.3 Server: diff --git a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py index ec9fe4ddb1..efbd039c80 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-mlperf-inference-utils/customize.py @@ -26,7 +26,7 @@ def preprocess(i): quiet = (env.get('CM_QUIET', False) == 'yes') - utils_path = i['run_script_input']['path'] + utils_path = env['CM_TMP_CURRENT_SCRIPT_PATH'] env['+PYTHONPATH'] = [utils_path] diff --git a/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py b/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py index 7f0bbe977b..f3eb69a543 100644 --- a/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/get-tensorrt/customize.py @@ -24,7 +24,7 @@ def preprocess(i): # Not enforcing dev requirement for now if env.get('CM_TENSORRT_TAR_FILE_PATH', '') == '' and env.get( - 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64': + 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR_', '') != 'aarch64': if os_info['platform'] == 'windows': extra_pre = '' diff --git a/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml index c0a618346a..5aeed2a6b9 100644 --- a/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/install-python-src/_cm.yaml @@ -16,6 +16,9 @@ deps: - tags: detect,cpu - tags: get,generic-sys-util,_libffi-dev - tags: get,generic-sys-util,_libbz2-dev + enable_if_env: + CM_HOST_OS_FLAVOR: + - ubuntu - tags: get,generic-sys-util,_libssl-dev - enable_if_env: CM_HOST_OS_FLAVOR: diff --git a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml index 40ff0c6692..eb5f959b4f 100644 --- a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/_cm.yaml @@ -22,6 +22,7 @@ deps: input_mapping: input: CM_MLPERF_INFERENCE_SUBMISSION_DIR submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION submitter: CM_MLPERF_SUBMITTER tags: - run diff --git a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py index 28ceaf7f85..c8c43e2956 100644 --- a/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/preprocess-mlperf-inference-submission/customize.py @@ -40,8 +40,11 @@ def preprocess(i): print(f"Cleaning {submission_processed}") shutil.rmtree(submission_processed) + version = env.get('CM_MLPERF_SUBMISSION_CHECKER_VERSION', '') + x_version = ' --version ' + version + ' ' if version != '' else '' + CMD = env['CM_PYTHON_BIN'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", - "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + x_version env['CM_RUN_CMD'] = CMD return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml index f6d9acd5e1..3b80194d42 100644 --- a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/_cm.yaml @@ -261,3 +261,11 @@ variations: env: CM_DATASET: terabyte group: dataset + igbh: + env: + CM_DATASET: igbh + group: dataset + dataset_llama3: + env: + CM_DATASET: dataset_llama3 + group: dataset diff --git a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py index 1e4363da6d..ba41d02c90 100644 --- a/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/process-mlperf-accuracy/customize.py @@ -129,18 +129,30 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" --statistics-path '{env['CM_SDXL_STATISTICS_FILE_PATH']}' " + extra_options += ( + f""" --statistics-path '{ + env['CM_SDXL_STATISTICS_FILE_PATH']}'""" + ) if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " + extra_options += ( + f""" --compliance-images-path '{ + env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' """ + ) else: - extra_options += f""" --compliance-images-path '{os.path.join(result_dir, "images")}' """ + extra_options += f""" --compliance-images-path '{ + os.path.join( + result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + extra_options += ( + f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + ) if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + extra_options += ( + f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + ) # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", @@ -173,15 +185,35 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" --aggregation-trace-file '{env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + extra_options += ( + f""" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' """ + ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" --day-23-file '{env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + extra_options += ( + f""" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' """ + ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + "'" + extra_options + \ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") + " > '" + out_file + "'" + elif dataset == "igbh": + if env.get('CM_DATASET_IGBH_SIZE', '') == '': + if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', + '') == "full": + env['CM_DATASET_IGBH_SIZE'] = "full" + else: + env['CM_DATASET_IGBH_SIZE'] = "tiny" + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'" + + elif dataset == "dataset_llama3": + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama3.1-405b", "evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + "' --mlperf-accuracy-file '" + os.path.join( + result_dir, "mlperf_log_accuracy.json") + "' --dtype '" + env['CM_ACCURACY_DTYPE'] + "' --dataset-file '" + env['CM_DATASET_LLAMA3_PATH'] + "' > '" + out_file + "'" + else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py index 7f7633ec28..1e91c785b5 100644 --- a/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/customize.py @@ -18,9 +18,6 @@ def preprocess(i): os_info = i['os_info'] - if os_info['platform'] == 'windows': - return {'return': 1, 'error': 'Windows is not supported in this script yet'} - env = i['env'] meta = i['meta'] diff --git a/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat new file mode 100644 index 0000000000..8642fce0e2 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/pull-git-repo/run.bat @@ -0,0 +1,26 @@ +@echo off +setlocal enabledelayedexpansion + +REM Save the current directory +set "CUR_DIR=%CD%" +set "SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%" + +REM Change to the specified path +set "path=%CM_GIT_CHECKOUT_PATH%" +echo cd %path% + +cd /d "%path%" +if errorlevel 1 ( + echo Failed to change directory to %path% + exit /b %errorlevel% +) + +REM Execute the Git pull command +echo %CM_GIT_PULL_CMD% +call %CM_GIT_PULL_CMD% +REM Don't fail if there are local changes +REM if errorlevel 1 exit /b %errorlevel% + +REM Return to the original directory +cd /d "%CUR_DIR%" +endlocal diff --git a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py index 0ea2d2ce84..3d52964a06 100644 --- a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/customize.py @@ -12,6 +12,7 @@ from cmind import utils import cmind as cm import os +from giturlparse import parse def preprocess(i): @@ -43,6 +44,13 @@ def preprocess(i): env['CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE'] = env.get( 'CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE', 'Added new results') + p = parse(repo) + if env.get('CM_GITHUB_PAT', '') != '': + token = env['CM_GITHUB_PAT'] + env['CM_GIT_PUSH_CMD'] = f"""git push https://x-access-token:{env['CM_GITHUB_PAT']}@{p.host}/{p.owner}/{p.repo}""" + else: + env['CM_GIT_PUSH_CMD'] = "git push" + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat index 2052eb5644..085727d190 100644 --- a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat +++ b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.bat @@ -25,7 +25,11 @@ REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% git commit -a -m "%CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE%" -git push + +if defined CM_MLPERF_INFERENCE_SUBMISSION_DIR call %CM_SET_REMOTE_URL_CMD% + +echo "%CM_GIT_PUSH_CMD%" +%CM_GIT_PUSH_CMD% REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh index 1eb4f663e4..8b6ac5648e 100644 --- a/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh +++ b/cmx4mlops/cmx4mlops/repo/script/push-mlperf-inference-results-to-github/run.sh @@ -16,5 +16,8 @@ fi test $? -eq 0 || exit $? git commit -a -m "${CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE}" -git push + +echo ${CM_GIT_PUSH_CMD} +${CM_GIT_PUSH_CMD} + test $? -eq 0 || exit $? diff --git a/cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md new file mode 100644 index 0000000000..696f829223 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/reproduce-mlperf-inference-dummy/COPYRIGHT.md @@ -0,0 +1,3 @@ +© 2022-2025 MLCommons. All Rights Reserved. + +Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone. diff --git a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml index 5135070b99..3bc5ac184c 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/_cm.yaml @@ -17,6 +17,7 @@ default_env: CM_DOCKER_DETACHED_MODE: 'yes' CM_DOCKER_REUSE_EXISTING_CONTAINER: 'no' CM_DOCKER_PRIVILEGED_MODE: 'no' + CM_PODMAN_MAP_USER_ID: 'no' input_mapping: all_gpus: CM_DOCKER_ADD_ALL_GPUS @@ -57,6 +58,9 @@ input_mapping: new_env_keys: - 'CM_DOCKER_CONTAINER_ID' +deps: + - tags: get,docker + prehook_deps: - names: - build-docker-image @@ -68,3 +72,4 @@ prehook_deps: CM_DOCKER_CONTAINER_ID: - on tags: build,docker,image +- tags: get,docker diff --git a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py index 6a0ce7ce50..73d603235f 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/run-docker-container/customize.py @@ -14,6 +14,7 @@ import os import subprocess from os.path import exists +import json def preprocess(i): @@ -62,7 +63,7 @@ def preprocess(i): print('') print('Checking existing Docker container:') print('') - CMD = f"""docker ps --filter "ancestor={DOCKER_CONTAINER}" """ + CMD = f"""{env['CM_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """ if os_info['platform'] == 'windows': CMD += " 2> nul" else: @@ -71,17 +72,31 @@ def preprocess(i): print('') try: - docker_container = subprocess.check_output( - CMD, shell=True).decode("utf-8") + out = subprocess.check_output( + CMD, shell=True, text=True).strip() except Exception as e: return { - 'return': 1, 'error': 'Docker is either not installed or not started:\n{}'.format(e)} + 'return': 1, + 'error': 'Unexpected error occurred with docker run:\n{}'.format(e) + } + + if len(out) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', + '')).lower() in ["1", "true", "yes"]: # container exists + # print(out) + out_split = out.splitlines() + if len(out_split) > 0: + try: + out_json = json.loads(out_split[0]) + # print("JSON successfully loaded:", out_json) + except json.JSONDecodeError as e: + print(f"Error: First line of 'out' is not valid JSON: {e}") + return { + 'return': 1, 'error': f"Error: First line of 'out' is not valid JSON: {e}"} + else: + out_json = [] - output_split = docker_container.split("\n") - if len(output_split) > 1 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', - '')).lower() in ["1", "true", "yes"]: # container exists - out = output_split[1].split(" ") - existing_container_id = out[0] + if isinstance(out_json, list) and len(out_json) > 0: + existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id @@ -89,7 +104,7 @@ def preprocess(i): if env.get('CM_DOCKER_CONTAINER_ID', '') != '': del (env['CM_DOCKER_CONTAINER_ID']) # not valid ID - CMD = "docker images -q " + DOCKER_CONTAINER + CMD = f"""{env['CM_CONTAINER_TOOL']} images -q """ + DOCKER_CONTAINER if os_info['platform'] == 'windows': CMD += " 2> nul" @@ -178,6 +193,10 @@ def postprocess(i): if env.get('CM_DOCKER_EXTRA_RUN_ARGS', '') != '': run_opts += env['CM_DOCKER_EXTRA_RUN_ARGS'] + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_PODMAN_MAP_USER_ID', '').lower() not in ["no", "0", "false"]: + run_opts += " --userns=keep-id" + if env.get('CM_DOCKER_PORT_MAPS', []): for ports in env['CM_DOCKER_PORT_MAPS']: port_map_cmds.append(ports) @@ -203,11 +222,6 @@ def postprocess(i): return {'return': 1, 'error': 'Can\'t find separator : in a mount string: {}'.format( mount_cmd)} -# mount_parts = mount_cmd.split(":") -# if len(mount_parts) != 2: -# return {'return': 1, 'error': 'Invalid mount {} -# specified'.format(mount_parts)} - host_mount = mount_parts[0] if not os.path.exists(host_mount): @@ -247,14 +261,14 @@ def postprocess(i): existing_container_id = env.get('CM_DOCKER_CONTAINER_ID', '') if existing_container_id: - CMD = f"ID={existing_container_id} && docker exec $ID bash -c '" + run_cmd + "'" + CMD = f"""ID={existing_container_id} && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '""" + run_cmd + "'" else: - CONTAINER = f"docker run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash" - CMD = f"ID=`{CONTAINER}` && docker exec $ID bash -c '{run_cmd}'" + CONTAINER = f"""{env['CM_CONTAINER_TOOL']} run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash""" + CMD = f"""ID=`{CONTAINER}` && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '{run_cmd}'""" if False and str(env.get('CM_KEEP_DETACHED_CONTAINER', '')).lower() not in [ 'yes', "1", 'true']: - CMD += " && docker kill $ID >/dev/null" + CMD += f""" && {env['CM_CONTAINER_TOOL']} kill $ID >/dev/null""" CMD += ' && echo "ID=$ID"' @@ -263,7 +277,10 @@ def postprocess(i): print('') print(CMD) print('') - print("Running " + run_cmd + " inside docker container") + print( + "Running " + + run_cmd + + f""" inside {env['CM_CONTAINER_TOOL']} container""") record_script({'cmd': CMD, 'env': env}) @@ -287,7 +304,8 @@ def postprocess(i): docker_out = result.stdout # if docker_out != 0: - # return {'return': docker_out, 'error': 'docker run failed'} + # return {'return': docker_out, 'error': f""{env['CM_CONTAINER_TOOL']} + # run failed""} lines = docker_out.split("\n") @@ -311,7 +329,7 @@ def postprocess(i): x1 = '-it' x2 = " && bash ) || bash" - CONTAINER = "docker run " + x1 + " --entrypoint " + x + x + " " + run_opts + \ + CONTAINER = f"{env['CM_CONTAINER_TOOL']} run " + x1 + " --entrypoint " + x + x + " " + run_opts + \ " " + docker_image_repo + "/" + docker_image_name + ":" + docker_image_tag CMD = CONTAINER + " bash -c " + x + run_cmd_prefix + run_cmd + x2 + x @@ -325,7 +343,10 @@ def postprocess(i): print('') docker_out = os.system(CMD) if docker_out != 0: - return {'return': docker_out, 'error': 'docker run failed'} + if docker_out % 256 == 0: + docker_out = 1 + return {'return': docker_out, + 'error': f"""{env['CM_CONTAINER_TOOL']} run failed"""} return {'return': 0} @@ -360,7 +381,7 @@ def record_script(i): def update_docker_info(env): # Updating Docker info - docker_image_repo = env.get('CM_DOCKER_IMAGE_REPO', 'local') + docker_image_repo = env.get('CM_DOCKER_IMAGE_REPO', 'localhost/local') env['CM_DOCKER_IMAGE_REPO'] = docker_image_repo docker_image_base = env.get('CM_DOCKER_IMAGE_BASE') diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml new file mode 100644 index 0000000000..942f499e89 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/_cm.yaml @@ -0,0 +1,248 @@ +alias: run-mlperf-automotive-app +uid: 2a7315d2dff74898 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: Modular MLPerf inference benchmark pipeline + +developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" + + +clean_output_files: +- open.tar.gz +- summary.csv +- summary.json + +tags: +- run +- run-abtf +- run-abtf-inference +- mlcommons +- inference +- reference + +tags_help: "run-abtf,inference" + +default_env: + CM_MLPERF_IMPLEMENTATION: reference + CM_MLPERF_MODEL: retinanet + CM_MLPERF_RUN_STYLE: test + +input_mapping: + backend: CM_MLPERF_BACKEND + clean: CM_MLPERF_CLEAN_ALL + compliance: CM_MLPERF_LOADGEN_COMPLIANCE + dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT + dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER + debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM + device: CM_MLPERF_DEVICE + division: CM_MLPERF_SUBMISSION_DIVISION + docker: CM_MLPERF_USE_DOCKER + dump_version_info: CM_DUMP_VERSION_INFO + save_console_log: CM_SAVE_CONSOLE_LOG + execution_mode: CM_MLPERF_RUN_STYLE + find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE + gh_token: CM_GH_TOKEN + gpu_name: CM_NVIDIA_GPU_NAME + hw_name: CM_HW_NAME + hw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + imagenet_path: IMAGENET_PATH + implementation: CM_MLPERF_IMPLEMENTATION + lang: CM_MLPERF_IMPLEMENTATION + mode: CM_MLPERF_LOADGEN_MODE + model: CM_MLPERF_MODEL + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + output_dir: OUTPUT_BASE_DIR + output_summary: MLPERF_INFERENCE_SUBMISSION_SUMMARY + output_tar: MLPERF_INFERENCE_SUBMISSION_TAR_FILE + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT + power: CM_SYSTEM_POWER + precision: CM_MLPERF_MODEL_PRECISION + preprocess_submission: CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR + push_to_github: CM_MLPERF_RESULT_PUSH_TO_GITHUB + readme: CM_MLPERF_README + regenerate_accuracy_file: CM_MLPERF_REGENERATE_ACCURACY_FILE + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + results_dir: OUTPUT_BASE_DIR + results_git_url: CM_MLPERF_RESULTS_GIT_REPO_URL + run_checker: CM_RUN_SUBMISSION_CHECKER + run_style: CM_MLPERF_RUN_STYLE + scenario: CM_MLPERF_LOADGEN_SCENARIO + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + skip_submission_generation: CM_MLPERF_SKIP_SUBMISSION_GENERATION + skip_truncation: CM_SKIP_TRUNCATE_ACCURACY + submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + submitter: CM_MLPERF_SUBMITTER + sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS + sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + system_type: CM_MLPERF_SUBMISSION_SYSTEM_TYPE + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + test_query_count: CM_TEST_QUERY_COUNT + threads: CM_NUM_THREADS + batch_size: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + sut: CM_MLPERF_INFERENCE_SUT_VARIATION + +new_state_keys: +- app_mlperf_inference_* +- cm-mlperf-inference-results* + +deps: +- tags: detect,os + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- tags: detect,cpu + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- names: + - python + - python3 + tags: get,python3 + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- names: + - inference-src + tags: get,mlcommons,inference,src + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- tags: get,sut,description + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] + +- tags: get,mlperf,inference,results,dir + names: + - get-mlperf-inference-results-dir + enable_if_env: + CM_MLPERF_USE_DOCKER: [ off ] + skip_if_env: + OUTPUT_BASE_DIR: [ on ] +- tags: install,pip-package,for-cmind-python,_package.tabulate +- tags: get,mlperf,inference,utils + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] + +variations: + accuracy-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + all-modes: + env: + CM_MLPERF_LOADGEN_ALL_MODES: 'yes' + group: mode + + all-scenarios: + env: + CM_MLPERF_LOADGEN_ALL_SCENARIOS: 'yes' + + compliance: + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + + dashboard: + default_gui: false + env: + CM_MLPERF_DASHBOARD: 'on' + + find-performance: + env: + CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' + CM_MLPERF_LOADGEN_ALL_MODES: 'no' + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_RESULT_PUSH_TO_GITHUB: false + group: submission-generation + + full: + add_deps_recursive: + coco2014-original: + tags: _full + coco2014-preprocessed: + tags: _full + env: + CM_MLPERF_SUBMISSION_GENERATION_STYLE: full + CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes' + group: submission-generation-style + + performance-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + mvp-demo: + default_env: + CM_MLPERF_DEVICE: cpu + + env: + CM_MLPERF_INFERENCE_VERSION: mvp-demo + CM_MLPERF_MODEL: abtf-demo-model + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_IMPLEMENTATION: mlcommons-python + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: mvp-demo + adr: + compiler: + tags: gcc + group: benchmark-version + + poc-demo: + default_env: + CM_MLPERF_DEVICE: cpu + CM_TEST_QUERY_COUNT: "20" + + env: + CM_MLPERF_INFERENCE_VERSION: poc-demo + CM_MLPERF_MODEL: abtf-poc-model + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_IMPLEMENTATION: mlcommons-python + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: poc-demo + adr: + compiler: + tags: gcc + group: benchmark-version + + performance-and-accuracy: + default: true + base: + - all-modes + default_variations: + submission-generation-style: full + group: submission-generation + + submission: + base: + - all-modes + default_gui: true + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'yes' + CM_TAR_SUBMISSION_DIR: 'yes' + group: submission-generation + post_deps: + - names: + - submission-generator + enable_if_env: + CM_MLPERF_SKIP_SUBMISSION_GENERATION: + - 'no' + - 'false' + - 'False' + - '0' + tags: generate,mlperf,inference,submission diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py new file mode 100644 index 0000000000..14cb9c2374 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-automotive-app/customize.py @@ -0,0 +1,403 @@ +from cmind import utils +import os +import json +import shutil +import subprocess +import cmind as cm +import copy +from tabulate import tabulate + +summary_ext = ['.csv', '.json', '.xlsx'] + +########################################################################## + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + + inp = i['input'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": + return {'return': 0} + + dump_version_info = env.get('CM_DUMP_VERSION_INFO', True) + system_meta = state.get('CM_SUT_META', {}) + if system_meta: + env['CM_SUT_META_EXISTS'] = "yes" + + env['CM_MODEL'] = env['CM_MLPERF_MODEL'] + + # Clean MLPerf inference output tar file if non-standard + x = env.get('MLPERF_INFERENCE_SUBMISSION_TAR_FILE', '') + if x != '' and os.path.isfile(x): + os.remove(x) + + # Clean MLPerf inference submission summary files + x = env.get('MLPERF_INFERENCE_SUBMISSION_SUMMARY', '') + if x != '': + for y in summary_ext: + z = x + y + if os.path.isfile(z): + os.remove(z) + + if env.get('CM_MLPERF_SUBMISSION_SYSTEM_TYPE', '') != '': + system_type = env['CM_MLPERF_SUBMISSION_SYSTEM_TYPE'] + system_meta['system_type'] = system_type + + if env.get('CM_MLPERF_SUBMISSION_DIVISION', '') != '': + division = env['CM_MLPERF_SUBMISSION_DIVISION'] + system_meta['division'] = division + + if system_meta.get('division', '') != "closed": + # no compliance runs needed for open division + env["CM_MLPERF_LOADGEN_COMPLIANCE"] = "no" + + clean = False + + if 'CM_MLPERF_CLEAN_ALL' in env: + clean = True + if 'CM_MLPERF_CLEAN_SUBMISSION_DIR' not in env: + env['CM_MLPERF_CLEAN_SUBMISSION_DIR'] = "yes" + if 'CM_RERUN' not in env: + env['CM_RERUN'] = "yes" + + if str(env.get('CM_SYSTEM_POWER', 'no')).lower( + ) != "no" or env.get('CM_MLPERF_POWER', '') == "yes": + power_variation = ",_power" + env['CM_MLPERF_POWER'] = "yes" + else: + power_variation = "" + + if env.get('CM_RUN_STYLE', + '') == "valid" and 'CM_RUN_MLPERF_ACCURACY' not in env: + env['CM_RUN_MLPERF_ACCURACY'] = "on" + + if env.get('CM_MLPERF_INFERENCE_SOURCE', '') != '': + print( + "Using MLCommons Inference source from " + + env['CM_MLPERF_INFERENCE_SOURCE']) + + if 'CM_MLPERF_LOADGEN_EXTRA_OPTIONS' not in env: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] = "" + + if 'CM_MLPERF_LOADGEN_MODES' not in env: + if 'CM_MLPERF_LOADGEN_MODE' not in env: + env['CM_MLPERF_LOADGEN_MODE'] = "performance" + + if 'CM_MLPERF_LOADGEN_SCENARIOS' not in env: + if 'CM_MLPERF_LOADGEN_SCENARIO' not in env: + env['CM_MLPERF_LOADGEN_SCENARIO'] = "Offline" + + if env.get('CM_MLPERF_LOADGEN_ALL_SCENARIOS', '') == "yes": + env['CM_MLPERF_LOADGEN_SCENARIOS'] = get_valid_scenarios( + env['CM_MODEL'], + system_meta['system_type'], + env['CM_MLPERF_LAST_RELEASE'], + env['CM_MLPERF_INFERENCE_SOURCE']) + else: + system_meta = {} + env['CM_MLPERF_LOADGEN_SCENARIOS'] = [ + env['CM_MLPERF_LOADGEN_SCENARIO']] + + if env.get('CM_MLPERF_LOADGEN_ALL_MODES', '') == "yes": + env['CM_MLPERF_LOADGEN_MODES'] = ["performance", "accuracy"] + else: + env['CM_MLPERF_LOADGEN_MODES'] = [env['CM_MLPERF_LOADGEN_MODE']] + + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get( + 'CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) + + test_list = [] + + variation_implementation = "_" + \ + env.get("CM_MLPERF_IMPLEMENTATION", "reference") + variation_model = ",_" + env["CM_MLPERF_MODEL"] + variation_backend = ",_" + \ + env["CM_MLPERF_BACKEND"] if env.get( + "CM_MLPERF_BACKEND", "") != "" else "" + variation_device = ",_" + \ + env["CM_MLPERF_DEVICE"] if env.get( + "CM_MLPERF_DEVICE", "") != "" else "" + variation_run_style = ",_" + env.get("CM_MLPERF_RUN_STYLE", "test") + variation_reproducibility = ",_" + env["CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS"] if env.get( + "CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS", "") != "" else "" + + if env.get("CM_MLPERF_MODEL_PRECISION", '') != '': + variation_quantization_string = ",_" + env["CM_MLPERF_MODEL_PRECISION"] + else: + variation_quantization_string = "" + + tags = "app,abtf-inference," + variation_implementation + variation_model + variation_backend + variation_device + \ + variation_run_style + variation_reproducibility + \ + variation_quantization_string + power_variation + verbose = inp.get('v', False) + print_env = inp.get('print_env', False) + print_deps = inp.get('print_deps', False) + add_deps_recursive = inp.get('add_deps_recursive', {}) + add_deps = inp.get('add_deps', {}) + ad = inp.get('ad', {}) + adr = inp.get('adr', {}) + adr_from_meta = i['run_script_input'].get('add_deps_recursive') + + for key in adr_from_meta: + add_deps_recursive[key] = adr_from_meta[key] + + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += "_batch_size." + \ + env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'] + + if env.get('CM_MLPERF_INFERENCE_SUT_VARIATION', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += "_" + \ + env['CM_MLPERF_INFERENCE_SUT_VARIATION'] + + if env.get('CM_NETWORK_LOADGEN', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + network_variation_tag = f"_network-{env['CM_NETWORK_LOADGEN']}" + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += network_variation_tag + + if env.get('CM_OUTPUT_FOLDER_NAME', '') == '': + env['CM_OUTPUT_FOLDER_NAME'] = env['CM_MLPERF_RUN_STYLE'] + "_results" + + output_dir = os.path.join( + env['OUTPUT_BASE_DIR'], + env['CM_OUTPUT_FOLDER_NAME']) + if clean: + path_to_clean = output_dir + + print('=========================================================') + print('Cleaning results in {}'.format(path_to_clean)) + if os.path.exists(path_to_clean): + shutil.rmtree(path_to_clean) + + print('=========================================================') + + if str(env.get('CM_MLPERF_USE_DOCKER', '') + ).lower() in ["1", "true", "yes"]: + action = "docker" + del (env['OUTPUT_BASE_DIR']) + state = {} + docker_extra_input = {} + + if env.get('CM_HW_NAME'): + del (env['CM_HW_NAME']) + + for k in inp: + if k.startswith("docker_"): + docker_extra_input[k] = inp[k] + inp = {} + else: + action = "run" + + # local_keys = [ 'CM_MLPERF_SKIP_RUN', 'CM_MLPERF_LOADGEN_QUERY_COUNT', + # 'CM_MLPERF_LOADGEN_TARGET_QPS', 'CM_MLPERF_LOADGEN_TARGET_LATENCY' ] + + for scenario in env['CM_MLPERF_LOADGEN_SCENARIOS']: + scenario_tags = tags + ",_" + scenario.lower() + env['CM_MLPERF_LOADGEN_SCENARIO'] = scenario + + if scenario == "Offline": + if env.get('CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS'): + env['CM_MLPERF_LOADGEN_TARGET_QPS'] = env['CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS'] + elif scenario == "Server": + if env.get('CM_MLPERF_LOADGEN_SERVER_TARGET_QPS'): + env['CM_MLPERF_LOADGEN_TARGET_QPS'] = env['CM_MLPERF_LOADGEN_SERVER_TARGET_QPS'] + elif scenario == "SingleStream": + if env.get('CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY'): + env['CM_MLPERF_LOADGEN_TARGET_LATENCY'] = env['CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY'] + elif scenario == "MultiStream": + if env.get('CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY'): + env['CM_MLPERF_LOADGEN_TARGET_LATENCY'] = env['CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY'] + + for mode in env['CM_MLPERF_LOADGEN_MODES']: + env_copy = copy.deepcopy(env) + env_copy['CM_MLPERF_LOADGEN_MODE'] = mode + for key in env_copy: + if isinstance(env_copy[key], str) and env_copy[key].startswith( + "CM_TMP_"): + del env_copy[key] + + print(f"\nRunning loadgen scenario: {scenario} and mode: {mode}") + ii = {'action': action, 'automation': 'script', 'tags': scenario_tags, 'quiet': 'true', + 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} + + if action == "docker": + for k in docker_extra_input: + ii[k] = docker_extra_input[k] + r = cm.access(ii) + if r['return'] > 0: + return r + if action == "docker": + # We run commands interactively inside the docker container + return {'return': 0} + + if env_copy.get('CM_OUTPUT_PREDICTIONS_PATH'): + print( + f"\nOutput predictions can be seen by opening the images inside {env_copy['CM_OUTPUT_PREDICTIONS_PATH']}\n") + + if state.get('docker', {}): + del (state['docker']) + + if env.get("CM_MLPERF_LOADGEN_COMPLIANCE", "") == "yes": + for test in test_list: + env_copy = copy.deepcopy(env) + for key in env_copy: + if isinstance(env_copy[key], str) and env_copy[key].startswith( + "CM_TMP_"): + del env_copy[key] + env_copy['CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] = test + env_copy['CM_MLPERF_LOADGEN_MODE'] = "compliance" + ii = {'action': action, 'automation': 'script', 'tags': scenario_tags, 'quiet': 'true', + 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} + if action == "docker": + for k in docker_extra_input: + ii[k] = docker_extra_input[k] + r = cm.access(ii) + if r['return'] > 0: + return r + if state.get('docker', {}): + del (state['docker']) + + if state.get("cm-mlperf-inference-results"): + # print(state["cm-mlperf-inference-results"]) + for sut in state["cm-mlperf-inference-results"]: # only one sut will be there + # Better to do this in a stand alone CM script with proper deps but + # currently we manage this by modifying the sys path of the python + # executing CM + import mlperf_utils # noqa + + print(sut) + result_table, headers = mlperf_utils.get_result_table( + state["cm-mlperf-inference-results"][sut]) + print(tabulate(result_table, headers=headers, tablefmt="pretty")) + + print( + f"\nThe MLPerf inference results are stored at {output_dir}\n") + + return {'return': 0} + + +def get_valid_scenarios(model, category, mlperf_version, mlperf_path): + + import sys + + submission_checker_dir = os.path.join(mlperf_path, "tools", "submission") + + sys.path.append(submission_checker_dir) + if not os.path.exists(os.path.join( + submission_checker_dir, "submission_checker.py")): + shutil.copy(os.path.join(submission_checker_dir, "submission-checker.py"), os.path.join(submission_checker_dir, + "submission_checker.py")) + + import submission_checker as checker + + if "dlrm-99" in model: + model = model.replace("dlrm-99", "dlrm-v2-99") + if "sdxl" in model: + model = "stable-diffusion-xl" + + config = checker.MODEL_CONFIG + + internal_model_name = config[mlperf_version]["model_mapping"].get( + model, model) + + valid_scenarios = config[mlperf_version]["required-scenarios-" + + category][internal_model_name] + + print( + "Valid Scenarios for " + + model + + " in " + + category + + " category are :" + + str(valid_scenarios)) + + return valid_scenarios + +########################################################################## + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'reference': + x1 = env.get('CM_MLPERF_INFERENCE_SOURCE', '') + x2 = env.get('CM_MLPERF_INFERENCE_CONF_PATH', '') + + if x1 != '' and x2 != '': + print('') + print( + 'Path to the MLPerf inference benchmark reference sources: {}'.format(x1)) + print( + 'Path to the MLPerf inference reference configuration file: {}'.format(x2)) + print('') + + return {'return': 0} + + +########################################################################## + + +def load_md(path, path2, name): + + fn = os.path.join(path, path2, name + '.md') + + s = '' + + if os.path.isfile(fn): + r = utils.load_txt(fn) + if r['return'] > 0: + return r + + s = r['string'] + + return {'return': 0, 'string': s} + +########################################################################## + + +def get_url(url, path, path2, name, text): + + name_md = name + '.md' + fn = os.path.join(path, path2, name_md) + + urlx = '' + url_online = '' + if os.path.isfile(fn): + if not url.endswith('/'): + url += '/' + urlx = url + path2 + '/' + name_md + + url_online = '[{}]({})'.format(text, urlx) + + return {'return': 0, 'url_online': url_online} + +########################################################################## diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml index cf390bc3ab..29effb5c24 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/_cm.yaml @@ -8,9 +8,6 @@ category: Modular MLPerf inference benchmark pipeline developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" -gui: - title: CM GUI to run MLPerf inference benchmarks and prepare submissions - clean_output_files: - open.tar.gz - summary.csv @@ -29,6 +26,7 @@ tags: - reference tags_help: "run-mlperf,inference" +predeps: False default_env: CM_MLPERF_IMPLEMENTATION: reference @@ -36,6 +34,8 @@ default_env: CM_MLPERF_RUN_STYLE: test CM_MLPERF_SKIP_SUBMISSION_GENERATION: no CM_DOCKER_PRIVILEGED_MODE: yes + CM_MLPERF_SUBMISSION_DIVISION: open + CM_MLPERF_INFERENCE_TP_SIZE: 1 input_mapping: api_server: CM_MLPERF_INFERENCE_API_SERVER @@ -110,6 +110,7 @@ input_mapping: sut: CM_MLPERF_INFERENCE_SUT_VARIATION nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH tp_size: CM_NVIDIA_TP_SIZE + vllm_tp_size: CM_MLPERF_INFERENCE_TP_SIZE vllm_model_name: CM_VLLM_SERVER_MODEL_NAME num_workers: CM_MLPERF_INFERENCE_NUM_WORKERS max_test_duration: CM_MLPERF_MAX_DURATION_TEST @@ -139,6 +140,12 @@ deps: - names: - inference-src tags: get,mlcommons,inference,src +- tags: pull,git,repo + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,sut,description skip_if_env: CM_MLPERF_USE_DOCKER: [ on ] @@ -201,11 +208,6 @@ variations: env: CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' - dashboard: - default_gui: false - env: - CM_MLPERF_DASHBOARD: 'on' - find-performance: env: CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' @@ -334,7 +336,6 @@ variations: tags: _version.r4_0-dev r4.1-dev: - default: true env: CM_MLPERF_INFERENCE_VERSION: '4.1-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1-dev_default @@ -351,6 +352,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '4.1' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v4.1 adr: get-mlperf-inference-results-dir: tags: _version.r4_1 @@ -359,6 +361,21 @@ variations: mlperf-inference-nvidia-scratch-space: tags: _version.r4_1 group: benchmark-version + + r5.0-dev: + default: true + env: + CM_MLPERF_INFERENCE_VERSION: '5.0-dev' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v5.0 + group: benchmark-version + adr: + get-mlperf-inference-results-dir: + tags: _version.r5.0-dev + get-mlperf-inference-submission-dir: + tags: _version.r5.0-dev + mlperf-inference-nvidia-scratch-space: + tags: _version.r5.0-dev short: add_deps_recursive: @@ -382,7 +399,6 @@ variations: submission: base: - all-modes - default_gui: true default_variations: submission-generation-style: full env: @@ -449,6 +465,7 @@ input_description: - mobilenet - efficientnet - rgat + - llama3_1-405b default: resnet50 desc: MLPerf model sort: 200 diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py index e620df68b0..d710d1f7cd 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-app/customize.py @@ -244,7 +244,8 @@ def preprocess(i): inp = {} if str(docker_dt).lower() in ["yes", "true", "1"]: # turning it off for the first run and after that we turn it on - env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' + if env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '') == '': + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' env['CM_DOCKER_DETACHED_MODE'] = 'yes' if env.get('CM_DOCKER_IMAGE_NAME', '') != '': @@ -444,587 +445,3 @@ def get_url(url, path, path2, name, text): url_online = '[{}]({})'.format(text, urlx) return {'return': 0, 'url_online': url_online} - -########################################################################## - - -def gui(i): - - params = i['params'] - st = i['st'] - - script_meta = i['meta'] - - misc = i['misc_module'] - - script_path = i['script_path'] - script_url = i.get('script_url', '') - script_tags = i.get('script_tags', '') - - compute_meta = i.get('compute_meta', {}) - compute_tags = compute_meta.get('tags', []) - bench_meta = i.get('bench_meta', {}) - - compute_uid = compute_meta.get('uid', '') - bench_uid = bench_meta.get('uid', '') - - st_inputs_custom = {} - - bench_input = bench_meta.get('bench_input', {}) - - end_html = '' - - extra = {} - add_to_st_inputs = {} - - inp = script_meta['input_description'] - - # Here we can update params - v = compute_meta.get('mlperf_inference_device') - if v is not None and v != '': - inp['device']['force'] = v - - if v in ['tpu', 'gaudi']: - st.markdown('----') - st.markdown( - '**WARNING: unified CM workflow support for this hardware is pending - please [feel free to help](https://discord.gg/JjWNWXKxwT)!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - elif 'orin' in compute_tags: - st.markdown('----') - st.markdown( - '**WARNING: we need to encode CM knowledge from [this Orin setp](https://github.com/mlcommons/ck/blob/master/docs/mlperf/setup/setup-nvidia-jetson-orin.md) to this GUI!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - st.markdown('---') - st.markdown('**How would you like to run the MLPerf inference benchmark?**') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_device', - 'desc': inp['device']}) - device = r.get('value2') - inp['device']['force'] = device - - if device == 'cpu': - inp['implementation']['choices'] = ['mlcommons-python', - 'mlcommons-cpp', 'intel', 'ctuning-cpp-tflite'] - if 'intel' in compute_tags: - inp['implementation']['default'] = 'intel' - else: - inp['implementation']['default'] = 'mlcommons-python' - inp['backend']['choices'] = [ - 'onnxruntime', 'deepsparse', 'pytorch', 'tf', 'tvm-onnx'] - inp['backend']['default'] = 'onnxruntime' - elif device == 'rocm': - inp['implementation']['force'] = 'mlcommons-python' - inp['precision']['force'] = '' - inp['backend']['force'] = 'onnxruntime' - st.markdown( - '*WARNING: CM-MLPerf inference workflow was not tested thoroughly for AMD GPU - please feel free to test and improve!*') - elif device == 'qaic': - inp['implementation']['force'] = 'qualcomm' - inp['precision']['force'] = '' - inp['backend']['force'] = 'glow' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_division', - 'desc': inp['division']}) - division = r.get('value2') - inp['division']['force'] = division - - y = 'compliance' - if division == 'closed': - inp[y]['default'] = 'yes' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_compliance', - 'desc': inp[y]}) - compliance = r.get('value2') - inp[y]['force'] = compliance - - if compliance == 'yes': - st.markdown( - '*:red[See [online table with required compliance tests](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#5132-inference)].*') - - else: - inp[y]['force'] = 'no' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_category', - 'desc': inp['category']}) - category = r.get('value2') - inp['category']['force'] = category - - ########################################################################## - # Implementation - v = bench_input.get('mlperf_inference_implementation') - if v is not None and v != '': - inp['implementation']['force'] = v - else: - if device == 'cuda': - inp['implementation']['choices'] = [ - 'nvidia', 'mlcommons-python', 'mlcommons-cpp'] - inp['implementation']['default'] = 'nvidia' - inp['backend']['choices'] = ['tensorrt', 'onnxruntime', 'pytorch'] - inp['backend']['default'] = 'tensorrt' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_implementation', - 'desc': inp['implementation']}) - implementation = r.get('value2') - inp['implementation']['force'] = implementation - - implementation_setup = '' - r = load_md(script_path, 'setup', 'i-' + implementation) - if r['return'] == 0: - implementation_setup = r['string'] - - url_faq_implementation = '' - r = get_url(script_url, script_path, 'faq', implementation, 'FAQ online') - if r['return'] == 0: - url_faq_implementation = r['url_online'] - - can_have_docker_flag = False - - if implementation == 'mlcommons-cpp': - # inp['backend']['choices'] = ['onnxruntime'] - inp['precision']['force'] = 'float32' - inp['backend']['force'] = 'onnxruntime' - inp['model']['choices'] = ['resnet50', 'retinanet'] - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp)]*') - elif implementation == 'mlcommons-python': - inp['precision']['force'] = 'float32' - if device == 'cuda': - inp['backend']['choices'] = ['onnxruntime', 'pytorch', 'tf'] - inp['backend']['default'] = 'onnxruntime' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-python)]*') - elif implementation == 'ctuning-cpp-tflite': - inp['precision']['force'] = 'float32' - inp['model']['force'] = 'resnet50' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-ctuning-cpp-tflite)]*') - elif implementation == 'nvidia': - inp['backend']['force'] = 'tensorrt' - extra['skip_script_docker_func'] = True - can_have_docker_flag = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-nvidia)]*') - elif implementation == 'intel': - inp['model']['choices'] = ['bert-99', 'gptj-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['int8', 'int4'] - inp['precision']['default'] = 'int8' - inp['category']['force'] = 'datacenter' - inp['backend']['force'] = 'pytorch' - inp['sut']['default'] = 'sapphire-rapids.112c' - can_have_docker_flag = True - extra['skip_script_docker_func'] = True -# st.markdown('*:red[Note: Intel implementation require extra CM command to build and run Docker container - you will run CM commands to run MLPerf benchmarks there!]*') - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-intel)]*') - elif implementation == 'qualcomm': - inp['model']['choices'] = ['resnet50', 'retinanet', 'bert-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['default'] = 'float16' - extra['skip_script_docker_func'] = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-qualcomm)]*') - - ########################################################################## - # Backend - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_backend', - 'desc': inp['backend']}) - backend = r.get('value2') - inp['backend']['force'] = backend - - backend_setup = '' - r = load_md(script_path, 'setup', 'b-' + backend) - if r['return'] == 0: - backend_setup = r['string'] - - if backend == 'deepsparse': - inp['model']['choices'] = [ - 'resnet50', 'retinanet', 'bert-99', 'bert-99.9'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['float32', 'int8'] - inp['precision']['default'] = 'int8' - if 'force' in inp['precision']: - del (inp['precision']['force']) - - ########################################################################## - # Model - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_model', - 'desc': inp['model']}) - model = r.get('value2') - inp['model']['force'] = model - - github_doc_model = '' - - if model == 'retinanet': - x = '50' - if implementation == 'mlcommons-python': - x = '200' - st.markdown( - ':red[This model requires ~{}GB of free disk space for preprocessed dataset in a full/submission run!]\n'.format(x)) - - elif model.startswith('bert-'): - github_doc_model = 'bert' - - elif model.startswith('3d-unet-'): - github_doc_model = '3d-unet' - - elif model == 'rnnt': - github_doc_model = 'rnnt' - - elif model.startswith('dlrm-v2-'): - github_doc_model = 'dlrm_v2' - - elif model.startswith('gptj-'): - github_doc_model = 'gpt-j' - - elif model == 'sdxl': - github_doc_model = 'stable-diffusion-xl' - - elif model.startswith('llama2-'): - github_doc_model = 'llama2-70b' - - elif model.startswith('mixtral-'): - github_doc_model = 'mixtral-8x7b' - - if github_doc_model == '': - github_doc_model = model - - model_cm_url = 'https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference/{}'.format( - github_doc_model) - extra_notes_online = '[Extra notes online]({})\n'.format(model_cm_url) - - st.markdown( - '*[CM-MLPerf GitHub docs for this model]({})*'.format(model_cm_url)) - - ########################################################################## - # Precision - if implementation == 'intel': - if model == 'bert-99': - inp['precision']['force'] = 'int8' - elif model == 'gptj-99': - inp['precision']['force'] = 'int4' - elif implementation == 'qualcomm': - if model == 'resnet50': - inp['precision']['print'] = 'int8' - elif model == 'retinanet': - inp['precision']['print'] = 'int8' - elif model == 'bert-99': - inp['precision']['print'] = 'int8/float16' - - if inp['precision'].get('force', '') == '': - x = inp['precision'].get('print', '') - if x != '': - st.markdown('**{}**: {}'.format(inp['precision']['desc'], x)) - else: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_precision', - 'desc': inp['precision']}) - precision = r.get('value2') - inp['precision']['force'] = precision - - ########################################################################## - # Benchmark version - - script_meta_variations = script_meta['variations'] - - choices = [''] + [ - k for k in script_meta_variations if script_meta_variations[k].get( - 'group', '') == 'benchmark-version'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Force specific benchmark version?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_version', - 'desc': desc}) - benchmark_version = r.get('value2') - - if benchmark_version != '': - params['~~benchmark-version'] = [benchmark_version] - - ########################################################################## - # Run via Docker container - if can_have_docker_flag: - - default_choice = 'yes - run in container' - - choices = [default_choice, 'no - run natively'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Should CM script prepare and run Docker container in interactive mode to run MLPerf? You can then copy/paste CM commands generated by this GUI to benchmark different models.'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_docker', - 'desc': desc}) - benchmark_docker = r.get('value2') - - if benchmark_docker == 'yes - run in container': - add_to_st_inputs['@docker'] = True - add_to_st_inputs['@docker_cache'] = 'no' - - ########################################################################## - # Prepare submission - st.markdown('---') - - submission = st.toggle( - 'Would you like to prepare official submission?', - value=False) - if submission: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_hw_name', - 'desc': inp['hw_name']}) - inp['hw_name']['force'] = r.get('value2') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_submitter', - 'desc': inp['submitter']}) - submitter = r.get('value2') - inp['submitter']['force'] = submitter - - params['~~submission-generation'] = ['submission'] - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - inp['clean']['default'] = False - inp['repro']['force'] = True - - x = '*:red[Use the following command to find local directory with the submission tree and results:]*\n```bash\ncm find cache --tags=submission,dir\n```\n' - - x += '*:red[You will also find results in `mlperf-inference-submission.tar.gz` file that you can submit to MLPerf!]*\n\n' - - x += '*:red[Note that if some results are INVALID due to too short run, you can rerun the same CM command and it should increase the length of the benchmark until you get valid result!]*\n' - - st.markdown(x) - - st.markdown('---') - - else: - inp['submitter']['force'] = '' - inp['clean']['default'] = True - params['~submission'] = ['false'] - - choices = [ - 'Performance', - 'Accuracy', - 'Find Performance from a short run', - 'Performance and Accuracy'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'What to measure?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_measure', - 'desc': desc}) - measure = r.get('value2') - - x = '' - if measure == 'Performance': - x = 'performance-only' - elif measure == 'Accuracy': - x = 'accuracy-only' - elif measure == 'Find Performance from a short run': - x = 'find-performance' - elif measure == 'Performance and Accuracy': - x = 'submission' - - params['~~submission-generation'] = [x] - - ####################################################################### - # Prepare scenario - - xall = 'All applicable' - choices = ['Offline', 'Server', 'SingleStream', 'MultiStream', xall] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Which scenario(s)?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_scenario', - 'desc': desc}) - scenario = r.get('value2') - - if scenario == xall: - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - else: - inp['scenario']['force'] = scenario - - ########################################################################## - # Short or full run - - x = ['Full run', 'Short run'] - if submission: - choices = [x[0], x[1]] - else: - choices = [x[1], x[0]] - - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Short (test) or full (valid) run?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_how', - 'desc': desc}) - how = r.get('value2') - - if how == x[0]: - params['~~submission-generation-style'] = ['full'] - inp['execution_mode']['force'] = 'valid' - else: - params['~~submission-generation-style'] = ['short'] - inp['execution_mode']['force'] = 'test' - - ########################################################################## - # Power - -# desc = {'boolean':True, 'default':False, 'desc':'Measure power?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_power', 'desc':desc}) -# power = r.get('value2', False) - - power = st.toggle('Measure power consumption?', value=False) - - if power: - inp['power']['force'] = 'yes' - - y = 'adr.mlperf-power-client.power_server' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_server', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'adr.mlperf-power-client.port' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_port', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - st.markdown( - '*:red[See [online notes](https://github.com/mlcommons/ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md)] to setup power meter and server.*') - - else: - inp['power']['force'] = 'no' - inp['adr.mlperf-power-client.power_server']['force'] = '' - inp['adr.mlperf-power-client.port']['force'] = '' - - ########################################################################## - # Dashboard - -# desc = {'boolean':True, 'default':False, 'desc':'Output results to W&B dashboard?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_dashboard', 'desc':desc}) -# dashboard = r.get('value2', False) - - dashboard = st.toggle('Output results to W&B dashboard?', value=False) - - if dashboard: - params['~dashboard'] = ['true'] - - y = 'dashboard_wb_project' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_project', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'dashboard_wb_user' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_user', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - else: - params['~dashboard'] = ['false'] - inp['dashboard_wb_project']['force'] = '' - inp['dashboard_wb_user']['force'] = '' - - # Hide customization by default - params['hide_script_customization'] = True - - x = implementation_setup - if backend_setup != '': - if x != '': - x += '\n\n' - x += backend_setup - - extra['extra_notes_online'] = extra_notes_online - extra['extra_faq_online'] = url_faq_implementation - extra['extra_setup'] = x - - ########################################################################## - value_reproduce = inp.get('repro', {}).get('force', False) - reproduce = st.toggle( - 'Record extra info for reproducibility?', - value=value_reproduce) - - explore = st.toggle( - 'Explore/tune benchmark (batch size, threads, etc)?', - value=False) - - if reproduce or explore: - add_to_st_inputs.update({ - "@repro_extra.run-mlperf-inference-app.bench_uid": bench_uid, - "@repro_extra.run-mlperf-inference-app.compute_uid": compute_uid, - '@results_dir': '{{CM_EXPERIMENT_PATH3}}', - '@submission_dir': '{{CM_EXPERIMENT_PATH3}}' - }) - - inp['repro']['force'] = True - extra['use_experiment'] = True - - if explore: - add_to_st_inputs['@batch_size'] = '{{CM_EXPLORE_BATCH_SIZE{[1,2,4,8]}}}' - - ########################################################################## - debug = st.toggle( - 'Debug and run MLPerf benchmark natively from command line after CM auto-generates CMD?', - value=False) - if debug: - inp['debug']['force'] = True - - extra['add_to_st_inputs'] = add_to_st_inputs - - return {'return': 0, 'end_html': end_html, 'extra': extra} diff --git a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml index 84e712a401..0bb2079b06 100644 --- a/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml +++ b/cmx4mlops/cmx4mlops/repo/script/run-mlperf-inference-submission-checker/_cm.yaml @@ -36,6 +36,7 @@ deps: tags: preprocess,mlperf,inference,submission input_mapping: extra_args: CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS + extra_checker_args: CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS extra_model_benchmark_map: CM_MLPERF_EXTRA_MODEL_MAPPING input: CM_MLPERF_INFERENCE_SUBMISSION_DIR power: CM_MLPERF_POWER @@ -50,6 +51,7 @@ input_mapping: src_version: CM_MLPERF_SUBMISSION_CHECKER_VERSION submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR submitter: CM_MLPERF_SUBMITTER + submitter_id: CM_MLPERF_SUBMITTER_ID tar: CM_TAR_SUBMISSION_DIR post_deps: - enable_if_env: @@ -66,6 +68,12 @@ post_deps: CM_TAR_SUBMISSION_DIR: - 'yes' tags: run,tar +- enable_if_env: + CM_SUBMITTER_ID: + - 'yes' + tags: submit,mlperf,results,_inference + env: + CM_MLPERF_SUBMISSION_FILE: <<>> tags: - run - mlc @@ -103,3 +111,7 @@ versions: adr: submission-checker-src: version: r4.1 + r5.0: + adr: + submission-checker-src: + version: master diff --git a/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md new file mode 100644 index 0000000000..a059b0c49b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/COPYRIGHT.md @@ -0,0 +1,9 @@ +# Copyright Notice + +© 2024-2025 MLCommons. All Rights Reserved. + +This file is licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License can be obtained at: + +[Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + +Unless required by applicable law or agreed to in writing, software distributed under the License is provided on an "AS IS" basis, without warranties or conditions of any kind, either express or implied. Please refer to the License for the specific language governing permissions and limitations under the License. diff --git a/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml new file mode 100644 index 0000000000..b7a10ce834 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/_cm.yaml @@ -0,0 +1,22 @@ +alias: submit-mlperf-results +automation_alias: script +automation_uid: 5b4e0237da074764 +category: MLPerf benchmark support +default_env: + CM_MLPERF_SUBMISSION_URL: https://submissions-ui.mlcommons.org + +input_mapping: + input: CM_MLPERF_SUBMISSION_FILE + submitter_id: CM_MLPERF_SUBMITTER_ID +tags: +- submit +- mlperf +- results +- mlperf-results +- publish-results +- submission +uid: cc01f0a82bef4216 +variations: + inference: + env: + CM_MLPERF_BENCHMARK: "Inference" diff --git a/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py new file mode 100644 index 0000000000..d39b233f3b --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/submit-mlperf-results/customize.py @@ -0,0 +1,194 @@ +import requests +from cmind import utils +import cmind as cm +import os +import json + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + meta = i['meta'] + automation = i['automation'] + + server = env['CM_MLPERF_SUBMISSION_URL'] + benchmark = env['CM_MLPERF_BENCHMARK'] + submitter_id = env['CM_MLPERF_SUBMITTER_ID'] + file_path = env['CM_MLPERF_SUBMISSION_FILE'] + + r = get_signed_url(server, benchmark, submitter_id, file_path) + if r['return'] > 0: + return r + + signed_url = r['signed_url'] + submission_id = r['submission_id'] + + # print(signed_url) + # print(submission_id) + r = upload_file_to_signed_url(file_path, signed_url) + if r['return'] > 0: + return r + + r = trigger_submission_checker( + server, submitter_id, benchmark, submission_id) + if r['return'] > 0: + return r + + return {'return': 0} + + +def get_signed_url(server, benchmark, submitter_id, file_path): + # Define the URL + url = f"{server}/index/url" + + # Define the headers + headers = { + "Content-Type": "application/json" + } + + # Define the payload + payload = { + "submitter_id": submitter_id, + "benchmark": benchmark, + "filename": file_path + } + + try: + # Make the POST request + response = requests.post(url, json=payload, headers=headers) + + # Check the response status + if response.status_code == 200: + # print("Request successful!") + # print("Response:", response.json()) + pass + else: + # print(f"Request failed with status code {response.status_code}") + # print("Response:", response.text) + pass + + except requests.exceptions.RequestException as e: + return {"return": 1, + "error": f"An error occurred in connecting to the server: {e}"} + + response_json = response.json() + # print(response_json) + # response = json.loads(response_json) + try: + signed_url = response_json['signed_url'] + submission_id = response_json['submission_id'] + except Exception as e: + return { + "return": 1, "error": f"An error occurred while processing the response: {e}"} + + return {'return': 0, 'signed_url': signed_url, + 'submission_id': submission_id} + + +def upload_file_to_signed_url(file_path, signed_url): + """ + Uploads a file to a signed URL using HTTP PUT. + + Parameters: + file_path (str): The path to the file you want to upload. + signed_url (str): The pre-signed URL for uploading the file. + + Returns: + dict: A dictionary with 'status_code' and 'response' keys. + """ + headers = { + 'Content-Type': 'application/octet-stream', + 'Access-Control-Allow-Headers': '*' + } + + try: + # Open the file in binary mode + with open(file_path, 'rb') as file: + response = requests.put( + signed_url, + data=file, + headers=headers + ) + + if response.status_code in [200, 201, 204]: + print("File uploaded successfully!") + return { + 'return': 0 + } + else: + print( + f"Failed to upload file. Status code: {response.status_code}") + print("Response:", response.text) + + return { + 'return': response.status_code, + 'error': response.text + } + + except FileNotFoundError: + print("Error: File not found.") + return { + 'return': 400, + 'error': f'''File {file_path} not found''' + } + + except requests.exceptions.RequestException as e: + print(f"Request failed: {e}") + return { + 'return': 500, + 'error': str(e) + } + + +def trigger_submission_checker( + server_url, submitter_id, benchmark, submission_id): + """ + Sends a POST request with URL-encoded form data. + + Parameters: + server_url (str): The server endpoint URL (e.g., https://example.com/index). + submitter_id (str): The ID of the submitter. + benchmark (str): The benchmark identifier. + submission_id (str): The submission ID. + + Returns: + dict: A dictionary containing status code and response content. + """ + url = f"{server_url}/index" + headers = { + "Content-Type": "application/x-www-form-urlencoded" + } + payload = { + "submitter_id": submitter_id, + "benchmark": benchmark, + "submission_id": submission_id + } + + try: + # Make the POST request with URL-encoded data + response = requests.post(url, data=payload, headers=headers) + + if response.ok: + print("Submission Check Request successful!") + pass + else: + print( + f"Submission Check Request failed with status code: {response.status_code}") + print("Response:", response.text) + + return { + "return": 0, + "response": response.text + } + + except requests.exceptions.RequestException as e: + print("An error occurred:", e) + return { + "return": 500, + "error": str(e) + } + + +def postprocess(i): + return {'return': 0} diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py new file mode 100644 index 0000000000..8012d097b6 --- /dev/null +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/process_tests.py @@ -0,0 +1,38 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files = sys.argv[1:] + +for file in files: + print(file) + if not os.path.isfile(file) or not "script" in file: + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + if data.get('uid', '') == '': + continue # not a CM script meta + uid = data['uid'] + + ii = { + 'action': 'test', 'automation': 'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con' + } + if os.environ.get('DOCKER_CM_REPO', '') != '': + ii['docker_cm_repo'] = os.environ['DOCKER_CM_REPO'] + if os.environ.get('DOCKER_CM_REPO_BRANCH', '') != '': + ii['docker_cm_repo_branch'] = os.environ['DOCKER_CM_REPO_BRANCH'] + if os.environ.get('TEST_INPUT_INDEX', '') != '': + ii['test_input_index'] = os.environ['TEST_INPUT_INDEX'] + print(ii) + r = cm.access(ii) + + checks.check_return(r) diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py index ad867a2a12..1b63631c63 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/script/test_docker.py @@ -10,7 +10,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', @@ -27,7 +27,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py index 0b96f17f5a..bc8d22f783 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py @@ -30,7 +30,7 @@ 'name': 'mlperf'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py index 692ddeb830..4e17d572d4 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py @@ -18,7 +18,7 @@ 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', diff --git a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py index 5758ad08f2..28bc0132bf 100644 --- a/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py +++ b/cmx4mlops/cmx4mlops/repo/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py @@ -20,7 +20,7 @@ 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) diff --git a/cmx4mlops/pyproject.toml b/cmx4mlops/pyproject.toml new file mode 100644 index 0000000000..678db87627 --- /dev/null +++ b/cmx4mlops/pyproject.toml @@ -0,0 +1,80 @@ +# Author and developer: Grigori Fursin + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools"] + +[project] +name = "cmx4mlops" + +authors = [ + {name = "Grigori Fursin and contributors", email = "grigori.fursin@ctuning.org"} +] + +maintainers = [ + {name = "Grigori Fursin", email = "grigori.fursin@ctuning.org"} +] + +description = "CMX4MLOps repository" + +requires-python = ">=3.7" + +dependencies = [ + "cmind>=3.5.2", + "flextask" +] + +keywords = [ + "cmx4mlops", + "cmx4mlperf", + "cm4mlops", + "cm4mlperf", + "mlperf", + "virtual mlops", + "vmlops", + "cmind", + "workflow", + "automation", + "mlops", + "devops", + "aiops", + "portability", + "reusability" +] + +license = {text = "Apache 2.0"} + +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +dynamic = ["readme", "version"] + +[tool.setuptools.dynamic] +readme = {file = "cmx4mlops/README.md", content-type = "text/markdown"} +version = {file = "cmx4mlops/VERSION"} + +[tool.setuptools] +zip-safe = false +include-package-data = true + +[tool.setuptools.package-data] +cmx4mlops = ["**"] + +[tool.setuptools.packages.find] +where = ["."] +include = ["cmx4mlops"] + + +[project.urls] +Homepage = "https://github.com/mlcommons/ck" +Documentation = "TBD" +Repository = "https://github.com/mlcommons/ck/tree/master/cmx4mlops" +Issues = "https://github.com/mlcommons/ck/issues" +Changelog = "https://github.com/mlcommons/ck/blob/master/cmx4mlops/CHANGES.md" From 9d75b281696b0f8a2ee04647f1fd982afc44f8da Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sun, 2 Feb 2025 18:31:01 +0100 Subject: [PATCH 2/2] clean up --- README.md | 4 ++-- cmx4mlops/README.md | 24 ++++++++++++++++++++++-- cmx4mlops/cmx4mlops/cmr.yaml | 2 +- cmx4mlperf/README.md | 1 + cmx4mlperf/cmx4mlperf/VERSION | 1 + cmx4mlperf/cmx4mlperf/__init__.py | 0 cmx4mlperf/cmx4mlperf/cmr.yaml | 14 ++++++++++++++ 7 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 cmx4mlperf/README.md create mode 100644 cmx4mlperf/cmx4mlperf/VERSION create mode 100644 cmx4mlperf/cmx4mlperf/__init__.py create mode 100644 cmx4mlperf/cmx4mlperf/cmr.yaml diff --git a/README.md b/README.md index bf3aebc7cf..068f42aab4 100755 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ see [online catalog at CK playground](https://access.cknowledge.org/playground/? CM scripts extend the concept of `cmake` with simple Python automations, native scripts and JSON/YAML meta descriptions. They require Python 3.7+ with minimal dependencies and are -[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) +[continuously extended by the community and MLCommons members](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) to run natively on Ubuntu, MacOS, Windows, RHEL, Debian, Amazon Linux and any other operating system, in a cloud or inside automatically generated containers while keeping backward compatibility. @@ -176,7 +176,7 @@ and to advance its development as a collaborative, community-driven effort. We thank [MLCommons](https://mlcommons.org), [FlexAI](https://flex.ai) and [cTuning](https://cTuning.org) for supporting this project, -as well as our dedicated [volunteers and collaborators](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) +as well as our dedicated [volunteers and collaborators](https://github.com/mlcommons/ck/blob/master/CONTRIBUTORS.md) for their feedback and contributions! If you found the CM automations helpful, kindly reference this article: diff --git a/cmx4mlops/README.md b/cmx4mlops/README.md index e034c78c99..674e1da1f6 100644 --- a/cmx4mlops/README.md +++ b/cmx4mlops/README.md @@ -22,9 +22,10 @@ by `yaml` metadata, enabling the creation of robust and flexible ML workflows. Grigori Fursin, the cTuning foundation and OctoML donated the CK and CM projects to MLCommons to benefit everyone and encourage collaborative development. -## Maintainer(s) +## Maintainers -* MLCommons +* CM, CM4MLOps and MLPerf automations: MLCommons +* CMX (the next generation of CM): Grigori Fursin ## Author @@ -37,6 +38,25 @@ for their invaluable feedback and support! Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) and the [white paper](https://arxiv.org/abs/2406.16791). +## Test image classification and MLPerf R-GAT inference benchmark via CMX PYPI package + +```bash +pip install cmind +pip install cmx4mlops +cmx run script "python app image-classification onnx" --quiet +cmx run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=ubuntu-latest_x86 --model=rgat --implementation=python --backend=pytorch --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet --v --target_qps=1 +``` + +## Test image classification and MLPerf R-GAT inference benchmark via CMX GitHub repo + +```bash +pip uninstall cmx4mlops +pip install cmind +cmx pull repo mlcommons@ck --dir=cmx4mlops/cmx4mlops +cmx run script "python app image-classification onnx" --quiet +cmx run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=ubuntu-latest_x86 --model=rgat --implementation=python --backend=pytorch --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet --v --target_qps=1 +``` + ## Parent project Visit the [parent Collective Knowledge project](https://github.com/mlcommons/ck) for further details. diff --git a/cmx4mlops/cmx4mlops/cmr.yaml b/cmx4mlops/cmx4mlops/cmr.yaml index a62e4cf25b..4e2dbf480a 100644 --- a/cmx4mlops/cmx4mlops/cmr.yaml +++ b/cmx4mlops/cmx4mlops/cmr.yaml @@ -5,7 +5,7 @@ git: true version: "0.5.1" -author: "Grigori Fursin" +author: "Grigori Fursin and contributors" install_python_requirements: false diff --git a/cmx4mlperf/README.md b/cmx4mlperf/README.md new file mode 100644 index 0000000000..a0990367ef --- /dev/null +++ b/cmx4mlperf/README.md @@ -0,0 +1 @@ +TBD diff --git a/cmx4mlperf/cmx4mlperf/VERSION b/cmx4mlperf/cmx4mlperf/VERSION new file mode 100644 index 0000000000..4b9fcbec10 --- /dev/null +++ b/cmx4mlperf/cmx4mlperf/VERSION @@ -0,0 +1 @@ +0.5.1 diff --git a/cmx4mlperf/cmx4mlperf/__init__.py b/cmx4mlperf/cmx4mlperf/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cmx4mlperf/cmx4mlperf/cmr.yaml b/cmx4mlperf/cmx4mlperf/cmr.yaml new file mode 100644 index 0000000000..258c731ecc --- /dev/null +++ b/cmx4mlperf/cmx4mlperf/cmr.yaml @@ -0,0 +1,14 @@ +alias: cmx4mlperf +uid: 5c24e5e0ce5a457a + +git: true + +version: "0.5.1" + +author: "Grigori Fursin and contributors" + +install_python_requirements: false + +min_cm_version: "3.5.3" + +prefix: repo