From 18a2d70cac29014efb24a7b9747514c6a211e8e0 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 3 Oct 2025 10:52:12 -0500 Subject: [PATCH 1/3] Add mlcube implementation --- text_to_image/.dockerignore | 1 + text_to_image/mlcube/.gitignore | 1 + text_to_image/mlcube/Dockerfile | 19 ++++++++ text_to_image/mlcube/mlcube.yaml | 29 ++++++++++++ text_to_image/mlcube/readme.md | 2 + text_to_image/scripts/download_demo.sh | 61 ++++++++++++++++++++++++++ text_to_image/scripts/run_demo.sh | 56 +++++++++++++++++++++++ 7 files changed, 169 insertions(+) create mode 100644 text_to_image/.dockerignore create mode 100644 text_to_image/mlcube/.gitignore create mode 100644 text_to_image/mlcube/Dockerfile create mode 100644 text_to_image/mlcube/mlcube.yaml create mode 100644 text_to_image/mlcube/readme.md create mode 100755 text_to_image/scripts/download_demo.sh create mode 100755 text_to_image/scripts/run_demo.sh diff --git a/text_to_image/.dockerignore b/text_to_image/.dockerignore new file mode 100644 index 000000000..0148c8483 --- /dev/null +++ b/text_to_image/.dockerignore @@ -0,0 +1 @@ +mlcube/workspace/ diff --git a/text_to_image/mlcube/.gitignore b/text_to_image/mlcube/.gitignore new file mode 100644 index 000000000..f1981605f --- /dev/null +++ b/text_to_image/mlcube/.gitignore @@ -0,0 +1 @@ +workspace \ No newline at end of file diff --git a/text_to_image/mlcube/Dockerfile b/text_to_image/mlcube/Dockerfile new file mode 100644 index 000000000..49e1d6337 --- /dev/null +++ b/text_to_image/mlcube/Dockerfile @@ -0,0 +1,19 @@ +FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel + +RUN apt-get update && apt-get install -y --no-install-recommends \ + git vim curl unzip \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +WORKDIR /workspace/flux + +COPY . . + +RUN pip install --no-cache-dir \ + huggingface-hub \ + -r torchtitan/requirements.txt \ + -r torchtitan/torchtitan/experiments/flux/requirements-flux.txt \ + -r torchtitan/requirements-mlperf.txt + +RUN pip install -e torchtitan/ + diff --git a/text_to_image/mlcube/mlcube.yaml b/text_to_image/mlcube/mlcube.yaml new file mode 100644 index 000000000..33e82ea86 --- /dev/null +++ b/text_to_image/mlcube/mlcube.yaml @@ -0,0 +1,29 @@ +name: Flux +description: Flux text to image +authors: + - { name: "MLCommons Best Practices Working Group" } + +platform: + accelerator_count: 1 + +docker: + image: mlcommons/flux_benchmark:0.0.1 + build_context: ".." + build_file: "mlcube/Dockerfile" + gpu_args: "--gpus=all -e HUGGING_FACE_HUB_TOKEN" + +tasks: + download_demo: + entrypoint: ./scripts/download_demo.sh -a + parameters: + outputs: + data_path: demo_data/ + model_path: models/ + demo: + entrypoint: ./scripts/run_demo.sh -a + parameters: + inputs: + data_path: demo_data/data/ + model_path: models/ + outputs: + log_dir: demo_logs/ \ No newline at end of file diff --git a/text_to_image/mlcube/readme.md b/text_to_image/mlcube/readme.md new file mode 100644 index 000000000..f66a4ac67 --- /dev/null +++ b/text_to_image/mlcube/readme.md @@ -0,0 +1,2 @@ +export HUGGING_FACE_HUB_TOKEN="YOUR_TOKEN" +git submodule update --init --recursive \ No newline at end of file diff --git a/text_to_image/scripts/download_demo.sh b/text_to_image/scripts/download_demo.sh new file mode 100755 index 000000000..694243962 --- /dev/null +++ b/text_to_image/scripts/download_demo.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +DATA_PATH="./dataset" +MODEL_PATH="./models" + +# Capture MLCube parameter +while [ $# -gt 0 ]; do + case "$1" in + --data_path=*) + DATA_PATH="${1#*=}" + ;; + --model_path=*) + MODEL_PATH="${1#*=}" + ;; + *) ;; + esac + shift +done + +if ! command -v huggingface-cli &> /dev/null; then + echo "Error: huggingface-cli is not installed. Please add 'huggingface-hub' to your pip requirements." >&2 + exit 1 +fi + + +echo "--- Preparing Directories ---" +mkdir -p "$DATA_PATH" +cd "$DATA_PATH" +echo "Working directory: $(pwd)" + + +echo "--- Downloading and unzipping dataset ---" +curl -O https://storage.googleapis.com/mlperf_training_demo/flux/flux_minified_data.zip +unzip -o -q flux_minified_data.zip +rm flux_minified_data.zip +echo "Dataset downloaded successfully." + + +mkdir -p "$MODEL_PATH" +echo "--- Downloading models to ${MODEL_PATH} directory ---" +echo HUGGING_FACE_HUB_TOKEN $HUGGING_FACE_HUB_TOKEN + +echo "Downloading FLUX.1-schnell autoencoder..." +huggingface-cli download black-forest-labs/FLUX.1-schnell ae.safetensors \ + --local-dir "${MODEL_PATH}/autoencoder" \ + --local-dir-use-symlinks False + +echo "Downloading T5-v1_1-xxl text encoder..." +huggingface-cli download google/t5-v1_1-xxl \ + --local-dir "${MODEL_PATH}/t5" \ + --exclude "tf_model.h5" \ + --local-dir-use-symlinks False + +echo "Downloading CLIP-vit-large-patch14 image encoder..." +huggingface-cli download openai/clip-vit-large-patch14 \ + --local-dir "${MODEL_PATH}/clip" \ + --exclude "*.safetensors,*.msgpack,tf_model.h5" \ + --local-dir-use-symlinks False + +echo "--- All downloads completed successfully! ---" \ No newline at end of file diff --git a/text_to_image/scripts/run_demo.sh b/text_to_image/scripts/run_demo.sh new file mode 100755 index 000000000..00a60a1e8 --- /dev/null +++ b/text_to_image/scripts/run_demo.sh @@ -0,0 +1,56 @@ +#!/bin/bash +set -e + +DATA_PATH="" +MODEL_PATH="" +LOG_DIR="" + +# Capture MLCube parameter +while [ $# -gt 0 ]; do + case "$1" in + --data_path=*) + DATA_PATH="${1#*=}" + ;; + --model_path=*) + MODEL_PATH="${1#*=}" + ;; + --log_dir=*) + LOG_DIR="${1#*=}" + ;; + *) ;; + esac + shift +done + +if [[ -z "$DATA_PATH" || -z "$MODEL_PATH" || -z "$LOG_DIR" ]]; then + echo "Error: --data_path and --log_dir were not provided by MLCube." >&2 + exit 1 +fi + +echo "Data Path: $DATA_PATH" +echo "Model Path: $MODEL_PATH" +echo "Log Directory: $LOG_DIR" +echo "--------------------------" + +export DATAROOT="$DATA_PATH" +export MODELROOT="$MODEL_PATH" +export LOGDIR="$LOG_DIR" +export NGPU=1 +export CONFIG_FILE="torchtitan/torchtitan/experiments/flux/train_configs/flux_schnell_mlperf_preprocessed.toml" + +echo "Running training with the following environment:" +echo "DATAROOT=$DATAROOT" +echo "MODELROOT=$MODELROOT" +echo "LOGDIR=$LOGDIR" +echo "NGPU=$NGPU" +echo "CONFIG_FILE=$CONFIG_FILE" +echo "--------------------------" + +ln -s $DATAROOT /dataset +ln -s $MODELROOT /models + +bash torchtitan/torchtitan/experiments/flux/run_train.sh \ + --training.steps=10 \ + --training.batch_size=1 \ + --training.seq_len=2 \ + --eval.eval_freq=5 From 947d4e75719ebe72d300aadd60007558feed58e0 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 3 Oct 2025 11:04:32 -0500 Subject: [PATCH 2/3] Update readme --- text_to_image/mlcube/readme.md | 72 +++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/text_to_image/mlcube/readme.md b/text_to_image/mlcube/readme.md index f66a4ac67..6ce0444cf 100644 --- a/text_to_image/mlcube/readme.md +++ b/text_to_image/mlcube/readme.md @@ -1,2 +1,72 @@ +# MLCube for Flux.1-schnell + +MLCube™ GitHub [repository](https://github.com/mlcommons/mlcube). MLCube™ [wiki](https://mlcommons.github.io/mlcube/). + +## Project setup + +An important requirement is that you must have Docker installed. + +```bash +# Create Python environment and install MLCube Docker runner +virtualenv -p python3 ./env && source ./env/bin/activate && pip install pip==24.0 && pip install mlcube-docker +# Fetch the implementation from GitHub +git clone https://github.com/mlcommons/training && cd ./training +git fetch origin pull/839/head:feature/mlcube_flux && git checkout feature/mlcube_flux +cd ./text_to_image/mlcube +``` + +Inside the mlcube directory run the following command to check implemented tasks. + +```shell +mlcube describe +``` + +###  Extra requirements + +You need to download the `torchtitan` git submodule: + +```shell +git submodule update --init --recursive +``` + +You also need accept the license for the [FLUX schnell model](https://huggingface.co/black-forest-labs/FLUX.1-schnell) on Hugginface. + +Finally, to be able to download all the models you will need to get a token from [Hugginface](https://huggingface.co/settings/tokens). + +**Note**: Make sure that when creating the token you select: + +* Read access to contents of all public gated repos you can access + +After that you can set a new enviroment variable, like this: + +```shell export HUGGING_FACE_HUB_TOKEN="YOUR_TOKEN" -git submodule update --init --recursive \ No newline at end of file +``` + +### MLCube tasks + +* Demo tasks: + +Download demo dataset and models. + +```shell +mlcube run --task=download_demo -Pdocker.build_strategy=always +``` + +Train demo. + +```shell +mlcube run --task=demo -Pdocker.build_strategy=always +``` + +### Execute the complete pipeline + +You can execute the complete pipeline with one single command. + +* Demo pipeline: + +```shell +mlcube run --task=download_demo,download_models,demo -Pdocker.build_strategy=always +``` + +**Note**: To rebuild the image use the flag: `-Pdocker.build_strategy=always` during the `mlcube run` command. From 26c89423a9706a6b7439ce100f081e45dc88e098 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Fri, 3 Oct 2025 17:17:37 -0500 Subject: [PATCH 3/3] Update readme --- text_to_image/mlcube/readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text_to_image/mlcube/readme.md b/text_to_image/mlcube/readme.md index 6ce0444cf..746b35d37 100644 --- a/text_to_image/mlcube/readme.md +++ b/text_to_image/mlcube/readme.md @@ -66,7 +66,7 @@ You can execute the complete pipeline with one single command. * Demo pipeline: ```shell -mlcube run --task=download_demo,download_models,demo -Pdocker.build_strategy=always +mlcube run --task=download_demo,demo -Pdocker.build_strategy=always ``` **Note**: To rebuild the image use the flag: `-Pdocker.build_strategy=always` during the `mlcube run` command.