From 18a2d70cac29014efb24a7b9747514c6a211e8e0 Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 3 Oct 2025 10:52:12 -0500
Subject: [PATCH 1/3] Add mlcube implementation

---
 text_to_image/.dockerignore            |  1 +
 text_to_image/mlcube/.gitignore        |  1 +
 text_to_image/mlcube/Dockerfile        | 19 ++++++++
 text_to_image/mlcube/mlcube.yaml       | 29 ++++++++++++
 text_to_image/mlcube/readme.md         |  2 +
 text_to_image/scripts/download_demo.sh | 61 ++++++++++++++++++++++++++
 text_to_image/scripts/run_demo.sh      | 56 +++++++++++++++++++++++
 7 files changed, 169 insertions(+)
 create mode 100644 text_to_image/.dockerignore
 create mode 100644 text_to_image/mlcube/.gitignore
 create mode 100644 text_to_image/mlcube/Dockerfile
 create mode 100644 text_to_image/mlcube/mlcube.yaml
 create mode 100644 text_to_image/mlcube/readme.md
 create mode 100755 text_to_image/scripts/download_demo.sh
 create mode 100755 text_to_image/scripts/run_demo.sh

diff --git a/text_to_image/.dockerignore b/text_to_image/.dockerignore
new file mode 100644
index 000000000..0148c8483
--- /dev/null
+++ b/text_to_image/.dockerignore
@@ -0,0 +1 @@
+mlcube/workspace/
diff --git a/text_to_image/mlcube/.gitignore b/text_to_image/mlcube/.gitignore
new file mode 100644
index 000000000..f1981605f
--- /dev/null
+++ b/text_to_image/mlcube/.gitignore
@@ -0,0 +1 @@
+workspace
\ No newline at end of file
diff --git a/text_to_image/mlcube/Dockerfile b/text_to_image/mlcube/Dockerfile
new file mode 100644
index 000000000..49e1d6337
--- /dev/null
+++ b/text_to_image/mlcube/Dockerfile
@@ -0,0 +1,19 @@
+FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git vim curl unzip \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+WORKDIR /workspace/flux
+
+COPY . .
+
+RUN pip install --no-cache-dir \
+    huggingface-hub \ 
+    -r torchtitan/requirements.txt \
+    -r torchtitan/torchtitan/experiments/flux/requirements-flux.txt \
+    -r torchtitan/requirements-mlperf.txt
+
+RUN pip install -e torchtitan/
+
diff --git a/text_to_image/mlcube/mlcube.yaml b/text_to_image/mlcube/mlcube.yaml
new file mode 100644
index 000000000..33e82ea86
--- /dev/null
+++ b/text_to_image/mlcube/mlcube.yaml
@@ -0,0 +1,29 @@
+name: Flux
+description: Flux text to image
+authors:
+  - { name: "MLCommons Best Practices Working Group" }
+
+platform:
+  accelerator_count: 1
+
+docker:
+  image: mlcommons/flux_benchmark:0.0.1
+  build_context: ".."
+  build_file: "mlcube/Dockerfile"
+  gpu_args: "--gpus=all -e HUGGING_FACE_HUB_TOKEN"
+
+tasks:
+  download_demo:
+    entrypoint: ./scripts/download_demo.sh -a
+    parameters:
+      outputs:
+        data_path: demo_data/
+        model_path: models/
+  demo:
+      entrypoint: ./scripts/run_demo.sh -a
+      parameters:
+        inputs:
+          data_path: demo_data/data/
+          model_path: models/
+        outputs:
+          log_dir: demo_logs/
\ No newline at end of file
diff --git a/text_to_image/mlcube/readme.md b/text_to_image/mlcube/readme.md
new file mode 100644
index 000000000..f66a4ac67
--- /dev/null
+++ b/text_to_image/mlcube/readme.md
@@ -0,0 +1,2 @@
+export HUGGING_FACE_HUB_TOKEN="YOUR_TOKEN"
+git submodule update --init --recursive
\ No newline at end of file
diff --git a/text_to_image/scripts/download_demo.sh b/text_to_image/scripts/download_demo.sh
new file mode 100755
index 000000000..694243962
--- /dev/null
+++ b/text_to_image/scripts/download_demo.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+
+DATA_PATH="./dataset"
+MODEL_PATH="./models"
+
+# Capture MLCube parameter
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --data_path=*)
+    DATA_PATH="${1#*=}"
+    ;;
+  --model_path=*)
+    MODEL_PATH="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+if ! command -v huggingface-cli &> /dev/null; then
+    echo "Error: huggingface-cli is not installed. Please add 'huggingface-hub' to your pip requirements." >&2
+    exit 1
+fi
+
+
+echo "--- Preparing Directories ---"
+mkdir -p "$DATA_PATH"
+cd "$DATA_PATH"
+echo "Working directory: $(pwd)"
+
+
+echo "--- Downloading and unzipping dataset ---"
+curl -O https://storage.googleapis.com/mlperf_training_demo/flux/flux_minified_data.zip
+unzip -o -q flux_minified_data.zip
+rm flux_minified_data.zip
+echo "Dataset downloaded successfully."
+
+
+mkdir -p "$MODEL_PATH"
+echo "--- Downloading models to ${MODEL_PATH} directory ---"
+echo HUGGING_FACE_HUB_TOKEN $HUGGING_FACE_HUB_TOKEN
+
+echo "Downloading FLUX.1-schnell autoencoder..."
+huggingface-cli download black-forest-labs/FLUX.1-schnell ae.safetensors \
+    --local-dir "${MODEL_PATH}/autoencoder" \
+    --local-dir-use-symlinks False
+
+echo "Downloading T5-v1_1-xxl text encoder..."
+huggingface-cli download google/t5-v1_1-xxl \
+    --local-dir "${MODEL_PATH}/t5" \
+    --exclude "tf_model.h5" \
+    --local-dir-use-symlinks False
+
+echo "Downloading CLIP-vit-large-patch14 image encoder..."
+huggingface-cli download openai/clip-vit-large-patch14 \
+    --local-dir "${MODEL_PATH}/clip" \
+    --exclude "*.safetensors,*.msgpack,tf_model.h5" \
+    --local-dir-use-symlinks False
+
+echo "--- All downloads completed successfully! ---"
\ No newline at end of file
diff --git a/text_to_image/scripts/run_demo.sh b/text_to_image/scripts/run_demo.sh
new file mode 100755
index 000000000..00a60a1e8
--- /dev/null
+++ b/text_to_image/scripts/run_demo.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+set -e
+
+DATA_PATH=""
+MODEL_PATH=""
+LOG_DIR=""
+
+# Capture MLCube parameter
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --data_path=*)
+    DATA_PATH="${1#*=}"
+    ;;
+  --model_path=*)
+    MODEL_PATH="${1#*=}"
+    ;;
+  --log_dir=*)
+    LOG_DIR="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+if [[ -z "$DATA_PATH" || -z "$MODEL_PATH" || -z "$LOG_DIR" ]]; then
+  echo "Error: --data_path and --log_dir were not provided by MLCube." >&2
+  exit 1
+fi
+
+echo "Data Path: $DATA_PATH"
+echo "Model Path: $MODEL_PATH"
+echo "Log Directory: $LOG_DIR"
+echo "--------------------------"
+
+export DATAROOT="$DATA_PATH"
+export MODELROOT="$MODEL_PATH"
+export LOGDIR="$LOG_DIR"
+export NGPU=1
+export CONFIG_FILE="torchtitan/torchtitan/experiments/flux/train_configs/flux_schnell_mlperf_preprocessed.toml"
+
+echo "Running training with the following environment:"
+echo "DATAROOT=$DATAROOT"
+echo "MODELROOT=$MODELROOT"
+echo "LOGDIR=$LOGDIR"
+echo "NGPU=$NGPU"
+echo "CONFIG_FILE=$CONFIG_FILE"
+echo "--------------------------"
+
+ln -s $DATAROOT /dataset
+ln -s $MODELROOT /models
+
+bash torchtitan/torchtitan/experiments/flux/run_train.sh \
+  --training.steps=10 \
+  --training.batch_size=1 \
+  --training.seq_len=2 \
+  --eval.eval_freq=5

From 947d4e75719ebe72d300aadd60007558feed58e0 Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 3 Oct 2025 11:04:32 -0500
Subject: [PATCH 2/3] Update readme

---
 text_to_image/mlcube/readme.md | 72 +++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/text_to_image/mlcube/readme.md b/text_to_image/mlcube/readme.md
index f66a4ac67..6ce0444cf 100644
--- a/text_to_image/mlcube/readme.md
+++ b/text_to_image/mlcube/readme.md
@@ -1,2 +1,72 @@
+# MLCube for Flux.1-schnell
+
+MLCube™ GitHub [repository](https://github.com/mlcommons/mlcube). MLCube™ [wiki](https://mlcommons.github.io/mlcube/).
+
+## Project setup
+
+An important requirement is that you must have Docker installed.
+
+```bash
+# Create Python environment and install MLCube Docker runner 
+virtualenv -p python3 ./env && source ./env/bin/activate && pip install pip==24.0 && pip install mlcube-docker
+# Fetch the implementation from GitHub
+git clone https://github.com/mlcommons/training && cd ./training
+git fetch origin pull/839/head:feature/mlcube_flux && git checkout feature/mlcube_flux
+cd ./text_to_image/mlcube
+```
+
+Inside the mlcube directory run the following command to check implemented tasks.
+
+```shell
+mlcube describe
+```
+
+###  Extra requirements
+
+You need to download the `torchtitan` git submodule:
+
+```shell
+git submodule update --init --recursive
+```
+
+You also need accept the license for the [FLUX schnell model](https://huggingface.co/black-forest-labs/FLUX.1-schnell) on Hugginface.
+
+Finally, to be able to download all the models you will need to get a token from [Hugginface](https://huggingface.co/settings/tokens).
+
+**Note**: Make sure that when creating the token you select:
+
+* Read access to contents of all public gated repos you can access
+
+After that you can set a new enviroment variable, like this:
+
+```shell
 export HUGGING_FACE_HUB_TOKEN="YOUR_TOKEN"
-git submodule update --init --recursive
\ No newline at end of file
+```
+
+### MLCube tasks
+
+* Demo tasks:
+
+Download demo dataset and models.
+
+```shell
+mlcube run --task=download_demo -Pdocker.build_strategy=always
+```
+
+Train demo.
+
+```shell
+mlcube run --task=demo -Pdocker.build_strategy=always
+```
+
+### Execute the complete pipeline
+
+You can execute the complete pipeline with one single command.
+
+* Demo pipeline:
+
+```shell
+mlcube run --task=download_demo,download_models,demo -Pdocker.build_strategy=always
+```
+
+**Note**: To rebuild the image use the flag: `-Pdocker.build_strategy=always` during the `mlcube run` command.

From 26c89423a9706a6b7439ce100f081e45dc88e098 Mon Sep 17 00:00:00 2001
From: David Jurado <david.jurado@factored.ai>
Date: Fri, 3 Oct 2025 17:17:37 -0500
Subject: [PATCH 3/3] Update readme

---
 text_to_image/mlcube/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_to_image/mlcube/readme.md b/text_to_image/mlcube/readme.md
index 6ce0444cf..746b35d37 100644
--- a/text_to_image/mlcube/readme.md
+++ b/text_to_image/mlcube/readme.md
@@ -66,7 +66,7 @@ You can execute the complete pipeline with one single command.
 * Demo pipeline:
 
 ```shell
-mlcube run --task=download_demo,download_models,demo -Pdocker.build_strategy=always
+mlcube run --task=download_demo,demo -Pdocker.build_strategy=always
 ```
 
 **Note**: To rebuild the image use the flag: `-Pdocker.build_strategy=always` during the `mlcube run` command.