Skip to content

Commit

Permalink
allow parallel running and datacenter specific executions
Browse files Browse the repository at this point in the history
  • Loading branch information
isidentical committed Jan 26, 2024
1 parent ff7ad91 commit b638c6c
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 37 deletions.
2 changes: 1 addition & 1 deletion artifacts/latest.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.6092358700116165, 1.590405477967579, 1.6014833319932222, 1.6045241150422953, 1.6173307650024071, 1.588649354991503, 1.6177432839758694, 1.599749773973599, 1.5973809910356067, 1.611054973967839]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.553542829991784, 1.5469060649629682, 1.5682512729545124, 1.5926210439647548, 1.5490420999703929, 1.5811396269709803, 1.601516699010972, 1.573098658991512, 1.5553199910209514, 1.5495691270334646]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7270463349996135, 1.7257355590118095, 1.72448783100117, 1.7195176769746467, 1.724910780962091, 1.733797338034492, 1.7283085110248066, 1.7298935379949398, 1.7296830140403472, 1.7474360400228761]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.3513619299628772, 1.3534803700167686, 1.3520958359586075, 1.3536381669691764, 1.354587804991752, 1.3556970959762111, 1.3540321679902263, 1.3534756689914502, 1.3540198029950261, 1.3547918749973178]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0565082289977, 1.0567370590288192, 1.0561835389817134, 1.0582475429982878, 1.0596825950196944, 1.0571462360094301, 1.0588196199969389, 1.0595528869889677, 1.05731826700503, 1.0564522079657763]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.926582371990662, 5.934434743016027, 5.9270851469482295, 5.935426631011069, 5.924764123046771, 5.940283620962873, 5.923806683975272, 5.939957841997966, 5.936083366977982, 5.9296819130540825]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.721943153010216, 5.728673742036335, 5.741363879002165, 5.76699190097861, 5.737180910015013, 5.739464172977023, 5.734640704002231, 5.739124519051984, 5.736957271001302, 5.743521926051471]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.710114244022407, 5.713956555002369, 5.712215353967622, 5.711912807018962, 5.717077467998024, 5.717427038005553, 5.716518344997894, 5.723207506001927, 5.732120550004765, 5.730621722002979]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.215489073016215, 5.213854452013038, 5.219272127957083, 5.21321740699932, 5.216327044996433, 5.215428333031014, 5.218647814006545, 5.215836882998701, 5.220495002984535, 5.2171645070193335]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.125404424034059, 5.128606810001656, 5.129265585972462, 5.131429913977627, 5.139510503038764, 5.135477636009455, 5.141903425042983, 5.1426030559814535, 5.1420622110017575, 5.1471164600225165]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9522697469219565, 0.9466271298006177, 0.9612564342096448, 0.9496314357966185, 0.9472450213506818, 0.9524376271292567, 0.947170939296484, 0.9501788932830095, 0.9480817448347807, 0.949673724360764]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.282238028012216, 4.288415948860347, 4.3019642466679215, 4.316444550640881, 4.300708514638245, 4.304514071904123, 4.2988332360982895, 4.30375206284225, 4.303566202521324, 4.297783775255084]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.153573560994118, 8.144518585992046, 8.136577832978219, 8.14440743502928, 8.146547965996433, 8.137827199010644, 8.150413497991394, 8.143599029979669, 8.154678368009627, 8.15259703004267]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.87210746697383, 5.879055427969433, 5.893418683030177, 5.887948323041201, 5.883382624015212, 5.88199090200942, 5.886507772025652, 5.893981233006343, 5.895993906015065, 5.897189933981281]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.314938348019496, 5.328400561993476, 5.314847628993448, 5.321663878043182, 5.31307160895085, 5.323098871042021, 5.315845976991113, 5.323869657004252, 5.333241019980051, 5.340266301005613]}, {"name": "Diffusers (torch 2.1, SDPA) + OpenAI's [consistency decoder](https://github.com/openai/consistencydecoder)\\*\\*", "category": "SD1.5 (End-to-end)", "timings": [2.2305995619972236, 2.236397976987064, 2.233143593999557, 2.2379272099933587, 2.2369742310256697, 2.226276573026553, 2.2258994709700346, 2.227976629976183, 2.2204389010439627, 2.225959989009425]}, {"name": "Comfy (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.751719175023027, 5.749125912960153, 5.8242923590005375, 5.748247125011403, 5.757380981987808, 5.821511441026814, 5.782289217982907, 5.796118968981318, 5.799208401993383, 5.7610603049979545]}, {"name": "Stable Fast (torch 2.1)", "category": "SD1.5 (End-to-end)", "timings": [0.9004453329835087, 0.9010565869975835, 0.9024333320558071, 0.902811500011012, 0.9005696969106793, 0.9003846610430628, 0.9000067131128162, 0.9003504698630422, 0.9020813019014895, 0.9022957070264965]}, {"name": "Stable Fast (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [4.1391438820865005, 4.1379809838254005, 4.143944907933474, 4.143460404826328, 4.147412231890485, 4.152759549906477, 4.151341335149482, 4.151535141048953, 4.167611639015377, 4.1654934079851955]}]}
{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.125404424034059, 5.128606810001656, 5.129265585972462, 5.131429913977627, 5.139510503038764, 5.135477636009455, 5.141903425042983, 5.1426030559814535, 5.1420622110017575, 5.1471164600225165]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.314938348019496, 5.328400561993476, 5.314847628993448, 5.321663878043182, 5.31307160895085, 5.323098871042021, 5.315845976991113, 5.323869657004252, 5.333241019980051, 5.340266301005613]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0565082289977, 1.0567370590288192, 1.0561835389817134, 1.0582475429982878, 1.0596825950196944, 1.0571462360094301, 1.0588196199969389, 1.0595528869889677, 1.05731826700503, 1.0564522079657763]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.87210746697383, 5.879055427969433, 5.893418683030177, 5.887948323041201, 5.883382624015212, 5.88199090200942, 5.886507772025652, 5.893981233006343, 5.895993906015065, 5.897189933981281]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.3513619299628772, 1.3534803700167686, 1.3520958359586075, 1.3536381669691764, 1.354587804991752, 1.3556970959762111, 1.3540321679902263, 1.3534756689914502, 1.3540198029950261, 1.3547918749973178]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.282238028012216, 4.288415948860347, 4.3019642466679215, 4.316444550640881, 4.300708514638245, 4.304514071904123, 4.2988332360982895, 4.30375206284225, 4.303566202521324, 4.297783775255084]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.153573560994118, 8.144518585992046, 8.136577832978219, 8.14440743502928, 8.146547965996433, 8.137827199010644, 8.150413497991394, 8.143599029979669, 8.154678368009627, 8.15259703004267]}, {"name": "Stable Fast (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [4.1391438820865005, 4.1379809838254005, 4.143944907933474, 4.143460404826328, 4.147412231890485, 4.152759549906477, 4.151341335149482, 4.151535141048953, 4.167611639015377, 4.1654934079851955]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7270463349996135, 1.7257355590118095, 1.72448783100117, 1.7195176769746467, 1.724910780962091, 1.733797338034492, 1.7283085110248066, 1.7298935379949398, 1.7296830140403472, 1.7474360400228761]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.6092358700116165, 1.590405477967579, 1.6014833319932222, 1.6045241150422953, 1.6173307650024071, 1.588649354991503, 1.6177432839758694, 1.599749773973599, 1.5973809910356067, 1.611054973967839]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "Stable Fast (torch 2.1)", "category": "SD1.5 (End-to-end)", "timings": [0.9004453329835087, 0.9010565869975835, 0.9024333320558071, 0.902811500011012, 0.9005696969106793, 0.9003846610430628, 0.9000067131128162, 0.9003504698630422, 0.9020813019014895, 0.9022957070264965]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.710114244022407, 5.713956555002369, 5.712215353967622, 5.711912807018962, 5.717077467998024, 5.717427038005553, 5.716518344997894, 5.723207506001927, 5.732120550004765, 5.730621722002979]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.553542829991784, 1.5469060649629682, 1.5682512729545124, 1.5926210439647548, 1.5490420999703929, 1.5811396269709803, 1.601516699010972, 1.573098658991512, 1.5553199910209514, 1.5495691270334646]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9522697469219565, 0.9466271298006177, 0.9612564342096448, 0.9496314357966185, 0.9472450213506818, 0.9524376271292567, 0.947170939296484, 0.9501788932830095, 0.9480817448347807, 0.949673724360764]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.215489073016215, 5.213854452013038, 5.219272127957083, 5.21321740699932, 5.216327044996433, 5.215428333031014, 5.218647814006545, 5.215836882998701, 5.220495002984535, 5.2171645070193335]}, {"name": "Comfy (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.751719175023027, 5.749125912960153, 5.8242923590005375, 5.748247125011403, 5.757380981987808, 5.821511441026814, 5.782289217982907, 5.796118968981318, 5.799208401993383, 5.7610603049979545]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.721943153010216, 5.728673742036335, 5.741363879002165, 5.76699190097861, 5.737180910015013, 5.739464172977023, 5.734640704002231, 5.739124519051984, 5.736957271001302, 5.743521926051471]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.926582371990662, 5.934434743016027, 5.9270851469482295, 5.935426631011069, 5.924764123046771, 5.940283620962873, 5.923806683975272, 5.939957841997966, 5.936083366977982, 5.9296819130540825]}, {"name": "Diffusers (torch 2.1, SDPA) + OpenAI's [consistency decoder](https://github.com/openai/consistencydecoder)\\*\\*", "category": "SD1.5 (End-to-end)", "timings": [2.2305995619972236, 2.236397976987064, 2.233143593999557, 2.2379272099933587, 2.2369742310256697, 2.226276573026553, 2.2258994709700346, 2.227976629976183, 2.2204389010439627, 2.225959989009425]}]}
130 changes: 94 additions & 36 deletions benchmarks/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse
import json
import traceback
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
from dataclasses import asdict
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -55,6 +57,44 @@ def load_previous_timings(
}


def run_benchmark(
benchmark_key: tuple[str, str],
benchmark: dict,
settings: BenchmarkSettings,
parameters: InputParameters,
options: argparse.Namespace,
):
print(f"Running benchmark: {benchmark_key}")
function = benchmark["function"].on(
machine_type=options.machine_type,
_scheduler="nomad",
)
if options.target_node:
function = function.on(
_scheduler_options={
"target_node": options.target_node,
}
)

if options.datacenters:
function = function.on(
_scheduler_options={
"datacenters": options.datacenters,
}
)

benchmark_results = function(
benchmark_settings=settings,
parameters=parameters,
**benchmark.get("kwargs", {}),
)
return {
"name": benchmark["name"],
"category": benchmark["category"], # "SD1.5", "SDXL"
"timings": benchmark_results.timings,
}


def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("results_dir", type=Path)
Expand Down Expand Up @@ -84,11 +124,18 @@ def main() -> None:
"stablefast",
],
)
parser.add_argument(
"--machine-type",
type=str,
default="GPU",
choices=["GPU", "GPU-A6000"],
)

# For ensuring consistency among results, make sure to compare the numbers
# within the same node. So the driver, cuda version, power supply, CPU compute
# etc. are all the same.
parser.add_argument("--target-node", type=str, default=None)
parser.add_argument("--datacenters", type=str, nargs="*")

options = parser.parse_args()
session_file = options.results_dir / f"{options.session_id}.json"
Expand All @@ -101,45 +148,56 @@ def main() -> None:

timings = []
previous_timings = load_previous_timings(session_file, settings, parameters)
for benchmark in track(ALL_BENCHMARKS, description="Running benchmarks..."):
benchmark_key = (benchmark["category"], benchmark["name"])
should_skip = benchmark.get("skip_if", False)
should_force_run = options.force_run or (
options.force_run_only
and options.force_run_only in benchmark["name"].lower()
)
if benchmark_key in previous_timings and (not should_force_run or should_skip):
print(f"Skipping {benchmark_key} (already run)")
timings.append(
{
"name": benchmark["name"],
"category": benchmark["category"], # "SD1.5", "SDXL"
"timings": previous_timings[benchmark_key],
}

with ThreadPoolExecutor(max_workers=8) as executor:
benchmark_futures = []

for benchmark in ALL_BENCHMARKS:
benchmark_key = (benchmark["category"], benchmark["name"])
should_skip = benchmark.get("skip_if", False)
should_force_run = options.force_run or (
options.force_run_only
and options.force_run_only in benchmark["name"].lower()
)
continue

print(f"Running benchmark: {benchmark_key}")
function = benchmark["function"].on(_scheduler="nomad")
if options.target_node:
function = function.on(
_scheduler_options={
"target_node": options.target_node,
}
if benchmark_key in previous_timings and (
not should_force_run or should_skip
):
print(f"Skipping {benchmark_key} (already run)")
future = Future() # type: ignore
future.set_result(
{
"name": benchmark["name"],
"category": benchmark["category"],
"timings": previous_timings[benchmark_key],
}
)
benchmark_futures.append(future)
continue

benchmark_futures.append(
executor.submit(
run_benchmark,
benchmark_key,
benchmark,
settings,
parameters,
options,
)
)

benchmark_results = function(
benchmark_settings=settings,
parameters=parameters,
**benchmark.get("kwargs", {}),
)
timings.append(
{
"name": benchmark["name"],
"category": benchmark["category"], # "SD1.5", "SDXL"
"timings": benchmark_results.timings,
}
)
for future in track(
as_completed(benchmark_futures),
total=len(benchmark_futures),
description="Running benchmarks",
):
try:
result = future.result()
except Exception as exc:
print("Benchmark failed!!")
traceback.print_exc()
continue
else:
timings.append(result)

results = {
"settings": asdict(settings),
Expand Down

0 comments on commit b638c6c

Please sign in to comment.