From 389c44e21313c4c68af135d0635c344933a45c4a Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Thu, 21 Dec 2023 03:08:42 +0300 Subject: [PATCH] add stablefast --- artifacts/latest.json | 2 +- benchmarks/__main__.py | 12 +++++- benchmarks/benchmark_stablefast.py | 69 ++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 benchmarks/benchmark_stablefast.py diff --git a/artifacts/latest.json b/artifacts/latest.json index 99f51c4..bf70eb6 100644 --- a/artifacts/latest.json +++ b/artifacts/latest.json @@ -1 +1 @@ -{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.6092358700116165, 1.590405477967579, 1.6014833319932222, 1.6045241150422953, 1.6173307650024071, 1.588649354991503, 1.6177432839758694, 1.599749773973599, 1.5973809910356067, 1.611054973967839]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.553542829991784, 1.5469060649629682, 1.5682512729545124, 1.5926210439647548, 1.5490420999703929, 1.5811396269709803, 1.601516699010972, 1.573098658991512, 1.5553199910209514, 1.5495691270334646]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7270463349996135, 1.7257355590118095, 1.72448783100117, 1.7195176769746467, 1.724910780962091, 1.733797338034492, 1.7283085110248066, 1.7298935379949398, 1.7296830140403472, 1.7474360400228761]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.3513619299628772, 1.3534803700167686, 1.3520958359586075, 1.3536381669691764, 1.354587804991752, 1.3556970959762111, 1.3540321679902263, 1.3534756689914502, 1.3540198029950261, 1.3547918749973178]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0565082289977, 1.0567370590288192, 1.0561835389817134, 1.0582475429982878, 1.0596825950196944, 1.0571462360094301, 1.0588196199969389, 1.0595528869889677, 1.05731826700503, 1.0564522079657763]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.926582371990662, 5.934434743016027, 5.9270851469482295, 5.935426631011069, 5.924764123046771, 5.940283620962873, 5.923806683975272, 5.939957841997966, 5.936083366977982, 5.9296819130540825]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.721943153010216, 5.728673742036335, 5.741363879002165, 5.76699190097861, 5.737180910015013, 5.739464172977023, 5.734640704002231, 5.739124519051984, 5.736957271001302, 5.743521926051471]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.710114244022407, 5.713956555002369, 5.712215353967622, 5.711912807018962, 5.717077467998024, 5.717427038005553, 5.716518344997894, 5.723207506001927, 5.732120550004765, 5.730621722002979]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.215489073016215, 5.213854452013038, 5.219272127957083, 5.21321740699932, 5.216327044996433, 5.215428333031014, 5.218647814006545, 5.215836882998701, 5.220495002984535, 5.2171645070193335]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.125404424034059, 5.128606810001656, 5.129265585972462, 5.131429913977627, 5.139510503038764, 5.135477636009455, 5.141903425042983, 5.1426030559814535, 5.1420622110017575, 5.1471164600225165]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9522697469219565, 0.9466271298006177, 0.9612564342096448, 0.9496314357966185, 0.9472450213506818, 0.9524376271292567, 0.947170939296484, 0.9501788932830095, 0.9480817448347807, 0.949673724360764]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.282238028012216, 4.288415948860347, 4.3019642466679215, 4.316444550640881, 4.300708514638245, 4.304514071904123, 4.2988332360982895, 4.30375206284225, 4.303566202521324, 4.297783775255084]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.153573560994118, 8.144518585992046, 8.136577832978219, 8.14440743502928, 8.146547965996433, 8.137827199010644, 8.150413497991394, 8.143599029979669, 8.154678368009627, 8.15259703004267]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.87210746697383, 5.879055427969433, 5.893418683030177, 5.887948323041201, 5.883382624015212, 5.88199090200942, 5.886507772025652, 5.893981233006343, 5.895993906015065, 5.897189933981281]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.314938348019496, 5.328400561993476, 5.314847628993448, 5.321663878043182, 5.31307160895085, 5.323098871042021, 5.315845976991113, 5.323869657004252, 5.333241019980051, 5.340266301005613]}, {"name": "Diffusers (torch 2.1, SDPA) + OpenAI's [consistency decoder](https://github.com/openai/consistencydecoder)\\*\\*", "category": "SD1.5 (End-to-end)", "timings": [2.2305995619972236, 2.236397976987064, 2.233143593999557, 2.2379272099933587, 2.2369742310256697, 2.226276573026553, 2.2258994709700346, 2.227976629976183, 2.2204389010439627, 2.225959989009425]}, {"name": "Comfy (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.751719175023027, 5.749125912960153, 5.8242923590005375, 5.748247125011403, 5.757380981987808, 5.821511441026814, 5.782289217982907, 5.796118968981318, 5.799208401993383, 5.7610603049979545]}]} +{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.6092358700116165, 1.590405477967579, 1.6014833319932222, 1.6045241150422953, 1.6173307650024071, 1.588649354991503, 1.6177432839758694, 1.599749773973599, 1.5973809910356067, 1.611054973967839]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.553542829991784, 1.5469060649629682, 1.5682512729545124, 1.5926210439647548, 1.5490420999703929, 1.5811396269709803, 1.601516699010972, 1.573098658991512, 1.5553199910209514, 1.5495691270334646]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7270463349996135, 1.7257355590118095, 1.72448783100117, 1.7195176769746467, 1.724910780962091, 1.733797338034492, 1.7283085110248066, 1.7298935379949398, 1.7296830140403472, 1.7474360400228761]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.3513619299628772, 1.3534803700167686, 1.3520958359586075, 1.3536381669691764, 1.354587804991752, 1.3556970959762111, 1.3540321679902263, 1.3534756689914502, 1.3540198029950261, 1.3547918749973178]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0565082289977, 1.0567370590288192, 1.0561835389817134, 1.0582475429982878, 1.0596825950196944, 1.0571462360094301, 1.0588196199969389, 1.0595528869889677, 1.05731826700503, 1.0564522079657763]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.926582371990662, 5.934434743016027, 5.9270851469482295, 5.935426631011069, 5.924764123046771, 5.940283620962873, 5.923806683975272, 5.939957841997966, 5.936083366977982, 5.9296819130540825]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.721943153010216, 5.728673742036335, 5.741363879002165, 5.76699190097861, 5.737180910015013, 5.739464172977023, 5.734640704002231, 5.739124519051984, 5.736957271001302, 5.743521926051471]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.710114244022407, 5.713956555002369, 5.712215353967622, 5.711912807018962, 5.717077467998024, 5.717427038005553, 5.716518344997894, 5.723207506001927, 5.732120550004765, 5.730621722002979]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.215489073016215, 5.213854452013038, 5.219272127957083, 5.21321740699932, 5.216327044996433, 5.215428333031014, 5.218647814006545, 5.215836882998701, 5.220495002984535, 5.2171645070193335]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.125404424034059, 5.128606810001656, 5.129265585972462, 5.131429913977627, 5.139510503038764, 5.135477636009455, 5.141903425042983, 5.1426030559814535, 5.1420622110017575, 5.1471164600225165]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9522697469219565, 0.9466271298006177, 0.9612564342096448, 0.9496314357966185, 0.9472450213506818, 0.9524376271292567, 0.947170939296484, 0.9501788932830095, 0.9480817448347807, 0.949673724360764]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.282238028012216, 4.288415948860347, 4.3019642466679215, 4.316444550640881, 4.300708514638245, 4.304514071904123, 4.2988332360982895, 4.30375206284225, 4.303566202521324, 4.297783775255084]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.153573560994118, 8.144518585992046, 8.136577832978219, 8.14440743502928, 8.146547965996433, 8.137827199010644, 8.150413497991394, 8.143599029979669, 8.154678368009627, 8.15259703004267]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.87210746697383, 5.879055427969433, 5.893418683030177, 5.887948323041201, 5.883382624015212, 5.88199090200942, 5.886507772025652, 5.893981233006343, 5.895993906015065, 5.897189933981281]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.314938348019496, 5.328400561993476, 5.314847628993448, 5.321663878043182, 5.31307160895085, 5.323098871042021, 5.315845976991113, 5.323869657004252, 5.333241019980051, 5.340266301005613]}, {"name": "Diffusers (torch 2.1, SDPA) + OpenAI's [consistency decoder](https://github.com/openai/consistencydecoder)\\*\\*", "category": "SD1.5 (End-to-end)", "timings": [2.2305995619972236, 2.236397976987064, 2.233143593999557, 2.2379272099933587, 2.2369742310256697, 2.226276573026553, 2.2258994709700346, 2.227976629976183, 2.2204389010439627, 2.225959989009425]}, {"name": "Comfy (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.751719175023027, 5.749125912960153, 5.8242923590005375, 5.748247125011403, 5.757380981987808, 5.821511441026814, 5.782289217982907, 5.796118968981318, 5.799208401993383, 5.7610603049979545]}, {"name": "Stable Fast (torch 2.1)", "category": "SD1.5 (End-to-end)", "timings": [0.9004453329835087, 0.9010565869975835, 0.9024333320558071, 0.902811500011012, 0.9005696969106793, 0.9003846610430628, 0.9000067131128162, 0.9003504698630422, 0.9020813019014895, 0.9022957070264965]}, {"name": "Stable Fast (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [4.1391438820865005, 4.1379809838254005, 4.143944907933474, 4.143460404826328, 4.147412231890485, 4.152759549906477, 4.151341335149482, 4.151535141048953, 4.167611639015377, 4.1654934079851955]}]} diff --git a/benchmarks/__main__.py b/benchmarks/__main__.py index 38d78b5..4393433 100644 --- a/benchmarks/__main__.py +++ b/benchmarks/__main__.py @@ -12,6 +12,7 @@ benchmark_experimental, benchmark_minsdxl, benchmark_oneflow, + benchmark_stablefast, benchmark_tensorrt, ) from benchmarks.settings import BenchmarkSettings, InputParameters @@ -23,6 +24,7 @@ *benchmark_minsdxl.LOCAL_BENCHMARKS, *benchmark_experimental.LOCAL_BENCHMARKS, *benchmark_comfy.LOCAL_BENCHMARKS, + *benchmark_stablefast.LOCAL_BENCHMARKS, ] @@ -72,7 +74,15 @@ def main() -> None: "--force-run-only", type=str.lower, help="Force running only the specified benchmarks, even if they have already been run.", - choices=["diffusers", "tensorrt", "minsdxl", "oneflow", "consistency", "comfy"], + choices=[ + "diffusers", + "tensorrt", + "minsdxl", + "oneflow", + "consistency", + "comfy", + "stablefast", + ], ) # For ensuring consistency among results, make sure to compare the numbers diff --git a/benchmarks/benchmark_stablefast.py b/benchmarks/benchmark_stablefast.py new file mode 100644 index 0000000..da011d4 --- /dev/null +++ b/benchmarks/benchmark_stablefast.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from functools import partial + +import fal + +from benchmarks.settings import BenchmarkResults, BenchmarkSettings, InputParameters + + +@fal.function( + requirements=[ + "accelerate==0.24.1", + "diffusers==0.24.0", + "torch==2.1.1", + "transformers>=4.35", + "xformers>=0.0.22", + "triton>=2.1.0", + "https://github.com/chengzeyi/stable-fast/releases/download/v1.0.0/stable_fast-1.0.0+torch211cu121-cp311-cp311-manylinux2014_x86_64.whl", + "--extra-index-url", + "https://download.pytorch.org/whl/cu121", + ], + machine_type="GPU", +) +def stablefast_any( + benchmark_settings: BenchmarkSettings, + parameters: InputParameters, + model_name: str, +) -> BenchmarkResults: + import torch + from diffusers import DiffusionPipeline + from sfast.compilers.diffusion_pipeline_compiler import CompilationConfig, compile + + pipeline = DiffusionPipeline.from_pretrained( + model_name, + torch_dtype=torch.float16, + use_safetensors=True, + ) + pipeline.to("cuda") + + config = CompilationConfig.Default() + config.enable_xformers = True + config.enable_triton = True + config.enable_cuda_graph = True + pipeline = compile(pipeline, config) + + inference_func = partial( + pipeline, parameters.prompt, num_inference_steps=parameters.steps + ) + return benchmark_settings.apply(inference_func) + + +LOCAL_BENCHMARKS = [ + { + "name": "Stable Fast (torch 2.1)", + "category": "SD1.5 (End-to-end)", + "function": stablefast_any, + "kwargs": { + "model_name": "runwayml/stable-diffusion-v1-5", + }, + }, + { + "name": "Stable Fast (torch 2.1)", + "category": "SDXL (End-to-end)", + "function": stablefast_any, + "kwargs": { + "model_name": "stabilityai/stable-diffusion-xl-base-1.0", + }, + }, +]