Skip to content

Commit

Permalink
add stablefast
Browse files Browse the repository at this point in the history
  • Loading branch information
isidentical committed Dec 21, 2023
1 parent ad21b9d commit 389c44e
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 2 deletions.
2 changes: 1 addition & 1 deletion artifacts/latest.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.6092358700116165, 1.590405477967579, 1.6014833319932222, 1.6045241150422953, 1.6173307650024071, 1.588649354991503, 1.6177432839758694, 1.599749773973599, 1.5973809910356067, 1.611054973967839]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.553542829991784, 1.5469060649629682, 1.5682512729545124, 1.5926210439647548, 1.5490420999703929, 1.5811396269709803, 1.601516699010972, 1.573098658991512, 1.5553199910209514, 1.5495691270334646]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7270463349996135, 1.7257355590118095, 1.72448783100117, 1.7195176769746467, 1.724910780962091, 1.733797338034492, 1.7283085110248066, 1.7298935379949398, 1.7296830140403472, 1.7474360400228761]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.3513619299628772, 1.3534803700167686, 1.3520958359586075, 1.3536381669691764, 1.354587804991752, 1.3556970959762111, 1.3540321679902263, 1.3534756689914502, 1.3540198029950261, 1.3547918749973178]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0565082289977, 1.0567370590288192, 1.0561835389817134, 1.0582475429982878, 1.0596825950196944, 1.0571462360094301, 1.0588196199969389, 1.0595528869889677, 1.05731826700503, 1.0564522079657763]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.926582371990662, 5.934434743016027, 5.9270851469482295, 5.935426631011069, 5.924764123046771, 5.940283620962873, 5.923806683975272, 5.939957841997966, 5.936083366977982, 5.9296819130540825]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.721943153010216, 5.728673742036335, 5.741363879002165, 5.76699190097861, 5.737180910015013, 5.739464172977023, 5.734640704002231, 5.739124519051984, 5.736957271001302, 5.743521926051471]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.710114244022407, 5.713956555002369, 5.712215353967622, 5.711912807018962, 5.717077467998024, 5.717427038005553, 5.716518344997894, 5.723207506001927, 5.732120550004765, 5.730621722002979]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.215489073016215, 5.213854452013038, 5.219272127957083, 5.21321740699932, 5.216327044996433, 5.215428333031014, 5.218647814006545, 5.215836882998701, 5.220495002984535, 5.2171645070193335]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.125404424034059, 5.128606810001656, 5.129265585972462, 5.131429913977627, 5.139510503038764, 5.135477636009455, 5.141903425042983, 5.1426030559814535, 5.1420622110017575, 5.1471164600225165]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9522697469219565, 0.9466271298006177, 0.9612564342096448, 0.9496314357966185, 0.9472450213506818, 0.9524376271292567, 0.947170939296484, 0.9501788932830095, 0.9480817448347807, 0.949673724360764]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.282238028012216, 4.288415948860347, 4.3019642466679215, 4.316444550640881, 4.300708514638245, 4.304514071904123, 4.2988332360982895, 4.30375206284225, 4.303566202521324, 4.297783775255084]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.153573560994118, 8.144518585992046, 8.136577832978219, 8.14440743502928, 8.146547965996433, 8.137827199010644, 8.150413497991394, 8.143599029979669, 8.154678368009627, 8.15259703004267]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.87210746697383, 5.879055427969433, 5.893418683030177, 5.887948323041201, 5.883382624015212, 5.88199090200942, 5.886507772025652, 5.893981233006343, 5.895993906015065, 5.897189933981281]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.314938348019496, 5.328400561993476, 5.314847628993448, 5.321663878043182, 5.31307160895085, 5.323098871042021, 5.315845976991113, 5.323869657004252, 5.333241019980051, 5.340266301005613]}, {"name": "Diffusers (torch 2.1, SDPA) + OpenAI's [consistency decoder](https://github.com/openai/consistencydecoder)\\*\\*", "category": "SD1.5 (End-to-end)", "timings": [2.2305995619972236, 2.236397976987064, 2.233143593999557, 2.2379272099933587, 2.2369742310256697, 2.226276573026553, 2.2258994709700346, 2.227976629976183, 2.2204389010439627, 2.225959989009425]}, {"name": "Comfy (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.751719175023027, 5.749125912960153, 5.8242923590005375, 5.748247125011403, 5.757380981987808, 5.821511441026814, 5.782289217982907, 5.796118968981318, 5.799208401993383, 5.7610603049979545]}]}
{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.6092358700116165, 1.590405477967579, 1.6014833319932222, 1.6045241150422953, 1.6173307650024071, 1.588649354991503, 1.6177432839758694, 1.599749773973599, 1.5973809910356067, 1.611054973967839]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.553542829991784, 1.5469060649629682, 1.5682512729545124, 1.5926210439647548, 1.5490420999703929, 1.5811396269709803, 1.601516699010972, 1.573098658991512, 1.5553199910209514, 1.5495691270334646]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7270463349996135, 1.7257355590118095, 1.72448783100117, 1.7195176769746467, 1.724910780962091, 1.733797338034492, 1.7283085110248066, 1.7298935379949398, 1.7296830140403472, 1.7474360400228761]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.3513619299628772, 1.3534803700167686, 1.3520958359586075, 1.3536381669691764, 1.354587804991752, 1.3556970959762111, 1.3540321679902263, 1.3534756689914502, 1.3540198029950261, 1.3547918749973178]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0565082289977, 1.0567370590288192, 1.0561835389817134, 1.0582475429982878, 1.0596825950196944, 1.0571462360094301, 1.0588196199969389, 1.0595528869889677, 1.05731826700503, 1.0564522079657763]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.926582371990662, 5.934434743016027, 5.9270851469482295, 5.935426631011069, 5.924764123046771, 5.940283620962873, 5.923806683975272, 5.939957841997966, 5.936083366977982, 5.9296819130540825]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.721943153010216, 5.728673742036335, 5.741363879002165, 5.76699190097861, 5.737180910015013, 5.739464172977023, 5.734640704002231, 5.739124519051984, 5.736957271001302, 5.743521926051471]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.710114244022407, 5.713956555002369, 5.712215353967622, 5.711912807018962, 5.717077467998024, 5.717427038005553, 5.716518344997894, 5.723207506001927, 5.732120550004765, 5.730621722002979]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.215489073016215, 5.213854452013038, 5.219272127957083, 5.21321740699932, 5.216327044996433, 5.215428333031014, 5.218647814006545, 5.215836882998701, 5.220495002984535, 5.2171645070193335]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.125404424034059, 5.128606810001656, 5.129265585972462, 5.131429913977627, 5.139510503038764, 5.135477636009455, 5.141903425042983, 5.1426030559814535, 5.1420622110017575, 5.1471164600225165]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9522697469219565, 0.9466271298006177, 0.9612564342096448, 0.9496314357966185, 0.9472450213506818, 0.9524376271292567, 0.947170939296484, 0.9501788932830095, 0.9480817448347807, 0.949673724360764]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.282238028012216, 4.288415948860347, 4.3019642466679215, 4.316444550640881, 4.300708514638245, 4.304514071904123, 4.2988332360982895, 4.30375206284225, 4.303566202521324, 4.297783775255084]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.153573560994118, 8.144518585992046, 8.136577832978219, 8.14440743502928, 8.146547965996433, 8.137827199010644, 8.150413497991394, 8.143599029979669, 8.154678368009627, 8.15259703004267]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.87210746697383, 5.879055427969433, 5.893418683030177, 5.887948323041201, 5.883382624015212, 5.88199090200942, 5.886507772025652, 5.893981233006343, 5.895993906015065, 5.897189933981281]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.314938348019496, 5.328400561993476, 5.314847628993448, 5.321663878043182, 5.31307160895085, 5.323098871042021, 5.315845976991113, 5.323869657004252, 5.333241019980051, 5.340266301005613]}, {"name": "Diffusers (torch 2.1, SDPA) + OpenAI's [consistency decoder](https://github.com/openai/consistencydecoder)\\*\\*", "category": "SD1.5 (End-to-end)", "timings": [2.2305995619972236, 2.236397976987064, 2.233143593999557, 2.2379272099933587, 2.2369742310256697, 2.226276573026553, 2.2258994709700346, 2.227976629976183, 2.2204389010439627, 2.225959989009425]}, {"name": "Comfy (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.751719175023027, 5.749125912960153, 5.8242923590005375, 5.748247125011403, 5.757380981987808, 5.821511441026814, 5.782289217982907, 5.796118968981318, 5.799208401993383, 5.7610603049979545]}, {"name": "Stable Fast (torch 2.1)", "category": "SD1.5 (End-to-end)", "timings": [0.9004453329835087, 0.9010565869975835, 0.9024333320558071, 0.902811500011012, 0.9005696969106793, 0.9003846610430628, 0.9000067131128162, 0.9003504698630422, 0.9020813019014895, 0.9022957070264965]}, {"name": "Stable Fast (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [4.1391438820865005, 4.1379809838254005, 4.143944907933474, 4.143460404826328, 4.147412231890485, 4.152759549906477, 4.151341335149482, 4.151535141048953, 4.167611639015377, 4.1654934079851955]}]}
12 changes: 11 additions & 1 deletion benchmarks/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
benchmark_experimental,
benchmark_minsdxl,
benchmark_oneflow,
benchmark_stablefast,
benchmark_tensorrt,
)
from benchmarks.settings import BenchmarkSettings, InputParameters
Expand All @@ -23,6 +24,7 @@
*benchmark_minsdxl.LOCAL_BENCHMARKS,
*benchmark_experimental.LOCAL_BENCHMARKS,
*benchmark_comfy.LOCAL_BENCHMARKS,
*benchmark_stablefast.LOCAL_BENCHMARKS,
]


Expand Down Expand Up @@ -72,7 +74,15 @@ def main() -> None:
"--force-run-only",
type=str.lower,
help="Force running only the specified benchmarks, even if they have already been run.",
choices=["diffusers", "tensorrt", "minsdxl", "oneflow", "consistency", "comfy"],
choices=[
"diffusers",
"tensorrt",
"minsdxl",
"oneflow",
"consistency",
"comfy",
"stablefast",
],
)

# For ensuring consistency among results, make sure to compare the numbers
Expand Down
69 changes: 69 additions & 0 deletions benchmarks/benchmark_stablefast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

from functools import partial

import fal

from benchmarks.settings import BenchmarkResults, BenchmarkSettings, InputParameters


@fal.function(
requirements=[
"accelerate==0.24.1",
"diffusers==0.24.0",
"torch==2.1.1",
"transformers>=4.35",
"xformers>=0.0.22",
"triton>=2.1.0",
"https://github.com/chengzeyi/stable-fast/releases/download/v1.0.0/stable_fast-1.0.0+torch211cu121-cp311-cp311-manylinux2014_x86_64.whl",
"--extra-index-url",
"https://download.pytorch.org/whl/cu121",
],
machine_type="GPU",
)
def stablefast_any(
benchmark_settings: BenchmarkSettings,
parameters: InputParameters,
model_name: str,
) -> BenchmarkResults:
import torch
from diffusers import DiffusionPipeline
from sfast.compilers.diffusion_pipeline_compiler import CompilationConfig, compile

pipeline = DiffusionPipeline.from_pretrained(
model_name,
torch_dtype=torch.float16,
use_safetensors=True,
)
pipeline.to("cuda")

config = CompilationConfig.Default()
config.enable_xformers = True
config.enable_triton = True
config.enable_cuda_graph = True
pipeline = compile(pipeline, config)

inference_func = partial(
pipeline, parameters.prompt, num_inference_steps=parameters.steps
)
return benchmark_settings.apply(inference_func)


LOCAL_BENCHMARKS = [
{
"name": "Stable Fast (torch 2.1)",
"category": "SD1.5 (End-to-end)",
"function": stablefast_any,
"kwargs": {
"model_name": "runwayml/stable-diffusion-v1-5",
},
},
{
"name": "Stable Fast (torch 2.1)",
"category": "SDXL (End-to-end)",
"function": stablefast_any,
"kwargs": {
"model_name": "stabilityai/stable-diffusion-xl-base-1.0",
},
},
]

0 comments on commit 389c44e

Please sign in to comment.