Skip to content

Commit

Permalink
benchmarks: add torch.compile'd SD/SDXL
Browse files Browse the repository at this point in the history
  • Loading branch information
isidentical committed Nov 4, 2023
1 parent aa3a059 commit a24d23a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ Running on an A100 80G SXM hosted at [fal.ai](https://fal.ai).
|------------------|----------|------------|---------|---------|--------------|
| Diffusers (fp16, SDPA) | 1.591s | 1.590s | 1.581s | 1.601s | 31.44 it/s |
| Diffusers (fp16, xformers) | 1.758s | 1.759s | 1.746s | 1.772s | 28.43 it/s |
| Diffusers (fp16, SDPA, compiled) | 1.352s | 1.351s | 1.348s | 1.356s | 37.01 it/s |

### SDXL Benchmarks
| | mean (s) | median (s) | min (s) | max (s) | speed (it/s) |
|------------------|----------|------------|---------|---------|--------------|
| Diffusers (fp16, SDPA) | 5.933s | 5.933s | 5.924s | 5.943s | 8.43 it/s |
| Diffusers (fp16, xformers) | 5.724s | 5.724s | 5.714s | 5.731s | 8.74 it/s |
| Diffusers (fp16, SDPA, compiled) | 5.246s | 5.247s | 5.233s | 5.259s | 9.53 it/s |

<!-- END TABLE -->

Expand Down
38 changes: 38 additions & 0 deletions benchmarks/diffusers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from functools import partial

import fal
Expand All @@ -20,7 +21,14 @@ def diffusers_any(
parameters: InputParameters,
model_name: str,
enable_xformers: bool = False,
compile: bool = False,
) -> BenchmarkResults:
# Some of these functionality might not be available in torch 2.1,
# but setting just in case if in the future we upgrade to a newer
# version of torch.
os.environ["TORCHINDUCTOR_CACHE_DIR"] = "/data/torch-cache"
os.environ["TORCHINDUCTOR_FX_GRAPH_CACHE"] = "1"

import torch
from diffusers import DiffusionPipeline

Expand All @@ -30,9 +38,21 @@ def diffusers_any(
use_safetensors=True,
)
pipeline.to("cuda")

# Use XFormers memory efficient attention instead of Torch SDPA
# which might also utilize memory efficient attention (alongside
# flash attention).
if enable_xformers:
pipeline.enable_xformers_memory_efficient_attention()

# The mode here is reduce-overhead, which is a balanced compromise between
# compilation time and runtime. The other modes might be a possible choice
# for future benchmarks.
if compile:
pipeline.unet = torch.compile(
pipeline.unet, fullgraph=True, mode="reduce-overhead"
)

return benchmark_settings.apply(
partial(
pipeline,
Expand Down Expand Up @@ -60,6 +80,15 @@ def diffusers_any(
"enable_xformers": True,
},
},
{
"name": "Diffusers (fp16, SDPA, compiled)",
"category": "SD1.5",
"function": diffusers_any,
"kwargs": {
"model_name": "runwayml/stable-diffusion-v1-5",
"compile": True,
},
},
{
"name": "Diffusers (fp16, SDPA)",
"category": "SDXL",
Expand All @@ -77,4 +106,13 @@ def diffusers_any(
"enable_xformers": True,
},
},
{
"name": "Diffusers (fp16, SDPA, compiled)",
"category": "SDXL",
"function": diffusers_any,
"kwargs": {
"model_name": "stabilityai/stable-diffusion-xl-base-1.0",
"compile": True,
},
},
]

0 comments on commit a24d23a

Please sign in to comment.