Skip to content

Commit

Permalink
benchmarks: add NCHW channels last + compiled combination
Browse files Browse the repository at this point in the history
  • Loading branch information
isidentical committed Nov 4, 2023
1 parent a24d23a commit 64babfd
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ Running on an A100 80G SXM hosted at [fal.ai](https://fal.ai).
| Diffusers (fp16, SDPA) | 1.591s | 1.590s | 1.581s | 1.601s | 31.44 it/s |
| Diffusers (fp16, xformers) | 1.758s | 1.759s | 1.746s | 1.772s | 28.43 it/s |
| Diffusers (fp16, SDPA, compiled) | 1.352s | 1.351s | 1.348s | 1.356s | 37.01 it/s |
| Diffusers (fp16, SDPA, compiled, NCHW channels last) | 1.066s | 1.065s | 1.062s | 1.076s | 46.95 it/s |

### SDXL Benchmarks
| | mean (s) | median (s) | min (s) | max (s) | speed (it/s) |
|------------------|----------|------------|---------|---------|--------------|
| Diffusers (fp16, SDPA) | 5.933s | 5.933s | 5.924s | 5.943s | 8.43 it/s |
| Diffusers (fp16, xformers) | 5.724s | 5.724s | 5.714s | 5.731s | 8.74 it/s |
| Diffusers (fp16, SDPA, compiled) | 5.246s | 5.247s | 5.233s | 5.259s | 9.53 it/s |
| Diffusers (fp16, SDPA, compiled, NCHW channels last) | 5.132s | 5.132s | 5.121s | 5.142s | 9.74 it/s |

<!-- END TABLE -->

Expand Down
32 changes: 28 additions & 4 deletions benchmarks/diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def diffusers_any(
parameters: InputParameters,
model_name: str,
enable_xformers: bool = False,
compile: bool = False,
use_compile: bool = False,
use_nchw_channels: bool = False,
) -> BenchmarkResults:
# Some of these functionality might not be available in torch 2.1,
# but setting just in case if in the future we upgrade to a newer
Expand All @@ -45,10 +46,13 @@ def diffusers_any(
if enable_xformers:
pipeline.enable_xformers_memory_efficient_attention()

if use_nchw_channels:
pipeline.unet = pipeline.unet.to(memory_format=torch.channels_last)

# The mode here is reduce-overhead, which is a balanced compromise between
# compilation time and runtime. The other modes might be a possible choice
# for future benchmarks.
if compile:
if use_compile:
pipeline.unet = torch.compile(
pipeline.unet, fullgraph=True, mode="reduce-overhead"
)
Expand Down Expand Up @@ -86,7 +90,17 @@ def diffusers_any(
"function": diffusers_any,
"kwargs": {
"model_name": "runwayml/stable-diffusion-v1-5",
"compile": True,
"use_compile": True,
},
},
{
"name": "Diffusers (fp16, SDPA, compiled, NCHW channels last)",
"category": "SD1.5",
"function": diffusers_any,
"kwargs": {
"model_name": "runwayml/stable-diffusion-v1-5",
"use_compile": True,
"use_nchw_channels": True,
},
},
{
Expand All @@ -112,7 +126,17 @@ def diffusers_any(
"function": diffusers_any,
"kwargs": {
"model_name": "stabilityai/stable-diffusion-xl-base-1.0",
"compile": True,
"use_compile": True,
},
},
{
"name": "Diffusers (fp16, SDPA, compiled, NCHW channels last)",
"category": "SDXL",
"function": diffusers_any,
"kwargs": {
"model_name": "stabilityai/stable-diffusion-xl-base-1.0",
"use_compile": True,
"use_nchw_channels": True,
},
},
]

0 comments on commit 64babfd

Please sign in to comment.