From ba7208d81c60ba5a8025e17ede73502861c3a530 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Mon, 6 Nov 2023 01:42:53 +0300 Subject: [PATCH] benchmarks: add minSDXL+ w/flash-attn2 --- README.md | 1 + artifacts/latest.json | 2 +- benchmarks/benchmark_minsdxl.py | 10 +++++++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b05bd8..ea36788 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Running on an A100 80G SXM hosted at [fal.ai](https://fal.ai). | [minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA) | 5.881s | 5.881s | 5.872s | 5.891s | 8.50 it/s | | Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\* | 5.748s | 5.746s | 5.734s | 5.776s | 8.70 it/s | | Diffusers (torch 2.1, xformers) | 5.724s | 5.724s | 5.714s | 5.731s | 8.74 it/s | +| [minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2) | 5.306s | 5.304s | 5.288s | 5.333s | 9.43 it/s | | Diffusers (torch 2.1, SDPA, compiled) | 5.246s | 5.247s | 5.233s | 5.259s | 9.53 it/s | | Diffusers (torch 2.1, SDPA, compiled, NCHW channels last) | 5.132s | 5.132s | 5.121s | 5.142s | 9.74 it/s | | OneFlow | 4.605s | 4.607s | 4.581s | 4.625s | 10.85 it/s | diff --git a/artifacts/latest.json b/artifacts/latest.json index 5bb9398..9985932 100644 --- a/artifacts/latest.json +++ b/artifacts/latest.json @@ -1 +1 @@ -{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.5917212970089167, 1.5975631090113893, 1.5821007050108165, 1.5864128279790748, 1.5813008210097905, 1.588955162995262, 1.583035584015306, 1.5979954930080567, 1.6009252599906176, 1.5956080609757919]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.562345665995963, 1.5535877529764548, 1.5727124799741432, 1.5913520029862411, 1.584301869967021, 1.5461461240192875, 1.557934220007155, 1.5496932759997435, 1.553419985983055, 1.5442492840229534]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7560910189931747, 1.7572659730212763, 1.7597715989977587, 1.7469689899880905, 1.763645778002683, 1.748716948000947, 1.7602629070170224, 1.7721076029993128, 1.7460152900021058, 1.7701677379955072]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.356168844999047, 1.354804383998271, 1.3516721340129152, 1.3500280909938738, 1.3562533959920984, 1.3556265980005264, 1.3505920349853113, 1.3477569509996101, 1.3498703970108181, 1.3481854719866533]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0672315989795607, 1.0727007249952294, 1.0632865040097386, 1.0763663580000866, 1.06514667099691, 1.065665372996591, 1.0638107580016367, 1.0616009290097281, 1.0649084030010272, 1.063036303006811]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.940763157996116, 5.926704184006667, 5.932992869988084, 5.940833892993396, 5.923987179005053, 5.938259807007853, 5.923574882996036, 5.930732762994012, 5.942996845988091, 5.932096109987469]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.733747632999439, 5.7373922020196915, 5.739806508005131, 5.7549302889965475, 5.754676963028032, 5.745761024008971, 5.751321510004345, 5.776052331959363, 5.744757027016021, 5.746225669980049]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.728389803000027, 5.7223248230002355, 5.713896728004329, 5.7198221340077, 5.716055455995956, 5.730836973001715, 5.725524671986932, 5.730034602980595, 5.726219657983165, 5.722418188001029]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.233289741008775, 5.24713467201218, 5.235783365002135, 5.239803472999483, 5.251854731992353, 5.242411447019549, 5.250333832023898, 5.259196978004184, 5.247713554999791, 5.255097048007883]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.12098954099929, 5.122773736016825, 5.130043459008448, 5.131235945009394, 5.132947302015964, 5.1301643929909915, 5.13384268200025, 5.141838512994582, 5.139718485996127, 5.141162898013135]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9568120219919365, 0.9468847009993624, 0.9545126229932066, 0.9472718389879446, 0.9552929110068362, 0.9412291230109986, 0.9544001989997923, 0.9557115529896691, 0.9509655799774919, 0.9482630330021493]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.586631373997079, 4.61347366499831, 4.600411992985755, 4.6092570440087, 4.611457958992105, 4.604861573025119, 4.602566407003906, 4.624956093000947, 4.615925558988238, 4.580915530998027]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.118194965005387, 8.118167286971584, 8.115796227997635, 8.130944961973, 8.135477469011676, 8.122692861012183, 8.14406747900648, 8.144401906989515, 8.139942242007237, 8.14477308903588]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.888596308999695, 5.891138902050443, 5.881036774022505, 5.887884529947769, 5.881468039995525, 5.873392584035173, 5.8823586810030974, 5.872284794982988, 5.874876776011661, 5.874297180038411]}]} +{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (torch 2.1, SDPA)", "category": "SD1.5 (End-to-end)", "timings": [1.5917212970089167, 1.5975631090113893, 1.5821007050108165, 1.5864128279790748, 1.5813008210097905, 1.588955162995262, 1.583035584015306, 1.5979954930080567, 1.6009252599906176, 1.5956080609757919]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SD1.5 (End-to-end)", "timings": [1.562345665995963, 1.5535877529764548, 1.5727124799741432, 1.5913520029862411, 1.584301869967021, 1.5461461240192875, 1.557934220007155, 1.5496932759997435, 1.553419985983055, 1.5442492840229534]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SD1.5 (End-to-end)", "timings": [1.7560910189931747, 1.7572659730212763, 1.7597715989977587, 1.7469689899880905, 1.763645778002683, 1.748716948000947, 1.7602629070170224, 1.7721076029993128, 1.7460152900021058, 1.7701677379955072]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SD1.5 (End-to-end)", "timings": [1.356168844999047, 1.354804383998271, 1.3516721340129152, 1.3500280909938738, 1.3562533959920984, 1.3556265980005264, 1.3505920349853113, 1.3477569509996101, 1.3498703970108181, 1.3481854719866533]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SD1.5 (End-to-end)", "timings": [1.0672315989795607, 1.0727007249952294, 1.0632865040097386, 1.0763663580000866, 1.06514667099691, 1.065665372996591, 1.0638107580016367, 1.0616009290097281, 1.0649084030010272, 1.063036303006811]}, {"name": "Diffusers (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.940763157996116, 5.926704184006667, 5.932992869988084, 5.940833892993396, 5.923987179005053, 5.938259807007853, 5.923574882996036, 5.930732762994012, 5.942996845988091, 5.932096109987469]}, {"name": "Diffusers (torch 2.1, SDPA, [tiny VAE](https://github.com/madebyollin/taesd))\\*", "category": "SDXL (End-to-end)", "timings": [5.733747632999439, 5.7373922020196915, 5.739806508005131, 5.7549302889965475, 5.754676963028032, 5.745761024008971, 5.751321510004345, 5.776052331959363, 5.744757027016021, 5.746225669980049]}, {"name": "Diffusers (torch 2.1, xformers)", "category": "SDXL (End-to-end)", "timings": [5.728389803000027, 5.7223248230002355, 5.713896728004329, 5.7198221340077, 5.716055455995956, 5.730836973001715, 5.725524671986932, 5.730034602980595, 5.726219657983165, 5.722418188001029]}, {"name": "Diffusers (torch 2.1, SDPA, compiled)", "category": "SDXL (End-to-end)", "timings": [5.233289741008775, 5.24713467201218, 5.235783365002135, 5.239803472999483, 5.251854731992353, 5.242411447019549, 5.250333832023898, 5.259196978004184, 5.247713554999791, 5.255097048007883]}, {"name": "Diffusers (torch 2.1, SDPA, compiled, NCHW channels last)", "category": "SDXL (End-to-end)", "timings": [5.12098954099929, 5.122773736016825, 5.130043459008448, 5.131235945009394, 5.132947302015964, 5.1301643929909915, 5.13384268200025, 5.141838512994582, 5.139718485996127, 5.141162898013135]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5 (End-to-end)", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL (End-to-end)", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}, {"name": "OneFlow", "category": "SD1.5 (End-to-end)", "timings": [0.9568120219919365, 0.9468847009993624, 0.9545126229932066, 0.9472718389879446, 0.9552929110068362, 0.9412291230109986, 0.9544001989997923, 0.9557115529896691, 0.9509655799774919, 0.9482630330021493]}, {"name": "OneFlow", "category": "SDXL (End-to-end)", "timings": [4.586631373997079, 4.61347366499831, 4.600411992985755, 4.6092570440087, 4.611457958992105, 4.604861573025119, 4.602566407003906, 4.624956093000947, 4.615925558988238, 4.580915530998027]}, {"name": "[minSDXL](https://github.com/cloneofsimo/minSDXL) (torch 2.1)", "category": "SDXL (End-to-end)", "timings": [8.118194965005387, 8.118167286971584, 8.115796227997635, 8.130944961973, 8.135477469011676, 8.122692861012183, 8.14406747900648, 8.144401906989515, 8.139942242007237, 8.14477308903588]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, SDPA)", "category": "SDXL (End-to-end)", "timings": [5.888596308999695, 5.891138902050443, 5.881036774022505, 5.887884529947769, 5.881468039995525, 5.873392584035173, 5.8823586810030974, 5.872284794982988, 5.874876776011661, 5.874297180038411]}, {"name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", "category": "SDXL (End-to-end)", "timings": [5.2938573459978215, 5.333262387022842, 5.287751997995656, 5.295834582997486, 5.314443435985595, 5.31391279597301, 5.2958174420055, 5.300047887023538, 5.307358076039236, 5.318470707978122]}]} diff --git a/benchmarks/benchmark_minsdxl.py b/benchmarks/benchmark_minsdxl.py index e31f951..632c36c 100644 --- a/benchmarks/benchmark_minsdxl.py +++ b/benchmarks/benchmark_minsdxl.py @@ -16,7 +16,7 @@ "torch==2.1.0", "transformers==4.35.0", "xformers==0.0.22.post7", - "https://github.com/tridao/flash-attention-wheels/releases/download/v2.0.6.post8/flash_attn_wheels_test-2.0.6.post8+cu121torch2.1cxx11abiTRUE-cp311-cp311-linux_x86_64.whl", + "https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.3/flash_attn-2.3.3+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl", ], machine_type="GPU", ) @@ -71,4 +71,12 @@ class UnetRewriteModel(sdxl_rewrite["UNet2DConditionModel"], ModelMixin): # typ "model_url": "https://raw.githubusercontent.com/isidentical/minSDXL/4e378780c75399823aa29404b9e1288d96c22943/sdxl_rewrite.py", }, }, + { + "name": "[minSDXL+](https://github.com/isidentical/minSDXL) (torch 2.1, flash-attention v2)", + "category": "SDXL (End-to-end)", + "function": diffusers_any, + "kwargs": { + "model_url": "https://raw.githubusercontent.com/isidentical/minSDXL/0fd7fe9c6f6544f7d16eb7a41cd7606cddb9527c/sdxl_rewrite.py", + }, + }, ]