diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index db0cb39..d5ec330 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -2,6 +2,12 @@ name: Run Benchmarks on: workflow_dispatch: + inputs: + force-run: + description: "Force run" + required: false + default: false + type: boolean jobs: run: @@ -28,12 +34,15 @@ jobs: FAL_KEY_SECRET: ${{ secrets.FAL_KEY_SECRET }} FAL_TARGET_NODE: ${{ secrets.FAL_TARGET_NODE }} run: | - python -m benchmarks /tmp \ + python -m benchmarks artifacts \ --session-id=latest \ - --target-node=$FAL_TARGET_NODE + --target-node=$FAL_TARGET_NODE \ + --iterations=10 \ + --warmup-iterations=3 \ + ${{ fromJSON('["", "--force-run"]')[github.event.inputs.force-run == 'true'] }} - name: Regenerate tables - run: python benchmarks/update_table.py /tmp/latest.json + run: python benchmarks/update_table.py artifacts/latest.json - name: Commit and push changes uses: stefanzweifel/git-auto-commit-action@v4 diff --git a/artifacts/latest.json b/artifacts/latest.json new file mode 100644 index 0000000..f396b15 --- /dev/null +++ b/artifacts/latest.json @@ -0,0 +1 @@ +{"settings": {"warmup_iterations": 3, "benchmark_iterations": 10}, "parameters": {"prompt": "A photo of a cat", "steps": 50}, "timings": [{"name": "Diffusers (fp16, SDPA)", "category": "SD1.5", "timings": [1.5917212970089167, 1.5975631090113893, 1.5821007050108165, 1.5864128279790748, 1.5813008210097905, 1.588955162995262, 1.583035584015306, 1.5979954930080567, 1.6009252599906176, 1.5956080609757919]}, {"name": "Diffusers (fp16, xformers)", "category": "SD1.5", "timings": [1.7560910189931747, 1.7572659730212763, 1.7597715989977587, 1.7469689899880905, 1.763645778002683, 1.748716948000947, 1.7602629070170224, 1.7721076029993128, 1.7460152900021058, 1.7701677379955072]}, {"name": "Diffusers (fp16, SDPA, compiled)", "category": "SD1.5", "timings": [1.356168844999047, 1.354804383998271, 1.3516721340129152, 1.3500280909938738, 1.3562533959920984, 1.3556265980005264, 1.3505920349853113, 1.3477569509996101, 1.3498703970108181, 1.3481854719866533]}, {"name": "Diffusers (fp16, SDPA, compiled, NCHW channels last)", "category": "SD1.5", "timings": [1.0672315989795607, 1.0727007249952294, 1.0632865040097386, 1.0763663580000866, 1.06514667099691, 1.065665372996591, 1.0638107580016367, 1.0616009290097281, 1.0649084030010272, 1.063036303006811]}, {"name": "Diffusers (fp16, SDPA)", "category": "SDXL", "timings": [5.940763157996116, 5.926704184006667, 5.932992869988084, 5.940833892993396, 5.923987179005053, 5.938259807007853, 5.923574882996036, 5.930732762994012, 5.942996845988091, 5.932096109987469]}, {"name": "Diffusers (fp16, xformers)", "category": "SDXL", "timings": [5.728389803000027, 5.7223248230002355, 5.713896728004329, 5.7198221340077, 5.716055455995956, 5.730836973001715, 5.725524671986932, 5.730034602980595, 5.726219657983165, 5.722418188001029]}, {"name": "Diffusers (fp16, SDPA, compiled)", "category": "SDXL", "timings": [5.233289741008775, 5.24713467201218, 5.235783365002135, 5.239803472999483, 5.251854731992353, 5.242411447019549, 5.250333832023898, 5.259196978004184, 5.247713554999791, 5.255097048007883]}, {"name": "Diffusers (fp16, SDPA, compiled, NCHW channels last)", "category": "SDXL", "timings": [5.12098954099929, 5.122773736016825, 5.130043459008448, 5.131235945009394, 5.132947302015964, 5.1301643929909915, 5.13384268200025, 5.141838512994582, 5.139718485996127, 5.141162898013135]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SD1.5", "timings": [0.819957683008397, 0.8171751589979976, 0.8198997500003316, 0.8168765410082415, 0.8175504659884609, 0.817866342025809, 0.8211427440110128, 0.8207452670030762, 0.8174457829736639, 0.8177875310066156]}, {"name": "TensorRT 9.0 (cuda graphs, static shapes)", "category": "SDXL", "timings": [4.099050192977302, 4.091173734981567, 4.09869981801603, 4.100261182000395, 4.1056046999874525, 4.1030455399886705, 4.104289636015892, 4.105645445990376, 4.1050181849859655, 4.106528664997313]}]} diff --git a/benchmarks/__main__.py b/benchmarks/__main__.py index 774e855..9efebc8 100644 --- a/benchmarks/__main__.py +++ b/benchmarks/__main__.py @@ -52,6 +52,11 @@ def main() -> None: type=str, default=datetime.now().strftime("%Y%m%d-%H%M%S"), ) + parser.add_argument( + "--force-run", + action="store_true", + help="Force running all benchmarks, even if they have already been run.", + ) # For ensuring consistency among results, make sure to compare the numbers # within the same node. So the driver, cuda version, power supply, CPU compute @@ -71,7 +76,7 @@ def main() -> None: previous_timings = load_previous_timings(session_file, settings, parameters) for benchmark in track(ALL_BENCHMARKS, description="Running benchmarks..."): benchmark_key = (benchmark["category"], benchmark["name"]) - if benchmark_key in previous_timings: + if benchmark_key in previous_timings and not options.force_run: print(f"Skipping {benchmark_key} (already run)") timings.append( {