Skip to content

Commit

Permalink
Benchmarking for PRs
Browse files Browse the repository at this point in the history
  • Loading branch information
jprider63 committed Aug 29, 2024
1 parent e33539d commit c2cf86f
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 0 deletions.
154 changes: 154 additions & 0 deletions .github/workflows/ci-pr-bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
name: CI Benchmarks

on:
pull_request:
branches:
- master

env:
CERBERUS_IMAGE_ID: ghcr.io/rems-project/cerberus/cn:release

# cancel in-progress job when a new push is performed
concurrency:
group: ci-pr-bench-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
benchmark:
name: Performance benchmarks
strategy:
matrix:
# version: [4.12.0, 4.14.1]
version: [4.14.1]


runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 100 # this is to make sure we obtain the target base commit

- name: System dependencies (ubuntu)
run: |
sudo apt install build-essential libgmp-dev z3 opam cmake
- name: Restore cached opam
id: cache-opam-restore
uses: actions/cache/restore@v4
with:
path: ~/.opam
key: ${{ matrix.version }}

- name: Setup opam
if: steps.cache-opam-restore.outputs.cache-hit != 'true'
run: |
opam init --yes --no-setup --shell=sh --compiler=${{ matrix.version }}
opam install --deps-only --yes ./cerberus-lib.opam
opam switch create with_coq ${{ matrix.version }}
eval $(opam env --switch=with_coq)
opam repo add --yes --this-switch coq-released https://coq.inria.fr/opam/released
opam pin --yes -n coq-struct-tact https://github.com/uwplse/StructTact.git
opam repo add --yes --this-switch iris-dev https://gitlab.mpi-sws.org/iris/opam.git
opam pin --yes -n coq-sail-stdpp https://github.com/rems-project/coq-sail.git#f319aad
opam pin --yes -n coq-cheri-capabilities https://github.com/rems-project/coq-cheri-capabilities.git
opam install --deps-only --yes ./cerberus-lib.opam ./cerberus-cheri.opam
- name: Save cached opam
if: steps.cache-opam-restore.outputs.cache-hit != 'true'
id: cache-opam-save
uses: actions/cache/save@v4
with:
path: ~/.opam
key: ${{ steps.cache-opam-restore.outputs.cache-primary-key }}

- name: Install python dependencies
run: pip install tabulate

- name: Install Cerberus
run: |
opam switch ${{ matrix.version }}
eval $(opam env --switch=${{ matrix.version }})
opam pin --yes --no-action add cerberus-lib .
opam pin --yes --no-action add cerberus .
opam install --yes cerberus
- name: Download cvc5 release
uses: robinraju/release-downloader@v1
with:
repository: cvc5/cvc5
tag: cvc5-1.1.2
fileName: cvc5-Linux-static.zip

- name: Unzip and install cvc5
run: |
unzip cvc5-Linux-static.zip
chmod +x cvc5-Linux-static/bin/cvc5
sudo cp cvc5-Linux-static/bin/cvc5 /usr/local/bin/
- name: Install CN
run: |
opam switch ${{ matrix.version }}
eval $(opam env --switch=${{ matrix.version }})
opam pin --yes --no-action add cn .
opam install --yes cn ocamlformat.0.26.2
- name: Set environment variables
run: |
echo "BASE_DATA=$(mktemp)" >> $GITHUB_ENV
echo "PR_DATA=$(mktemp)" >> $GITHUB_ENV
echo "COMMENT=$(mktemp)" >> $GITHUB_ENV
echo "BASE_SHA=$(echo ${{ github.event.pull_request.base.sha }} | cut -c1-8)" >> $GITHUB_ENV
echo "HEAD_SHA=$(echo ${{ github.event.pull_request.head.sha }} | cut -c1-8)" >> $GITHUB_ENV
- name: Run benchmark on PR
run: |
opam switch ${{ matrix.version }}
eval $(opam env --switch=${{ matrix.version }})
cd tests; USE_OPAM='' ./run-ci-benchmarks.sh
mv benchmark-data.json ${{ env.PR_DATA }}
cd ..
- name: Switch to target branch
run: |
git checkout ${{ github.event.pull_request.base.sha }}
- name: Install Cerberus
run: |
opam switch ${{ matrix.version }}
eval $(opam env --switch=${{ matrix.version }})
opam pin --yes --no-action add cerberus-lib .
opam pin --yes --no-action add cerberus .
opam install --yes cerberus
- name: Install CN
run: |
opam switch ${{ matrix.version }}
eval $(opam env --switch=${{ matrix.version }})
opam pin --yes --no-action add cn .
opam install --yes cn ocamlformat.0.26.2
- name: Run benchmark on baseline
run: |
opam switch ${{ matrix.version }}
eval $(opam env --switch=${{ matrix.version }})
cd tests; USE_OPAM='' ./run-ci-benchmarks.sh; mv benchmark-data.json ${{ env.BASE_DATA }}
cd ..
- name: Compare results
run: |
echo 'Benchmark comparison for [`${{ env.HEAD_SHA }}`](${{ github.event.repository.html_url }}/commit/${{ github.event.pull_request.head.sha }}) (PR) vs [`${{ env.BASE_SHA }}`](${{ github.event.repository.html_url }}/commit/${{ github.event.pull_request.base.sha }}) (baseline).' >> ${{ env.COMMENT }}
git checkout ${{ github.event.pull_request.head.sha }}
tests/compare-benchmarks.py ${{ env.BASE_DATA }} ${{ env.PR_DATA }} >> ${{ env.COMMENT }}
- name: 'Comment PR'
uses: actions/[email protected]
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
github.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: require('fs').readFileSync('${{ env.COMMENT }}').toString()
});
73 changes: 73 additions & 0 deletions tests/compare-benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python3

import json
import sys
import tabulate

# Warn if the new code exceeds this percentage threshold of slowdown.
THRESHOLD = 100 # 2x slowdown

def to_diff(new_value, baseline_value):
diff = new_value - baseline_value

percent = (new_value / baseline_value - 1) * 100
output = "{diff:+.2f} ({percent:+.2f}%)".format(diff=diff, percent=percent)

return (output, percent)

def main():
if len(sys.argv) != 3:
print("usage: compare-benchmarks.py <baseline.json> <new.json>", file=sys.stderr)
exit(-1)


with open(sys.argv[2], 'r') as f:
new_data = json.load(f)

new_numbers = {}
for benchmark in new_data:
new_numbers[benchmark['name']] = benchmark['value']

with open(sys.argv[1], 'r') as f:
baseline = json.load(f)

missing = set()
degradation = set()

output = []
for benchmark in baseline:
name = benchmark['name']
baseline_value = benchmark['value']

new_value_m = new_numbers.get(name)
if new_value_m:
(diff, percentage) = to_diff(new_value_m, baseline_value)

if percentage > THRESHOLD:
disp_name = "**" + name + "***"
degradation.add(name)
else:
disp_name = name

output.append([disp_name, baseline_value, new_value_m, diff])

del new_numbers[name]
else:
missing.add(name)

print("")
if len(degradation) != 0:
print("**Warning**: Performance degradations: " + ', '.join(degradation))
if len(missing) != 0:
print("**Warning**: Removed benchmarks: " + ', '.join(missing))
added = new_numbers.keys()
if len(added) != 0:
print("Added benchmarks: " + ', '.join(added))

print("\n# Benchmark comparison\n")

# Benchmark name | Old time (s) | New time (s) | Difference (s)
print(tabulate.tabulate(output, headers=['Benchmark name', 'Baseline time (s)', 'New time (s)', 'Difference (s)'], tablefmt='pipe'))

if __name__ == "__main__":
main()

0 comments on commit c2cf86f

Please sign in to comment.