Skip to content

Commit

Permalink
fix(ci): Balance splits across benchmarking CI jobs according to the …
Browse files Browse the repository at this point in the history
…number of CPU cores (#5099)

* Fix how balancing over CPU cores works, so now tasks are split by variants instead of only directories
* Fix bug with taskset trying to assign to non-existing core
* Fix bug with off by 1 error in benchmarks count
* Fail job with exit code 1 if sirun could not start it
* Fail CI job when GROUP_SIZE is larger than number of CPU cores
  • Loading branch information
ddyurchenko authored Jan 13, 2025
1 parent 587957e commit 435109b
Showing 1 changed file with 31 additions and 26 deletions.
57 changes: 31 additions & 26 deletions benchmark/sirun/runall.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -e

# Temporary until merged to master
wget -O sirun.tar.gz https://github.com/DataDog/sirun/releases/download/v0.1.10/sirun-v0.1.10-x86_64-unknown-linux-musl.tar.gz \
&& tar -xzf sirun.tar.gz \
Expand Down Expand Up @@ -34,46 +36,49 @@ echo "using Node.js ${VERSION}"
CPU_AFFINITY="${CPU_START_ID:-24}" # reset for each node.js version
SPLITS=${SPLITS:-1}
GROUP=${GROUP:-1}
BENCH_COUNT=0

BENCH_COUNT=0
for D in *; do
if [ -d "${D}" ]; then
BENCH_COUNT=$(($BENCH_COUNT+1))
cd "${D}"
variants="$(node ../get-variants.js)"
for V in $variants; do BENCH_COUNT=$(($BENCH_COUNT+1)); done
cd ..
fi
done

# over count so that it can be divided by bash as an integer
BENCH_COUNT=$(($BENCH_COUNT+$BENCH_COUNT%$SPLITS))
GROUP_SIZE=$(($BENCH_COUNT/$SPLITS))

run_all_variants () {
local variants="$(node ../get-variants.js)"

node ../squash-affinity.js

for V in $variants; do
echo "running ${1}/${V} in background, pinned to core ${CPU_AFFINITY}..."

export SIRUN_VARIANT=$V

(time node ../run-one-variant.js >> ../results.ndjson && echo "${1}/${V} finished.") &
((CPU_AFFINITY=CPU_AFFINITY+1))
done
}
GROUP_SIZE=$(($(($BENCH_COUNT+$SPLITS-1))/$SPLITS)) # round up

BENCH_INDEX=0
BENCH_END=$(($GROUP_SIZE*$GROUP))
BENCH_START=$(($BENCH_END-$GROUP_SIZE))

if [[ ${GROUP_SIZE} -gt 24 ]]; then
echo "Group size ${GROUP_SIZE} is larger than available number of CPU cores on Benchmarking Platform machines (24 cores)"
exit 1
fi

for D in *; do
if [ -d "${D}" ]; then
if [[ ${BENCH_INDEX} -ge ${BENCH_START} && ${BENCH_INDEX} -lt ${BENCH_END} ]]; then
cd "${D}"
run_all_variants $D
cd ..
fi
cd "${D}"
variants="$(node ../get-variants.js)"

node ../squash-affinity.js

for V in $variants; do
if [[ ${BENCH_INDEX} -ge ${BENCH_START} && ${BENCH_INDEX} -lt ${BENCH_END} ]]; then
echo "running $((BENCH_INDEX+1)) out of ${BENCH_COUNT}, ${D}/${V} in background, pinned to core ${CPU_AFFINITY}..."

export SIRUN_VARIANT=$V

(time node ../run-one-variant.js >> ../results.ndjson && echo "${D}/${V} finished.") &
((CPU_AFFINITY=CPU_AFFINITY+1))
fi

BENCH_INDEX=$(($BENCH_INDEX+1))
done

BENCH_INDEX=$(($BENCH_INDEX+1))
cd ..
fi
done

Expand Down

0 comments on commit 435109b

Please sign in to comment.