Skip to content

Commit 754bfcb

Browse files
committed
chore: remove failed runs from results
Adds a new step to the `process` job that cleans up benchmark results for each combination run for all different package managers. This way we can be sure that failed runs are not impacting result numbers, while also allowing for a more resilient system that will still be able to complete a full benchmark even if a few runs for a specific package manager + fixture + variation fails to complete. Fixes: #11
1 parent 40dbb5e commit 754bfcb

File tree

1 file changed

+111
-1
lines changed

1 file changed

+111
-1
lines changed

.github/workflows/benchmark.yaml

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ on:
55
inputs:
66
fixtures:
77
description: 'The fixture to run the benchmarks on'
8-
default: '["next", "astro", "svelte", "vue"]'
8+
default: '["next", "astro", "vue", "svelte"]'
99
variations:
1010
description: 'The benchmark variations to run'
1111
default: '["cache", "cache+lockfile", "cache+node_modules", "cache+lockfile+node_modules", "clean", "lockfile", "lockfile+node_modules", "node_modules"]'
@@ -80,6 +80,116 @@ jobs:
8080
with:
8181
path: results
8282
pattern: results-*
83+
- name: Clean benchmarks result
84+
uses: actions/github-script@v7
85+
with:
86+
script: |
87+
const fs = require('fs');
88+
const path = require('path');
89+
90+
// Define fixture and variation values from the strategy matrix
91+
const fixtures = [
92+
"next",
93+
"astro",
94+
"vue",
95+
"svelte",
96+
"run"
97+
];
98+
const variations = [
99+
"cache",
100+
"cache+lockfile",
101+
"cache+node_modules",
102+
"cache+lockfile+node_modules",
103+
"clean",
104+
"lockfile",
105+
"lockfile+node_modules",
106+
"node_modules",
107+
"run"
108+
];
109+
110+
// Helper functions for statistical calculations
111+
function calculateMean(times) {
112+
return times.reduce((sum, time) => sum + time, 0) / times.length;
113+
}
114+
115+
function calculateStddev(times, mean) {
116+
const variance = times.reduce((sum, time) => sum + Math.pow(time - mean, 2), 0) / times.length;
117+
return Math.sqrt(variance);
118+
}
119+
120+
function calculateMedian(times) {
121+
const sorted = [...times].sort((a, b) => a - b);
122+
const mid = Math.floor(sorted.length / 2);
123+
return sorted.length % 2 === 0
124+
? (sorted[mid - 1] + sorted[mid]) / 2
125+
: sorted[mid];
126+
}
127+
128+
// Clean benchmark results
129+
for (const fixture of fixtures) {
130+
for (const variation of variations) {
131+
// we only handle one specific combination for run in which both its
132+
// fixture and variation are named run. if it's anything else, we skip it.
133+
const skipInvalidRunFixtures = variation === "run" && fixture !== "run";
134+
const skipInvalidRunVariations = fixture === "run" && variation !== "run";
135+
if (skipInvalidRunFixtures || skipInvalidRunVariations) {
136+
continue;
137+
}
138+
139+
const benchmarkPath = path.join('results', `results-${fixture}-${variation}`, 'benchmarks.json');
140+
141+
try {
142+
console.log(`Cleaning benchmark file: ${benchmarkPath}`);
143+
const benchmarkData = JSON.parse(fs.readFileSync(benchmarkPath, 'utf8'));
144+
145+
if (benchmarkData.results && benchmarkData.results.length > 0) {
146+
for (let i = 0; i < benchmarkData.results.length; i++) {
147+
const result = benchmarkData.results[i];
148+
const { times, exit_codes } = result;
149+
150+
if (times && exit_codes && times.length === exit_codes.length) {
151+
// Filter out times where exit_codes is not 0
152+
const cleanTimes = times.filter((time, index) => exit_codes[index] === 0);
153+
const cleanExitCodes = exit_codes.filter(code => code === 0);
154+
155+
if (cleanTimes.length > 0) {
156+
// Recalculate statistics
157+
const mean = calculateMean(cleanTimes);
158+
const stddev = calculateStddev(cleanTimes, mean);
159+
const median = calculateMedian(cleanTimes);
160+
const min = Math.min(...cleanTimes);
161+
const max = Math.max(...cleanTimes);
162+
163+
// Update the result object
164+
result.times = cleanTimes;
165+
result.exit_codes = cleanExitCodes;
166+
result.mean = mean;
167+
result.stddev = stddev;
168+
result.median = median;
169+
result.min = min;
170+
result.max = max;
171+
172+
console.log(`Cleaned ${fixture}-${variation} (result ${i}): ${times.length - cleanTimes.length} failed runs removed, ${cleanTimes.length} valid runs remaining`);
173+
} else {
174+
console.warn(`All runs failed for ${fixture}-${variation} (result ${i})`);
175+
}
176+
} else {
177+
console.warn(`Invalid times/exit_codes arrays for ${fixture}-${variation} (result ${i})`);
178+
}
179+
}
180+
181+
// Save the cleaned data back to the file
182+
fs.writeFileSync(benchmarkPath, JSON.stringify(benchmarkData, null, 2));
183+
} else {
184+
console.warn(`No results found in ${benchmarkPath}`);
185+
}
186+
} catch (error) {
187+
console.error(`Failed to clean ${benchmarkPath}: ${error.message}`);
188+
}
189+
}
190+
}
191+
192+
console.log('Benchmark cleaning completed');
83193
- name: Process Results
84194
run: |
85195
bash ./scripts/process-results.sh

0 commit comments

Comments
 (0)