New benchmarking system

WPMedia · Sep 27, 2017 · 0c1a718 · 0c1a718
1 parent e08f4fa
commit 0c1a718
Show file tree

Hide file tree

Showing 38 changed files with 1,150 additions and 1,625 deletions.
diff --git a/.eslintrc b/.eslintrc
@@ -26,6 +26,10 @@
     "no-eq-null": "off",
     "no-lonely-if": "off",
     "no-new": "off",
+    "no-restricted-properties": [2, {
+        "object": "Object",
+        "property": "assign"
+    }],
     "no-unused-vars": ["error", {"argsIgnorePattern": "^_$"}],
     "no-var": "error",
     "no-warning-comments": "error",

diff --git a/bench/.eslintrc b/bench/.eslintrc
@@ -4,8 +4,13 @@
       "jsx": true
     }
   },
+  "plugins": [
+    "react"
+  ],
   "rules": {
-    "flowtype/require-valid-file-annotation": [0]
+    "flowtype/require-valid-file-annotation": [0],
+    "react/jsx-uses-vars": [2],
+    "no-restricted-properties": "off"
   },
   "globals": {
     "React": false,

diff --git a/bench/README.md b/bench/README.md
@@ -7,95 +7,49 @@ Benchmarks help us catch performance regressions and improve performance.
 Start the benchmark server
 
 ```bash
-MAPBOX_ACCESS_TOKEN={YOUR MAPBOX ACCESS TOKEN} npm run start-bench
+MAPBOX_ACCESS_TOKEN={YOUR MAPBOX ACCESS TOKEN} yarn start
 ```
 
 To run all benchmarks, open [the benchmark page, `http://localhost:9966/bench`](http://localhost:9966/bench).
 
-To run a specific benchmark, add its name to the url hash, for example [`http://localhost:9966/bench/#buffer`](http://localhost:9966/bench/#buffer).
+To run a specific benchmark, add its name to the url hash, for example [`http://localhost:9966/bench/#Layout`](http://localhost:9966/bench/#Layout).
 
 ## Writing a Benchmark
 
 Good benchmarks
 
  - are precise (i.e. running it multiple times returns roughly the same result)
  - operate in a way that mimics real-world usage
- - use a large quantity of diverse real-world data
+ - use a significant quantity of real-world data
  - are conceptually simple
 
-Benchmarks are implemented as a function that returns an instance of `Evented`.
-
-```js
-createBenchmark(options: {
-    accessToken: string;
-    createMap: (options: {
-        width: number;
-        height: number;
-        ... // supports all options for the Map constructor
-    }):Map
-}): Evented
-```
-
-The instance of `Evented` may fire any number of `log` and `error` events. The
-instance of `Evented` must fire exactly one `end` event.
-
-### `log`
-
-Fire the `log` event to report benchmark progress to the user.
+Benchmarks are implemented by extending the `Benchmark` class and implementing at least the `bench` method.
+If the benchmark needs to do setup or teardown work that should not be included in the measured time, you
+can implement the `setup` or `teardown` methods. All three of these methods may return a `Promise` if they
+need to do asynchronous work (or they can act synchronously and return `undefined`).
 
-```js
-{
-    message: string;
-    color: string = 'blue'; // name of a Mapbox base color https://mapbox.com/base/styling/color
-}
-```
-
-If your benchmark has a notion of running multiple "samples", you might emit
-one `log` event per sample.
-
-```js
-benchmark.fire('log', {
-    message: 'Finished sample ' + i + ' in ' + formatNumber(time) + ' ms'
-});
-```
+See the JSDoc comments on the `Benchmark` class for more details, and the existing benchmarks for examples.
 
-These events have no machine-semantic meaning.
+## Interpreting benchmark results
 
-### `end`
+The benchmark harness runs each benchmark's `bench` method a lot of times -- until it thinks it has enough
+samples to do an analysis -- and records how long each call takes. From these samples, it creates summary
+statistics and plots that help in determining whether a given change increased or decreased performance.
 
-Fire the `end` event to indicate the benchmark has finished running and report
-its results.
+* **Mean**, **Minimum**, and **Deviation** are the standard summary statistics.
+* **R? Slope / Correlation** are measures derived from comparing increasingly large groups of samples (1 sample,
+2 samples, 3 samples, ...) to the sum of those samples' execution time. Ideally, the number of samples is
+perfectly linearly correlated to the sum of execution times. If it is, then the slope of the line is equivalent
+the average execution time. But usually, the correlation is not perfect due to natural variance and outliers.
+The R? correlation indicates how good the linear approximation is. Values greater than 0.99 are good. Less
+than 0.99 is iffy (??), and less than 0.90 means something is confounding the results, and they should be
+regarded as unreliable (??).
+* The top plot shows the distribution of samples, both by plotting each individual sample (on the right),
+and by plotting a kernel density estimate. On the right, you can also see (from left to right) the mean,
+minimum and maximum sample, and sample values at the first quartile, second quartile (median), and third quartile.
+* The bottom plot shows the R? analysis and resulting linear approximation.
 
-These events have both human-readable results (`message`) and machine-readable results (`score`). Smaller `score`s are "better."  Optionally, an array of raw sample data (`samples`) may also be included.
+## Posting benchmark results to PRs
 
-```js
-{
-    message: string;
-    score: number;
-    ?samples: Array
-}
-```
-
-```js
-benchmark.fire('end', {
-    message: 'Average time is ' + formatNumber(averageTime)) + 'ms',
-    score: averageTime,
-    samples: [
-        ['sample', 'value'],
-        [1, sample1Time],
-        [2, sample2Time],
-        [3, sample3Time],
-        ...
-    ]
-});
-```
-
-### `error`
-
-Fire the `error` event to indicate the benchmark has encountered an error.
-
-```js
-{
-    error: Error;
-}
-```
+We recommend installing a browser extension that can take full-page snapshots, e.g.
+[FireShot](https://chrome.google.com/webstore/detail/take-webpage-screenshots/mcbpblocgmgfnpjjppndjkmgjaogfceg).
diff --git a/bench/benchmarks.js b/bench/benchmarks.js
@@ -1,24 +1,30 @@
+// @flow
+
 'use strict';
 
+require('../src').accessToken = require('./lib/access_token');
+
+window.mapboxglVersions = window.mapboxglVersions || [];
 window.mapboxglBenchmarks = window.mapboxglBenchmarks || {};
 
 const version = process.env.BENCHMARK_VERSION;
-function registerBenchmark(name, benchmark) {
-    window.mapboxglBenchmarks[name] = window.mapboxglBenchmarks[name] || {};
-    window.mapboxglBenchmarks[name][version] = benchmark;
+window.mapboxglVersions.push(version);
+
+function register(Benchmark) {
+    window.mapboxglBenchmarks[Benchmark.name] = window.mapboxglBenchmarks[Benchmark.name] || {};
+    window.mapboxglBenchmarks[Benchmark.name][version] = new Benchmark();
 }
 
-registerBenchmark('map-load', require('./benchmarks/map_load'));
-registerBenchmark('style-load', require('./benchmarks/style_load'));
-registerBenchmark('buffer', require('./benchmarks/buffer'));
-registerBenchmark('tile_layout_dds', require('./benchmarks/tile_layout_dds'));
-registerBenchmark('fps', require('./benchmarks/fps'));
-registerBenchmark('frame-duration', require('./benchmarks/frame_duration'));
-registerBenchmark('query-point', require('./benchmarks/query_point'));
-registerBenchmark('query-box', require('./benchmarks/query_box'));
-registerBenchmark('geojson-setdata-small', require('./benchmarks/geojson_setdata_small'));
-registerBenchmark('geojson-setdata-large', require('./benchmarks/geojson_setdata_large'));
-registerBenchmark('filter', require('./benchmarks/filter'));
+register(require('./benchmarks/layout'));
+register(require('./benchmarks/layout_dds'));
+register(require('./benchmarks/paint'));
+register(require('./benchmarks/map_load'));
+register(require('./benchmarks/style_validate'));
+register(require('./benchmarks/style_layer_create'));
+register(require('./benchmarks/query_point'));
+register(require('./benchmarks/query_box'));
+register(require('./benchmarks/filter_create'));
+register(require('./benchmarks/filter_evaluate'));
 
 // Ensure the global worker pool is never drained. Browsers have resource limits
 // on the max number of workers that can be created per page.

diff --git a/bench/benchmarks/buffer.js b/bench/benchmarks/buffer.js