metrics: check stddev when collecting results

vaind · vaind · commit 10dc4966ee12 · 2023-01-04T18:51:27.000+01:00
diff --git a/packages/replay/metrics/configs/ci/collect.ts b/packages/replay/metrics/configs/ci/collect.ts
@@ -1,16 +1,38 @@
 import { Metrics, MetricsCollector } from '../../src/collector.js';
+import { MetricsStats, NumberProvider } from '../../src/results/metrics-stats.js';
 import { JankTestScenario } from '../../src/scenarios.js';
 import { latestResultFile } from './env.js';
 
+function checkStdDev(stats: MetricsStats, name: string, provider: NumberProvider, max: number): boolean {
+  const value = stats.stddev(provider);
+  if (value == undefined) {
+    console.warn(`✗ | Discarding results because StandardDeviation(${name}) is undefined`);
+    return false;
+  } else if (value > max) {
+    console.warn(`✗ | Discarding results because StandardDeviation(${name}) is larger than ${max}. Actual value: ${value}`);
+    return false;
+  } else {
+    console.log(`✓ | StandardDeviation(${name}) is ${value} (<= ${max})`)
+  }
+  return true;
+}
+
 const collector = new MetricsCollector({ headless: true });
 const result = await collector.execute({
-  name: 'dummy',
+  name: 'jank',
   a: new JankTestScenario(false),
   b: new JankTestScenario(true),
-  runs: 1,
-  tries: 1,
-  async test(_aResults: Metrics[], _bResults: Metrics[]) {
-    return true;
+  runs: 10,
+  tries: 10,
+  async shouldAccept(results: Metrics[]): Promise<boolean> {
+    const stats = new MetricsStats(results);
+    return true
+      && checkStdDev(stats, 'lcp', MetricsStats.lcp, 10)
+      && checkStdDev(stats, 'cls', MetricsStats.cls, 10)
+      && checkStdDev(stats, 'cpu', MetricsStats.cpu, 10)
+      && checkStdDev(stats, 'memory-mean', MetricsStats.memoryMean, 10000)
+      && checkStdDev(stats, 'memory-max', MetricsStats.memoryMax, 10000);
+    ;
   },
 });
 
diff --git a/packages/replay/metrics/configs/dev/collect.ts b/packages/replay/metrics/configs/dev/collect.ts
@@ -9,7 +9,7 @@ const result = await collector.execute({
   b: new JankTestScenario(true),
   runs: 1,
   tries: 1,
-  async test(_aResults: Metrics[], _bResults: Metrics[]) {
+  async shouldAccept(_results: Metrics[]): Promise<boolean> {
     return true;
   },
 });
diff --git a/packages/replay/metrics/src/collector.ts b/packages/replay/metrics/src/collector.ts
@@ -6,6 +6,7 @@ import { JsHeapUsage, JsHeapUsageSampler, JsHeapUsageSerialized } from './perf/m
 import { PerfMetricsSampler } from './perf/sampler.js';
 import { Result } from './results/result.js';
 import { Scenario, TestCase } from './scenarios.js';
+import { consoleGroup } from './util/console.js';
 import { WebVitals, WebVitalsCollector } from './vitals/index.js';
 
 const cpuThrottling = 4;
@@ -55,37 +56,38 @@ export class MetricsCollector {
 
   public async execute(testCase: TestCase): Promise<Result> {
     console.log(`Executing test case ${testCase.name}`);
-    console.group();
-    for (let i = 1; i <= testCase.tries; i++) {
-      const aResults = await this._collect('A', testCase.a, testCase.runs);
-      const bResults = await this._collect('B', testCase.b, testCase.runs);
-      if (await testCase.test(aResults, bResults)) {
-        console.groupEnd();
-        console.log(`Test case ${testCase.name} passed on try ${i}/${testCase.tries}`);
-        return new Result(testCase.name, cpuThrottling, networkConditions, aResults, bResults);
-      } else if (i != testCase.tries) {
-        console.log(`Test case ${testCase.name} failed on try ${i}/${testCase.tries}`);
-      } else {
-        console.groupEnd();
-        console.error(`Test case ${testCase.name} failed`);
-      }
-    }
-    throw `Test case execution ${testCase.name} failed after ${testCase.tries} tries.`;
+    return consoleGroup(async () => {
+      const aResults = await this._collect(testCase, 'A', testCase.a);
+      const bResults = await this._collect(testCase, 'B', testCase.b);
+      return new Result(testCase.name, cpuThrottling, networkConditions, aResults, bResults);
+    });
   }
 
-  private async _collect(name: string, scenario: Scenario, runs: number): Promise<Metrics[]> {
-    const label = `Scenario ${name} data collection (total ${runs} runs)`;
-    console.time(label);
-    const results: Metrics[] = [];
-    for (let run = 0; run < runs; run++) {
-      const innerLabel = `Scenario ${name} data collection, run ${run}/${runs}`;
-      console.time(innerLabel);
-      results.push(await this._run(scenario));
-      console.timeEnd(innerLabel);
+  private async _collect(testCase: TestCase, name: string, scenario: Scenario): Promise<Metrics[]> {
+    const label = `Scenario ${name} data collection (total ${testCase.runs} runs)`;
+    for (let try_ = 1; try_ <= testCase.tries; try_++) {
+      console.time(label);
+      const results: Metrics[] = [];
+      for (let run = 1; run <= testCase.runs; run++) {
+        const innerLabel = `Scenario ${name} data collection, run ${run}/${testCase.runs}`;
+        console.time(innerLabel);
+        results.push(await this._run(scenario));
+        console.timeEnd(innerLabel);
+      }
+      console.timeEnd(label);
+      assert.strictEqual(results.length, testCase.runs);
+      if (await testCase.shouldAccept(results)) {
+        console.log(`Test case ${testCase.name}, scenario ${name} passed on try ${try_}/${testCase.tries}`);
+        return results;
+      } else if (try_ != testCase.tries) {
+        console.log(`Test case ${testCase.name} failed on try ${try_}/${testCase.tries}, retrying`);
+      } else {
+        throw `Test case ${testCase.name}, scenario ${name} failed after ${testCase.tries} tries.`;
+      }
     }
-    console.timeEnd(label);
-    assert.strictEqual(results.length, runs);
-    return results;
+    // Unreachable code, if configured properly:
+    console.assert(testCase.tries >= 1);
+    return [];
   }
 
   private async _run(scenario: Scenario): Promise<Metrics> {
diff --git a/packages/replay/metrics/src/results/analyzer.ts b/packages/replay/metrics/src/results/analyzer.ts
@@ -48,11 +48,11 @@ export class ResultsAnalyzer {
       items.push({ metric: metric, value: new AnalyzerItemNumberValue(unit, valueA, valueB) })
     }
 
-    pushIfDefined(AnalyzerItemMetric.lcp, AnalyzerItemUnit.ms, aStats.lcp, bStats.lcp);
-    pushIfDefined(AnalyzerItemMetric.cls, AnalyzerItemUnit.ms, aStats.cls, bStats.cls);
-    pushIfDefined(AnalyzerItemMetric.cpu, AnalyzerItemUnit.ratio, aStats.cpu, bStats.cpu);
-    pushIfDefined(AnalyzerItemMetric.memoryAvg, AnalyzerItemUnit.bytes, aStats.memoryMean, bStats.memoryMean);
-    pushIfDefined(AnalyzerItemMetric.memoryMax, AnalyzerItemUnit.bytes, aStats.memoryMax, bStats.memoryMax);
+    pushIfDefined(AnalyzerItemMetric.lcp, AnalyzerItemUnit.ms, aStats.mean(MetricsStats.lcp), bStats.mean(MetricsStats.lcp));
+    pushIfDefined(AnalyzerItemMetric.cls, AnalyzerItemUnit.ms, aStats.mean(MetricsStats.cls), bStats.mean(MetricsStats.cls));
+    pushIfDefined(AnalyzerItemMetric.cpu, AnalyzerItemUnit.ratio, aStats.mean(MetricsStats.cpu), bStats.mean(MetricsStats.cpu));
+    pushIfDefined(AnalyzerItemMetric.memoryAvg, AnalyzerItemUnit.bytes, aStats.mean(MetricsStats.memoryMean), bStats.mean(MetricsStats.memoryMean));
+    pushIfDefined(AnalyzerItemMetric.memoryMax, AnalyzerItemUnit.bytes, aStats.max(MetricsStats.memoryMax), bStats.max(MetricsStats.memoryMax));
 
     return items.filter((item) => item.value != undefined);
   }
diff --git a/packages/replay/metrics/src/results/metrics-stats.ts b/packages/replay/metrics/src/results/metrics-stats.ts
@@ -7,29 +7,24 @@ export type NumberProvider = (metrics: Metrics) => number;
 export class MetricsStats {
   constructor(private _items: Metrics[]) { }
 
+  static lcp: NumberProvider = metrics => metrics.vitals.lcp;
+  static cls: NumberProvider = metrics => metrics.vitals.cls;
+  static cpu: NumberProvider = metrics => metrics.cpu.average;
+  static memoryMean: NumberProvider = metrics => ss.mean(Array.from(metrics.memory.snapshots.values()));
+  static memoryMax: NumberProvider = metrics => ss.max(Array.from(metrics.memory.snapshots.values()));
+
   public mean(dataProvider: NumberProvider): number | undefined {
     const numbers = this._items.map(dataProvider);
     return numbers.length > 0 ? ss.mean(numbers) : undefined;
   }
 
-  public get lcp(): number | undefined {
-    return this.mean((metrics) => metrics.vitals.lcp);
-  }
-
-  public get cls(): number | undefined {
-    return this.mean((metrics) => metrics.vitals.cls);
-  }
-
-  public get cpu(): number | undefined {
-    return this.mean((metrics) => metrics.cpu.average);
-  }
-
-  public get memoryMean(): number | undefined {
-    return this.mean((metrics) => ss.mean(Array.from(metrics.memory.snapshots.values())));
+  public max(dataProvider: NumberProvider): number | undefined {
+    const numbers = this._items.map(dataProvider);
+    return numbers.length > 0 ? ss.max(numbers) : undefined;
   }
 
-  public get memoryMax(): number | undefined {
-    const numbers = this._items.map((metrics) => ss.max(Array.from(metrics.memory.snapshots.values())));
-    return numbers.length > 0 ? ss.max(numbers) : undefined;
+  public stddev(dataProvider: NumberProvider): number | undefined {
+    const numbers = this._items.map(dataProvider);
+    return numbers.length > 0 ? ss.standardDeviation(numbers) : undefined;
   }
 }
diff --git a/packages/replay/metrics/src/scenarios.ts b/packages/replay/metrics/src/scenarios.ts
@@ -18,9 +18,10 @@ export interface TestCase {
   runs: number;
   tries: number;
 
-  // Test function that will be executed and given scenarios A and B result sets.
-  // Each has exactly `runs` number of items.
-  test(aResults: Metrics[], bResults: Metrics[]): Promise<boolean>;
+  // Test function that will be executed and given a scenarios result set with exactly `runs` number of items.
+  // Should returns true if this "try" should be accepted and collected.
+  // If false is returned, `Collector` will retry up to `tries` number of times.
+  shouldAccept(results: Metrics[]): Promise<boolean>;
 }
 
 // A simple scenario that just loads the given URL.
@@ -37,9 +38,9 @@ export class JankTestScenario implements Scenario {
   public constructor(private _withSentry: boolean) { }
 
   public async run(_: playwright.Browser, page: playwright.Page): Promise<void> {
-    let url = path.resolve(`./test-apps/jank/${  this._withSentry ? 'with-sentry' : 'index'  }.html`);
+    let url = path.resolve(`./test-apps/jank/${this._withSentry ? 'with-sentry' : 'index'}.html`);
     assert(fs.existsSync(url));
-    url = `file:///${  url.replace('\\', '/')}`;
+    url = `file:///${url.replace('\\', '/')}`;
     console.log('Navigating to ', url);
     await page.goto(url, { waitUntil: 'load', timeout: 60000 });
     await new Promise(resolve => setTimeout(resolve, 5000));