test(ci): Adjust detectFlakyTests to account for multiple tests in a file (#11653)

Lms24 · web-flow · commit 308e743b62c2 · 2024-04-17T17:51:58.000+02:00
Add a test detection heuristic to the detector to roughly
count the number of tests in a file. This should reduce the number of
times each test is running in case files with multiple tests have
changed. Furthermore, this PR also refactors the run count determination
logic a bit to make it easier to understand.
diff --git a/dev-packages/browser-integration-tests/scripts/detectFlakyTests.ts b/dev-packages/browser-integration-tests/scripts/detectFlakyTests.ts
@@ -1,7 +1,30 @@
 import * as childProcess from 'child_process';
+import * as fs from 'fs';
 import * as path from 'path';
 import * as glob from 'glob';
 
+/**
+ * The number of browsers we run the tests in.
+ */
+const NUM_BROWSERS = 3;
+
+/**
+ * Assume that each test runs for 3s.
+ */
+const ASSUMED_TEST_DURATION_SECONDS = 3;
+
+/**
+ * We keep the runtime of the detector if possible under 30min.
+ */
+const MAX_TARGET_TEST_RUNTIME_SECONDS = 30 * 60;
+
+/**
+ * Running one test 50x is what we consider enough to detect flakiness.
+ * Running one test 5x is the bare minimum
+ */
+const MAX_PER_TEST_RUN_COUNT = 50;
+const MIN_PER_TEST_RUN_COUNT = 5;
+
 async function run(): Promise<void> {
   let testPaths: string[] = [];
 
@@ -20,23 +43,8 @@ ${changedPaths.join('\n')}
     }
   }
 
-  let runCount: number;
-  if (process.env.TEST_RUN_COUNT === 'AUTO') {
-    // No test paths detected: run everything 5x
-    runCount = 5;
-
-    if (testPaths.length > 0) {
-      // Run everything up to 100x, assuming that total runtime is less than 60min.
-      // We assume an average runtime of 3s per test, times 4 (for different browsers) = 12s per detected testPaths
-      // We want to keep overall runtime under 30min
-      const testCount = testPaths.length * 4;
-      const expectedRuntimePerTestPath = testCount * 3;
-      const expectedRuntime = Math.floor((30 * 60) / expectedRuntimePerTestPath);
-      runCount = Math.min(50, Math.max(expectedRuntime, 5));
-    }
-  } else {
-    runCount = parseInt(process.env.TEST_RUN_COUNT || '10');
-  }
+  const repeatEachCount = getPerTestRunCount(testPaths);
+  console.log(`Running tests ${repeatEachCount} times each.`);
 
   const cwd = path.join(__dirname, '../');
 
@@ -45,7 +53,7 @@ ${changedPaths.join('\n')}
       const cp = childProcess.spawn(
         `npx playwright test ${
           testPaths.length ? testPaths.join(' ') : './suites'
-        } --reporter='line' --repeat-each ${runCount}`,
+        } --reporter='line' --repeat-each ${repeatEachCount}`,
         { shell: true, cwd },
       );
 
@@ -88,6 +96,33 @@ ${changedPaths.join('\n')}
   console.log(`☑️ All tests passed.`);
 }
 
+/**
+ * Returns how many time one test should run based on the chosen mode and a bunch of heuristics
+ */
+function getPerTestRunCount(testPaths: string[]) {
+  if (process.env.TEST_RUN_COUNT === 'AUTO' && testPaths.length > 0) {
+    // Run everything up to 100x, assuming that total runtime is less than 60min.
+    // We assume an average runtime of 3s per test, times 4 (for different browsers) = 12s per detected testPaths
+    // We want to keep overall runtime under 30min
+    const estimatedNumberOfTests = testPaths.map(getApproximateNumberOfTests).reduce((a, b) => a + b);
+    console.log(`Estimated number of tests: ${estimatedNumberOfTests}`);
+
+    // tests are usually run against all browsers we test with, so let's assume this
+    const testRunCount = estimatedNumberOfTests * NUM_BROWSERS;
+    console.log(`Estimated test runs for one round: ${testRunCount}`);
+
+    const estimatedTestRuntime = testRunCount * ASSUMED_TEST_DURATION_SECONDS;
+    console.log(`Estimated test runtime: ${estimatedTestRuntime}s`);
+
+    const expectedPerTestRunCount = Math.floor(MAX_TARGET_TEST_RUNTIME_SECONDS / estimatedTestRuntime);
+    console.log(`Expected per-test run count: ${expectedPerTestRunCount}`);
+
+    return Math.min(MAX_PER_TEST_RUN_COUNT, Math.max(expectedPerTestRunCount, MIN_PER_TEST_RUN_COUNT));
+  }
+
+  return parseInt(process.env.TEST_RUN_COUNT || '5');
+}
+
 function getTestPaths(): string[] {
   const paths = glob.sync('suites/**/test.{ts,js}', {
     cwd: path.join(__dirname, '../'),
@@ -111,4 +146,22 @@ function logError(error: unknown) {
   }
 }
 
+/**
+ * Definitely not bulletproof way of getting the number of tests in a file :D
+ * We simply match on `it(`, `test(`, etc and count the matches.
+ *
+ * Note: This test completely disregards parameterized tests (`it.each`, etc) or
+ * skipped/disabled tests and other edge cases. It's just a rough estimate.
+ */
+function getApproximateNumberOfTests(testPath: string): number {
+  try {
+    const content = fs.readFileSync(path.join(process.cwd(), testPath, 'test.ts'), 'utf-8');
+    const matches = content.match(/it\(|test\(|sentryTest\(/g);
+    return Math.max(matches ? matches.length : 1, 1);
+  } catch (e) {
+    console.error(`Could not read file ${testPath}`);
+    return 1;
+  }
+}
+
 run();