diff --git a/dev-packages/browser-integration-tests/scripts/detectFlakyTests.ts b/dev-packages/browser-integration-tests/scripts/detectFlakyTests.ts index 299ebb002692..8fc5d9ff2e92 100644 --- a/dev-packages/browser-integration-tests/scripts/detectFlakyTests.ts +++ b/dev-packages/browser-integration-tests/scripts/detectFlakyTests.ts @@ -1,7 +1,30 @@ import * as childProcess from 'child_process'; +import * as fs from 'fs'; import * as path from 'path'; import * as glob from 'glob'; +/** + * The number of browsers we run the tests in. + */ +const NUM_BROWSERS = 3; + +/** + * Assume that each test runs for 3s. + */ +const ASSUMED_TEST_DURATION_SECONDS = 3; + +/** + * We keep the runtime of the detector if possible under 30min. + */ +const MAX_TARGET_TEST_RUNTIME_SECONDS = 30 * 60; + +/** + * Running one test 50x is what we consider enough to detect flakiness. + * Running one test 5x is the bare minimum + */ +const MAX_PER_TEST_RUN_COUNT = 50; +const MIN_PER_TEST_RUN_COUNT = 5; + async function run(): Promise { let testPaths: string[] = []; @@ -20,23 +43,8 @@ ${changedPaths.join('\n')} } } - let runCount: number; - if (process.env.TEST_RUN_COUNT === 'AUTO') { - // No test paths detected: run everything 5x - runCount = 5; - - if (testPaths.length > 0) { - // Run everything up to 100x, assuming that total runtime is less than 60min. - // We assume an average runtime of 3s per test, times 4 (for different browsers) = 12s per detected testPaths - // We want to keep overall runtime under 30min - const testCount = testPaths.length * 4; - const expectedRuntimePerTestPath = testCount * 3; - const expectedRuntime = Math.floor((30 * 60) / expectedRuntimePerTestPath); - runCount = Math.min(50, Math.max(expectedRuntime, 5)); - } - } else { - runCount = parseInt(process.env.TEST_RUN_COUNT || '10'); - } + const repeatEachCount = getPerTestRunCount(testPaths); + console.log(`Running tests ${repeatEachCount} times each.`); const cwd = path.join(__dirname, '../'); @@ -45,7 +53,7 @@ ${changedPaths.join('\n')} const cp = childProcess.spawn( `npx playwright test ${ testPaths.length ? testPaths.join(' ') : './suites' - } --reporter='line' --repeat-each ${runCount}`, + } --reporter='line' --repeat-each ${repeatEachCount}`, { shell: true, cwd }, ); @@ -88,6 +96,33 @@ ${changedPaths.join('\n')} console.log(`☑️ All tests passed.`); } +/** + * Returns how many time one test should run based on the chosen mode and a bunch of heuristics + */ +function getPerTestRunCount(testPaths: string[]) { + if (process.env.TEST_RUN_COUNT === 'AUTO' && testPaths.length > 0) { + // Run everything up to 100x, assuming that total runtime is less than 60min. + // We assume an average runtime of 3s per test, times 4 (for different browsers) = 12s per detected testPaths + // We want to keep overall runtime under 30min + const estimatedNumberOfTests = testPaths.map(getApproximateNumberOfTests).reduce((a, b) => a + b); + console.log(`Estimated number of tests: ${estimatedNumberOfTests}`); + + // tests are usually run against all browsers we test with, so let's assume this + const testRunCount = estimatedNumberOfTests * NUM_BROWSERS; + console.log(`Estimated test runs for one round: ${testRunCount}`); + + const estimatedTestRuntime = testRunCount * ASSUMED_TEST_DURATION_SECONDS; + console.log(`Estimated test runtime: ${estimatedTestRuntime}s`); + + const expectedPerTestRunCount = Math.floor(MAX_TARGET_TEST_RUNTIME_SECONDS / estimatedTestRuntime); + console.log(`Expected per-test run count: ${expectedPerTestRunCount}`); + + return Math.min(MAX_PER_TEST_RUN_COUNT, Math.max(expectedPerTestRunCount, MIN_PER_TEST_RUN_COUNT)); + } + + return parseInt(process.env.TEST_RUN_COUNT || '5'); +} + function getTestPaths(): string[] { const paths = glob.sync('suites/**/test.{ts,js}', { cwd: path.join(__dirname, '../'), @@ -111,4 +146,22 @@ function logError(error: unknown) { } } +/** + * Definitely not bulletproof way of getting the number of tests in a file :D + * We simply match on `it(`, `test(`, etc and count the matches. + * + * Note: This test completely disregards parameterized tests (`it.each`, etc) or + * skipped/disabled tests and other edge cases. It's just a rough estimate. + */ +function getApproximateNumberOfTests(testPath: string): number { + try { + const content = fs.readFileSync(path.join(process.cwd(), testPath, 'test.ts'), 'utf-8'); + const matches = content.match(/it\(|test\(|sentryTest\(/g); + return Math.max(matches ? matches.length : 1, 1); + } catch (e) { + console.error(`Could not read file ${testPath}`); + return 1; + } +} + run();