diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25397006ee23c..51dd0f81ed147 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,6 +69,8 @@ jobs: env: CI_JOB_NAME: ${{ matrix.name }} CI_JOB_DOC_URL: ${{ matrix.doc_url }} + GITHUB_WORKFLOW_RUN_ID: ${{ github.run_id }} + GITHUB_REPOSITORY: ${{ github.repository }} CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse # commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs. HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }} diff --git a/src/bootstrap/src/utils/metrics.rs b/src/bootstrap/src/utils/metrics.rs index 885fff9c32c5a..862c444962415 100644 --- a/src/bootstrap/src/utils/metrics.rs +++ b/src/bootstrap/src/utils/metrics.rs @@ -9,9 +9,10 @@ use std::fs::File; use std::io::BufWriter; use std::time::{Duration, Instant, SystemTime}; +use build_helper::ci::CiEnv; use build_helper::metrics::{ - JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats, Test, - TestOutcome, TestSuite, TestSuiteMetadata, + CiMetadata, JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats, + Test, TestOutcome, TestSuite, TestSuiteMetadata, }; use sysinfo::{CpuRefreshKind, RefreshKind, System}; @@ -217,7 +218,12 @@ impl BuildMetrics { children: steps.into_iter().map(|step| self.prepare_json_step(step)).collect(), }); - let json = JsonRoot { format_version: CURRENT_FORMAT_VERSION, system_stats, invocations }; + let json = JsonRoot { + format_version: CURRENT_FORMAT_VERSION, + system_stats, + invocations, + ci_metadata: get_ci_metadata(CiEnv::current()), + }; t!(std::fs::create_dir_all(dest.parent().unwrap())); let mut file = BufWriter::new(t!(File::create(&dest))); @@ -245,6 +251,16 @@ impl BuildMetrics { } } +fn get_ci_metadata(ci_env: CiEnv) -> Option { + if ci_env != CiEnv::GitHubActions { + return None; + } + let workflow_run_id = + std::env::var("GITHUB_WORKFLOW_RUN_ID").ok().and_then(|id| id.parse::().ok())?; + let repository = std::env::var("GITHUB_REPOSITORY").ok()?; + Some(CiMetadata { workflow_run_id, repository }) +} + struct MetricsState { finished_steps: Vec, running_steps: Vec, diff --git a/src/build_helper/src/metrics.rs b/src/build_helper/src/metrics.rs index fdff9cd18cea7..8b82e62a32770 100644 --- a/src/build_helper/src/metrics.rs +++ b/src/build_helper/src/metrics.rs @@ -9,6 +9,19 @@ pub struct JsonRoot { pub format_version: usize, pub system_stats: JsonInvocationSystemStats, pub invocations: Vec, + #[serde(default)] + pub ci_metadata: Option, +} + +/// Represents metadata about bootstrap's execution in CI. +#[derive(Serialize, Deserialize)] +pub struct CiMetadata { + /// GitHub run ID of the workflow where bootstrap was executed. + /// Note that the run ID will be shared amongst all jobs executed in that workflow. + pub workflow_run_id: u64, + /// Full name of a GitHub repository where bootstrap was executed in CI. + /// e.g. `rust-lang-ci/rust`. + pub repository: String, } #[derive(Serialize, Deserialize)] diff --git a/src/ci/citool/src/analysis.rs b/src/ci/citool/src/analysis.rs index 2b001f28b0eb4..7fbfad467c641 100644 --- a/src/ci/citool/src/analysis.rs +++ b/src/ci/citool/src/analysis.rs @@ -1,5 +1,6 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; +use std::time::Duration; use build_helper::metrics::{ BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name, @@ -184,11 +185,70 @@ fn render_table(suites: BTreeMap) -> String { } /// Outputs a report of test differences between the `parent` and `current` commits. -pub fn output_test_diffs(job_metrics: HashMap) { +pub fn output_test_diffs(job_metrics: &HashMap) { let aggregated_test_diffs = aggregate_test_diffs(&job_metrics); report_test_diffs(aggregated_test_diffs); } +/// Prints the ten largest differences in bootstrap durations. +pub fn output_largest_duration_changes(job_metrics: &HashMap) { + struct Entry<'a> { + job: &'a JobName, + before: Duration, + after: Duration, + change: f64, + } + + let mut changes: Vec = vec![]; + for (job, metrics) in job_metrics { + if let Some(parent) = &metrics.parent { + let duration_before = parent + .invocations + .iter() + .map(|i| BuildStep::from_invocation(i).duration) + .sum::(); + let duration_after = metrics + .current + .invocations + .iter() + .map(|i| BuildStep::from_invocation(i).duration) + .sum::(); + let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64(); + let pct_change = pct_change * 100.0; + // Normalize around 100, to get + for regression and - for improvements + let pct_change = pct_change - 100.0; + changes.push(Entry { + job, + before: duration_before, + after: duration_after, + change: pct_change, + }); + } + } + changes.sort_by(|e1, e2| e1.change.partial_cmp(&e2.change).unwrap().reverse()); + + println!("# Job duration changes"); + for (index, entry) in changes.into_iter().take(10).enumerate() { + println!( + "{}. `{}`: {:.1}s -> {:.1}s ({:.1}%)", + index + 1, + entry.job, + entry.before.as_secs_f64(), + entry.after.as_secs_f64(), + entry.change + ); + } + + println!(); + output_details("How to interpret the job duration changes?", || { + println!( + r#"Job durations can vary a lot, based on the actual runner instance +that executed the job, system noise, invalidated caches, etc. The table above is provided +mostly for t-infra members, for simpler debugging of potential CI slow-downs."# + ); + }); +} + #[derive(Default)] struct TestSuiteRecord { passed: u64, diff --git a/src/ci/citool/src/main.rs b/src/ci/citool/src/main.rs index 5f5c50dc43a0e..6db5eab458cca 100644 --- a/src/ci/citool/src/main.rs +++ b/src/ci/citool/src/main.rs @@ -15,7 +15,7 @@ use clap::Parser; use jobs::JobDatabase; use serde_yaml::Value; -use crate::analysis::output_test_diffs; +use crate::analysis::{output_largest_duration_changes, output_test_diffs}; use crate::cpu_usage::load_cpu_usage; use crate::datadog::upload_datadog_metric; use crate::jobs::RunType; @@ -160,7 +160,7 @@ fn postprocess_metrics( job_name, JobMetrics { parent: Some(parent_metrics), current: metrics }, )]); - output_test_diffs(job_metrics); + output_test_diffs(&job_metrics); return Ok(()); } Err(error) => { @@ -180,7 +180,8 @@ fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow let metrics = download_auto_job_metrics(&db, &parent, ¤t)?; println!("\nComparing {parent} (parent) -> {current} (this PR)\n"); - output_test_diffs(metrics); + output_test_diffs(&metrics); + output_largest_duration_changes(&metrics); Ok(()) } diff --git a/src/ci/citool/src/metrics.rs b/src/ci/citool/src/metrics.rs index 086aa5009f341..a816fb3c4f165 100644 --- a/src/ci/citool/src/metrics.rs +++ b/src/ci/citool/src/metrics.rs @@ -1,5 +1,5 @@ use std::collections::HashMap; -use std::path::Path; +use std::path::{Path, PathBuf}; use anyhow::Context; use build_helper::metrics::{JsonNode, JsonRoot, TestSuite}; @@ -74,6 +74,17 @@ Maybe it was newly added?"#, } pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result { + // Best effort cache to speed-up local re-executions of citool + let cache_path = PathBuf::from(".citool-cache").join(sha).join(format!("{job_name}.json")); + if cache_path.is_file() { + if let Ok(metrics) = std::fs::read_to_string(&cache_path) + .map_err(|err| err.into()) + .and_then(|data| anyhow::Ok::(serde_json::from_str::(&data)?)) + { + return Ok(metrics); + } + } + let url = get_metrics_url(job_name, sha); let mut response = ureq::get(&url).call()?; if !response.status().is_success() { @@ -87,6 +98,13 @@ pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result {summary} - " ); func(); diff --git a/src/ci/docker/run.sh b/src/ci/docker/run.sh index 2805bb1118d82..00d791eeb6b38 100755 --- a/src/ci/docker/run.sh +++ b/src/ci/docker/run.sh @@ -355,6 +355,8 @@ docker \ --env GITHUB_ACTIONS \ --env GITHUB_REF \ --env GITHUB_STEP_SUMMARY="/checkout/obj/${SUMMARY_FILE}" \ + --env GITHUB_WORKFLOW_RUN_ID \ + --env GITHUB_REPOSITORY \ --env RUST_BACKTRACE \ --env TOOLSTATE_REPO_ACCESS_TOKEN \ --env TOOLSTATE_REPO \