diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index 16addc2dc1e75..f4133dfbc95b9 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -56,6 +56,8 @@ impl<'tcx> MonoItem<'tcx> { } } + // Note: if you change how item size estimates work, you might need to + // change NON_INCR_MIN_CGU_SIZE as well. pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize { match *self { MonoItem::Fn(instance) => { @@ -248,8 +250,14 @@ pub struct CodegenUnit<'tcx> { /// Auxiliary info about a `MonoItem`. #[derive(Copy, Clone, PartialEq, Debug, HashStable)] pub struct MonoItemData { + /// A cached copy of the result of `MonoItem::instantiation_mode`, where + /// `GloballyShared` maps to `false` and `LocalCopy` maps to `true`. + pub inlined: bool, + pub linkage: Linkage, pub visibility: Visibility, + + /// A cached copy of the result of `MonoItem::size_estimate`. pub size_estimate: usize, } diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index 54096abb2e0c8..71aef53192f48 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -248,7 +248,8 @@ where } let size_estimate = mono_item.size_estimate(cx.tcx); - cgu.items_mut().insert(mono_item, MonoItemData { linkage, visibility, size_estimate }); + cgu.items_mut() + .insert(mono_item, MonoItemData { inlined: false, linkage, visibility, size_estimate }); // Get all inlined items that are reachable from `mono_item` without // going via another root item. This includes drop-glue, functions from @@ -263,6 +264,7 @@ where for inlined_item in reachable_inlined_items { // This is a CGU-private copy. cgu.items_mut().entry(inlined_item).or_insert_with(|| MonoItemData { + inlined: true, linkage: Linkage::Internal, visibility: Visibility::Default, size_estimate: inlined_item.size_estimate(cx.tcx), @@ -316,6 +318,60 @@ fn merge_codegen_units<'tcx>( let mut cgu_contents: FxHashMap> = codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect(); + // If N is the maximum number of CGUs, and the CGUs are sorted from largest + // to smallest, we repeatedly find which CGU in codegen_units[N..] has the + // greatest overlap of inlined items with codegen_units[N-1], merge that + // CGU into codegen_units[N-1], then re-sort by size and repeat. + // + // We use inlined item overlap to guide this merging because it minimizes + // duplication of inlined items, which makes LLVM be faster and generate + // better and smaller machine code. + // + // Why merge into codegen_units[N-1]? We want CGUs to have similar sizes, + // which means we don't want codegen_units[0..N] (the already big ones) + // getting any bigger, if we can avoid it. When we have more than N CGUs + // then at least one of the biggest N will have to grow. codegen_units[N-1] + // is the smallest of those, and so has the most room to grow. + let max_codegen_units = cx.tcx.sess.codegen_units().as_usize(); + while codegen_units.len() > max_codegen_units { + // Sort small CGUs to the back. + codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate())); + + let cgu_dst = &codegen_units[max_codegen_units - 1]; + + // Find the CGU that overlaps the most with `cgu_dst`. In the case of a + // tie, favour the earlier (bigger) CGU. + let mut max_overlap = 0; + let mut max_overlap_i = max_codegen_units; + for (i, cgu_src) in codegen_units.iter().enumerate().skip(max_codegen_units) { + if cgu_src.size_estimate() <= max_overlap { + // None of the remaining overlaps can exceed `max_overlap`, so + // stop looking. + break; + } + + let overlap = compute_inlined_overlap(cgu_dst, cgu_src); + if overlap > max_overlap { + max_overlap = overlap; + max_overlap_i = i; + } + } + + let mut cgu_src = codegen_units.swap_remove(max_overlap_i); + let cgu_dst = &mut codegen_units[max_codegen_units - 1]; + + // Move the items from `cgu_src` to `cgu_dst`. Some of them may be + // duplicate inlined items, in which case the destination CGU is + // unaffected. Recalculate size estimates afterwards. + cgu_dst.items_mut().extend(cgu_src.items_mut().drain()); + cgu_dst.compute_size_estimate(); + + // Record that `cgu_dst` now contains all the stuff that was in + // `cgu_src` before. + let mut consumed_cgu_names = cgu_contents.remove(&cgu_src.name()).unwrap(); + cgu_contents.get_mut(&cgu_dst.name()).unwrap().append(&mut consumed_cgu_names); + } + // Having multiple CGUs can drastically speed up compilation. But for // non-incremental builds, tiny CGUs slow down compilation *and* result in // worse generated code. So we don't allow CGUs smaller than this (unless @@ -323,24 +379,22 @@ fn merge_codegen_units<'tcx>( // common in larger programs, so this isn't all that large. const NON_INCR_MIN_CGU_SIZE: usize = 1800; - // Repeatedly merge the two smallest codegen units as long as: - // - we have more CGUs than the upper limit, or - // - (Non-incremental builds only) the user didn't specify a CGU count, and - // there are multiple CGUs, and some are below the minimum size. + // Repeatedly merge the two smallest codegen units as long as: it's a + // non-incremental build, and the user didn't specify a CGU count, and + // there are multiple CGUs, and some are below the minimum size. // // The "didn't specify a CGU count" condition is because when an explicit // count is requested we observe it as closely as possible. For example, // the `compiler_builtins` crate sets `codegen-units = 10000` and it's // critical they aren't merged. Also, some tests use explicit small values // and likewise won't work if small CGUs are merged. - while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize() - || (cx.tcx.sess.opts.incremental.is_none() - && matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_)) - && codegen_units.len() > 1 - && codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE)) + while cx.tcx.sess.opts.incremental.is_none() + && matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_)) + && codegen_units.len() > 1 + && codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE) { // Sort small cgus to the back. - codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate())); + codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate())); let mut smallest = codegen_units.pop().unwrap(); let second_smallest = codegen_units.last_mut().unwrap(); @@ -351,16 +405,7 @@ fn merge_codegen_units<'tcx>( second_smallest.items_mut().extend(smallest.items_mut().drain()); second_smallest.compute_size_estimate(); - // Record that `second_smallest` now contains all the stuff that was - // in `smallest` before. - let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap(); - cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names); - - debug!( - "CodegenUnit {} merged into CodegenUnit {}", - smallest.name(), - second_smallest.name() - ); + // Don't update `cgu_contents`, that's only for incremental builds. } let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); @@ -439,6 +484,25 @@ fn merge_codegen_units<'tcx>( } } +/// Compute the combined size of all inlined items that appear in both `cgu1` +/// and `cgu2`. +fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'tcx>) -> usize { + // Either order works. We pick the one that involves iterating over fewer + // items. + let (src_cgu, dst_cgu) = + if cgu1.items().len() <= cgu2.items().len() { (cgu1, cgu2) } else { (cgu2, cgu1) }; + + let mut overlap = 0; + for (item, data) in src_cgu.items().iter() { + if data.inlined { + if dst_cgu.items().contains_key(item) { + overlap += data.size_estimate; + } + } + } + overlap +} + fn internalize_symbols<'tcx>( cx: &PartitioningCx<'_, 'tcx>, codegen_units: &mut [CodegenUnit<'tcx>], @@ -870,19 +934,16 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit< all_cgu_sizes.push(cgu.size_estimate()); for (item, data) in cgu.items() { - match item.instantiation_mode(tcx) { - InstantiationMode::GloballyShared { .. } => { - root_items += 1; - root_size += data.size_estimate; - } - InstantiationMode::LocalCopy => { - if inlined_items.insert(item) { - unique_inlined_items += 1; - unique_inlined_size += data.size_estimate; - } - placed_inlined_items += 1; - placed_inlined_size += data.size_estimate; + if !data.inlined { + root_items += 1; + root_size += data.size_estimate; + } else { + if inlined_items.insert(item) { + unique_inlined_items += 1; + unique_inlined_size += data.size_estimate; } + placed_inlined_items += 1; + placed_inlined_size += data.size_estimate; } } } @@ -937,10 +998,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit< let symbol_name = item.symbol_name(tcx).name; let symbol_hash_start = symbol_name.rfind('h'); let symbol_hash = symbol_hash_start.map_or("", |i| &symbol_name[i..]); - let kind = match item.instantiation_mode(tcx) { - InstantiationMode::GloballyShared { .. } => "root", - InstantiationMode::LocalCopy => "inlined", - }; + let kind = if !data.inlined { "root" } else { "inlined" }; let size = data.size_estimate; let _ = with_no_trimmed_paths!(writeln!( s,