Skip to content

Inline overlap based CGU merging #113777

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions compiler/rustc_middle/src/mir/mono.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ impl<'tcx> MonoItem<'tcx> {
}
}

// Note: if you change how item size estimates work, you might need to
// change NON_INCR_MIN_CGU_SIZE as well.
pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize {
match *self {
MonoItem::Fn(instance) => {
Expand Down Expand Up @@ -248,8 +250,14 @@ pub struct CodegenUnit<'tcx> {
/// Auxiliary info about a `MonoItem`.
#[derive(Copy, Clone, PartialEq, Debug, HashStable)]
pub struct MonoItemData {
/// A cached copy of the result of `MonoItem::instantiation_mode`, where
/// `GloballyShared` maps to `false` and `LocalCopy` maps to `true`.
pub inlined: bool,

pub linkage: Linkage,
pub visibility: Visibility,

/// A cached copy of the result of `MonoItem::size_estimate`.
pub size_estimate: usize,
}

Expand Down
132 changes: 95 additions & 37 deletions compiler/rustc_monomorphize/src/partitioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ where
}
let size_estimate = mono_item.size_estimate(cx.tcx);

cgu.items_mut().insert(mono_item, MonoItemData { linkage, visibility, size_estimate });
cgu.items_mut()
.insert(mono_item, MonoItemData { inlined: false, linkage, visibility, size_estimate });

// Get all inlined items that are reachable from `mono_item` without
// going via another root item. This includes drop-glue, functions from
Expand All @@ -263,6 +264,7 @@ where
for inlined_item in reachable_inlined_items {
// This is a CGU-private copy.
cgu.items_mut().entry(inlined_item).or_insert_with(|| MonoItemData {
inlined: true,
linkage: Linkage::Internal,
visibility: Visibility::Default,
size_estimate: inlined_item.size_estimate(cx.tcx),
Expand Down Expand Up @@ -316,31 +318,83 @@ fn merge_codegen_units<'tcx>(
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();

// If N is the maximum number of CGUs, and the CGUs are sorted from largest
// to smallest, we repeatedly find which CGU in codegen_units[N..] has the
// greatest overlap of inlined items with codegen_units[N-1], merge that
// CGU into codegen_units[N-1], then re-sort by size and repeat.
//
// We use inlined item overlap to guide this merging because it minimizes
// duplication of inlined items, which makes LLVM be faster and generate
// better and smaller machine code.
//
// Why merge into codegen_units[N-1]? We want CGUs to have similar sizes,
// which means we don't want codegen_units[0..N] (the already big ones)
// getting any bigger, if we can avoid it. When we have more than N CGUs
// then at least one of the biggest N will have to grow. codegen_units[N-1]
// is the smallest of those, and so has the most room to grow.
let max_codegen_units = cx.tcx.sess.codegen_units().as_usize();
while codegen_units.len() > max_codegen_units {
// Sort small CGUs to the back.
codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));

let cgu_dst = &codegen_units[max_codegen_units - 1];

// Find the CGU that overlaps the most with `cgu_dst`. In the case of a
// tie, favour the earlier (bigger) CGU.
let mut max_overlap = 0;
let mut max_overlap_i = max_codegen_units;
for (i, cgu_src) in codegen_units.iter().enumerate().skip(max_codegen_units) {
if cgu_src.size_estimate() <= max_overlap {
// None of the remaining overlaps can exceed `max_overlap`, so
// stop looking.
break;
}

let overlap = compute_inlined_overlap(cgu_dst, cgu_src);
if overlap > max_overlap {
max_overlap = overlap;
max_overlap_i = i;
}
}

let mut cgu_src = codegen_units.swap_remove(max_overlap_i);
let cgu_dst = &mut codegen_units[max_codegen_units - 1];

// Move the items from `cgu_src` to `cgu_dst`. Some of them may be
// duplicate inlined items, in which case the destination CGU is
// unaffected. Recalculate size estimates afterwards.
cgu_dst.items_mut().extend(cgu_src.items_mut().drain());
cgu_dst.compute_size_estimate();

// Record that `cgu_dst` now contains all the stuff that was in
// `cgu_src` before.
let mut consumed_cgu_names = cgu_contents.remove(&cgu_src.name()).unwrap();
cgu_contents.get_mut(&cgu_dst.name()).unwrap().append(&mut consumed_cgu_names);
}

// Having multiple CGUs can drastically speed up compilation. But for
// non-incremental builds, tiny CGUs slow down compilation *and* result in
// worse generated code. So we don't allow CGUs smaller than this (unless
// there is just one CGU, of course). Note that CGU sizes of 100,000+ are
// common in larger programs, so this isn't all that large.
const NON_INCR_MIN_CGU_SIZE: usize = 1800;

// Repeatedly merge the two smallest codegen units as long as:
// - we have more CGUs than the upper limit, or
// - (Non-incremental builds only) the user didn't specify a CGU count, and
// there are multiple CGUs, and some are below the minimum size.
// Repeatedly merge the two smallest codegen units as long as: it's a
// non-incremental build, and the user didn't specify a CGU count, and
// there are multiple CGUs, and some are below the minimum size.
//
// The "didn't specify a CGU count" condition is because when an explicit
// count is requested we observe it as closely as possible. For example,
// the `compiler_builtins` crate sets `codegen-units = 10000` and it's
// critical they aren't merged. Also, some tests use explicit small values
// and likewise won't work if small CGUs are merged.
while codegen_units.len() > cx.tcx.sess.codegen_units().as_usize()
|| (cx.tcx.sess.opts.incremental.is_none()
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
&& codegen_units.len() > 1
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE))
while cx.tcx.sess.opts.incremental.is_none()
&& matches!(cx.tcx.sess.codegen_units(), CodegenUnits::Default(_))
&& codegen_units.len() > 1
&& codegen_units.iter().any(|cgu| cgu.size_estimate() < NON_INCR_MIN_CGU_SIZE)
{
// Sort small cgus to the back.
codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
codegen_units.sort_by_cached_key(|cgu| cmp::Reverse(cgu.size_estimate()));

let mut smallest = codegen_units.pop().unwrap();
let second_smallest = codegen_units.last_mut().unwrap();
Expand All @@ -351,16 +405,7 @@ fn merge_codegen_units<'tcx>(
second_smallest.items_mut().extend(smallest.items_mut().drain());
second_smallest.compute_size_estimate();

// Record that `second_smallest` now contains all the stuff that was
// in `smallest` before.
let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap();
cgu_contents.get_mut(&second_smallest.name()).unwrap().append(&mut consumed_cgu_names);

debug!(
"CodegenUnit {} merged into CodegenUnit {}",
smallest.name(),
second_smallest.name()
);
// Don't update `cgu_contents`, that's only for incremental builds.
}

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
Expand Down Expand Up @@ -439,6 +484,25 @@ fn merge_codegen_units<'tcx>(
}
}

/// Compute the combined size of all inlined items that appear in both `cgu1`
/// and `cgu2`.
fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'tcx>) -> usize {
// Either order works. We pick the one that involves iterating over fewer
// items.
let (src_cgu, dst_cgu) =
if cgu1.items().len() <= cgu2.items().len() { (cgu1, cgu2) } else { (cgu2, cgu1) };

let mut overlap = 0;
for (item, data) in src_cgu.items().iter() {
if data.inlined {
if dst_cgu.items().contains_key(item) {
overlap += data.size_estimate;
}
}
}
overlap
}

fn internalize_symbols<'tcx>(
cx: &PartitioningCx<'_, 'tcx>,
codegen_units: &mut [CodegenUnit<'tcx>],
Expand Down Expand Up @@ -870,19 +934,16 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
all_cgu_sizes.push(cgu.size_estimate());

for (item, data) in cgu.items() {
match item.instantiation_mode(tcx) {
InstantiationMode::GloballyShared { .. } => {
root_items += 1;
root_size += data.size_estimate;
}
InstantiationMode::LocalCopy => {
if inlined_items.insert(item) {
unique_inlined_items += 1;
unique_inlined_size += data.size_estimate;
}
placed_inlined_items += 1;
placed_inlined_size += data.size_estimate;
if !data.inlined {
root_items += 1;
root_size += data.size_estimate;
} else {
if inlined_items.insert(item) {
unique_inlined_items += 1;
unique_inlined_size += data.size_estimate;
}
placed_inlined_items += 1;
placed_inlined_size += data.size_estimate;
}
}
}
Expand Down Expand Up @@ -937,10 +998,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
let symbol_name = item.symbol_name(tcx).name;
let symbol_hash_start = symbol_name.rfind('h');
let symbol_hash = symbol_hash_start.map_or("<no hash>", |i| &symbol_name[i..]);
let kind = match item.instantiation_mode(tcx) {
InstantiationMode::GloballyShared { .. } => "root",
InstantiationMode::LocalCopy => "inlined",
};
let kind = if !data.inlined { "root" } else { "inlined" };
let size = data.size_estimate;
let _ = with_no_trimmed_paths!(writeln!(
s,
Expand Down