Grow RawVec to fill the allocator bins tighter.

dpc · dpc · commit dbc2ef8ada50 · 2022-09-04T16:17:10.000-07:00
AFAIK, most if not not all memory allocators use some combination of evenly sized bins. Typically these bins are power of two sized, sometimes suplemented with some 0b1100xxx... bins as well. Most of the time every allocation is also prefixed with a pointer to some per-allocation metadata, adding a fixed overhead to every requested allocation. This can observed with: ```rust fn main() { let s = 24; let v1: Vec<u8> = Vec::with_capacity(s); let v2: Vec<u8> = Vec::with_capacity(s); let v3: Vec<u8> = Vec::with_capacity(s); println!("{:?}", v1.as_ptr()); println!("{:?}", v2.as_ptr()); println!("{:?}", v3.as_ptr()); } ``` https://play.rust-lang.org/?version=stable&mode=release&edition=2021&gist=e8cea915323ad742c9d78863e3884587 For `let s = 24` the pointers are 32 bytes appart, but increasing `s` to 25 make them 64 bytes appart, as the allocation falls into the one up sized bin. This made me think that the default way of growing collections in Rust (doubling their capacity) is degenerate in most common cases. Most types is aligned to the power of 2 size, and then doubling their size make almost every allocation waste almost 50% of actually allocated space for it. By growing the capacity by trying to fill the bin well, we can possibly avoid some needless allocations and lower the memory consumption.
diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs
@@ -119,6 +119,7 @@
 #![feature(fn_traits)]
 #![feature(hasher_prefixfree_extras)]
 #![feature(inplace_iteration)]
+#![feature(int_roundings)]
 #![feature(iter_advance_by)]
 #![feature(iter_next_chunk)]
 #![feature(layout_for_ptr)]
diff --git a/library/alloc/src/raw_vec.rs b/library/alloc/src/raw_vec.rs
@@ -1,7 +1,6 @@
 #![unstable(feature = "raw_vec_internals", reason = "unstable const warnings", issue = "none")]
 
 use core::alloc::LayoutError;
-use core::cmp;
 use core::intrinsics;
 use core::mem::{self, ManuallyDrop, MaybeUninit};
 use core::ops::Drop;
@@ -386,13 +385,32 @@ impl<T, A: Allocator> RawVec<T, A> {
             return Err(CapacityOverflow.into());
         }
 
+        // Size of allocator's per-allocation overhead we expect
+        // FIXME: maybe two pointers to be on the safe side? It could potentially
+        // be platform-dependent.
+        let alloc_overhead_size = mem::size_of::<usize>();
+
         // Nothing we can really do about these checks, sadly.
         let required_cap = len.checked_add(additional).ok_or(CapacityOverflow)?;
 
-        // This guarantees exponential growth. The doubling cannot overflow
-        // because `cap <= isize::MAX` and the type of `cap` is `usize`.
-        let cap = cmp::max(self.cap * 2, required_cap);
-        let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap);
+        let alloc_size = required_cap.checked_mul(mem::size_of::<T>()).ok_or(CapacityOverflow)?;
+        // Add the overhead
+        let alloc_size = alloc_size.checked_add(alloc_overhead_size).ok_or(CapacityOverflow)?;
+
+        // Since memory allocators tend to use power of two sized bins, find the
+        // bin size we will fall into.
+        debug_assert!(alloc_size > 1);
+        let bin_size = usize::MAX >> (alloc_size - 1).leading_zeros(); // + 1 skipped to prevent overflow
+
+        // Leave some room for allocators that add fixed overhead (usually
+        // one pointer-size)
+        let aligned_alloc_size = bin_size.saturating_sub(alloc_overhead_size - 1) /* the +1 skipped from the previous line turned into -1 here */ ;
+
+        // Align the capacity to fit the bin
+        let cap = aligned_alloc_size / mem::size_of::<T>();
+        // Since we've added the overhead in `required_cap`, we shold never
+        // end up with smaller cap after aligning
+        debug_assert!(required_cap <= cap);
 
         let new_layout = Layout::array::<T>(cap);