From d86608205069aed5c78bcc38dd26bcf4213e23a0 Mon Sep 17 00:00:00 2001 From: Emerentius Date: Mon, 30 Apr 2018 13:09:10 +0200 Subject: [PATCH 1/4] optimize joining and concatenation for slices for both Vec and String - eliminates the boolean first flag in fn join() for String only - eliminates repeated bounds checks in join(), concat() - adds fast paths for small string separators up to a len of 4 bytes --- src/liballoc/slice.rs | 22 +++---- src/liballoc/str.rs | 138 +++++++++++++++++++++++++++++++----------- 2 files changed, 113 insertions(+), 47 deletions(-) diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs index 161493f389226..82578c3206f7c 100644 --- a/src/liballoc/slice.rs +++ b/src/liballoc/slice.rs @@ -566,18 +566,18 @@ impl> SliceConcatExt for [V] { } fn join(&self, sep: &T) -> Vec { - let size = self.iter().fold(0, |acc, v| acc + v.borrow().len()); - let mut result = Vec::with_capacity(size + self.len()); - let mut first = true; - for v in self { - if first { - first = false - } else { - result.push(sep.clone()) + let mut iter = self.iter(); + iter.next().map_or(vec![], |first| { + let size = self.iter().fold(0, |acc, v| acc + v.borrow().len()); + let mut result = Vec::with_capacity(size + self.len()); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.push(sep.clone()); + result.extend_from_slice(v.borrow()) } - result.extend_from_slice(v.borrow()) - } - result + result + }) } fn connect(&self, sep: &T) -> Vec { diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 823e56b64e398..45daabf86abdb 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -86,52 +86,118 @@ impl> SliceConcatExt for [S] { type Output = String; fn concat(&self) -> String { - if self.is_empty() { - return String::new(); - } - - // `len` calculation may overflow but push_str will check boundaries - let len = self.iter().map(|s| s.borrow().len()).sum(); - let mut result = String::with_capacity(len); - - for s in self { - result.push_str(s.borrow()) - } - - result + self.join("") } fn join(&self, sep: &str) -> String { - if self.is_empty() { - return String::new(); + unsafe { + String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) ) } + } - // concat is faster - if sep.is_empty() { - return self.concat(); - } + fn connect(&self, sep: &str) -> String { + self.join(sep) + } +} - // this is wrong without the guarantee that `self` is non-empty - // `len` calculation may overflow but push_str but will check boundaries - let len = sep.len() * (self.len() - 1) + - self.iter().map(|s| s.borrow().len()).sum::(); - let mut result = String::with_capacity(len); - let mut first = true; +macro_rules! spezialize_for_lengths { + ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => { + let mut target = $target; + let iter = $iter; + let sep_len = $separator.len(); + let sep_bytes = $separator; + match $separator.len() { + $( + // loops with hardcoded sizes run much faster + // specialize the cases with small separator lengths + $num => { + for s in iter { + target.get_unchecked_mut(..$num) + .copy_from_slice(sep_bytes); + + let s_bytes = s.borrow().as_ref(); + let offset = s_bytes.len(); + target = {target}.get_unchecked_mut($num..); + target.get_unchecked_mut(..offset) + .copy_from_slice(s_bytes); + target = {target}.get_unchecked_mut(offset..); + } + }, + )* + 0 => { + // concat, same principle without the separator + for s in iter { + let s_bytes = s.borrow().as_ref(); + let offset = s_bytes.len(); + target.get_unchecked_mut(..offset) + .copy_from_slice(s_bytes); + target = {target}.get_unchecked_mut(offset..); + } + }, + _ => { + // arbitrary non-zero size fallback + for s in iter { + target.get_unchecked_mut(..sep_len) + .copy_from_slice(sep_bytes); + + let s_bytes = s.borrow().as_ref(); + let offset = s_bytes.len(); + target = {target}.get_unchecked_mut(sep_len..); + target.get_unchecked_mut(..offset) + .copy_from_slice(s_bytes); + target = {target}.get_unchecked_mut(offset..); + } + } + } + }; +} - for s in self { - if first { - first = false; - } else { - result.push_str(sep); +// Optimized join implementation that works for both Vec (T: Copy) and String's inner vec +// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262) +// For this reason SliceConcatExt is not specialized for T: Copy and SliceConcatExt is the +// only user of this function. It is left in place for the time when that is fixed. +// +// the bounds for String-join are S: Borrow and for Vec-join Borrow<[T]> +// [T] and str both impl AsRef<[T]> for some T +// => s.borrow().as_ref() and we always have slices +fn join_generic_copy(slice: &[S], sep: &[T]) -> Vec +where + T: Copy, + B: AsRef<[T]> + ?Sized, + S: Borrow, +{ + let sep_len = sep.len(); + let mut iter = slice.iter(); + iter.next().map_or(vec![], |first| { + // this is wrong without the guarantee that `slice` is non-empty + // if the `len` calculation overflows, we'll panic + // we would have run out of memory anyway and the rest of the function requires + // the entire String pre-allocated for safety + // + // this is the exact len of the resulting String + let len = sep_len.checked_mul(slice.len() - 1).and_then(|n| { + slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add) + }).expect("attempt to join into collection with len > usize::MAX"); + + // crucial for safety + let mut result = Vec::with_capacity(len); + + unsafe { + result.extend_from_slice(first.borrow().as_ref()); + + { + let pos = result.len(); + let target = result.get_unchecked_mut(pos..len); + + // copy separator and strs over without bounds checks + // generate loops with hardcoded offsets for small separators + // massive improvements possible (~ x2) + spezialize_for_lengths!(sep, target, iter; 1, 2, 3, 4); } - result.push_str(s.borrow()); + result.set_len(len); } result - } - - fn connect(&self, sep: &str) -> String { - self.join(sep) - } + }) } #[stable(feature = "rust1", since = "1.0.0")] From b2fd7da0cf0b835a540d333b4b72426b4735c586 Mon Sep 17 00:00:00 2001 From: Emerentius Date: Mon, 7 May 2018 17:37:13 +0200 Subject: [PATCH 2/4] add more join tests old tests cover the new fast path of str joining already this adds tests for joining into Strings with long separators (>4 byte) and for joining into Vec, T: Clone + !Copy. Vec will be specialised when specialisation type inference bugs are fixed. --- src/liballoc/tests/slice.rs | 9 +++++++++ src/liballoc/tests/str.rs | 13 +++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 6fd0b33f02a60..3b7eec38609d0 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -609,6 +609,15 @@ fn test_join() { assert_eq!(v.join(&0), [1, 0, 2, 0, 3]); } +#[test] +fn test_join_nocopy() { + let v: [String; 0] = []; + assert_eq!(v.join(","), ""); + assert_eq!(["a".to_string(), "ab".into()].join(","), "a,ab"); + assert_eq!(["a".to_string(), "ab".into(), "abc".into()].join(","), "a,ab,abc"); + assert_eq!(["a".to_string(), "ab".into(), "".into()].join(","), "a,ab,"); +} + #[test] fn test_insert() { let mut a = vec![1, 2, 4]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index d11bf5dc3e9a6..03d295d16e6a0 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -162,6 +162,19 @@ fn test_join_for_different_lengths() { test_join!("-a-bc", ["", "a", "bc"], "-"); } +// join has fast paths for small separators up to 4 bytes +// this tests the slow paths. +#[test] +fn test_join_for_different_lengths_with_long_separator() { + assert_eq!("~~~~~".len(), 15); + + let empty: &[&str] = &[]; + test_join!("", empty, "~~~~~"); + test_join!("a", ["a"], "~~~~~"); + test_join!("a~~~~~b", ["a", "b"], "~~~~~"); + test_join!("~~~~~a~~~~~bc", ["", "a", "bc"], "~~~~~"); +} + #[test] fn test_unsafe_slice() { assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)}); From d0d0885c3fa85fd05946d69599a3ddd886c9671f Mon Sep 17 00:00:00 2001 From: Emerentius Date: Wed, 23 May 2018 07:20:37 +0200 Subject: [PATCH 3/4] compacts join code --- src/liballoc/str.rs | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 45daabf86abdb..4db6d5d715cb1 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -104,7 +104,6 @@ macro_rules! spezialize_for_lengths { ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => { let mut target = $target; let iter = $iter; - let sep_len = $separator.len(); let sep_bytes = $separator; match $separator.len() { $( @@ -112,46 +111,31 @@ macro_rules! spezialize_for_lengths { // specialize the cases with small separator lengths $num => { for s in iter { - target.get_unchecked_mut(..$num) - .copy_from_slice(sep_bytes); - - let s_bytes = s.borrow().as_ref(); - let offset = s_bytes.len(); - target = {target}.get_unchecked_mut($num..); - target.get_unchecked_mut(..offset) - .copy_from_slice(s_bytes); - target = {target}.get_unchecked_mut(offset..); + copy_slice_and_advance!(target, sep_bytes); + copy_slice_and_advance!(target, s.borrow().as_ref()); } }, )* - 0 => { - // concat, same principle without the separator - for s in iter { - let s_bytes = s.borrow().as_ref(); - let offset = s_bytes.len(); - target.get_unchecked_mut(..offset) - .copy_from_slice(s_bytes); - target = {target}.get_unchecked_mut(offset..); - } - }, _ => { // arbitrary non-zero size fallback for s in iter { - target.get_unchecked_mut(..sep_len) - .copy_from_slice(sep_bytes); - - let s_bytes = s.borrow().as_ref(); - let offset = s_bytes.len(); - target = {target}.get_unchecked_mut(sep_len..); - target.get_unchecked_mut(..offset) - .copy_from_slice(s_bytes); - target = {target}.get_unchecked_mut(offset..); + copy_slice_and_advance!(target, sep_bytes); + copy_slice_and_advance!(target, s.borrow().as_ref()); } } } }; } +macro_rules! copy_slice_and_advance { + ($target:expr, $bytes:expr) => { + let len = $bytes.len(); + $target.get_unchecked_mut(..len) + .copy_from_slice($bytes); + $target = {$target}.get_unchecked_mut(len..); + } +} + // Optimized join implementation that works for both Vec (T: Copy) and String's inner vec // Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262) // For this reason SliceConcatExt is not specialized for T: Copy and SliceConcatExt is the @@ -192,7 +176,7 @@ where // copy separator and strs over without bounds checks // generate loops with hardcoded offsets for small separators // massive improvements possible (~ x2) - spezialize_for_lengths!(sep, target, iter; 1, 2, 3, 4); + spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4); } result.set_len(len); } From 12bd28874697d600d347518c8636053b92e81801 Mon Sep 17 00:00:00 2001 From: Emerentius Date: Fri, 25 May 2018 23:53:22 +0200 Subject: [PATCH 4/4] incorporate changes from code review further reduce unsafe fn calls reduce right drift assert! sufficient capacity --- src/liballoc/slice.rs | 24 +++++++++-------- src/liballoc/str.rs | 60 ++++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 38 deletions(-) diff --git a/src/liballoc/slice.rs b/src/liballoc/slice.rs index 82578c3206f7c..90bc2f9769c88 100644 --- a/src/liballoc/slice.rs +++ b/src/liballoc/slice.rs @@ -567,17 +567,19 @@ impl> SliceConcatExt for [V] { fn join(&self, sep: &T) -> Vec { let mut iter = self.iter(); - iter.next().map_or(vec![], |first| { - let size = self.iter().fold(0, |acc, v| acc + v.borrow().len()); - let mut result = Vec::with_capacity(size + self.len()); - result.extend_from_slice(first.borrow()); - - for v in iter { - result.push(sep.clone()); - result.extend_from_slice(v.borrow()) - } - result - }) + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = self.iter().fold(0, |acc, v| acc + v.borrow().len()); + let mut result = Vec::with_capacity(size + self.len()); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.push(sep.clone()); + result.extend_from_slice(v.borrow()) + } + result } fn connect(&self, sep: &T) -> Vec { diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 4db6d5d715cb1..32ca8d1fa5eba 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -130,9 +130,9 @@ macro_rules! spezialize_for_lengths { macro_rules! copy_slice_and_advance { ($target:expr, $bytes:expr) => { let len = $bytes.len(); - $target.get_unchecked_mut(..len) - .copy_from_slice($bytes); - $target = {$target}.get_unchecked_mut(len..); + let (head, tail) = {$target}.split_at_mut(len); + head.copy_from_slice($bytes); + $target = tail; } } @@ -152,36 +152,42 @@ where { let sep_len = sep.len(); let mut iter = slice.iter(); - iter.next().map_or(vec![], |first| { - // this is wrong without the guarantee that `slice` is non-empty - // if the `len` calculation overflows, we'll panic - // we would have run out of memory anyway and the rest of the function requires - // the entire String pre-allocated for safety - // - // this is the exact len of the resulting String - let len = sep_len.checked_mul(slice.len() - 1).and_then(|n| { - slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add) + + // the first slice is the only one without a separator preceding it + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + + // compute the exact total length of the joined Vec + // if the `len` calculation overflows, we'll panic + // we would have run out of memory anyway and the rest of the function requires + // the entire Vec pre-allocated for safety + let len = sep_len.checked_mul(iter.len()).and_then(|n| { + slice.iter() + .map(|s| s.borrow().as_ref().len()) + .try_fold(n, usize::checked_add) }).expect("attempt to join into collection with len > usize::MAX"); - // crucial for safety - let mut result = Vec::with_capacity(len); + // crucial for safety + let mut result = Vec::with_capacity(len); + assert!(result.capacity() >= len); - unsafe { - result.extend_from_slice(first.borrow().as_ref()); + result.extend_from_slice(first.borrow().as_ref()); - { - let pos = result.len(); - let target = result.get_unchecked_mut(pos..len); + unsafe { + { + let pos = result.len(); + let target = result.get_unchecked_mut(pos..len); - // copy separator and strs over without bounds checks - // generate loops with hardcoded offsets for small separators - // massive improvements possible (~ x2) - spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4); - } - result.set_len(len); + // copy separator and slices over without bounds checks + // generate loops with hardcoded offsets for small separators + // massive improvements possible (~ x2) + spezialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4); } - result - }) + result.set_len(len); + } + result } #[stable(feature = "rust1", since = "1.0.0")]