diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index f8b4a92df2c5d..bf9c2ae49a0c5 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -782,6 +782,130 @@ impl Vec { } } + /// Removes consecutive elements in the vector that resolve to the same key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// #![feature(dedup_by)] + /// + /// let mut vec = vec![10, 20, 21, 30, 20]; + /// + /// vec.dedup_by_key(|i| *i / 10); + /// + /// assert_eq!(vec, [10, 20, 30, 20]); + /// ``` + #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] + #[inline] + pub fn dedup_by_key(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq { + self.dedup_by(|a, b| key(a) == key(b)) + } + + /// Removes consecutive elements in the vector that resolve to the same key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// #![feature(dedup_by)] + /// use std::ascii::AsciiExt; + /// + /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + /// + /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + /// + /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); + /// ``` + #[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")] + pub fn dedup_by(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool { + unsafe { + // Although we have a mutable reference to `self`, we cannot make + // *arbitrary* changes. The `PartialEq` comparisons could panic, so we + // must ensure that the vector is in a valid state at all time. + // + // The way that we handle this is by using swaps; we iterate + // over all the elements, swapping as we go so that at the end + // the elements we wish to keep are in the front, and those we + // wish to reject are at the back. We can then truncate the + // vector. This operation is still O(n). + // + // Example: We start in this state, where `r` represents "next + // read" and `w` represents "next_write`. + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 1 | 2 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Comparing self[r] against self[w-1], this is not a duplicate, so + // we swap self[r] and self[w] (no effect as r==w) and then increment both + // r and w, leaving us with: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 1 | 2 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Comparing self[r] against self[w-1], this value is a duplicate, + // so we increment `r` but leave everything else unchanged: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 1 | 2 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Comparing self[r] against self[w-1], this is not a duplicate, + // so swap self[r] and self[w] and advance r and w: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 2 | 1 | 3 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Not a duplicate, repeat: + // + // r + // +---+---+---+---+---+---+ + // | 0 | 1 | 2 | 3 | 1 | 3 | + // +---+---+---+---+---+---+ + // w + // + // Duplicate, advance r. End of vec. Truncate to w. + + let ln = self.len(); + if ln <= 1 { + return; + } + + // Avoid bounds checks by using raw pointers. + let p = self.as_mut_ptr(); + let mut r: usize = 1; + let mut w: usize = 1; + + while r < ln { + let p_r = p.offset(r as isize); + let p_wm1 = p.offset((w - 1) as isize); + if !same_bucket(&mut *p_r, &mut *p_wm1) { + if r != w { + let p_w = p_wm1.offset(1); + mem::swap(&mut *p_r, &mut *p_w); + } + w += 1; + } + r += 1; + } + + self.truncate(w); + } + } + /// Appends an element to the back of a collection. /// /// # Panics @@ -1156,90 +1280,9 @@ impl Vec { /// assert_eq!(vec, [1, 2, 3, 2]); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn dedup(&mut self) { - unsafe { - // Although we have a mutable reference to `self`, we cannot make - // *arbitrary* changes. The `PartialEq` comparisons could panic, so we - // must ensure that the vector is in a valid state at all time. - // - // The way that we handle this is by using swaps; we iterate - // over all the elements, swapping as we go so that at the end - // the elements we wish to keep are in the front, and those we - // wish to reject are at the back. We can then truncate the - // vector. This operation is still O(n). - // - // Example: We start in this state, where `r` represents "next - // read" and `w` represents "next_write`. - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 1 | 2 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Comparing self[r] against self[w-1], this is not a duplicate, so - // we swap self[r] and self[w] (no effect as r==w) and then increment both - // r and w, leaving us with: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 1 | 2 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Comparing self[r] against self[w-1], this value is a duplicate, - // so we increment `r` but leave everything else unchanged: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 1 | 2 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Comparing self[r] against self[w-1], this is not a duplicate, - // so swap self[r] and self[w] and advance r and w: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 2 | 1 | 3 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Not a duplicate, repeat: - // - // r - // +---+---+---+---+---+---+ - // | 0 | 1 | 2 | 3 | 1 | 3 | - // +---+---+---+---+---+---+ - // w - // - // Duplicate, advance r. End of vec. Truncate to w. - - let ln = self.len(); - if ln <= 1 { - return; - } - - // Avoid bounds checks by using raw pointers. - let p = self.as_mut_ptr(); - let mut r: usize = 1; - let mut w: usize = 1; - - while r < ln { - let p_r = p.offset(r as isize); - let p_wm1 = p.offset((w - 1) as isize); - if *p_r != *p_wm1 { - if r != w { - let p_w = p_wm1.offset(1); - mem::swap(&mut *p_r, &mut *p_w); - } - w += 1; - } - r += 1; - } - - self.truncate(w); - } + self.dedup_by(|a, b| a == b) } } diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs index 878581a4f296e..d6bad46c44dfb 100644 --- a/src/libcollectionstest/lib.rs +++ b/src/libcollectionstest/lib.rs @@ -16,6 +16,7 @@ #![feature(collections)] #![feature(collections_bound)] #![feature(const_fn)] +#![feature(dedup_by)] #![feature(enumset)] #![feature(pattern)] #![feature(rand)] diff --git a/src/libcollectionstest/slice.rs b/src/libcollectionstest/slice.rs index 5b341ab62d097..6d11ca1fbcfc3 100644 --- a/src/libcollectionstest/slice.rs +++ b/src/libcollectionstest/slice.rs @@ -315,47 +315,6 @@ fn test_clear() { // If the unsafe block didn't drop things properly, we blow up here. } -#[test] -fn test_dedup() { - fn case(a: Vec, b: Vec) { - let mut v = a; - v.dedup(); - assert_eq!(v, b); - } - case(vec![], vec![]); - case(vec![1], vec![1]); - case(vec![1, 1], vec![1]); - case(vec![1, 2, 3], vec![1, 2, 3]); - case(vec![1, 1, 2, 3], vec![1, 2, 3]); - case(vec![1, 2, 2, 3], vec![1, 2, 3]); - case(vec![1, 2, 3, 3], vec![1, 2, 3]); - case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]); -} - -#[test] -fn test_dedup_unique() { - let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; - v0.dedup(); - let mut v1: Vec> = vec![box 1, box 2, box 2, box 3]; - v1.dedup(); - let mut v2: Vec> = vec![box 1, box 2, box 3, box 3]; - v2.dedup(); - // If the boxed pointers were leaked or otherwise misused, valgrind - // and/or rt should raise errors. -} - -#[test] -fn test_dedup_shared() { - let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; - v0.dedup(); - let mut v1: Vec> = vec![box 1, box 2, box 2, box 3]; - v1.dedup(); - let mut v2: Vec> = vec![box 1, box 2, box 3, box 3]; - v2.dedup(); - // If the pointers were leaked or otherwise misused, valgrind and/or - // rt should raise errors. -} - #[test] fn test_retain() { let mut v = vec![1, 2, 3, 4, 5]; diff --git a/src/libcollectionstest/vec.rs b/src/libcollectionstest/vec.rs index ee2b898d5c2c2..8417be289eb9e 100644 --- a/src/libcollectionstest/vec.rs +++ b/src/libcollectionstest/vec.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use std::ascii::AsciiExt; use std::borrow::Cow; use std::iter::{FromIterator, repeat}; use std::mem::size_of; @@ -213,6 +214,60 @@ fn test_retain() { assert_eq!(vec, [2, 4]); } +#[test] +fn test_dedup() { + fn case(a: Vec, b: Vec) { + let mut v = a; + v.dedup(); + assert_eq!(v, b); + } + case(vec![], vec![]); + case(vec![1], vec![1]); + case(vec![1, 1], vec![1]); + case(vec![1, 2, 3], vec![1, 2, 3]); + case(vec![1, 1, 2, 3], vec![1, 2, 3]); + case(vec![1, 2, 2, 3], vec![1, 2, 3]); + case(vec![1, 2, 3, 3], vec![1, 2, 3]); + case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]); +} + +#[test] +fn test_dedup_by_key() { + fn case(a: Vec, b: Vec) { + let mut v = a; + v.dedup_by_key(|i| *i / 10); + assert_eq!(v, b); + } + case(vec![], vec![]); + case(vec![10], vec![10]); + case(vec![10, 11], vec![10]); + case(vec![10, 20, 30], vec![10, 20, 30]); + case(vec![10, 11, 20, 30], vec![10, 20, 30]); + case(vec![10, 20, 21, 30], vec![10, 20, 30]); + case(vec![10, 20, 30, 31], vec![10, 20, 30]); + case(vec![10, 11, 20, 21, 22, 30, 31], vec![10, 20, 30]); +} + +#[test] +fn test_dedup_by() { + let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + + assert_eq!(vec, ["foo", "bar", "baz", "bar"]); +} + +#[test] +fn test_dedup_unique() { + let mut v0: Vec> = vec![box 1, box 1, box 2, box 3]; + v0.dedup(); + let mut v1: Vec> = vec![box 1, box 2, box 2, box 3]; + v1.dedup(); + let mut v2: Vec> = vec![box 1, box 2, box 3, box 3]; + v2.dedup(); + // If the boxed pointers were leaked or otherwise misused, valgrind + // and/or rt should raise errors. +} + #[test] fn zero_sized_values() { let mut v = Vec::new();