Skip to content

Commit a4c0468

Browse files
committed
collections::bitv: implement BitvSet directly as a Bitv
1 parent f728ad0 commit a4c0468

File tree

1 file changed

+143
-102
lines changed

1 file changed

+143
-102
lines changed

src/libcollections/bitv.rs

Lines changed: 143 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ use core::prelude::*;
1515
use core::cmp;
1616
use core::default::Default;
1717
use core::fmt;
18-
use core::iter::{Enumerate, Repeat, Map, Zip};
18+
use core::iter::{Map, Zip};
19+
use core::option;
1920
use core::ops;
2021
use core::slice;
2122
use core::uint;
@@ -268,6 +269,33 @@ fn die() -> ! {
268269
fail!("Tried to do operation on bit vectors with different sizes");
269270
}
270271

272+
enum WordsVariant<'a> {
273+
NoneIter,
274+
OneIter(option::Item<uint>),
275+
VecIter(slice::Items<'a, uint>)
276+
}
277+
278+
struct Words<'a> {
279+
rep: WordsVariant<'a>,
280+
offset: uint
281+
}
282+
283+
impl<'a> Iterator<(uint, uint)> for Words<'a> {
284+
/// Returns (offset, word)
285+
fn next<'a>(&'a mut self) -> Option<(uint, uint)> {
286+
let ret = match self.rep {
287+
NoneIter => None,
288+
OneIter(ref mut it) => it.next(),
289+
VecIter(ref mut it) => it.next().map(|n| *n)
290+
};
291+
self.offset += 1;
292+
match ret {
293+
Some(n) => Some((self.offset - 1, n)),
294+
None => None
295+
}
296+
}
297+
}
298+
271299
impl Bitv {
272300
#[inline]
273301
fn do_op(&mut self, op: Op, other: &Bitv) -> bool {
@@ -295,6 +323,18 @@ impl Bitv {
295323
}
296324
}
297325
}
326+
327+
#[inline]
328+
fn words<'a>(&'a self, start: uint) -> Words<'a> {
329+
Words {
330+
rep: match self.rep {
331+
Small(_) if start > 0 => NoneIter,
332+
Small(ref s) => OneIter(Some(s.bits).move_iter()),
333+
Big(ref b) => VecIter(b.storage.slice_from(start).iter())
334+
},
335+
offset: start
336+
}
337+
}
298338
}
299339

300340
impl Bitv {
@@ -687,15 +727,8 @@ impl<'a> RandomAccessIterator<bool> for Bits<'a> {
687727
/// It should also be noted that the amount of storage necessary for holding a
688728
/// set of objects is proportional to the maximum of the objects when viewed
689729
/// as a `uint`.
690-
#[deriving(Clone)]
691-
pub struct BitvSet {
692-
size: uint,
693-
694-
// In theory this is a `Bitv` instead of always a `BigBitv`, but knowing that
695-
// there's an array of storage makes our lives a whole lot easier when
696-
// performing union/intersection/etc operations
697-
bitv: BigBitv
698-
}
730+
#[deriving(Clone, PartialEq, Eq)]
731+
pub struct BitvSet(Bitv);
699732

700733
impl Default for BitvSet {
701734
#[inline]
@@ -705,56 +738,87 @@ impl Default for BitvSet {
705738
impl BitvSet {
706739
/// Creates a new bit vector set with initially no contents
707740
pub fn new() -> BitvSet {
708-
BitvSet{ size: 0, bitv: BigBitv::new(vec!(0)) }
741+
BitvSet(Bitv::new(0, false))
709742
}
710743

711744
/// Creates a new bit vector set from the given bit vector
712745
pub fn from_bitv(bitv: Bitv) -> BitvSet {
713-
let mut size = 0;
714-
bitv.ones(|_| {
715-
size += 1;
716-
true
717-
});
718-
let Bitv{rep, ..} = bitv;
719-
match rep {
720-
Big(b) => BitvSet{ size: size, bitv: b },
721-
Small(SmallBitv{bits}) =>
722-
BitvSet{ size: size, bitv: BigBitv{ storage: vec!(bits) } },
723-
}
746+
BitvSet(bitv)
724747
}
725748

726749
/// Returns the capacity in bits for this bit vector. Inserting any
727750
/// element less than this amount will not trigger a resizing.
728-
pub fn capacity(&self) -> uint { self.bitv.storage.len() * uint::BITS }
751+
pub fn capacity(&self) -> uint {
752+
let &BitvSet(ref bitv) = self;
753+
match bitv.rep {
754+
Small(_) => uint::BITS,
755+
Big(ref s) => s.storage.len() * uint::BITS
756+
}
757+
}
729758

730759
/// Consumes this set to return the underlying bit vector
731760
pub fn unwrap(self) -> Bitv {
732-
let cap = self.capacity();
733-
let BitvSet{bitv, ..} = self;
734-
return Bitv{ nbits:cap, rep: Big(bitv) };
761+
let BitvSet(bitv) = self;
762+
bitv
763+
}
764+
765+
#[inline]
766+
/// Grows the vector to be able to store bits with indices `[0, size - 1]`
767+
fn grow(&mut self, size: uint) {
768+
let &BitvSet(ref mut bitv) = self;
769+
let small_to_big = match bitv.rep { Small(s) if size >= uint::BITS => Some(s.bits), _ => None };
770+
if small_to_big.is_some() {
771+
bitv.rep = Big(BigBitv { storage: vec![small_to_big.unwrap()] });
772+
}
773+
match bitv.rep {
774+
Small(_) => {},
775+
Big(ref mut b) => {
776+
let size = (size + uint::BITS - 1) / uint::BITS;
777+
if b.storage.len() < size {
778+
b.storage.grow(size, &0);
779+
}
780+
}
781+
};
735782
}
736783

737784
#[inline]
738785
fn other_op(&mut self, other: &BitvSet, f: |uint, uint| -> uint) {
739-
fn nbits(mut w: uint) -> uint {
740-
let mut bits = 0;
741-
for _ in range(0u, uint::BITS) {
742-
if w == 0 {
743-
break;
786+
// Expand the vector if necessary
787+
self.grow(other.capacity());
788+
// Unwrap Bitvs
789+
let &BitvSet(ref mut self_bitv) = self;
790+
let &BitvSet(ref other_bitv) = other;
791+
for (i, w) in other_bitv.words(0) {
792+
match self_bitv.rep {
793+
Small(ref mut s) => { s.bits = f(s.bits, w); }
794+
Big(ref mut b) => {
795+
let old = *b.storage.get(i);
796+
let new = f(old, w);
797+
*b.storage.get_mut(i) = new;
798+
*b.storage.get_mut(i) = f(*b.storage.get(i), w);
744799
}
745-
bits += w & 1;
746-
w >>= 1;
747800
}
748-
return bits;
749-
}
750-
if self.capacity() < other.capacity() {
751-
self.bitv.storage.grow(other.capacity() / uint::BITS, &0);
752801
}
753-
for (i, &w) in other.bitv.storage.iter().enumerate() {
754-
let old = *self.bitv.storage.get(i);
755-
let new = f(old, w);
756-
*self.bitv.storage.get_mut(i) = new;
757-
self.size += nbits(new) - nbits(old);
802+
}
803+
804+
#[inline]
805+
/// Truncate the underlying vector to the least length required
806+
pub fn shrink_to_fit(&mut self) {
807+
let &BitvSet(ref mut bitv) = self;
808+
// Two steps: we borrow b as immutable to get the length...
809+
let old_len = match bitv.rep {
810+
Small(_) => 1,
811+
Big(ref b) => b.storage.len()
812+
};
813+
// ...and as mutable to change it.
814+
match bitv.rep {
815+
Small(_) => {},
816+
Big(ref mut b) => {
817+
let n = b.storage.iter().rev().take_while(|&&n| n == 0).count();
818+
let trunc_len = cmp::max(old_len - n, 1);
819+
b.storage.truncate(trunc_len);
820+
bitv.nbits = trunc_len * uint::BITS;
821+
}
758822
}
759823
}
760824

@@ -818,29 +882,6 @@ impl BitvSet {
818882
}
819883
}
820884

821-
impl cmp::PartialEq for BitvSet {
822-
fn eq(&self, other: &BitvSet) -> bool {
823-
if self.size != other.size {
824-
return false;
825-
}
826-
for (_, w1, w2) in self.commons(other) {
827-
if w1 != w2 {
828-
return false;
829-
}
830-
}
831-
for (_, _, w) in self.outliers(other) {
832-
if w != 0 {
833-
return false;
834-
}
835-
}
836-
return true;
837-
}
838-
839-
fn ne(&self, other: &BitvSet) -> bool { !self.eq(other) }
840-
}
841-
842-
impl cmp::Eq for BitvSet {}
843-
844885
impl fmt::Show for BitvSet {
845886
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
846887
try!(write!(fmt, "{{"));
@@ -866,19 +907,26 @@ impl<S: hash::Writer> hash::Hash<S> for BitvSet {
866907

867908
impl Collection for BitvSet {
868909
#[inline]
869-
fn len(&self) -> uint { self.size }
910+
fn len(&self) -> uint {
911+
let &BitvSet(ref bitv) = self;
912+
match bitv.rep {
913+
Small(ref s) => s.bits.count_ones(),
914+
Big(ref b) => b.storage.iter().fold(0, |acc, &n| acc + n.count_ones())
915+
}
916+
}
870917
}
871918

872919
impl Mutable for BitvSet {
873920
fn clear(&mut self) {
874-
self.bitv.each_storage(|w| { *w = 0; true });
875-
self.size = 0;
921+
let &BitvSet(ref mut bitv) = self;
922+
bitv.clear();
876923
}
877924
}
878925

879926
impl Set<uint> for BitvSet {
880927
fn contains(&self, value: &uint) -> bool {
881-
*value < self.bitv.storage.len() * uint::BITS && self.bitv.get(*value)
928+
let &BitvSet(ref bitv) = self;
929+
*value < bitv.nbits && bitv.get(*value)
882930
}
883931

884932
fn is_disjoint(&self, other: &BitvSet) -> bool {
@@ -914,31 +962,24 @@ impl MutableSet<uint> for BitvSet {
914962
if self.contains(&value) {
915963
return false;
916964
}
917-
let nbits = self.capacity();
918-
if value >= nbits {
919-
let newsize = cmp::max(value, nbits * 2) / uint::BITS + 1;
920-
assert!(newsize > self.bitv.storage.len());
921-
self.bitv.storage.grow(newsize, &0);
965+
if value >= self.capacity() {
966+
let new_cap = cmp::max(value + 1, self.capacity() * 2);
967+
self.grow(new_cap);
968+
}
969+
let &BitvSet(ref mut bitv) = self;
970+
if value >= bitv.nbits {
971+
bitv.nbits = value + 1;
922972
}
923-
self.size += 1;
924-
self.bitv.set(value, true);
973+
bitv.set(value, true);
925974
return true;
926975
}
927976

928977
fn remove(&mut self, value: &uint) -> bool {
929978
if !self.contains(value) {
930979
return false;
931980
}
932-
self.size -= 1;
933-
self.bitv.set(*value, false);
934-
935-
// Attempt to truncate our storage
936-
let mut i = self.bitv.storage.len();
937-
while i > 1 && *self.bitv.storage.get(i - 1) == 0 {
938-
i -= 1;
939-
}
940-
self.bitv.storage.truncate(i);
941-
981+
let &BitvSet(ref mut bitv) = self;
982+
bitv.set(*value, false);
942983
return true;
943984
}
944985
}
@@ -949,12 +990,12 @@ impl BitvSet {
949990
/// w1, w2) where the bit location is the number of bits offset so far,
950991
/// and w1/w2 are the words coming from the two vectors self, other.
951992
fn commons<'a>(&'a self, other: &'a BitvSet)
952-
-> Map<'static, ((uint, &'a uint), &'a Vec<uint>), (uint, uint, uint),
953-
Zip<Enumerate<slice::Items<'a, uint>>, Repeat<&'a Vec<uint>>>> {
954-
let min = cmp::min(self.bitv.storage.len(), other.bitv.storage.len());
955-
self.bitv.storage.slice(0, min).iter().enumerate()
956-
.zip(Repeat::new(&other.bitv.storage))
957-
.map(|((i, &w), o_store)| (i * uint::BITS, w, *o_store.get(i)))
993+
-> Map<((uint, uint), (uint, uint)), (uint, uint, uint),
994+
Zip<Words<'a>, Words<'a>>> {
995+
let &BitvSet(ref self_bitv) = self;
996+
let &BitvSet(ref other_bitv) = other;
997+
self_bitv.words(0).zip(other_bitv.words(0))
998+
.map(|((i, w1), (_, w2))| (i * uint::BITS, w1, w2))
958999
}
9591000

9601001
/// Visits each word in `self` or `other` that extends beyond the other. This
@@ -965,19 +1006,18 @@ impl BitvSet {
9651006
/// is true if the word comes from `self`, and `false` if it comes from
9661007
/// `other`.
9671008
fn outliers<'a>(&'a self, other: &'a BitvSet)
968-
-> Map<'static, ((uint, &'a uint), uint), (bool, uint, uint),
969-
Zip<Enumerate<slice::Items<'a, uint>>, Repeat<uint>>> {
970-
let slen = self.bitv.storage.len();
971-
let olen = other.bitv.storage.len();
1009+
-> Map<(uint, uint), (bool, uint, uint), Words<'a>> {
1010+
let slen = self.capacity() / uint::BITS;
1011+
let olen = other.capacity() / uint::BITS;
1012+
let &BitvSet(ref self_bitv) = self;
1013+
let &BitvSet(ref other_bitv) = other;
9721014

9731015
if olen < slen {
974-
self.bitv.storage.slice_from(olen).iter().enumerate()
975-
.zip(Repeat::new(olen))
976-
.map(|((i, &w), min)| (true, (i + min) * uint::BITS, w))
1016+
self_bitv.words(olen)
1017+
.map(|(i, w)| (true, i * uint::BITS, w))
9771018
} else {
978-
other.bitv.storage.slice_from(slen).iter().enumerate()
979-
.zip(Repeat::new(slen))
980-
.map(|((i, &w), min)| (false, (i + min) * uint::BITS, w))
1019+
other_bitv.words(slen)
1020+
.map(|(i, w)| (false, i * uint::BITS, w))
9811021
}
9821022
}
9831023
}
@@ -1600,6 +1640,7 @@ mod tests {
16001640

16011641
assert!(a.insert(1000));
16021642
assert!(a.remove(&1000));
1643+
a.shrink_to_fit();
16031644
assert_eq!(a.capacity(), uint::BITS);
16041645
}
16051646

0 commit comments

Comments
 (0)