diff --git a/src/libcollections/btree.rs b/src/libcollections/btree.rs deleted file mode 100644 index f6011976b65d0..0000000000000 --- a/src/libcollections/btree.rs +++ /dev/null @@ -1,919 +0,0 @@ -// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. -// - -// NB. this is not deprecated for removal, just deprecating the -// current implementation. If the major pain-points are addressed -// (overuse of by-value self and .clone), this can be removed. -#![deprecated = "the current implementation is extremely inefficient, \ - prefer a HashMap, TreeMap or TrieMap"] -#![allow(deprecated)] - -//! Starting implementation of a B-tree for Rust. -//! Structure inspired by Github user davidhalperin's gist. - -// A B-tree contains a root node (which contains a vector of elements), -// a length (the height of the tree), and lower and upper bounds on the -// number of elements that a given node can contain. - -use core::prelude::*; - -use alloc::boxed::Box; -use core::fmt; -use core::fmt::Show; - -use MutableSeq; -use vec::Vec; - -#[allow(missing_doc)] -pub struct BTree { - root: Node, - len: uint, - lower_bound: uint, - upper_bound: uint -} - -impl BTree { - /// Returns new `BTree` with root node (leaf) and user-supplied lower bound - /// The lower bound applies to every node except the root node. - pub fn new(k: K, v: V, lb: uint) -> BTree { - BTree { - root: Node::new_leaf(vec!(LeafElt::new(k, v))), - len: 1, - lower_bound: lb, - upper_bound: 2 * lb - } - } - - /// Helper function for `clone`: returns new BTree with supplied root node, - /// length, and lower bound. For use when the length is known already. - fn new_with_node_len(n: Node, - length: uint, - lb: uint) -> BTree { - BTree { - root: n, - len: length, - lower_bound: lb, - upper_bound: 2 * lb - } - } -} - -// We would probably want to remove the dependence on the Clone trait in the future. -// It is here as a crutch to ensure values can be passed around through the tree's nodes -// especially during insertions and deletions. -impl BTree { - /// Returns the value of a given key, which may not exist in the tree. - /// Calls the root node's get method. - pub fn get(self, k: K) -> Option { - return self.root.get(k); - } - - /// An insert method that uses the `clone` method for support. - pub fn insert(mut self, k: K, v: V) -> BTree { - let (a, b) = self.root.clone().insert(k, v, self.upper_bound.clone()); - if b { - match a.clone() { - LeafNode(leaf) => { - self.root = Node::new_leaf(leaf.clone().elts); - } - BranchNode(branch) => { - self.root = Node::new_branch(branch.clone().elts, - branch.clone().rightmost_child); - } - } - } - self - } -} - -impl Clone for BTree { - fn clone(&self) -> BTree { - BTree::new_with_node_len(self.root.clone(), self.len, self.lower_bound) - } -} - -impl PartialEq for BTree { - fn eq(&self, other: &BTree) -> bool { - self.root.cmp(&other.root) == Equal - } -} - -impl Eq for BTree {} - -impl PartialOrd for BTree { - fn partial_cmp(&self, other: &BTree) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for BTree { - /// Returns an ordering based on the root nodes of each `BTree`. - fn cmp(&self, other: &BTree) -> Ordering { - self.root.cmp(&other.root) - } -} - -impl fmt::Show for BTree { - /// Returns a string representation of the `BTree`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.root.fmt(f) - } -} - - -// Node types -// -// A node is either a LeafNode or a BranchNode, which contain either a Leaf or a Branch. -// Branches contain BranchElts, which contain a left child (another node) and a key-value -// pair. Branches also contain the rightmost child of the elements in the array. -// Leaves contain LeafElts, which do not have children. -enum Node { - LeafNode(Leaf), - BranchNode(Branch) -} - - -impl Node { - /// Creates a new leaf node given a vector of elements. - fn new_leaf(vec: Vec>) -> Node { - LeafNode(Leaf::new(vec)) - } - - /// Creates a new branch node given a vector of an elements and a pointer to a rightmost child. - fn new_branch(vec: Vec>, right: Box>) - -> Node { - BranchNode(Branch::new(vec, right)) - } - - /// Determines whether the given Node contains a Branch or a Leaf. - /// Used in testing. - fn is_leaf(&self) -> bool { - match self { - &LeafNode(..) => true, - &BranchNode(..) => false - } - } - - /// A binary search function for Nodes. - /// Calls either the Branch's or the Leaf's bsearch function. - fn bsearch_node(&self, k: K) -> Option { - match self { - &LeafNode(ref leaf) => leaf.bsearch_leaf(k), - &BranchNode(ref branch) => branch.bsearch_branch(k) - } - } -} - -impl Node { - /// Returns the corresponding value to the provided key. - /// `get()` is called in different ways on a branch or a leaf. - fn get(&self, k: K) -> Option { - match *self { - LeafNode(ref leaf) => return leaf.get(k), - BranchNode(ref branch) => return branch.get(k) - } - } - - /// Matches on the `Node`, then performs and returns the appropriate insert method. - fn insert(self, k: K, v: V, ub: uint) -> (Node, bool) { - match self { - LeafNode(leaf) => leaf.insert(k, v, ub), - BranchNode(branch) => branch.insert(k, v, ub) - } - } -} - -impl Clone for Node { - /// Returns a new `Node` based on whether or not it is a branch or a leaf. - fn clone(&self) -> Node { - match *self { - LeafNode(ref leaf) => { - Node::new_leaf(leaf.elts.clone()) - } - BranchNode(ref branch) => { - Node::new_branch(branch.elts.clone(), - branch.rightmost_child.clone()) - } - } - } -} - -impl PartialEq for Node { - fn eq(&self, other: &Node) -> bool { - match *self{ - BranchNode(ref branch) => { - if other.is_leaf() { - return false; - } - match *other { - BranchNode(ref branch2) => branch.cmp(branch2) == Equal, - LeafNode(..) => false - } - } - LeafNode(ref leaf) => { - match *other { - LeafNode(ref leaf2) => leaf.cmp(leaf2) == Equal, - BranchNode(..) => false - } - } - } - } -} - -impl Eq for Node {} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Node) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Node { - /// Implementation of `Ord` for `Node`s. - fn cmp(&self, other: &Node) -> Ordering { - match *self { - LeafNode(ref leaf) => { - match *other { - LeafNode(ref leaf2) => leaf.cmp(leaf2), - BranchNode(_) => Less - } - } - BranchNode(ref branch) => { - match *other { - BranchNode(ref branch2) => branch.cmp(branch2), - LeafNode(_) => Greater - } - } - } - } -} - -impl fmt::Show for Node { - /// Returns a string representation of a `Node`. - /// Will iterate over the Node and show `Key: x, value: y, child: ()` - /// for all elements in the `Node`. `child` only exists if the `Node` contains - /// a branch. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - LeafNode(ref leaf) => leaf.fmt(f), - BranchNode(ref branch) => branch.fmt(f), - } - } -} - - -// A leaf is a vector with elements that contain no children. A leaf also -// does not contain a rightmost child. -struct Leaf { - elts: Vec> -} - -// Vector of values with children, plus a rightmost child (greater than all) -struct Branch { - elts: Vec>, - rightmost_child: Box>, -} - - -impl Leaf { - /// Creates a new `Leaf` from a vector of `LeafElts`. - fn new(vec: Vec>) -> Leaf { - Leaf { - elts: vec - } - } - - /// Searches a leaf for a spot for a new element using a binary search. - /// Returns `None` if the element is already in the vector. - fn bsearch_leaf(&self, k: K) -> Option { - let mut high: uint = self.elts.len(); - let mut low: uint = 0; - let mut midpoint: uint = (high - low) / 2 ; - if midpoint == high { - midpoint = 0; - } - loop { - let order = self.elts[midpoint].key.cmp(&k); - match order { - Equal => { - return None; - } - Greater => { - if midpoint > 0 { - if self.elts[midpoint - 1].key.cmp(&k) == Less { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = midpoint / 2; - high = tmp; - continue; - } - } - else { - return Some(0); - } - } - Less => { - if midpoint + 1 < self.elts.len() { - if self.elts[midpoint + 1].key.cmp(&k) == Greater { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (high + low) / 2; - low = tmp; - } - } - else { - return Some(self.elts.len()); - } - } - } - } - } -} - - -impl Leaf { - /// Returns the corresponding value to the supplied key. - fn get(&self, k: K) -> Option { - for s in self.elts.iter() { - let order = s.key.cmp(&k); - match order { - Equal => return Some(s.value.clone()), - _ => {} - } - } - return None; - } - - /// Uses `clone()` to facilitate inserting new elements into a tree. - fn insert(mut self, k: K, v: V, ub: uint) -> (Node, bool) { - let to_insert = LeafElt::new(k, v); - let index: Option = self.bsearch_leaf(to_insert.clone().key); - //Check index to see whether we actually inserted the element into the vector. - match index { - //If the index is None, the new element already exists in the vector. - None => { - return (Node::new_leaf(self.clone().elts), false); - } - //If there is an index, insert at that index. - Some(i) => { - if i >= self.elts.len() { - self.elts.push(to_insert.clone()); - } - else { - self.elts.insert(i, to_insert.clone()); - } - } - } - //If we have overfilled the vector (by making its size greater than the - //upper bound), we return a new Branch with one element and two children. - if self.elts.len() > ub { - let midpoint_opt = self.elts.remove(ub / 2); - let midpoint = midpoint_opt.unwrap(); - let (left_leaf, right_leaf) = self.elts.partition(|le| - le.key.cmp(&midpoint.key.clone()) - == Less); - let branch_return = Node::new_branch(vec!(BranchElt::new(midpoint.key.clone(), - midpoint.value.clone(), - box Node::new_leaf(left_leaf))), - box Node::new_leaf(right_leaf)); - return (branch_return, true); - } - (Node::new_leaf(self.elts.clone()), true) - } -} - -impl Clone for Leaf { - /// Returns a new `Leaf` with the same elts. - fn clone(&self) -> Leaf { - Leaf::new(self.elts.clone()) - } -} - -impl PartialEq for Leaf { - fn eq(&self, other: &Leaf) -> bool { - self.elts == other.elts - } -} - -impl Eq for Leaf {} - -impl PartialOrd for Leaf { - fn partial_cmp(&self, other: &Leaf) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Leaf { - /// Returns an ordering based on the first element of each `Leaf`. - fn cmp(&self, other: &Leaf) -> Ordering { - if self.elts.len() > other.elts.len() { - return Greater; - } - if self.elts.len() < other.elts.len() { - return Less; - } - self.elts[0].cmp(&other.elts[0]) - } -} - - -impl fmt::Show for Leaf { - /// Returns a string representation of a `Leaf`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (i, s) in self.elts.iter().enumerate() { - if i != 0 { try!(write!(f, " // ")) } - try!(write!(f, "{}", *s)) - } - Ok(()) - } -} - - -impl Branch { - /// Creates a new `Branch` from a vector of `BranchElts` and a rightmost child (a node). - fn new(vec: Vec>, right: Box>) - -> Branch { - Branch { - elts: vec, - rightmost_child: right - } - } - - fn bsearch_branch(&self, k: K) -> Option { - let mut midpoint: uint = self.elts.len() / 2; - let mut high: uint = self.elts.len(); - let mut low: uint = 0u; - if midpoint == high { - midpoint = 0u; - } - loop { - let order = self.elts[midpoint].key.cmp(&k); - match order { - Equal => { - return None; - } - Greater => { - if midpoint > 0 { - if self.elts[midpoint - 1].key.cmp(&k) == Less { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (midpoint - low) / 2; - high = tmp; - continue; - } - } - else { - return Some(0); - } - } - Less => { - if midpoint + 1 < self.elts.len() { - if self.elts[midpoint + 1].key.cmp(&k) == Greater { - return Some(midpoint); - } - else { - let tmp = midpoint; - midpoint = (high - midpoint) / 2; - low = tmp; - } - } - else { - return Some(self.elts.len()); - } - } - } - } - } -} - -impl Branch { - /// Returns the corresponding value to the supplied key. - /// If the key is not there, find the child that might hold it. - fn get(&self, k: K) -> Option { - for s in self.elts.iter() { - let order = s.key.cmp(&k); - match order { - Less => return s.left.get(k), - Equal => return Some(s.value.clone()), - _ => {} - } - } - self.rightmost_child.get(k) - } - - /// An insert method that uses `.clone()` for support. - fn insert(mut self, k: K, v: V, ub: uint) -> (Node, bool) { - let mut new_branch = Node::new_branch(self.clone().elts, self.clone().rightmost_child); - let mut outcome = false; - let index: Option = new_branch.bsearch_node(k.clone()); - //First, find which path down the tree will lead to the appropriate leaf - //for the key-value pair. - match index.clone() { - None => { - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - outcome); - } - Some(i) => { - if i == self.elts.len() { - let new_outcome = self.clone().rightmost_child.insert(k.clone(), - v.clone(), - ub.clone()); - new_branch = new_outcome.clone().val0(); - outcome = new_outcome.val1(); - } - else { - let new_outcome = self.elts[i].left.clone().insert(k.clone(), - v.clone(), - ub.clone()); - new_branch = new_outcome.clone().val0(); - outcome = new_outcome.val1(); - } - //Check to see whether a branch or a leaf was returned from the - //tree traversal. - match new_branch.clone() { - //If we have a leaf, we do not need to resize the tree, - //so we can return false. - LeafNode(..) => { - if i == self.elts.len() { - self.rightmost_child = box new_branch.clone(); - } - else { - self.elts.get_mut(i).left = box new_branch.clone(); - } - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - true); - } - //If we have a branch, we might need to refactor the tree. - BranchNode(..) => {} - } - } - } - //If we inserted something into the tree, do the following: - if outcome { - match new_branch.clone() { - //If we have a new leaf node, integrate it into the current branch - //and return it, saying we have inserted a new element. - LeafNode(..) => { - if index.unwrap() == self.elts.len() { - self.rightmost_child = box new_branch; - } - else { - self.elts.get_mut(index.unwrap()).left = box new_branch; - } - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - true); - } - //If we have a new branch node, attempt to insert it into the tree - //as with the key-value pair, then check to see if the node is overfull. - BranchNode(branch) => { - let new_elt = branch.elts[0].clone(); - let new_elt_index = self.bsearch_branch(new_elt.clone().key); - match new_elt_index { - None => { - return (Node::new_branch(self.clone().elts, - self.clone().rightmost_child), - false); - } - Some(i) => { - self.elts.insert(i, new_elt); - if i + 1 >= self.elts.len() { - self.rightmost_child = branch.clone().rightmost_child; - } - else { - self.elts.get_mut(i + 1).left = - branch.clone().rightmost_child; - } - } - } - } - } - //If the current node is overfilled, create a new branch with one element - //and two children. - if self.elts.len() > ub { - let midpoint = self.elts.remove(ub / 2).unwrap(); - let (new_left, new_right) = self.clone().elts.partition(|le| - midpoint.key.cmp(&le.key) - == Greater); - new_branch = Node::new_branch( - vec!(BranchElt::new(midpoint.clone().key, - midpoint.clone().value, - box Node::new_branch(new_left, - midpoint.clone().left))), - box Node::new_branch(new_right, self.clone().rightmost_child)); - return (new_branch, true); - } - } - (Node::new_branch(self.elts.clone(), self.rightmost_child.clone()), outcome) - } -} - -impl Clone for Branch { - /// Returns a new branch using the clone methods of the `Branch`'s internal variables. - fn clone(&self) -> Branch { - Branch::new(self.elts.clone(), self.rightmost_child.clone()) - } -} - -impl PartialEq for Branch { - fn eq(&self, other: &Branch) -> bool { - self.elts == other.elts - } -} - -impl Eq for Branch {} - -impl PartialOrd for Branch { - fn partial_cmp(&self, other: &Branch) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Branch { - /// Compares the first elements of two `Branch`es to determine an - /// `Ordering`. - fn cmp(&self, other: &Branch) -> Ordering { - if self.elts.len() > other.elts.len() { - return Greater; - } - if self.elts.len() < other.elts.len() { - return Less; - } - self.elts[0].cmp(&other.elts[0]) - } -} - -impl fmt::Show for Branch { - /// Returns a string representation of a `Branch`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for (i, s) in self.elts.iter().enumerate() { - if i != 0 { try!(write!(f, " // ")) } - try!(write!(f, "{}", *s)) - } - write!(f, " // rightmost child: ({}) ", *self.rightmost_child) - } -} - -//A LeafElt contains no left child, but a key-value pair. -struct LeafElt { - key: K, - value: V -} - -//A BranchElt has a left child in insertion to a key-value pair. -struct BranchElt { - left: Box>, - key: K, - value: V -} - -impl LeafElt { - /// Creates a new `LeafElt` from a supplied key-value pair. - fn new(k: K, v: V) -> LeafElt { - LeafElt { - key: k, - value: v - } - } -} - -impl Clone for LeafElt { - /// Returns a new `LeafElt` by cloning the key and value. - fn clone(&self) -> LeafElt { - LeafElt::new(self.key.clone(), self.value.clone()) - } -} - -impl PartialEq for LeafElt { - fn eq(&self, other: &LeafElt) -> bool { - self.key == other.key && self.value == other.value - } -} - -impl Eq for LeafElt {} - -impl PartialOrd for LeafElt { - fn partial_cmp(&self, other: &LeafElt) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for LeafElt { - /// Returns an ordering based on the keys of the `LeafElt`s. - fn cmp(&self, other: &LeafElt) -> Ordering { - self.key.cmp(&other.key) - } -} - -impl fmt::Show for LeafElt { - /// Returns a string representation of a `LeafElt`. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Key: {}, value: {};", self.key, self.value) - } -} - -impl BranchElt { - /// Creates a new `BranchElt` from a supplied key, value, and left child. - fn new(k: K, v: V, n: Box>) -> BranchElt { - BranchElt { - left: n, - key: k, - value: v - } - } -} - - -impl Clone for BranchElt { - /// Returns a new `BranchElt` by cloning the key, value, and left child. - fn clone(&self) -> BranchElt { - BranchElt::new(self.key.clone(), - self.value.clone(), - self.left.clone()) - } -} - -impl PartialEq for BranchElt{ - fn eq(&self, other: &BranchElt) -> bool { - self.key == other.key && self.value == other.value - } -} - -impl Eq for BranchElt{} - -impl PartialOrd for BranchElt { - fn partial_cmp(&self, other: &BranchElt) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for BranchElt { - /// Fulfills `Ord` for `BranchElts`. - fn cmp(&self, other: &BranchElt) -> Ordering { - self.key.cmp(&other.key) - } -} - -impl fmt::Show for BranchElt { - /// Formats as a string containing the key, value, and child (which should recur to a - /// leaf). Consider changing in future to be more readable. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Key: {}, value: {}, (child: {})", - self.key, self.value, *self.left) - } -} - -#[cfg(test)] -mod test_btree { - use std::prelude::*; - - use super::{BTree, Node, LeafElt}; - - use MutableSeq; - - //Tests the functionality of the insert methods (which are unfinished). - #[test] - fn insert_test_one() { - let b = BTree::new(1i, "abc".to_string(), 2); - let is_insert = b.insert(2i, "xyz".to_string()); - assert!(is_insert.root.is_leaf()); - } - - #[test] - fn insert_test_two() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3)); - let b = BTree::new_with_node_len(n, 3, 2); - //println!("{}", b.clone().insert(4, "ddd".to_string()).to_string()); - assert!(b.insert(4, "ddd".to_string()).root.is_leaf()); - } - - #[test] - fn insert_test_three() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(4i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - //println!("{}", b.clone().insert(5, "eee".to_string()).to_string()); - assert!(!b.insert(5, "eee".to_string()).root.is_leaf()); - } - - #[test] - fn insert_test_four() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(3i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(4i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let mut b = BTree::new_with_node_len(n, 3, 2); - b = b.clone().insert(5, "eee".to_string()); - b = b.clone().insert(6, "fff".to_string()); - b = b.clone().insert(7, "ggg".to_string()); - b = b.clone().insert(8, "hhh".to_string()); - b = b.clone().insert(0, "omg".to_string()); - //println!("{}", b.clone().to_string()); - assert!(!b.root.is_leaf()); - } - - #[test] - fn bsearch_test_one() { - let b = BTree::new(1i, "abc".to_string(), 2u); - assert_eq!(Some(1), b.root.bsearch_node(2)); - } - - #[test] - fn bsearch_test_two() { - let b = BTree::new(1i, "abc".to_string(), 2u); - assert_eq!(Some(0), b.root.bsearch_node(0)); - } - - #[test] - fn bsearch_test_three() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(4i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(5i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - assert_eq!(Some(2), b.root.bsearch_node(3)); - } - - #[test] - fn bsearch_test_four() { - let leaf_elt_1 = LeafElt::new(1i, "aaa".to_string()); - let leaf_elt_2 = LeafElt::new(2i, "bbb".to_string()); - let leaf_elt_3 = LeafElt::new(4i, "ccc".to_string()); - let leaf_elt_4 = LeafElt::new(5i, "ddd".to_string()); - let n = Node::new_leaf(vec!(leaf_elt_1, leaf_elt_2, leaf_elt_3, leaf_elt_4)); - let b = BTree::new_with_node_len(n, 3, 2); - assert_eq!(Some(4), b.root.bsearch_node(800)); - } - - //Tests the functionality of the get method. - #[test] - fn get_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - let val = b.get(1); - assert_eq!(val, Some("abc".to_string())); - } - - //Tests the BTree's clone() method. - #[test] - fn btree_clone_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = b.clone(); - assert!(b.root == b2.root) - } - - //Tests the BTree's cmp() method when one node is "less than" another. - #[test] - fn btree_cmp_test_less() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(2i, "bcd".to_string(), 2); - assert!(&b.cmp(&b2) == &Less) - } - - //Tests the BTree's cmp() method when two nodes are equal. - #[test] - fn btree_cmp_test_eq() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(1i, "bcd".to_string(), 2); - assert!(&b.cmp(&b2) == &Equal) - } - - //Tests the BTree's cmp() method when one node is "greater than" another. - #[test] - fn btree_cmp_test_greater() { - let b = BTree::new(1i, "abc".to_string(), 2); - let b2 = BTree::new(2i, "bcd".to_string(), 2); - assert!(&b2.cmp(&b) == &Greater) - } - - //Tests the BTree's to_string() method. - #[test] - fn btree_tostr_test() { - let b = BTree::new(1i, "abc".to_string(), 2); - assert_eq!(b.to_string(), "Key: 1, value: abc;".to_string()) - } - -} diff --git a/src/libcollections/btree/map.rs b/src/libcollections/btree/map.rs new file mode 100644 index 0000000000000..b0ba225462150 --- /dev/null +++ b/src/libcollections/btree/map.rs @@ -0,0 +1,1203 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This implementation is largely based on the high-level description and analysis of B-Trees +// found in *Open Data Structures* (ODS). Although our implementation does not use any of +// the source found in ODS, if one wishes to review the high-level design of this structure, it +// can be freely downloaded at http://opendatastructures.org/. Its contents are as of this +// writing (August 2014) freely licensed under the following Creative Commons Attribution +// License: [CC BY 2.5 CA](http://creativecommons.org/licenses/by/2.5/ca/). + +use core::prelude::*; + +use super::node::*; +use std::hash::{Writer, Hash}; +use core::default::Default; +use core::{iter, fmt, mem}; +use core::fmt::Show; + +use {Deque, Map, MutableMap, Mutable, MutableSeq}; +use ringbuf::RingBuf; + + + +/// A map based on a B-Tree. +#[deriving(Clone)] +pub struct BTreeMap { + root: Node, + length: uint, + depth: uint, + b: uint, +} + +/// An abstract base over-which all other BTree iterators are built. +struct AbsEntries { + lca: T, + left: RingBuf, + right: RingBuf, + size: uint, +} + +/// An iterator over a BTreeMap's entries. +pub struct Entries<'a, K, V> { + inner: AbsEntries> +} + +/// A mutable iterator over a BTreeMap's entries. +pub struct MutEntries<'a, K, V> { + inner: AbsEntries> +} + +/// An owning iterator over a BTreeMap's entries. +pub struct MoveEntries { + inner: AbsEntries> +} + +/// An iterator over a BTreeMap's keys. +pub type Keys<'a, K, V> = iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>; + +/// An iterator over a BTreeMap's values. +pub type Values<'a, K, V> = iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>; + +/// A view into a single entry in a map, which may either be vacant or occupied. +pub enum Entry<'a, K:'a, V:'a> { + /// A vacant Entry + Vacant(VacantEntry<'a, K, V>), + /// An occupied Entry + Occupied(OccupiedEntry<'a, K, V>), +} + +/// A vacant Entry. +pub struct VacantEntry<'a, K:'a, V:'a> { + key: K, + stack: stack::SearchStack<'a, K, V>, +} + +/// An occupied Entry. +pub struct OccupiedEntry<'a, K:'a, V:'a> { + stack: stack::SearchStack<'a, K, V>, +} + +impl BTreeMap { + /// Makes a new empty BTreeMap with a reasonable choice for B. + pub fn new() -> BTreeMap { + //FIXME(Gankro): Tune this as a function of size_of? + BTreeMap::with_b(6) + } + + /// Makes a new empty BTreeMap with the given B. + pub fn with_b(b: uint) -> BTreeMap { + assert!(b > 1, "B must be greater than 1"); + BTreeMap { + length: 0, + depth: 1, + root: Node::make_leaf_root(b), + b: b, + } + } +} + +impl Map for BTreeMap { + // Searching in a B-Tree is pretty straightforward. + // + // Start at the root. Try to find the key in the current node. If we find it, return it. + // If it's not in there, follow the edge *before* the smallest key larger than + // the search key. If no such key exists (they're *all* smaller), then just take the last + // edge in the node. If we're in a leaf and we don't find our key, then it's not + // in the tree. + fn find(&self, key: &K) -> Option<&V> { + let mut cur_node = &self.root; + loop { + match cur_node.search(key) { + Found(i) => return cur_node.val(i), + GoDown(i) => match cur_node.edge(i) { + None => return None, + Some(next_node) => { + cur_node = next_node; + continue; + } + } + } + } + } +} + +impl MutableMap for BTreeMap { + // See `find` for implementation notes, this is basically a copy-paste with mut's added + fn find_mut(&mut self, key: &K) -> Option<&mut V> { + // temp_node is a Borrowck hack for having a mutable value outlive a loop iteration + let mut temp_node = &mut self.root; + loop { + let cur_node = temp_node; + match cur_node.search(key) { + Found(i) => return cur_node.val_mut(i), + GoDown(i) => match cur_node.edge_mut(i) { + None => return None, + Some(next_node) => { + temp_node = next_node; + continue; + } + } + } + } + } + + // Insertion in a B-Tree is a bit complicated. + // + // First we do the same kind of search described in `find`. But we need to maintain a stack of + // all the nodes/edges in our search path. If we find a match for the key we're trying to + // insert, just swap the vals and return the old ones. However, when we bottom out in a leaf, + // we attempt to insert our key-value pair at the same location we would want to follow another + // edge. + // + // If the node has room, then this is done in the obvious way by shifting elements. However, + // if the node itself is full, we split node into two, and give its median key-value + // pair to its parent to insert the new node with. Of course, the parent may also be + // full, and insertion can propagate until we reach the root. If we reach the root, and + // it is *also* full, then we split the root and place the two nodes under a newly made root. + // + // Note that we subtly deviate from Open Data Structures in our implementation of split. + // ODS describes inserting into the node *regardless* of its capacity, and then + // splitting *afterwards* if it happens to be overfull. However, this is inefficient. + // Instead, we split beforehand, and then insert the key-value pair into the appropriate + // result node. This has two consequences: + // + // 1) While ODS produces a left node of size B-1, and a right node of size B, + // we may potentially reverse this. However, this shouldn't effect the analysis. + // + // 2) While ODS may potentially return the pair we *just* inserted after + // the split, we will never do this. Again, this shouldn't effect the analysis. + + fn swap(&mut self, key: K, mut value: V) -> Option { + // This is a stack of rawptrs to nodes paired with indices, respectively + // representing the nodes and edges of our search path. We have to store rawptrs + // because as far as Rust is concerned, we can mutate aliased data with such a + // stack. It is of course correct, but what it doesn't know is that we will only + // be popping and using these ptrs one at a time in child-to-parent order. The alternative + // to doing this is to take the Nodes from their parents. This actually makes + // borrowck *really* happy and everything is pretty smooth. However, this creates + // *tons* of pointless writes, and requires us to always walk all the way back to + // the root after an insertion, even if we only needed to change a leaf. Therefore, + // we accept this potential unsafety and complexity in the name of performance. + // + // Regardless, the actual dangerous logic is completely abstracted away from BTreeMap + // by the stack module. All it can do is immutably read nodes, and ask the search stack + // to proceed down some edge by index. This makes the search logic we'll be reusing in a + // few different methods much neater, and of course drastically improves safety. + let mut stack = stack::PartialSearchStack::new(self); + + loop { + // Same basic logic as found in `find`, but with PartialSearchStack mediating the + // actual nodes for us + match stack.next().search(&key) { + Found(i) => unsafe { + // Perfect match, swap the values and return the old one + let next = stack.into_next(); + mem::swap(next.unsafe_val_mut(i), &mut value); + return Some(value); + }, + GoDown(i) => { + // We need to keep searching, try to get the search stack + // to go down further + stack = match stack.push(i) { + stack::Done(new_stack) => { + // We've reached a leaf, perform the insertion here + new_stack.insert(key, value); + return None; + } + stack::Grew(new_stack) => { + // We've found the subtree to insert this key/value pair in, + // keep searching + new_stack + } + }; + } + } + } + } + + // Deletion is the most complicated operation for a B-Tree. + // + // First we do the same kind of search described in + // `find`. But we need to maintain a stack of all the nodes/edges in our search path. + // If we don't find the key, then we just return `None` and do nothing. If we do find the + // key, we perform two operations: remove the item, and then possibly handle underflow. + // + // # removing the item + // If the node is a leaf, we just remove the item, and shift + // any items after it back to fill the hole. + // + // If the node is an internal node, we *swap* the item with the smallest item in + // in its right subtree (which must reside in a leaf), and then revert to the leaf + // case + // + // # handling underflow + // After removing an item, there may be too few items in the node. We want nodes + // to be mostly full for efficiency, although we make an exception for the root, which + // may have as few as one item. If this is the case, we may first try to steal + // an item from our left or right neighbour. + // + // To steal from the left (right) neighbour, + // we take the largest (smallest) item and child from it. We then swap the taken item + // with the item in their mutual parent that separates them, and then insert the + // parent's item and the taken child into the first (last) index of the underflowed node. + // + // However, stealing has the possibility of underflowing our neighbour. If this is the + // case, we instead *merge* with our neighbour. This of course reduces the number of + // children in the parent. Therefore, we also steal the item that separates the now + // merged nodes, and insert it into the merged node. + // + // Merging may cause the parent to underflow. If this is the case, then we must repeat + // the underflow handling process on the parent. If merging merges the last two children + // of the root, then we replace the root with the merged node. + + fn pop(&mut self, key: &K) -> Option { + // See `swap` for a more thorough description of the stuff going on in here + let mut stack = stack::PartialSearchStack::new(self); + loop { + match stack.next().search(key) { + Found(i) => { + // Perfect match. Terminate the stack here, and remove the entry + return Some(stack.seal(i).remove()); + }, + GoDown(i) => { + // We need to keep searching, try to go down the next edge + stack = match stack.push(i) { + stack::Done(_) => return None, // We're at a leaf; the key isn't in here + stack::Grew(new_stack) => { + new_stack + } + }; + } + } + } + } +} + +/// The stack module provides a safe interface for constructing and manipulating a stack of ptrs +/// to nodes. By using this module much better safety guarantees can be made, and more search +/// boilerplate gets cut out. +mod stack { + use core::prelude::*; + use super::BTreeMap; + use super::super::node::*; + use {MutableMap, MutableSeq}; + use vec::Vec; + + type StackItem = (*mut Node, uint); + type Stack = Vec>; + + /// A PartialSearchStack handles the construction of a search stack. + pub struct PartialSearchStack<'a, K:'a, V:'a> { + map: &'a mut BTreeMap, + stack: Stack, + next: *mut Node, + } + + /// A SearchStack represents a full path to an element of interest. It provides methods + /// for manipulating the element at the top of its stack. + pub struct SearchStack<'a, K:'a, V:'a> { + map: &'a mut BTreeMap, + stack: Stack, + top: StackItem, + } + + /// The result of asking a PartialSearchStack to push another node onto itself. Either it + /// Grew, in which case it's still Partial, or it found its last node was actually a leaf, in + /// which case it seals itself and yields a complete SearchStack. + pub enum PushResult<'a, K:'a, V:'a> { + Grew(PartialSearchStack<'a, K, V>), + Done(SearchStack<'a, K, V>), + } + + impl<'a, K, V> PartialSearchStack<'a, K, V> { + /// Creates a new PartialSearchStack from a BTreeMap by initializing the stack with the + /// root of the tree. + pub fn new<'a>(map: &'a mut BTreeMap) -> PartialSearchStack<'a, K, V> { + let depth = map.depth; + + PartialSearchStack { + next: &mut map.root as *mut _, + map: map, + stack: Vec::with_capacity(depth), + } + } + + /// Pushes the requested child of the stack's current top on top of the stack. If the child + /// exists, then a new PartialSearchStack is yielded. Otherwise, a full SearchStack is + /// yielded. + pub fn push(self, edge: uint) -> PushResult<'a, K, V> { + let map = self.map; + let mut stack = self.stack; + let next_ptr = self.next; + let next_node = unsafe { + &mut *next_ptr + }; + let to_insert = (next_ptr, edge); + match next_node.edge_mut(edge) { + None => Done(SearchStack { + map: map, + stack: stack, + top: to_insert, + }), + Some(node) => { + stack.push(to_insert); + Grew(PartialSearchStack { + map: map, + stack: stack, + next: node as *mut _, + }) + }, + } + } + + /// Converts the stack into a mutable reference to its top. + pub fn into_next(self) -> &'a mut Node { + unsafe { + &mut *self.next + } + } + + /// Gets the top of the stack. + pub fn next(&self) -> &Node { + unsafe { + &*self.next + } + } + + /// Converts the PartialSearchStack into a SearchStack. + pub fn seal(self, index: uint) -> SearchStack<'a, K, V> { + SearchStack { + map: self.map, + stack: self.stack, + top: (self.next as *mut _, index), + } + } + } + + impl<'a, K, V> SearchStack<'a, K, V> { + /// Gets a reference to the value the stack points to. + pub fn peek(&self) -> &V { + let (node_ptr, index) = self.top; + unsafe { + (*node_ptr).val(index).unwrap() + } + } + + /// Gets a mutable reference to the value the stack points to. + pub fn peek_mut(&mut self) -> &mut V { + let (node_ptr, index) = self.top; + unsafe { + (*node_ptr).val_mut(index).unwrap() + } + } + + /// Converts the stack into a mutable reference to the value it points to, with a lifetime + /// tied to the original tree. + pub fn into_top(self) -> &'a mut V { + let (node_ptr, index) = self.top; + unsafe { + (*node_ptr).val_mut(index).unwrap() + } + } + + /// Inserts the key and value into the top element in the stack, and if that node has to + /// split recursively inserts the split contents into the next element stack until + /// splits stop. + /// + /// Assumes that the stack represents a search path from the root to a leaf. + /// + /// An &mut V is returned to the inserted value, for callers that want a reference to this. + pub fn insert(self, key: K, val: V) -> &'a mut V { + unsafe { + let map = self.map; + map.length += 1; + + let mut stack = self.stack; + // Insert the key and value into the leaf at the top of the stack + let (node, index) = self.top; + let (mut insertion, inserted_ptr) = { + (*node).insert_as_leaf(index, key, val) + }; + + loop { + match insertion { + Fit => { + // The last insertion went off without a hitch, no splits! We can stop + // inserting now. + return &mut *inserted_ptr; + } + Split(key, val, right) => match stack.pop() { + // The last insertion triggered a split, so get the next element on the + // stack to recursively insert the split node into. + None => { + // The stack was empty; we've split the root, and need to make a + // a new one. This is done in-place because we can't move the + // root out of a reference to the tree. + Node::make_internal_root(&mut map.root, map.b, key, val, right); + + map.depth += 1; + return &mut *inserted_ptr; + } + Some((node, index)) => { + // The stack wasn't empty, do the insertion and recurse + insertion = (*node).insert_as_internal(index, key, val, right); + continue; + } + } + } + } + } + } + + /// Removes the key and value in the top element of the stack, then handles underflows as + /// described in BTree's pop function. + pub fn remove(mut self) -> V { + // Ensure that the search stack goes to a leaf. This is necessary to perform deletion + // in a BTree. Note that this may put the tree in an inconsistent state (further + // described in leafify's comments), but this is immediately fixed by the + // removing the value we want to remove + self.leafify(); + + let map = self.map; + map.length -= 1; + + let mut stack = self.stack; + + // Remove the key-value pair from the leaf that this search stack points to. + // Then, note if the leaf is underfull, and promptly forget the leaf and its ptr + // to avoid ownership issues. + let (value, mut underflow) = unsafe { + let (leaf_ptr, index) = self.top; + let leaf = &mut *leaf_ptr; + let (_key, value) = leaf.remove_as_leaf(index); + let underflow = leaf.is_underfull(); + (value, underflow) + }; + + loop { + match stack.pop() { + None => { + // We've reached the root, so no matter what, we're done. We manually + // access the root via the tree itself to avoid creating any dangling + // pointers. + if map.root.len() == 0 && !map.root.is_leaf() { + // We've emptied out the root, so make its only child the new root. + // If it's a leaf, we just let it become empty. + map.depth -= 1; + map.root = map.root.pop_edge().unwrap(); + } + return value; + } + Some((parent_ptr, index)) => { + if underflow { + // Underflow! Handle it! + unsafe { + let parent = &mut *parent_ptr; + parent.handle_underflow(index); + underflow = parent.is_underfull(); + } + } else { + // All done! + return value; + } + } + } + } + } + + /// Subroutine for removal. Takes a search stack for a key that might terminate at an + /// internal node, and mutates the tree and search stack to *make* it a search stack + /// for that same key that *does* terminates at a leaf. If the mutation occurs, then this + /// leaves the tree in an inconsistent state that must be repaired by the caller by + /// removing the entry in question. Specifically the key-value pair and its successor will + /// become swapped. + fn leafify(&mut self) { + unsafe { + let (node_ptr, index) = self.top; + // First, get ptrs to the found key-value pair + let node = &mut *node_ptr; + let (key_ptr, val_ptr) = { + (node.unsafe_key_mut(index) as *mut _, + node.unsafe_val_mut(index) as *mut _) + }; + + // Try to go into the right subtree of the found key to find its successor + match node.edge_mut(index + 1) { + None => { + // We're a proper leaf stack, nothing to do + } + Some(mut temp_node) => { + //We're not a proper leaf stack, let's get to work. + self.stack.push((node_ptr, index + 1)); + loop { + // Walk into the smallest subtree of this node + let node = temp_node; + let node_ptr = node as *mut _; + + if node.is_leaf() { + // This node is a leaf, do the swap and return + self.top = (node_ptr, 0); + node.unsafe_swap(0, &mut *key_ptr, &mut *val_ptr); + break; + } else { + // This node is internal, go deeper + self.stack.push((node_ptr, 0)); + temp_node = node.unsafe_edge_mut(0); + } + } + } + } + } + } + } +} + +impl Collection for BTreeMap { + fn len(&self) -> uint { + self.length + } +} + +impl Mutable for BTreeMap { + fn clear(&mut self) { + let b = self.b; + // avoid recursive destructors by manually traversing the tree + for _ in mem::replace(self, BTreeMap::with_b(b)).into_iter() {}; + } +} + +impl FromIterator<(K, V)> for BTreeMap { + fn from_iter>(iter: T) -> BTreeMap { + let mut map = BTreeMap::new(); + map.extend(iter); + map + } +} + +impl Extendable<(K, V)> for BTreeMap { + #[inline] + fn extend>(&mut self, mut iter: T) { + for (k, v) in iter { + self.insert(k, v); + } + } +} + +impl, V: Hash> Hash for BTreeMap { + fn hash(&self, state: &mut S) { + for elt in self.iter() { + elt.hash(state); + } + } +} + +impl Default for BTreeMap { + fn default() -> BTreeMap { + BTreeMap::new() + } +} + +impl PartialEq for BTreeMap { + fn eq(&self, other: &BTreeMap) -> bool { + self.len() == other.len() && + self.iter().zip(other.iter()).all(|(a, b)| a == b) + } +} + +impl Eq for BTreeMap {} + +impl PartialOrd for BTreeMap { + #[inline] + fn partial_cmp(&self, other: &BTreeMap) -> Option { + iter::order::partial_cmp(self.iter(), other.iter()) + } +} + +impl Ord for BTreeMap { + #[inline] + fn cmp(&self, other: &BTreeMap) -> Ordering { + iter::order::cmp(self.iter(), other.iter()) + } +} + +impl Show for BTreeMap { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, (k, v)) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}: {}", *k, *v)); + } + + write!(f, "}}") + } +} + +impl Index for BTreeMap { + fn index(&self, key: &K) -> &V { + self.find(key).expect("no entry found for key") + } +} + +/// Genericises over how to get the correct type of iterator from the correct type +/// of Node ownership. +trait Traverse { + fn traverse(node: N) -> Self; +} + +impl<'a, K, V> Traverse<&'a Node> for Traversal<'a, K, V> { + fn traverse(node: &'a Node) -> Traversal<'a, K, V> { + node.iter() + } +} + +impl<'a, K, V> Traverse<&'a mut Node> for MutTraversal<'a, K, V> { + fn traverse(node: &'a mut Node) -> MutTraversal<'a, K, V> { + node.iter_mut() + } +} + +impl Traverse> for MoveTraversal { + fn traverse(node: Node) -> MoveTraversal { + node.into_iter() + } +} + +/// Represents an operation to perform inside the following iterator methods. +/// This is necessary to use in `next` because we want to modify self.left inside +/// a match that borrows it. Similarly, in `next_back` for self.right. Instead, we use this +/// enum to note what we want to do, and do it after the match. +enum StackOp { + Push(T), + Pop, +} + +impl + DoubleEndedIterator>> + Iterator<(K, V)> for AbsEntries { + // This function is pretty long, but only because there's a lot of cases to consider. + // Our iterator represents two search paths, left and right, to the smallest and largest + // elements we have yet to yield. lca represents the least common ancestor of these two paths, + // above-which we never walk, since everything outside it has already been consumed (or was + // never in the range to iterate). + // + // Note that the design of these iterators permits an *arbitrary* initial pair of min and max, + // making these arbitrary sub-range iterators. However the logic to construct these paths + // efficiently is fairly involved, so this is a FIXME. The sub-range iterators also wouldn't be + // able to accurately predict size, so those iterators can't implement ExactSize. + fn next(&mut self) -> Option<(K, V)> { + loop { + // We want the smallest element, so try to get the top of the left stack + let op = match self.left.back_mut() { + // The left stack is empty, so try to get the next element of the two paths + // LCAs (the left search path is currently a subpath of the right one) + None => match self.lca.next() { + // The lca has been exhausted, walk further down the right path + None => match self.right.pop_front() { + // The right path is exhausted, so we're done + None => return None, + // The right path had something, make that the new LCA + // and restart the whole process + Some(right) => { + self.lca = right; + continue; + } + }, + // The lca yielded an edge, make that the new head of the left path + Some(Edge(next)) => Push(Traverse::traverse(next)), + // The lca yielded an entry, so yield that + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + }, + // The left stack wasn't empty, so continue along the node in its head + Some(iter) => match iter.next() { + // The head of the left path is empty, so Pop it off and restart the process + None => Pop, + // The head of the left path yielded an edge, so make that the new head + // of the left path + Some(Edge(next)) => Push(Traverse::traverse(next)), + // The head of the left path yielded entry, so yield that + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + } + }; + + // Handle any operation on the left stack as necessary + match op { + Push(item) => { self.left.push(item); }, + Pop => { self.left.pop(); }, + } + } + } + + fn size_hint(&self) -> (uint, Option) { + (self.size, Some(self.size)) + } +} + +impl + DoubleEndedIterator>> + DoubleEndedIterator<(K, V)> for AbsEntries { + // next_back is totally symmetric to next + fn next_back(&mut self) -> Option<(K, V)> { + loop { + let op = match self.right.back_mut() { + None => match self.lca.next_back() { + None => match self.left.pop_front() { + None => return None, + Some(left) => { + self.lca = left; + continue; + } + }, + Some(Edge(next)) => Push(Traverse::traverse(next)), + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + }, + Some(iter) => match iter.next_back() { + None => Pop, + Some(Edge(next)) => Push(Traverse::traverse(next)), + Some(Elem(k, v)) => { + self.size -= 1; + return Some((k, v)) + } + } + }; + + match op { + Push(item) => { self.right.push(item); }, + Pop => { self.right.pop(); } + } + } + } +} + +impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a V)> { self.inner.next() } + fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } +} +impl<'a, K, V> DoubleEndedIterator<(&'a K, &'a V)> for Entries<'a, K, V> { + fn next_back(&mut self) -> Option<(&'a K, &'a V)> { self.inner.next_back() } +} +impl<'a, K, V> ExactSize<(&'a K, &'a V)> for Entries<'a, K, V> {} + + +impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { self.inner.next() } + fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } +} +impl<'a, K, V> DoubleEndedIterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> { + fn next_back(&mut self) -> Option<(&'a K, &'a mut V)> { self.inner.next_back() } +} +impl<'a, K, V> ExactSize<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {} + + +impl Iterator<(K, V)> for MoveEntries { + fn next(&mut self) -> Option<(K, V)> { self.inner.next() } + fn size_hint(&self) -> (uint, Option) { self.inner.size_hint() } +} +impl DoubleEndedIterator<(K, V)> for MoveEntries { + fn next_back(&mut self) -> Option<(K, V)> { self.inner.next_back() } +} +impl ExactSize<(K, V)> for MoveEntries {} + + + +impl<'a, K: Ord, V> VacantEntry<'a, K, V> { + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it. + pub fn set(self, value: V) -> &'a mut V { + self.stack.insert(self.key, value) + } +} + +impl<'a, K: Ord, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry. + pub fn get(&self) -> &V { + self.stack.peek() + } + + /// Gets a mutable reference to the value in the entry. + pub fn get_mut(&mut self) -> &mut V { + self.stack.peek_mut() + } + + /// Converts the entry into a mutable reference to its value. + pub fn into_mut(self) -> &'a mut V { + self.stack.into_top() + } + + /// Sets the value of the entry with the OccupiedEntry's key, + /// and returns the entry's old value. + pub fn set(&mut self, mut value: V) -> V { + mem::swap(self.stack.peek_mut(), &mut value); + value + } + + /// Takes the value of the entry out of the map, and returns it. + pub fn take(self) -> V { + self.stack.remove() + } +} + +impl BTreeMap { + /// Gets an iterator over the entries of the map. + pub fn iter<'a>(&'a self) -> Entries<'a, K, V> { + let len = self.len(); + Entries { + inner: AbsEntries { + lca: Traverse::traverse(&self.root), + left: RingBuf::new(), + right: RingBuf::new(), + size: len, + } + } + } + + /// Gets a mutable iterator over the entries of the map. + pub fn iter_mut<'a>(&'a mut self) -> MutEntries<'a, K, V> { + let len = self.len(); + MutEntries { + inner: AbsEntries { + lca: Traverse::traverse(&mut self.root), + left: RingBuf::new(), + right: RingBuf::new(), + size: len, + } + } + } + + /// Gets an owning iterator over the entries of the map. + pub fn into_iter(self) -> MoveEntries { + let len = self.len(); + MoveEntries { + inner: AbsEntries { + lca: Traverse::traverse(self.root), + left: RingBuf::new(), + right: RingBuf::new(), + size: len, + } + } + } + + /// Gets an iterator over the keys of the map. + pub fn keys<'a>(&'a self) -> Keys<'a, K, V> { + self.iter().map(|(k, _)| k) + } + + /// Gets an iterator over the values of the map. + pub fn values<'a>(&'a self) -> Values<'a, K, V> { + self.iter().map(|(_, v)| v) + } +} + +impl BTreeMap { + /// Gets the given key's corresponding entry in the map for in-place manipulation. + pub fn entry<'a>(&'a mut self, key: K) -> Entry<'a, K, V> { + // same basic logic of `swap` and `pop`, blended together + let mut stack = stack::PartialSearchStack::new(self); + loop { + match stack.next().search(&key) { + Found(i) => { + // Perfect match + return Occupied(OccupiedEntry { + stack: stack.seal(i) + }); + }, + GoDown(i) => { + stack = match stack.push(i) { + stack::Done(new_stack) => { + // Not in the tree, but we've found where it goes + return Vacant(VacantEntry { + stack: new_stack, + key: key, + }); + } + stack::Grew(new_stack) => { + // We've found the subtree this key must go in + new_stack + } + }; + } + } + } + } +} + + + + + +#[cfg(test)] +mod test { + use std::prelude::*; + + use {Map, MutableMap}; + use super::{BTreeMap, Occupied, Vacant}; + + #[test] + fn test_basic_large() { + let mut map = BTreeMap::new(); + let size = 10000u; + assert_eq!(map.len(), 0); + + for i in range(0, size) { + assert_eq!(map.swap(i, 10*i), None); + assert_eq!(map.len(), i + 1); + } + + for i in range(0, size) { + assert_eq!(map.find(&i).unwrap(), &(i*10)); + } + + for i in range(size, size*2) { + assert_eq!(map.find(&i), None); + } + + for i in range(0, size) { + assert_eq!(map.swap(i, 100*i), Some(10*i)); + assert_eq!(map.len(), size); + } + + for i in range(0, size) { + assert_eq!(map.find(&i).unwrap(), &(i*100)); + } + + for i in range(0, size/2) { + assert_eq!(map.pop(&(i*2)), Some(i*200)); + assert_eq!(map.len(), size - i - 1); + } + + for i in range(0, size/2) { + assert_eq!(map.find(&(2*i)), None); + assert_eq!(map.find(&(2*i+1)).unwrap(), &(i*200 + 100)); + } + + for i in range(0, size/2) { + assert_eq!(map.pop(&(2*i)), None); + assert_eq!(map.pop(&(2*i+1)), Some(i*200 + 100)); + assert_eq!(map.len(), size/2 - i - 1); + } + } + + #[test] + fn test_basic_small() { + let mut map = BTreeMap::new(); + assert_eq!(map.pop(&1), None); + assert_eq!(map.find(&1), None); + assert_eq!(map.swap(1u, 1u), None); + assert_eq!(map.find(&1), Some(&1)); + assert_eq!(map.swap(1, 2), Some(1)); + assert_eq!(map.find(&1), Some(&2)); + assert_eq!(map.swap(2, 4), None); + assert_eq!(map.find(&2), Some(&4)); + assert_eq!(map.pop(&1), Some(2)); + assert_eq!(map.pop(&2), Some(4)); + assert_eq!(map.pop(&1), None); + } + + #[test] + fn test_iter() { + let size = 10000u; + + // Forwards + let mut map: BTreeMap = Vec::from_fn(size, |i| (i, i)).into_iter().collect(); + + { + let mut iter = map.iter(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&i, &i)); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.iter_mut(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&i, &mut (i + 0))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.into_iter(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (i, i)); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + } + + #[test] + fn test_iter_rev() { + let size = 10000u; + + // Forwards + let mut map: BTreeMap = Vec::from_fn(size, |i| (i, i)).into_iter().collect(); + + { + let mut iter = map.iter().rev(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&(size - i - 1), &(size - i - 1))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.iter_mut().rev(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (&(size - i - 1), &mut(size - i - 1))); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + { + let mut iter = map.into_iter().rev(); + for i in range(0, size) { + assert_eq!(iter.size_hint(), (size - i, Some(size - i))); + assert_eq!(iter.next().unwrap(), (size - i - 1, size - i - 1)); + } + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.next(), None); + } + + } + + #[test] + fn test_entry(){ + let xs = [(1i, 10i), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; + + let mut map: BTreeMap = xs.iter().map(|&x| x).collect(); + + // Existing key (insert) + match map.entry(1) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + assert_eq!(view.get(), &10); + assert_eq!(view.set(100), 10); + } + } + assert_eq!(map.find(&1).unwrap(), &100); + assert_eq!(map.len(), 6); + + + // Existing key (update) + match map.entry(2) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + let v = view.get_mut(); + *v *= 10; + } + } + assert_eq!(map.find(&2).unwrap(), &200); + assert_eq!(map.len(), 6); + + // Existing key (take) + match map.entry(3) { + Vacant(_) => unreachable!(), + Occupied(view) => { + assert_eq!(view.take(), 30); + } + } + assert_eq!(map.find(&3), None); + assert_eq!(map.len(), 5); + + + // Inexistent key (insert) + match map.entry(10) { + Occupied(_) => unreachable!(), + Vacant(view) => { + assert_eq!(*view.set(1000), 1000); + } + } + assert_eq!(map.find(&10).unwrap(), &1000); + assert_eq!(map.len(), 6); + } +} + + + + + + +#[cfg(test)] +mod bench { + use test::Bencher; + + use super::BTreeMap; + use deque::bench::{insert_rand_n, insert_seq_n, find_rand_n, find_seq_n}; + + #[bench] + pub fn insert_rand_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_rand_n(100, &mut m, b); + } + + #[bench] + pub fn insert_rand_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_rand_n(10_000, &mut m, b); + } + + // Insert seq + #[bench] + pub fn insert_seq_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_seq_n(100, &mut m, b); + } + + #[bench] + pub fn insert_seq_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + insert_seq_n(10_000, &mut m, b); + } + + // Find rand + #[bench] + pub fn find_rand_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_rand_n(100, &mut m, b); + } + + #[bench] + pub fn find_rand_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_rand_n(10_000, &mut m, b); + } + + // Find seq + #[bench] + pub fn find_seq_100(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_seq_n(100, &mut m, b); + } + + #[bench] + pub fn find_seq_10_000(b: &mut Bencher) { + let mut m : BTreeMap = BTreeMap::new(); + find_seq_n(10_000, &mut m, b); + } +} diff --git a/src/libcollections/btree/mod.rs b/src/libcollections/btree/mod.rs new file mode 100644 index 0000000000000..435a91f217eae --- /dev/null +++ b/src/libcollections/btree/mod.rs @@ -0,0 +1,32 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +pub use self::map::BTreeMap; +pub use self::map::Entries; +pub use self::map::MutEntries; +pub use self::map::MoveEntries; +pub use self::map::Keys; +pub use self::map::Values; +pub use self::map::Entry; +pub use self::map::OccupiedEntry; +pub use self::map::VacantEntry; + +pub use self::set::BTreeSet; +pub use self::set::Items; +pub use self::set::MoveItems; +pub use self::set::DifferenceItems; +pub use self::set::UnionItems; +pub use self::set::SymDifferenceItems; +pub use self::set::IntersectionItems; + + +mod node; +mod map; +mod set; diff --git a/src/libcollections/btree/node.rs b/src/libcollections/btree/node.rs new file mode 100644 index 0000000000000..e30b29f8767d3 --- /dev/null +++ b/src/libcollections/btree/node.rs @@ -0,0 +1,552 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This module represents all the internal representation and logic for a B-Tree's node +// with a safe interface, so that BTreeMap itself does not depend on any of these details. + +use core::prelude::*; + +use core::{slice, mem, ptr}; +use core::iter::Zip; +use MutableSeq; + +use vec; +use vec::Vec; + +/// Represents the result of an Insertion: either the item fit, or the node had to split +pub enum InsertionResult { + /// The inserted element fit + Fit, + /// The inserted element did not fit, so the node was split + Split(K, V, Node), +} + +/// Represents the result of a search for a key in a single node +pub enum SearchResult { + /// The element was found at the given index + Found(uint), + /// The element wasn't found, but if it's anywhere, it must be beyond this edge + GoDown(uint), +} + +/// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys. +#[deriving(Clone)] +pub struct Node { + // FIXME(Gankro): This representation is super safe and easy to reason about, but painfully + // inefficient. As three Vecs, each node consists of *9* words: (ptr, cap, size) * 3. In + // theory, if we take full control of allocation like HashMap's RawTable does, + // and restrict leaves to max size 256 (not unreasonable for a btree node) we can cut + // this down to just (ptr, cap: u8, size: u8, is_leaf: bool). With generic + // integer arguments, cap can even move into the the type, reducing this just to + // (ptr, size, is_leaf). This could also have cache benefits for very small nodes, as keys + // could bleed into edges and vals. + // + // However doing this would require a fair amount of code to reimplement all + // the Vec logic and iterators. It would also use *way* more unsafe code, which sucks and is + // hard. For now, we accept this cost in the name of correctness and simplicity. + // + // As a compromise, keys and vals could be merged into one Vec<(K, V)>, which would shave + // off 3 words, but possibly hurt our cache effeciency during search, which only cares about + // keys. This would also avoid the Zip we use in our iterator implementations. This is + // probably worth investigating. + // + // Note that this space waste is especially tragic since we store the Nodes by value in their + // parent's edges Vec, so unoccupied spaces in the edges Vec are quite large, and we have + // to shift around a lot more bits during insertion/removal. + + keys: Vec, + edges: Vec>, + vals: Vec, +} + +impl Node { + /// Searches for the given key in the node. If it finds an exact match, + /// `Found` will be yielded with the matching index. If it fails to find an exact match, + /// `GoDown` will be yielded with the index of the subtree the key must lie in. + pub fn search(&self, key: &K) -> SearchResult { + // FIXME(Gankro): Tune when to search linear or binary based on B (and maybe K/V). + // For the B configured as of this writing (B = 6), binary search was *singnificantly* + // worse for uints. + self.search_linear(key) + } + + fn search_linear(&self, key: &K) -> SearchResult { + for (i, k) in self.keys.iter().enumerate() { + match k.cmp(key) { + Less => {}, + Equal => return Found(i), + Greater => return GoDown(i), + } + } + GoDown(self.len()) + } +} + +// Public interface +impl Node { + /// Make a new internal node + pub fn new_internal(capacity: uint) -> Node { + Node { + keys: Vec::with_capacity(capacity), + vals: Vec::with_capacity(capacity), + edges: Vec::with_capacity(capacity + 1), + } + } + + /// Make a new leaf node + pub fn new_leaf(capacity: uint) -> Node { + Node { + keys: Vec::with_capacity(capacity), + vals: Vec::with_capacity(capacity), + edges: Vec::new(), + } + } + + /// Make a leaf root from scratch + pub fn make_leaf_root(b: uint) -> Node { + Node::new_leaf(capacity_from_b(b)) + } + + /// Make an internal root and swap it with an old root + pub fn make_internal_root(left_and_out: &mut Node, b: uint, key: K, value: V, + right: Node) { + let mut node = Node::new_internal(capacity_from_b(b)); + mem::swap(left_and_out, &mut node); + left_and_out.keys.push(key); + left_and_out.vals.push(value); + left_and_out.edges.push(node); + left_and_out.edges.push(right); + } + + + /// How many key-value pairs the node contains + pub fn len(&self) -> uint { + self.keys.len() + } + + /// How many key-value pairs the node can fit + pub fn capacity(&self) -> uint { + self.keys.capacity() + } + + /// If the node has any children + pub fn is_leaf(&self) -> bool { + self.edges.is_empty() + } + + /// if the node has too few elements + pub fn is_underfull(&self) -> bool { + self.len() < min_load_from_capacity(self.capacity()) + } + + /// if the node cannot fit any more elements + pub fn is_full(&self) -> bool { + self.len() == self.capacity() + } + + /// Swap the given key-value pair with the key-value pair stored in the node's index, + /// without checking bounds. + pub unsafe fn unsafe_swap(&mut self, index: uint, key: &mut K, val: &mut V) { + mem::swap(self.keys.as_mut_slice().unsafe_mut(index), key); + mem::swap(self.vals.as_mut_slice().unsafe_mut(index), val); + } + + /// Get the node's key mutably without any bounds checks. + pub unsafe fn unsafe_key_mut(&mut self, index: uint) -> &mut K { + self.keys.as_mut_slice().unsafe_mut(index) + } + + /// Get the node's value at the given index + pub fn val(&self, index: uint) -> Option<&V> { + self.vals.as_slice().get(index) + } + + /// Get the node's value at the given index + pub fn val_mut(&mut self, index: uint) -> Option<&mut V> { + self.vals.as_mut_slice().get_mut(index) + } + + /// Get the node's value mutably without any bounds checks. + pub unsafe fn unsafe_val_mut(&mut self, index: uint) -> &mut V { + self.vals.as_mut_slice().unsafe_mut(index) + } + + /// Get the node's edge at the given index + pub fn edge(&self, index: uint) -> Option<&Node> { + self.edges.as_slice().get(index) + } + + /// Get the node's edge mutably at the given index + pub fn edge_mut(&mut self, index: uint) -> Option<&mut Node> { + self.edges.as_mut_slice().get_mut(index) + } + + /// Get the node's edge mutably without any bounds checks. + pub unsafe fn unsafe_edge_mut(&mut self, index: uint) -> &mut Node { + self.edges.as_mut_slice().unsafe_mut(index) + } + + /// Pop an edge off the end of the node + pub fn pop_edge(&mut self) -> Option> { + self.edges.pop() + } + + /// Try to insert this key-value pair at the given index in this internal node + /// If the node is full, we have to split it. + /// + /// Returns a *mut V to the inserted value, because the caller may want this when + /// they're done mutating the tree, but we don't want to borrow anything for now. + pub fn insert_as_leaf(&mut self, index: uint, key: K, value: V) -> + (InsertionResult, *mut V) { + if !self.is_full() { + // The element can fit, just insert it + self.insert_fit_as_leaf(index, key, value); + (Fit, unsafe { self.unsafe_val_mut(index) as *mut _ }) + } else { + // The element can't fit, this node is full. Split it into two nodes. + let (new_key, new_val, mut new_right) = self.split(); + let left_len = self.len(); + + let ptr = if index <= left_len { + self.insert_fit_as_leaf(index, key, value); + unsafe { self.unsafe_val_mut(index) as *mut _ } + } else { + new_right.insert_fit_as_leaf(index - left_len - 1, key, value); + unsafe { new_right.unsafe_val_mut(index - left_len - 1) as *mut _ } + }; + + (Split(new_key, new_val, new_right), ptr) + } + } + + /// Try to insert this key-value pair at the given index in this internal node + /// If the node is full, we have to split it. + pub fn insert_as_internal(&mut self, index: uint, key: K, value: V, right: Node) + -> InsertionResult { + if !self.is_full() { + // The element can fit, just insert it + self.insert_fit_as_internal(index, key, value, right); + Fit + } else { + // The element can't fit, this node is full. Split it into two nodes. + let (new_key, new_val, mut new_right) = self.split(); + let left_len = self.len(); + + if index <= left_len { + self.insert_fit_as_internal(index, key, value, right); + } else { + new_right.insert_fit_as_internal(index - left_len - 1, key, value, right); + } + + Split(new_key, new_val, new_right) + } + } + + /// Remove the key-value pair at the given index + pub fn remove_as_leaf(&mut self, index: uint) -> (K, V) { + match (self.keys.remove(index), self.vals.remove(index)) { + (Some(k), Some(v)) => (k, v), + _ => unreachable!(), + } + } + + /// Handle an underflow in this node's child. We favour handling "to the left" because we know + /// we're empty, but our neighbour can be full. Handling to the left means when we choose to + /// steal, we pop off the end of our neighbour (always fast) and "unshift" ourselves + /// (always slow, but at least faster since we know we're half-empty). + /// Handling "to the right" reverses these roles. Of course, we merge whenever possible + /// because we want dense nodes, and merging is about equal work regardless of direction. + pub fn handle_underflow(&mut self, underflowed_child_index: uint) { + assert!(underflowed_child_index <= self.len()); + unsafe { + if underflowed_child_index > 0 { + self.handle_underflow_to_left(underflowed_child_index); + } else { + self.handle_underflow_to_right(underflowed_child_index); + } + } + } + + pub fn iter<'a>(&'a self) -> Traversal<'a, K, V> { + let is_leaf = self.is_leaf(); + Traversal { + elems: self.keys.as_slice().iter().zip(self.vals.as_slice().iter()), + edges: self.edges.as_slice().iter(), + head_is_edge: true, + tail_is_edge: true, + has_edges: !is_leaf, + } + } + + pub fn iter_mut<'a>(&'a mut self) -> MutTraversal<'a, K, V> { + let is_leaf = self.is_leaf(); + MutTraversal { + elems: self.keys.as_slice().iter().zip(self.vals.as_mut_slice().iter_mut()), + edges: self.edges.as_mut_slice().iter_mut(), + head_is_edge: true, + tail_is_edge: true, + has_edges: !is_leaf, + } + } + + pub fn into_iter(self) -> MoveTraversal { + let is_leaf = self.is_leaf(); + MoveTraversal { + elems: self.keys.into_iter().zip(self.vals.into_iter()), + edges: self.edges.into_iter(), + head_is_edge: true, + tail_is_edge: true, + has_edges: !is_leaf, + } + } +} + +// Private implementation details +impl Node { + /// Make a node from its raw components + fn from_vecs(keys: Vec, vals: Vec, edges: Vec>) -> Node { + Node { + keys: keys, + vals: vals, + edges: edges, + } + } + + /// We have somehow verified that this key-value pair will fit in this internal node, + /// so insert under that assumption. + fn insert_fit_as_leaf(&mut self, index: uint, key: K, val: V) { + self.keys.insert(index, key); + self.vals.insert(index, val); + } + + /// We have somehow verified that this key-value pair will fit in this internal node, + /// so insert under that assumption + fn insert_fit_as_internal(&mut self, index: uint, key: K, val: V, right: Node) { + self.keys.insert(index, key); + self.vals.insert(index, val); + self.edges.insert(index + 1, right); + } + + /// Node is full, so split it into two nodes, and yield the middle-most key-value pair + /// because we have one too many, and our parent now has one too few + fn split(&mut self) -> (K, V, Node) { + let r_keys = split(&mut self.keys); + let r_vals = split(&mut self.vals); + let r_edges = if self.edges.is_empty() { + Vec::new() + } else { + split(&mut self.edges) + }; + + let right = Node::from_vecs(r_keys, r_vals, r_edges); + // Pop it + let key = self.keys.pop().unwrap(); + let val = self.vals.pop().unwrap(); + + (key, val, right) + } + + /// Right is underflowed. Try to steal from left, + /// but merge left and right if left is low too. + unsafe fn handle_underflow_to_left(&mut self, underflowed_child_index: uint) { + let left_len = self.edges[underflowed_child_index - 1].len(); + if left_len > min_load_from_capacity(self.capacity()) { + self.steal_to_left(underflowed_child_index); + } else { + self.merge_children(underflowed_child_index - 1); + } + } + + /// Left is underflowed. Try to steal from the right, + /// but merge left and right if right is low too. + unsafe fn handle_underflow_to_right(&mut self, underflowed_child_index: uint) { + let right_len = self.edges[underflowed_child_index + 1].len(); + if right_len > min_load_from_capacity(self.capacity()) { + self.steal_to_right(underflowed_child_index); + } else { + self.merge_children(underflowed_child_index); + } + } + + /// Steal! Stealing is roughly analagous to a binary tree rotation. + /// In this case, we're "rotating" right. + unsafe fn steal_to_left(&mut self, underflowed_child_index: uint) { + // Take the biggest stuff off left + let (mut key, mut val, edge) = { + let left = self.unsafe_edge_mut(underflowed_child_index - 1); + match (left.keys.pop(), left.vals.pop(), left.edges.pop()) { + (Some(k), Some(v), e) => (k, v, e), + _ => unreachable!(), + } + }; + + // Swap the parent's seperating key-value pair with left's + self.unsafe_swap(underflowed_child_index - 1, &mut key, &mut val); + + // Put them at the start of right + { + let right = self.unsafe_edge_mut(underflowed_child_index); + right.keys.insert(0, key); + right.vals.insert(0, val); + match edge { + None => {} + Some(e) => right.edges.insert(0, e) + } + } + } + + /// Steal! Stealing is roughly analagous to a binary tree rotation. + /// In this case, we're "rotating" left. + unsafe fn steal_to_right(&mut self, underflowed_child_index: uint) { + // Take the smallest stuff off right + let (mut key, mut val, edge) = { + let right = self.unsafe_edge_mut(underflowed_child_index + 1); + match (right.keys.remove(0), right.vals.remove(0), right.edges.remove(0)) { + (Some(k), Some(v), e) => (k, v, e), + _ => unreachable!(), + } + }; + + // Swap the parent's seperating key-value pair with right's + self.unsafe_swap(underflowed_child_index, &mut key, &mut val); + + // Put them at the end of left + { + let left = self.unsafe_edge_mut(underflowed_child_index); + left.keys.push(key); + left.vals.push(val); + match edge { + None => {} + Some(e) => left.edges.push(e) + } + } + } + + /// Merge! Left and right will be smooshed into one node, along with the key-value + /// pair that seperated them in their parent. + unsafe fn merge_children(&mut self, left_index: uint) { + // Permanently remove right's index, and the key-value pair that seperates + // left and right + let (key, val, right) = { + match (self.keys.remove(left_index), + self.vals.remove(left_index), + self.edges.remove(left_index + 1)) { + (Some(k), Some(v), Some(e)) => (k, v, e), + _ => unreachable!(), + } + }; + + // Give left right's stuff. + let left = self.unsafe_edge_mut(left_index); + left.absorb(key, val, right); + } + + /// Take all the values from right, seperated by the given key and value + fn absorb(&mut self, key: K, val: V, right: Node) { + // Just as a sanity check, make sure we can fit this guy in + debug_assert!(self.len() + right.len() <= self.capacity()) + + self.keys.push(key); + self.vals.push(val); + self.keys.extend(right.keys.into_iter()); + self.vals.extend(right.vals.into_iter()); + self.edges.extend(right.edges.into_iter()); + } +} + +/// Takes a Vec, and splits half the elements into a new one. +fn split(left: &mut Vec) -> Vec { + // This function is intended to be called on a full Vec of size 2B - 1 (keys, values), + // or 2B (edges). In the former case, left should get B elements, and right should get + // B - 1. In the latter case, both should get B. Therefore, we can just always take the last + // size / 2 elements from left, and put them on right. This also ensures this method is + // safe, even if the Vec isn't full. Just uninteresting for our purposes. + let len = left.len(); + let right_len = len / 2; + let left_len = len - right_len; + let mut right = Vec::with_capacity(left.capacity()); + unsafe { + let left_ptr = left.as_slice().unsafe_get(left_len) as *const _; + let right_ptr = right.as_mut_slice().as_mut_ptr(); + ptr::copy_nonoverlapping_memory(right_ptr, left_ptr, right_len); + left.set_len(left_len); + right.set_len(right_len); + } + right +} + +/// Get the capacity of a node from the order of the parent B-Tree +fn capacity_from_b(b: uint) -> uint { + 2 * b - 1 +} + +/// Get the minimum load of a node from its capacity +fn min_load_from_capacity(cap: uint) -> uint { + // B - 1 + cap / 2 +} + +/// An abstraction over all the different kinds of traversals a node supports +struct AbsTraversal { + elems: Elems, + edges: Edges, + head_is_edge: bool, + tail_is_edge: bool, + has_edges: bool, +} + +/// A single atomic step in a traversal. Either an element is visited, or an edge is followed +pub enum TraversalItem { + Elem(K, V), + Edge(E), +} + +/// A traversal over a node's entries and edges +pub type Traversal<'a, K, V> = AbsTraversal, slice::Items<'a, V>>, + slice::Items<'a, Node>>; + +/// A mutable traversal over a node's entries and edges +pub type MutTraversal<'a, K, V> = AbsTraversal, slice::MutItems<'a, V>>, + slice::MutItems<'a, Node>>; + +/// An owning traversal over a node's entries and edges +pub type MoveTraversal = AbsTraversal, vec::MoveItems>, + vec::MoveItems>>; + + +impl, Edges: Iterator> + Iterator> for AbsTraversal { + + fn next(&mut self) -> Option> { + let head_is_edge = self.head_is_edge; + self.head_is_edge = !head_is_edge; + + if head_is_edge && self.has_edges { + self.edges.next().map(|node| Edge(node)) + } else { + self.elems.next().map(|(k, v)| Elem(k, v)) + } + } +} + +impl, Edges: DoubleEndedIterator> + DoubleEndedIterator> for AbsTraversal { + + fn next_back(&mut self) -> Option> { + let tail_is_edge = self.tail_is_edge; + self.tail_is_edge = !tail_is_edge; + + if tail_is_edge && self.has_edges { + self.edges.next_back().map(|node| Edge(node)) + } else { + self.elems.next_back().map(|(k, v)| Elem(k, v)) + } + } +} diff --git a/src/libcollections/btree/set.rs b/src/libcollections/btree/set.rs new file mode 100644 index 0000000000000..b21af89742c94 --- /dev/null +++ b/src/libcollections/btree/set.rs @@ -0,0 +1,433 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is pretty much entirely stolen from TreeSet, since BTreeMap has an identical interface +// to TreeMap + +use core::prelude::*; + +use super::{BTreeMap, Keys, MoveEntries}; +use std::hash::Hash; +use core::default::Default; +use core::{iter, fmt}; +use core::iter::Peekable; +use core::fmt::Show; + +use {Mutable, Set, MutableSet, MutableMap, Map}; + +/// A set based on a B-Tree. +#[deriving(Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] +pub struct BTreeSet{ + map: BTreeMap, +} + +/// An iterator over a BTreeSet's items. +pub type Items<'a, T> = Keys<'a, T, ()>; + +/// An owning iterator over a BTreeSet's items. +pub type MoveItems = iter::Map<'static, (T, ()), T, MoveEntries>; + +/// A lazy iterator producing elements in the set difference (in-order). +pub struct DifferenceItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +/// A lazy iterator producing elements in the set symmetric difference (in-order). +pub struct SymDifferenceItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +/// A lazy iterator producing elements in the set intersection (in-order). +pub struct IntersectionItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +/// A lazy iterator producing elements in the set union (in-order). +pub struct UnionItems<'a, T:'a> { + a: Peekable<&'a T, Items<'a, T>>, + b: Peekable<&'a T, Items<'a, T>>, +} + +impl BTreeSet { + /// Makes a new BTreeSet with a reasonable choice of B. + pub fn new() -> BTreeSet { + BTreeSet { map: BTreeMap::new() } + } + + /// Makes a new BTreeSet with the given B. + pub fn with_b(b: uint) -> BTreeSet { + BTreeSet { map: BTreeMap::with_b(b) } + } +} + +impl BTreeSet { + /// Gets an iterator over the BTreeSet's contents. + pub fn iter<'a>(&'a self) -> Items<'a, T> { + self.map.keys() + } + + /// Gets an iterator for moving out the BtreeSet's contents. + pub fn into_iter(self) -> MoveItems { + self.map.into_iter().map(|(k, _)| k) + } +} + +impl BTreeSet { + /// Visits the values representing the difference, in ascending order. + pub fn difference<'a>(&'a self, other: &'a BTreeSet) -> DifferenceItems<'a, T> { + DifferenceItems{a: self.iter().peekable(), b: other.iter().peekable()} + } + + /// Visits the values representing the symmetric difference, in ascending order. + pub fn symmetric_difference<'a>(&'a self, other: &'a BTreeSet) + -> SymDifferenceItems<'a, T> { + SymDifferenceItems{a: self.iter().peekable(), b: other.iter().peekable()} + } + + /// Visits the values representing the intersection, in ascending order. + pub fn intersection<'a>(&'a self, other: &'a BTreeSet) + -> IntersectionItems<'a, T> { + IntersectionItems{a: self.iter().peekable(), b: other.iter().peekable()} + } + + /// Visits the values representing the union, in ascending order. + pub fn union<'a>(&'a self, other: &'a BTreeSet) -> UnionItems<'a, T> { + UnionItems{a: self.iter().peekable(), b: other.iter().peekable()} + } +} + +impl Collection for BTreeSet { + fn len(&self) -> uint { + self.map.len() + } +} + +impl Mutable for BTreeSet { + fn clear(&mut self) { + self.map.clear() + } +} + +impl Set for BTreeSet { + fn contains(&self, value: &T) -> bool { + self.map.find(value).is_some() + } + + fn is_disjoint(&self, other: &BTreeSet) -> bool { + self.intersection(other).next().is_none() + } + + fn is_subset(&self, other: &BTreeSet) -> bool { + // Stolen from TreeMap + let mut x = self.iter(); + let mut y = other.iter(); + let mut a = x.next(); + let mut b = y.next(); + while a.is_some() { + if b.is_none() { + return false; + } + + let a1 = a.unwrap(); + let b1 = b.unwrap(); + + match b1.cmp(a1) { + Less => (), + Greater => return false, + Equal => a = x.next(), + } + + b = y.next(); + } + true + } +} + +impl MutableSet for BTreeSet{ + fn insert(&mut self, value: T) -> bool { + self.map.insert(value, ()) + } + + fn remove(&mut self, value: &T) -> bool { + self.map.remove(value) + } +} + +impl FromIterator for BTreeSet { + fn from_iter>(iter: Iter) -> BTreeSet { + let mut set = BTreeSet::new(); + set.extend(iter); + set + } +} + +impl Extendable for BTreeSet { + #[inline] + fn extend>(&mut self, mut iter: Iter) { + for elem in iter { + self.insert(elem); + } + } +} + +impl Default for BTreeSet { + fn default() -> BTreeSet { + BTreeSet::new() + } +} + +impl Show for BTreeSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(write!(f, "{{")); + + for (i, x) in self.iter().enumerate() { + if i != 0 { try!(write!(f, ", ")); } + try!(write!(f, "{}", *x)); + } + + write!(f, "}}") + } +} + +/// Compare `x` and `y`, but return `short` if x is None and `long` if y is None +fn cmp_opt(x: Option<&T>, y: Option<&T>, + short: Ordering, long: Ordering) -> Ordering { + match (x, y) { + (None , _ ) => short, + (_ , None ) => long, + (Some(x1), Some(y1)) => x1.cmp(y1), + } +} + +impl<'a, T: Ord> Iterator<&'a T> for DifferenceItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + match cmp_opt(self.a.peek(), self.b.peek(), Less, Less) { + Less => return self.a.next(), + Equal => { self.a.next(); self.b.next(); } + Greater => { self.b.next(); } + } + } + } +} + +impl<'a, T: Ord> Iterator<&'a T> for SymDifferenceItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + match cmp_opt(self.a.peek(), self.b.peek(), Greater, Less) { + Less => return self.a.next(), + Equal => { self.a.next(); self.b.next(); } + Greater => return self.b.next(), + } + } + } +} + +impl<'a, T: Ord> Iterator<&'a T> for IntersectionItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + let o_cmp = match (self.a.peek(), self.b.peek()) { + (None , _ ) => None, + (_ , None ) => None, + (Some(a1), Some(b1)) => Some(a1.cmp(b1)), + }; + match o_cmp { + None => return None, + Some(Less) => { self.a.next(); } + Some(Equal) => { self.b.next(); return self.a.next() } + Some(Greater) => { self.b.next(); } + } + } + } +} + +impl<'a, T: Ord> Iterator<&'a T> for UnionItems<'a, T> { + fn next(&mut self) -> Option<&'a T> { + loop { + match cmp_opt(self.a.peek(), self.b.peek(), Greater, Less) { + Less => return self.a.next(), + Equal => { self.b.next(); return self.a.next() } + Greater => return self.b.next(), + } + } + } +} + + +#[cfg(test)] +mod test { + use std::prelude::*; + + use {Set, MutableSet}; + use super::BTreeSet; + use std::hash; + + #[test] + fn test_clone_eq() { + let mut m = BTreeSet::new(); + + m.insert(1i); + m.insert(2); + + assert!(m.clone() == m); + } + + #[test] + fn test_hash() { + let mut x = BTreeSet::new(); + let mut y = BTreeSet::new(); + + x.insert(1i); + x.insert(2); + x.insert(3); + + y.insert(3i); + y.insert(2); + y.insert(1); + + assert!(hash::hash(&x) == hash::hash(&y)); + } + + fn check(a: &[int], + b: &[int], + expected: &[int], + f: |&BTreeSet, &BTreeSet, f: |&int| -> bool| -> bool) { + let mut set_a = BTreeSet::new(); + let mut set_b = BTreeSet::new(); + + for x in a.iter() { assert!(set_a.insert(*x)) } + for y in b.iter() { assert!(set_b.insert(*y)) } + + let mut i = 0; + f(&set_a, &set_b, |x| { + assert_eq!(*x, expected[i]); + i += 1; + true + }); + assert_eq!(i, expected.len()); + } + + #[test] + fn test_intersection() { + fn check_intersection(a: &[int], b: &[int], expected: &[int]) { + check(a, b, expected, |x, y, f| x.intersection(y).all(f)) + } + + check_intersection([], [], []); + check_intersection([1, 2, 3], [], []); + check_intersection([], [1, 2, 3], []); + check_intersection([2], [1, 2, 3], [2]); + check_intersection([1, 2, 3], [2], [2]); + check_intersection([11, 1, 3, 77, 103, 5, -5], + [2, 11, 77, -9, -42, 5, 3], + [3, 5, 11, 77]); + } + + #[test] + fn test_difference() { + fn check_difference(a: &[int], b: &[int], expected: &[int]) { + check(a, b, expected, |x, y, f| x.difference(y).all(f)) + } + + check_difference([], [], []); + check_difference([1, 12], [], [1, 12]); + check_difference([], [1, 2, 3, 9], []); + check_difference([1, 3, 5, 9, 11], + [3, 9], + [1, 5, 11]); + check_difference([-5, 11, 22, 33, 40, 42], + [-12, -5, 14, 23, 34, 38, 39, 50], + [11, 22, 33, 40, 42]); + } + + #[test] + fn test_symmetric_difference() { + fn check_symmetric_difference(a: &[int], b: &[int], + expected: &[int]) { + check(a, b, expected, |x, y, f| x.symmetric_difference(y).all(f)) + } + + check_symmetric_difference([], [], []); + check_symmetric_difference([1, 2, 3], [2], [1, 3]); + check_symmetric_difference([2], [1, 2, 3], [1, 3]); + check_symmetric_difference([1, 3, 5, 9, 11], + [-2, 3, 9, 14, 22], + [-2, 1, 5, 11, 14, 22]); + } + + #[test] + fn test_union() { + fn check_union(a: &[int], b: &[int], + expected: &[int]) { + check(a, b, expected, |x, y, f| x.union(y).all(f)) + } + + check_union([], [], []); + check_union([1, 2, 3], [2], [1, 2, 3]); + check_union([2], [1, 2, 3], [1, 2, 3]); + check_union([1, 3, 5, 9, 11, 16, 19, 24], + [-2, 1, 5, 9, 13, 19], + [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]); + } + + #[test] + fn test_zip() { + let mut x = BTreeSet::new(); + x.insert(5u); + x.insert(12u); + x.insert(11u); + + let mut y = BTreeSet::new(); + y.insert("foo"); + y.insert("bar"); + + let x = x; + let y = y; + let mut z = x.iter().zip(y.iter()); + + // FIXME: #5801: this needs a type hint to compile... + let result: Option<(&uint, & &'static str)> = z.next(); + assert_eq!(result.unwrap(), (&5u, &("bar"))); + + let result: Option<(&uint, & &'static str)> = z.next(); + assert_eq!(result.unwrap(), (&11u, &("foo"))); + + let result: Option<(&uint, & &'static str)> = z.next(); + assert!(result.is_none()); + } + + #[test] + fn test_from_iter() { + let xs = [1i, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: BTreeSet = xs.iter().map(|&x| x).collect(); + + for x in xs.iter() { + assert!(set.contains(x)); + } + } + + #[test] + fn test_show() { + let mut set: BTreeSet = BTreeSet::new(); + let empty: BTreeSet = BTreeSet::new(); + + set.insert(1); + set.insert(2); + + let set_str = format!("{}", set); + + assert!(set_str == "{1, 2}".to_string()); + assert_eq!(format!("{}", empty), "{}".to_string()); + } +} diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index 9d3be0d14d385..8b9a0ec796e27 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -37,7 +37,7 @@ extern crate alloc; use core::prelude::Option; pub use bitv::{Bitv, BitvSet}; -pub use btree::BTree; +pub use btree::{BTreeMap, BTreeSet}; pub use core::prelude::Collection; pub use dlist::DList; pub use enum_set::EnumSet; diff --git a/src/libstd/collections/mod.rs b/src/libstd/collections/mod.rs index d98d490a84b27..324c0295971e1 100644 --- a/src/libstd/collections/mod.rs +++ b/src/libstd/collections/mod.rs @@ -16,7 +16,7 @@ pub use core_collections::{Collection, Mutable, Map, MutableMap}; pub use core_collections::{Set, MutableSet, Deque, MutableSeq}; -pub use core_collections::{Bitv, BitvSet, BTree, DList, EnumSet}; +pub use core_collections::{Bitv, BitvSet, BTreeMap, BTreeSet, DList, EnumSet}; pub use core_collections::{PriorityQueue, RingBuf, SmallIntMap}; pub use core_collections::{TreeMap, TreeSet, TrieMap, TrieSet}; pub use core_collections::{bitv, btree, dlist, enum_set};