From c7b5aae8c4ee88eca838f24701a91c19704b3330 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Sun, 11 Nov 2012 01:01:37 -0800 Subject: [PATCH 1/3] Add an insert_with_key function to the Map trait --- src/libstd/map.rs | 25 ++++++++++++++++++- src/libstd/smallintmap.rs | 7 ++++++ .../class-impl-very-parameterized-trait.rs | 8 ++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/libstd/map.rs b/src/libstd/map.rs index 915202143a1ea..d6feebc016c8b 100644 --- a/src/libstd/map.rs +++ b/src/libstd/map.rs @@ -27,7 +27,15 @@ pub trait Map { * * Returns true if the key did not already exist in the map */ - fn insert(v: K, v: V) -> bool; + fn insert(key: K, value: V) -> bool; + + /** + * Add a value to the map. + * + * If the map contains a value for the key, use the function + * to set a new value. + */ + fn insert_with_key(ff: fn(K, V, V) -> V, key: K, value: V) -> bool; /// Returns true if the map contains a value for the specified key pure fn contains_key(key: K) -> bool; @@ -264,6 +272,14 @@ pub mod chained { } } + fn insert_with_key(ff: fn(K, V, V) -> V, key: K, val: V) -> bool { + // this can be optimized but first lets see if it compiles... + match self.find(key) { + None => return self.insert(key, val), + Some(copy orig) => return self.insert(key, ff(key, orig, val)) + } + } + pure fn get(k: K) -> V { let opt_v = self.find(k); if opt_v.is_none() { @@ -447,6 +463,13 @@ impl @Mut>: } } + fn insert_with_key(ff: fn(K, V, V) -> V, key: K, val: V) -> bool { + match self.find(key) { + None => return self.insert(key, val), + Some(copy orig) => return self.insert(key, ff(key, orig, val)), + } + } + fn remove(key: K) -> bool { do self.borrow_mut |p| { p.remove(&key) diff --git a/src/libstd/smallintmap.rs b/src/libstd/smallintmap.rs index 9dc216a21557a..8439d214ca043 100644 --- a/src/libstd/smallintmap.rs +++ b/src/libstd/smallintmap.rs @@ -103,6 +103,13 @@ impl SmallIntMap: map::Map { pure fn find(key: uint) -> Option { find(self, key) } fn rehash() { fail } + fn insert_with_key(ff: fn(uint, V, V) -> V, key: uint, val: V) -> bool { + match self.find(key) { + None => return self.insert(key, val), + Some(copy orig) => return self.insert(key, ff(key, orig, val)), + } + } + pure fn each(it: fn(key: uint, value: V) -> bool) { self.each_ref(|k, v| it(*k, *v)) } diff --git a/src/test/run-pass/class-impl-very-parameterized-trait.rs b/src/test/run-pass/class-impl-very-parameterized-trait.rs index c11e41eb57f46..c8a08dbf818c0 100644 --- a/src/test/run-pass/class-impl-very-parameterized-trait.rs +++ b/src/test/run-pass/class-impl-very-parameterized-trait.rs @@ -61,6 +61,14 @@ impl cat : Map { else { None } } + fn insert_with_key(ff: fn(+k: int, +v0: T, +v1: T) -> T, +key: int, +val: T) -> bool { + match self.find(key) { + None => return self.insert(key, val), + Some(copy orig) => return self.insert(key, ff(key, orig, val)) + } + } + + fn remove(+k:int) -> bool { match self.find(k) { Some(x) => { From 4cb91e427a82e3d76621992709b9728ea5176757 Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Wed, 14 Nov 2012 21:33:32 -0800 Subject: [PATCH 2/3] Test insert_with_key... --- src/libstd/map.rs | 21 +++++++++++++++++++ src/test/bench/shootout-k-nucleotide-pipes.rs | 5 +---- src/test/bench/shootout-k-nucleotide.rs | 5 +---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/libstd/map.rs b/src/libstd/map.rs index d6feebc016c8b..041346c9cd9ce 100644 --- a/src/libstd/map.rs +++ b/src/libstd/map.rs @@ -773,4 +773,25 @@ mod tests { assert map.get(~"b") == 2; assert map.get(~"c") == 3; } + + #[test] + fn test_insert_with_key() { + let map = map::HashMap::<~str, uint>(); + + fn inc(k: ~str, v0: uint, v1: uint) -> uint { + v0 + v1 + } + + map.insert_with_key(inc, ~"cat", 1); + map.insert_with_key(inc, ~"mongoose", 1); + map.insert_with_key(inc, ~"cat", 7); + map.insert_with_key(inc, ~"ferret", 3); + map.insert_with_key(inc, ~"cat", 2); + + assert 10 == option::get(map.find(~"cat")); + assert 3 == option::get(map.find(~"ferret")); + assert 1 == option::get(map.find(~"mongoose")); + + assert None == map.find(~"unicorn"); + } } diff --git a/src/test/bench/shootout-k-nucleotide-pipes.rs b/src/test/bench/shootout-k-nucleotide-pipes.rs index bf94e11980a60..42198a6d78968 100644 --- a/src/test/bench/shootout-k-nucleotide-pipes.rs +++ b/src/test/bench/shootout-k-nucleotide-pipes.rs @@ -69,10 +69,7 @@ fn find(mm: HashMap<~[u8], uint>, key: ~str) -> uint { // given a map, increment the counter for a key fn update_freq(mm: HashMap<~[u8], uint>, key: &[u8]) { let key = vec::slice(key, 0, key.len()); - match mm.find(key) { - option::None => { mm.insert(key, 1u ); } - option::Some(val) => { mm.insert(key, 1u + val); } - } + mm.insert_with_key(|k,v,v1| {v + v1}, key, 1); } // given a ~[u8], for each window call a function diff --git a/src/test/bench/shootout-k-nucleotide.rs b/src/test/bench/shootout-k-nucleotide.rs index 9bc7abfdff78e..f582ac6737df8 100644 --- a/src/test/bench/shootout-k-nucleotide.rs +++ b/src/test/bench/shootout-k-nucleotide.rs @@ -66,10 +66,7 @@ fn find(mm: HashMap<~[u8], uint>, key: ~str) -> uint { // given a map, increment the counter for a key fn update_freq(mm: HashMap<~[u8], uint>, key: &[u8]) { let key = vec::slice(key, 0, key.len()); - match mm.find(key) { - option::None => { mm.insert(key, 1u ); } - option::Some(val) => { mm.insert(key, 1u + val); } - } + mm.insert_with_key(|k,v,v1| {v + v1}, key, 1); } // given a ~[u8], for each window call a function From 29b72dad8d3ce1a2108720226159cb427e6a27fb Mon Sep 17 00:00:00 2001 From: Kevin Cantu Date: Tue, 20 Nov 2012 23:33:31 -0800 Subject: [PATCH 3/3] Add improvements to insert_with_key This commit adds a lower-level implementation of the generic `insert_with_key` which I expect to be faster. Now insert could be defined with insert_with_key, too, although I'm not sure we want to do that. This also clarifies the tests a bit and adds an `insert_with` function. --- src/libstd/map.rs | 98 ++++++++++++++++--- src/libstd/smallintmap.rs | 40 +++++++- src/test/bench/shootout-k-nucleotide-pipes.rs | 2 +- src/test/bench/shootout-k-nucleotide.rs | 2 +- .../class-impl-very-parameterized-trait.rs | 9 +- 5 files changed, 132 insertions(+), 19 deletions(-) diff --git a/src/libstd/map.rs b/src/libstd/map.rs index 041346c9cd9ce..730b5275e1d79 100644 --- a/src/libstd/map.rs +++ b/src/libstd/map.rs @@ -35,7 +35,16 @@ pub trait Map { * If the map contains a value for the key, use the function * to set a new value. */ - fn insert_with_key(ff: fn(K, V, V) -> V, key: K, value: V) -> bool; + fn insert_with_key(key: K, newval: V, ff: fn(K, V, V) -> V) -> bool; + + /** + * Add a value to the map. + * + * If the map contains a value for the key, use the function + * to set a new value. (Like insert_with_key, but with a function + * of only values.) + */ + fn insert_with(key: K, newval: V, ff: fn(V, V) -> V) -> bool; /// Returns true if the map contains a value for the specified key pure fn contains_key(key: K) -> bool; @@ -272,12 +281,57 @@ pub mod chained { } } - fn insert_with_key(ff: fn(K, V, V) -> V, key: K, val: V) -> bool { - // this can be optimized but first lets see if it compiles... + fn insert_with_key(key: K, newval: V, ff: fn(K, V, V) -> V) -> bool { +/* match self.find(key) { None => return self.insert(key, val), Some(copy orig) => return self.insert(key, ff(key, orig, val)) } +*/ + + let hash = key.hash_keyed(0,0) as uint; + match self.search_tbl(&key, hash) { + NotFound => { + self.count += 1u; + let idx = hash % vec::len(self.chains); + let old_chain = self.chains[idx]; + self.chains[idx] = Some(@Entry { + hash: hash, + key: key, + value: newval, + next: old_chain}); + + // consider rehashing if more 3/4 full + let nchains = vec::len(self.chains); + let load = {num: (self.count + 1u) as int, + den: nchains as int}; + if !util::rational_leq(load, {num:3, den:4}) { + self.rehash(); + } + + return true; + } + FoundFirst(idx, entry) => { + self.chains[idx] = Some(@Entry { + hash: hash, + key: key, + value: ff(key, entry.value, newval), + next: entry.next}); + return false; + } + FoundAfter(prev, entry) => { + prev.next = Some(@Entry { + hash: hash, + key: key, + value: ff(key, entry.value, newval), + next: entry.next}); + return false; + } + } + } + + fn insert_with(key: K, newval: V, ff: fn(V, V) -> V) -> bool { + return self.insert_with_key(key, newval, |_k, v, v1| ff(v,v1)); } pure fn get(k: K) -> V { @@ -463,12 +517,16 @@ impl @Mut>: } } - fn insert_with_key(ff: fn(K, V, V) -> V, key: K, val: V) -> bool { - match self.find(key) { - None => return self.insert(key, val), - Some(copy orig) => return self.insert(key, ff(key, orig, val)), - } - } + fn insert_with_key(key: K, newval: V, ff: fn(K, V, V) -> V) -> bool { + match self.find(key) { + None => return self.insert(key, newval), + Some(copy orig) => return self.insert(key, ff(key, orig, newval)) + } + } + + fn insert_with(key: K, newval: V, ff: fn(V, V) -> V) -> bool { + return self.insert_with_key(key, newval, |_k, v, v1| ff(v,v1)); + } fn remove(key: K) -> bool { do self.borrow_mut |p| { @@ -778,20 +836,30 @@ mod tests { fn test_insert_with_key() { let map = map::HashMap::<~str, uint>(); - fn inc(k: ~str, v0: uint, v1: uint) -> uint { + // given a new key, initialize it with this new count, given + // given an existing key, add more to its count + fn addMoreToCount(_k: ~str, v0: uint, v1: uint) -> uint { + v0 + v1 + } + + fn addMoreToCount_simple(v0: uint, v1: uint) -> uint { v0 + v1 } - map.insert_with_key(inc, ~"cat", 1); - map.insert_with_key(inc, ~"mongoose", 1); - map.insert_with_key(inc, ~"cat", 7); - map.insert_with_key(inc, ~"ferret", 3); - map.insert_with_key(inc, ~"cat", 2); + // count the number of several types of animal, + // adding in groups as we go + map.insert_with(~"cat", 1, addMoreToCount_simple); + map.insert_with_key(~"mongoose", 1, addMoreToCount); + map.insert_with(~"cat", 7, addMoreToCount_simple); + map.insert_with_key(~"ferret", 3, addMoreToCount); + map.insert_with_key(~"cat", 2, addMoreToCount); + // check the total counts assert 10 == option::get(map.find(~"cat")); assert 3 == option::get(map.find(~"ferret")); assert 1 == option::get(map.find(~"mongoose")); + // sadly, no mythical animals were counted! assert None == map.find(~"unicorn"); } } diff --git a/src/libstd/smallintmap.rs b/src/libstd/smallintmap.rs index 8439d214ca043..2fdfa3deea856 100644 --- a/src/libstd/smallintmap.rs +++ b/src/libstd/smallintmap.rs @@ -103,13 +103,17 @@ impl SmallIntMap: map::Map { pure fn find(key: uint) -> Option { find(self, key) } fn rehash() { fail } - fn insert_with_key(ff: fn(uint, V, V) -> V, key: uint, val: V) -> bool { + fn insert_with_key(key: uint, val: V, ff: fn(uint, V, V) -> V) -> bool { match self.find(key) { None => return self.insert(key, val), Some(copy orig) => return self.insert(key, ff(key, orig, val)), } } + fn insert_with(key: uint, newval: V, ff: fn(V, V) -> V) -> bool { + return self.insert_with_key(key, newval, |_k, v, v1| ff(v,v1)); + } + pure fn each(it: fn(key: uint, value: V) -> bool) { self.each_ref(|k, v| it(*k, *v)) } @@ -149,3 +153,37 @@ impl SmallIntMap: ops::Index { pub fn as_map(s: SmallIntMap) -> map::Map { s as map::Map:: } + +#[cfg(test)] +mod tests { + + #[test] + fn test_insert_with_key() { + let map: SmallIntMap = mk(); + + // given a new key, initialize it with this new count, given + // given an existing key, add more to its count + fn addMoreToCount(_k: uint, v0: uint, v1: uint) -> uint { + v0 + v1 + } + + fn addMoreToCount_simple(v0: uint, v1: uint) -> uint { + v0 + v1 + } + + // count integers + map.insert_with(3, 1, addMoreToCount_simple); + map.insert_with_key(9, 1, addMoreToCount); + map.insert_with(3, 7, addMoreToCount_simple); + map.insert_with_key(5, 3, addMoreToCount); + map.insert_with_key(3, 2, addMoreToCount); + + // check the total counts + assert 10 == option::get(map.find(3)); + assert 3 == option::get(map.find(5)); + assert 1 == option::get(map.find(9)); + + // sadly, no sevens were counted + assert None == map.find(7); + } +} diff --git a/src/test/bench/shootout-k-nucleotide-pipes.rs b/src/test/bench/shootout-k-nucleotide-pipes.rs index 42198a6d78968..8030ca2672175 100644 --- a/src/test/bench/shootout-k-nucleotide-pipes.rs +++ b/src/test/bench/shootout-k-nucleotide-pipes.rs @@ -69,7 +69,7 @@ fn find(mm: HashMap<~[u8], uint>, key: ~str) -> uint { // given a map, increment the counter for a key fn update_freq(mm: HashMap<~[u8], uint>, key: &[u8]) { let key = vec::slice(key, 0, key.len()); - mm.insert_with_key(|k,v,v1| {v + v1}, key, 1); + mm.insert_with(key, 1, |v,v1| { v+v1 }); } // given a ~[u8], for each window call a function diff --git a/src/test/bench/shootout-k-nucleotide.rs b/src/test/bench/shootout-k-nucleotide.rs index f582ac6737df8..8efe3c7575fb7 100644 --- a/src/test/bench/shootout-k-nucleotide.rs +++ b/src/test/bench/shootout-k-nucleotide.rs @@ -66,7 +66,7 @@ fn find(mm: HashMap<~[u8], uint>, key: ~str) -> uint { // given a map, increment the counter for a key fn update_freq(mm: HashMap<~[u8], uint>, key: &[u8]) { let key = vec::slice(key, 0, key.len()); - mm.insert_with_key(|k,v,v1| {v + v1}, key, 1); + mm.insert_with(key, 1, |v,v1| { v+v1 }); } // given a ~[u8], for each window call a function diff --git a/src/test/run-pass/class-impl-very-parameterized-trait.rs b/src/test/run-pass/class-impl-very-parameterized-trait.rs index c8a08dbf818c0..ad7be9d86da79 100644 --- a/src/test/run-pass/class-impl-very-parameterized-trait.rs +++ b/src/test/run-pass/class-impl-very-parameterized-trait.rs @@ -61,13 +61,20 @@ impl cat : Map { else { None } } - fn insert_with_key(ff: fn(+k: int, +v0: T, +v1: T) -> T, +key: int, +val: T) -> bool { + fn insert_with_key(+key: int, +val: T, ff: fn(+k: int, +v0: T, +v1: T) -> T) -> bool { match self.find(key) { None => return self.insert(key, val), Some(copy orig) => return self.insert(key, ff(key, orig, val)) } } + fn insert_with(+key: int, +val: T, ff: fn(+v0: T, +v1: T) -> T) -> bool { + match self.find(key) { + None => return self.insert(key, val), + Some(copy orig) => return self.insert(key, ff(orig, val)) + } + } + fn remove(+k:int) -> bool { match self.find(k) {