Skip to content

Commit 35a18f4

Browse files
committed
TST: augment speed test for R merges
1 parent 166061a commit 35a18f4

File tree

1 file changed

+38
-27
lines changed

1 file changed

+38
-27
lines changed

bench/bench_merge.R

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
library(plyr)
2+
library(data.table)
23
N <- 10000
34
indices = rep(NA, N)
45
indices2 = rep(NA, N)
@@ -13,12 +14,16 @@ right <- data.frame(key=indices,
1314
key2=indices2,
1415
value2=rnorm(10000))
1516

16-
left <- data.frame(key=rep(indices[1:1000], 10),
17-
key2=rep(indices2[1:1000], 10),
18-
value=rnorm(100000))
19-
right <- data.frame(key=indices[1:1000],
20-
key2=indices2[1:1000],
21-
value2=rnorm(10000))
17+
right2 <- data.frame(key=rep(indices, 2),
18+
key2=rep(indices2, 2),
19+
value2=rnorm(20000))
20+
21+
## left <- data.frame(key=rep(indices[1:1000], 10),
22+
## key2=rep(indices2[1:1000], 10),
23+
## value=rnorm(100000))
24+
## right <- data.frame(key=indices[1:1000],
25+
## key2=indices2[1:1000],
26+
## value2=rnorm(10000))
2227

2328
timeit <- function(func, niter=10) {
2429
timing = rep(NA, niter)
@@ -29,38 +34,44 @@ timeit <- function(func, niter=10) {
2934
mean(timing)
3035
}
3136

32-
left.join <- function(sort=TRUE) {
33-
result <- merge(left, right, all.x=TRUE, sort=sort)
37+
left.join <- function(sort=FALSE) {
38+
result <- base::merge(left, right, all.x=TRUE, sort=sort)
39+
}
40+
41+
right.join <- function(sort=FALSE) {
42+
result <- base::merge(left, right, all.y=TRUE, sort=sort)
3443
}
3544

36-
right.join <- function(sort=TRUE) {
37-
result <- merge(left, right, all.y=TRUE, sort=sort)
45+
outer.join <- function(sort=FALSE) {
46+
result <- base::merge(left, right, all=TRUE, sort=sort)
3847
}
3948

40-
outer.join <- function(sort=TRUE) {
41-
result <- merge(left, right, all=TRUE, sort=sort)
49+
inner.join <- function(sort=FALSE) {
50+
result <- base::merge(left, right, sort=sort)
4251
}
4352

44-
inner.join <- function(sort=TRUE) {
45-
reuslt <- merge(left, right, sort=sort)
53+
plyr.join <- function(type) {
54+
result <- plyr::join(left, right, by=c("key", "key2"),
55+
type=type, match="first")
4656
}
4757

4858
sort.options <- c(FALSE, TRUE)
4959

50-
results <- matrix(nrow=4, ncol=2)
51-
colnames(results) <- c("dont_sort", "sort")
52-
rownames(results) <- c("inner", "outer", "left", "right")
60+
results <- matrix(nrow=3, ncol=3)
61+
colnames(results) <- c("base::merge", "plyr", "data.table")
62+
rownames(results) <- c("inner", "outer", "left")
5363

54-
join.functions <- c(inner.join, outer.join, left.join, right.join)
55-
for (i in 1:4) {
56-
results[1, 1] <- timeit(function() {inner.join(sort=sort.options[1])})
57-
results[1, 2] <- timeit(function() {inner.join(sort=sort.options[2])})
58-
results[2, 1] <- timeit(function() {outer.join(sort=sort.options[1])})
59-
results[2, 2] <- timeit(function() {outer.join(sort=sort.options[2])})
60-
results[3, 1] <- timeit(function() {left.join(sort=sort.options[1])})
61-
results[3, 2] <- timeit(function() {left.join(sort=sort.options[2])})
62-
results[4, 1] <- timeit(function() {right.join(sort=sort.options[1])})
63-
results[4, 2] <- timeit(function() {right.join(sort=sort.options[2])})
64+
base.functions <- c(inner.join, outer.join, left.join)
65+
plyr.functions <- c(function() plyr.join("inner"),
66+
function() plyr.join("full"),
67+
function() plyr.join("left"))
68+
dt.functions <- c(inner.join, outer.join, left.join)
69+
for (i in 1:3) {
70+
base.func <- base.functions[[i]]
71+
plyr.func <- plyr.functions[[i]]
72+
## dt.func <- dt.functions[[i]]
73+
results[i, 1] <- timeit(base.func)
74+
results[i, 2] <- timeit(plyr.func)
6475
}
6576

6677
## do.something <- function(df, f) {

0 commit comments

Comments
 (0)