1
1
library(plyr )
2
+ library(data.table )
2
3
N <- 10000
3
4
indices = rep(NA , N )
4
5
indices2 = rep(NA , N )
@@ -13,12 +14,16 @@ right <- data.frame(key=indices,
13
14
key2 = indices2 ,
14
15
value2 = rnorm(10000 ))
15
16
16
- left <- data.frame (key = rep(indices [1 : 1000 ], 10 ),
17
- key2 = rep(indices2 [1 : 1000 ], 10 ),
18
- value = rnorm(100000 ))
19
- right <- data.frame (key = indices [1 : 1000 ],
20
- key2 = indices2 [1 : 1000 ],
21
- value2 = rnorm(10000 ))
17
+ right2 <- data.frame (key = rep(indices , 2 ),
18
+ key2 = rep(indices2 , 2 ),
19
+ value2 = rnorm(20000 ))
20
+
21
+ # # left <- data.frame(key=rep(indices[1:1000], 10),
22
+ # # key2=rep(indices2[1:1000], 10),
23
+ # # value=rnorm(100000))
24
+ # # right <- data.frame(key=indices[1:1000],
25
+ # # key2=indices2[1:1000],
26
+ # # value2=rnorm(10000))
22
27
23
28
timeit <- function (func , niter = 10 ) {
24
29
timing = rep(NA , niter )
@@ -29,38 +34,44 @@ timeit <- function(func, niter=10) {
29
34
mean(timing )
30
35
}
31
36
32
- left.join <- function (sort = TRUE ) {
33
- result <- merge(left , right , all.x = TRUE , sort = sort )
37
+ left.join <- function (sort = FALSE ) {
38
+ result <- base :: merge(left , right , all.x = TRUE , sort = sort )
39
+ }
40
+
41
+ right.join <- function (sort = FALSE ) {
42
+ result <- base :: merge(left , right , all.y = TRUE , sort = sort )
34
43
}
35
44
36
- right .join <- function (sort = TRUE ) {
37
- result <- merge(left , right , all.y = TRUE , sort = sort )
45
+ outer .join <- function (sort = FALSE ) {
46
+ result <- base :: merge(left , right , all = TRUE , sort = sort )
38
47
}
39
48
40
- outer .join <- function (sort = TRUE ) {
41
- result <- merge(left , right , all = TRUE , sort = sort )
49
+ inner .join <- function (sort = FALSE ) {
50
+ result <- base :: merge(left , right , sort = sort )
42
51
}
43
52
44
- inner.join <- function (sort = TRUE ) {
45
- reuslt <- merge(left , right , sort = sort )
53
+ plyr.join <- function (type ) {
54
+ result <- plyr :: join(left , right , by = c(" key" , " key2" ),
55
+ type = type , match = " first" )
46
56
}
47
57
48
58
sort.options <- c(FALSE , TRUE )
49
59
50
- results <- matrix (nrow = 4 , ncol = 2 )
51
- colnames(results ) <- c(" dont_sort " , " sort " )
52
- rownames(results ) <- c(" inner" , " outer" , " left" , " right " )
60
+ results <- matrix (nrow = 3 , ncol = 3 )
61
+ colnames(results ) <- c(" base::merge " , " plyr " , " data.table " )
62
+ rownames(results ) <- c(" inner" , " outer" , " left" )
53
63
54
- join.functions <- c(inner.join , outer.join , left.join , right.join )
55
- for (i in 1 : 4 ) {
56
- results [1 , 1 ] <- timeit(function () {inner.join(sort = sort.options [1 ])})
57
- results [1 , 2 ] <- timeit(function () {inner.join(sort = sort.options [2 ])})
58
- results [2 , 1 ] <- timeit(function () {outer.join(sort = sort.options [1 ])})
59
- results [2 , 2 ] <- timeit(function () {outer.join(sort = sort.options [2 ])})
60
- results [3 , 1 ] <- timeit(function () {left.join(sort = sort.options [1 ])})
61
- results [3 , 2 ] <- timeit(function () {left.join(sort = sort.options [2 ])})
62
- results [4 , 1 ] <- timeit(function () {right.join(sort = sort.options [1 ])})
63
- results [4 , 2 ] <- timeit(function () {right.join(sort = sort.options [2 ])})
64
+ base.functions <- c(inner.join , outer.join , left.join )
65
+ plyr.functions <- c(function () plyr.join(" inner" ),
66
+ function () plyr.join(" full" ),
67
+ function () plyr.join(" left" ))
68
+ dt.functions <- c(inner.join , outer.join , left.join )
69
+ for (i in 1 : 3 ) {
70
+ base.func <- base.functions [[i ]]
71
+ plyr.func <- plyr.functions [[i ]]
72
+ # # dt.func <- dt.functions[[i]]
73
+ results [i , 1 ] <- timeit(base.func )
74
+ results [i , 2 ] <- timeit(plyr.func )
64
75
}
65
76
66
77
# # do.something <- function(df, f) {
0 commit comments