@@ -7,16 +7,23 @@ for (i in 1:N) {
7
7
indices [i ] <- paste(sample(letters , 10 ), collapse = " " )
8
8
indices2 [i ] <- paste(sample(letters , 10 ), collapse = " " )
9
9
}
10
- left <- data.frame (key = rep(indices , 10 ),
11
- key2 = rep(indices2 , 10 ),
12
- value = rnorm(100000 ))
13
- right <- data.frame (key = indices ,
14
- key2 = indices2 ,
15
- value2 = rnorm(10000 ))
10
+ left <- data.frame (key = rep(indices [ 1 : 8000 ] , 10 ),
11
+ key2 = rep(indices2 [ 1 : 8000 ] , 10 ),
12
+ value = rnorm(80000 ))
13
+ right <- data.frame (key = indices [ 2001 : 10000 ] ,
14
+ key2 = indices2 [ 2001 : 10000 ] ,
15
+ value2 = rnorm(8000 ))
16
16
17
- right2 <- data.frame (key = rep(indices , 2 ),
18
- key2 = rep(indices2 , 2 ),
19
- value2 = rnorm(20000 ))
17
+ right2 <- data.frame (key = rep(right $ key , 2 ),
18
+ key2 = rep(right $ key2 , 2 ),
19
+ value2 = rnorm(16000 ))
20
+
21
+ left.dt <- data.table(left , key = c(" key" , " key2" ))
22
+ right.dt <- data.table(right , key = c(" key" , " key2" ))
23
+ right2.dt <- data.table(right2 , key = c(" key" , " key2" ))
24
+
25
+ # left.dt2 <- data.table(left)
26
+ # right.dt2 <- data.table(right)
20
27
21
28
# # left <- data.frame(key=rep(indices[1:1000], 10),
22
29
# # key2=rep(indices2[1:1000], 10),
@@ -47,7 +54,23 @@ outer.join <- function(sort=FALSE) {
47
54
}
48
55
49
56
inner.join <- function (sort = FALSE ) {
50
- result <- base :: merge(left , right , sort = sort )
57
+ result <- base :: merge(left , right , all = FALSE , sort = sort )
58
+ }
59
+
60
+ left.join.dt <- function (sort = FALSE ) {
61
+ result <- merge(left.dt , right.dt , all.x = TRUE , sort = sort )
62
+ }
63
+
64
+ right.join.dt <- function (sort = FALSE ) {
65
+ result <- merge(left.dt , right.dt , all.y = TRUE , sort = sort )
66
+ }
67
+
68
+ outer.join.dt <- function (sort = FALSE ) {
69
+ result <- merge(left.dt , right.dt , all = TRUE , sort = sort )
70
+ }
71
+
72
+ inner.join.dt <- function (sort = FALSE ) {
73
+ result <- merge(left.dt , right.dt , all = FALSE , sort = sort )
51
74
}
52
75
53
76
plyr.join <- function (type ) {
@@ -57,6 +80,8 @@ plyr.join <- function(type) {
57
80
58
81
sort.options <- c(FALSE , TRUE )
59
82
83
+ # many-to-one
84
+
60
85
results <- matrix (nrow = 3 , ncol = 3 )
61
86
colnames(results ) <- c(" base::merge" , " plyr" , " data.table" )
62
87
rownames(results ) <- c(" inner" , " outer" , " left" )
@@ -65,25 +90,68 @@ base.functions <- c(inner.join, outer.join, left.join)
65
90
plyr.functions <- c(function () plyr.join(" inner" ),
66
91
function () plyr.join(" full" ),
67
92
function () plyr.join(" left" ))
68
- dt.functions <- c(inner.join , outer.join , left.join )
93
+ dt.functions <- c(inner.join.dt , outer.join.dt , left.join.dt )
69
94
for (i in 1 : 3 ) {
70
95
base.func <- base.functions [[i ]]
71
96
plyr.func <- plyr.functions [[i ]]
72
- # # dt.func <- dt.functions[[i]]
97
+ dt.func <- dt.functions [[i ]]
73
98
results [i , 1 ] <- timeit(base.func )
74
99
results [i , 2 ] <- timeit(plyr.func )
100
+ results [i , 3 ] <- timeit(dt.func )
101
+ }
102
+
103
+
104
+ # many-to-many
105
+
106
+ left.join <- function (sort = FALSE ) {
107
+ result <- base :: merge(left , right2 , all.x = TRUE , sort = sort )
108
+ }
109
+
110
+ right.join <- function (sort = FALSE ) {
111
+ result <- base :: merge(left , right2 , all.y = TRUE , sort = sort )
112
+ }
113
+
114
+ outer.join <- function (sort = FALSE ) {
115
+ result <- base :: merge(left , right2 , all = TRUE , sort = sort )
116
+ }
117
+
118
+ inner.join <- function (sort = FALSE ) {
119
+ result <- base :: merge(left , right2 , all = FALSE , sort = sort )
120
+ }
121
+
122
+ left.join.dt <- function (sort = FALSE ) {
123
+ result <- merge(left.dt , right2.dt , all.x = TRUE , sort = sort )
124
+ }
125
+
126
+ right.join.dt <- function (sort = FALSE ) {
127
+ result <- merge(left.dt , right2.dt , all.y = TRUE , sort = sort )
75
128
}
76
129
77
- # # do.something <- function(df, f) {
78
- # # f(df)
79
- # # }
80
- # # df <- matrix(nrow=4, ncol=2)
81
- # # functions <- c(colSums, rowSums)
82
- # # g <- functions[1]
83
- # # do.something(df, function(df) g(df))
130
+ outer.join.dt <- function (sort = FALSE ) {
131
+ result <- merge(left.dt , right2.dt , all = TRUE , sort = sort )
132
+ }
133
+
134
+ inner.join.dt <- function (sort = FALSE ) {
135
+ result <- merge(left.dt , right2.dt , all = FALSE , sort = sort )
136
+ }
137
+
138
+ sort.options <- c(FALSE , TRUE )
139
+
140
+ # many-to-one
84
141
85
- # # dont_sort sort
86
- # # inner 0.2297 0.2286
87
- # # outer 1.1811 1.2843
88
- # # left 0.6706 0.7766
89
- # # right 0.2995 0.3371
142
+ results <- matrix (nrow = 3 , ncol = 2 )
143
+ colnames(results ) <- c(" base::merge" , " data.table" )
144
+ rownames(results ) <- c(" inner" , " outer" , " left" )
145
+
146
+ base.functions <- c(inner.join , outer.join , left.join )
147
+ plyr.functions <- c(function () plyr.join(" inner" ),
148
+ function () plyr.join(" full" ),
149
+ function () plyr.join(" left" ))
150
+ dt.functions <- c(inner.join.dt , outer.join.dt , left.join.dt )
151
+ for (i in 1 : 3 ) {
152
+ base.func <- base.functions [[i ]]
153
+ plyr.func <- plyr.functions [[i ]]
154
+ dt.func <- dt.functions [[i ]]
155
+ results [i , 1 ] <- timeit(base.func )
156
+ results [i , 2 ] <- timeit(dt.func )
157
+ }
0 commit comments