Skip to content

Commit 9e83747

Browse files
committed
aggregate transform
1 parent 95aa244 commit 9e83747

File tree

6 files changed

+571
-2
lines changed

6 files changed

+571
-2
lines changed

lib/aggregate.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/**
2+
* Copyright 2012-2017, Plotly, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the MIT license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
'use strict';
10+
11+
module.exports = require('../src/transforms/aggregate');

lib/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Plotly.register([
5656
// https://github.com/plotly/plotly.js/pull/978#pullrequestreview-2403353
5757
//
5858
Plotly.register([
59+
require('./aggregate'),
5960
require('./filter'),
6061
require('./groupby'),
6162
require('./sort')

src/transforms/aggregate.js

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
/**
2+
* Copyright 2012-2017, Plotly, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the MIT license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
'use strict';
10+
11+
var Axes = require('../plots/cartesian/axes');
12+
var Lib = require('../lib');
13+
var PlotSchema = require('../plot_api/plot_schema');
14+
var BADNUM = require('../constants/numerical').BADNUM;
15+
16+
exports.moduleType = 'transform';
17+
18+
exports.name = 'aggregate';
19+
20+
var attrs = exports.attributes = {
21+
enabled: {
22+
valType: 'boolean',
23+
dflt: true,
24+
description: [
25+
'Determines whether this aggregate transform is enabled or disabled.'
26+
].join(' ')
27+
},
28+
groups: {
29+
// TODO: groupby should support string or array grouping this way too
30+
// currently groupby only allows a grouping array
31+
valType: 'string',
32+
strict: true,
33+
noBlank: true,
34+
arrayOk: true,
35+
dflt: 'x',
36+
description: [
37+
'Sets the grouping target to which the aggregation is applied.',
38+
'Data points with matching group values will be coalesced into',
39+
'one point, using the supplied aggregation functions to reduce data',
40+
'in other data arrays.',
41+
'If a string, *groups* is assumed to be a reference to a data array',
42+
'in the parent trace object.',
43+
'To aggregate by nested variables, use *.* to access them.',
44+
'For example, set `groups` to *marker.color* to aggregate',
45+
'about the marker color array.',
46+
'If an array, *groups* is itself the data array by which we aggregate.'
47+
].join(' ')
48+
},
49+
aggregations: {
50+
_isLinkedToArray: 'style',
51+
array: {
52+
valType: 'string',
53+
role: 'info',
54+
description: [
55+
'A reference to the data array in the parent trace to aggregate.',
56+
'To aggregate by nested variables, use *.* to access them.',
57+
'For example, set `groups` to *marker.color* to aggregate',
58+
'about the marker color array.',
59+
'The referenced array must already exist, unless `func` is *count*,',
60+
'and each array may only be referenced once.'
61+
].join(' ')
62+
},
63+
func: {
64+
valType: 'enumerated',
65+
values: ['count', 'sum', 'avg', 'min', 'max', 'first', 'last'],
66+
dflt: 'first',
67+
role: 'info',
68+
description: [
69+
'Sets the aggregation function.',
70+
'All values from the linked `array`, corresponding to the same value',
71+
'in the `groups` array, are collected and reduced by this function.',
72+
'*count* is simply the number of values in the `groups` array, so does',
73+
'not even require the linked array to exist. *first* (*last*) is just',
74+
'the first (last) linked value.'
75+
].join(' ')
76+
},
77+
}
78+
};
79+
80+
/**
81+
* Supply transform attributes defaults
82+
*
83+
* @param {object} transformIn
84+
* object linked to trace.transforms[i] with 'func' set to exports.name
85+
* @param {object} traceOut
86+
* the _fullData trace this transform applies to
87+
* @param {object} layout
88+
* the plot's (not-so-full) layout
89+
* @param {object} traceIn
90+
* the input data trace this transform applies to
91+
*
92+
* @return {object} transformOut
93+
* copy of transformIn that contains attribute defaults
94+
*/
95+
exports.supplyDefaults = function(transformIn, traceOut) {
96+
var transformOut = {};
97+
var i;
98+
99+
function coerce(attr, dflt) {
100+
return Lib.coerce(transformIn, transformOut, attrs, attr, dflt);
101+
}
102+
103+
var enabled = coerce('enabled');
104+
105+
if(!enabled) return transformOut;
106+
107+
/*
108+
* Normally _arrayAttrs is calculated during doCalc, but that comes later.
109+
* Anyway this can change due to *count* aggregations (see below) so it's not
110+
* necessarily the same set.
111+
*
112+
* For performance we turn it into an object of truthy values
113+
* we'll use 1 for arrays we haven't aggregated yet, 0 for finished arrays,
114+
* as distinct from undefined which means this array isn't present in the input
115+
* missing arrays can still be aggregate outputs for *count* aggregations.
116+
*/
117+
var arrayAttrArray = PlotSchema.findArrayAttributes(traceOut);
118+
var arrayAttrs = {};
119+
for(i = 0; i < arrayAttrArray.length; i++) arrayAttrs[arrayAttrArray[i]] = 1;
120+
121+
var groups = coerce('groups');
122+
123+
if(!Array.isArray(groups)) {
124+
if(!arrayAttrs[groups]) {
125+
transformOut.enabled = false;
126+
return;
127+
}
128+
arrayAttrs[groups] = 0;
129+
}
130+
131+
var aggregationsIn = transformIn.aggregations;
132+
var aggregationsOut = transformOut.aggregations = [];
133+
134+
if(aggregationsIn) {
135+
for(i = 0; i < aggregationsIn.length; i++) {
136+
var aggregationOut = {};
137+
var array = Lib.coerce(aggregationsIn[i], aggregationOut, attrs.aggregations, 'array');
138+
var func = Lib.coerce(aggregationsIn[i], aggregationOut, attrs.aggregations, 'func');
139+
140+
// add this aggregation to the output only if it's the first instance
141+
// of a valid array attribute - or an unused array attribute with "count"
142+
if(array && (arrayAttrs[array] || (func === 'count' && arrayAttrs[array] === undefined))) {
143+
arrayAttrs[array] = 0;
144+
aggregationsOut.push(aggregationOut);
145+
}
146+
}
147+
}
148+
149+
// any array attributes we haven't yet covered, fill them with the default aggregation
150+
for(i = 0; i < arrayAttrArray.length; i++) {
151+
if(arrayAttrs[arrayAttrArray[i]]) {
152+
aggregationsOut.push({
153+
array: arrayAttrArray[i],
154+
func: attrs.aggregations.func.dflt
155+
});
156+
}
157+
}
158+
159+
return transformOut;
160+
};
161+
162+
163+
exports.calcTransform = function(gd, trace, opts) {
164+
if(!opts.enabled) return;
165+
166+
var groups = opts.groups;
167+
168+
var groupArray = Lib.getTargetArray(trace, {target: groups});
169+
if(!groupArray) return;
170+
171+
var i, vi, groupIndex;
172+
173+
var groupIndices = {};
174+
var groupings = [];
175+
for(i = 0; i < groupArray.length; i++) {
176+
vi = groupArray[i];
177+
groupIndex = groupIndices[vi];
178+
if(groupIndex === undefined) {
179+
groupIndices[vi] = groupings.length;
180+
groupings.push([i]);
181+
}
182+
else groupings[groupIndex].push(i);
183+
}
184+
185+
var aggregations = opts.aggregations;
186+
187+
for(i = 0; i < aggregations.length; i++) {
188+
aggregateOneArray(gd, trace, groupings, aggregations[i]);
189+
}
190+
191+
if(typeof groups === 'string') {
192+
aggregateOneArray(gd, trace, groupings, {array: groups, func: 'first'});
193+
}
194+
};
195+
196+
function aggregateOneArray(gd, trace, groupings, aggregation) {
197+
var attr = aggregation.array;
198+
var targetNP = Lib.nestedProperty(trace, attr);
199+
var arrayIn = targetNP.get();
200+
var conversions = Axes.getDataConversions(gd, trace, attr, arrayIn);
201+
var func = getAggregateFunction(aggregation.func, conversions);
202+
203+
var arrayOut = new Array(groupings.length);
204+
for(var i = 0; i < groupings.length; i++) {
205+
arrayOut[i] = func(arrayIn, groupings[i]);
206+
}
207+
targetNP.set(arrayOut);
208+
}
209+
210+
function getAggregateFunction(func, conversions) {
211+
var d2c = conversions.d2c;
212+
var c2d = conversions.c2d;
213+
214+
switch(func) {
215+
// count, first, and last don't depend on anything about the data
216+
// point back to pure functions for performance
217+
case 'count':
218+
return count;
219+
case 'first':
220+
return first;
221+
case 'last':
222+
return last;
223+
224+
case 'sum':
225+
// This will produce output in all cases even though it's nonsensical
226+
// for date or category data.
227+
return function(array, indices) {
228+
var total = 0;
229+
for(var i = 0; i < indices.length; i++) {
230+
var vi = d2c(array[indices[i]]);
231+
if(vi !== BADNUM) total += +vi;
232+
}
233+
return c2d(total);
234+
};
235+
236+
case 'avg':
237+
// Generally meaningless for category data but it still does something.
238+
return function(array, indices) {
239+
var total = 0;
240+
var cnt = 0;
241+
for(var i = 0; i < indices.length; i++) {
242+
var vi = d2c(array[indices[i]]);
243+
if(vi !== BADNUM) {
244+
total += +vi;
245+
cnt++;
246+
}
247+
}
248+
return cnt ? c2d(total / cnt) : BADNUM;
249+
};
250+
251+
case 'min':
252+
return function(array, indices) {
253+
var out = Infinity;
254+
for(var i = 0; i < indices.length; i++) {
255+
var vi = d2c(array[indices[i]]);
256+
if(vi !== BADNUM) out = Math.min(out, +vi);
257+
}
258+
return (out === Infinity) ? BADNUM : c2d(out);
259+
};
260+
261+
case 'max':
262+
return function(array, indices) {
263+
var out = -Infinity;
264+
for(var i = 0; i < indices.length; i++) {
265+
var vi = d2c(array[indices[i]]);
266+
if(vi !== BADNUM) out = Math.max(out, +vi);
267+
}
268+
return (out === -Infinity) ? BADNUM : c2d(out);
269+
};
270+
}
271+
}
272+
273+
function count(array, indices) {
274+
return indices.length;
275+
}
276+
277+
function first(array, indices) {
278+
return array[indices[0]];
279+
}
280+
281+
function last(array, indices) {
282+
return array[indices[indices.length - 1]];
283+
}

src/transforms/groupby.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@ exports.attributes = {
6363
*
6464
* @param {object} transformIn
6565
* object linked to trace.transforms[i] with 'type' set to exports.name
66-
* @param {object} fullData
67-
* the plot's full data
66+
* @param {object} traceOut
67+
* the _fullData trace this transform applies to
6868
* @param {object} layout
6969
* the plot's (not-so-full) layout
70+
* @param {object} traceIn
71+
* the input data trace this transform applies to
7072
*
7173
* @return {object} transformOut
7274
* copy of transformIn that contains attribute defaults

0 commit comments

Comments
 (0)