diff --git a/lib/aggregate.js b/lib/aggregate.js new file mode 100644 index 00000000000..aaa0321dfab --- /dev/null +++ b/lib/aggregate.js @@ -0,0 +1,11 @@ +/** +* Copyright 2012-2017, Plotly, Inc. +* All rights reserved. +* +* This source code is licensed under the MIT license found in the +* LICENSE file in the root directory of this source tree. +*/ + +'use strict'; + +module.exports = require('../src/transforms/aggregate'); diff --git a/lib/index.js b/lib/index.js index 8c1b11fff73..140b8030885 100644 --- a/lib/index.js +++ b/lib/index.js @@ -56,6 +56,7 @@ Plotly.register([ // https://github.com/plotly/plotly.js/pull/978#pullrequestreview-2403353 // Plotly.register([ + require('./aggregate'), require('./filter'), require('./groupby'), require('./sort') diff --git a/src/plots/cartesian/axes.js b/src/plots/cartesian/axes.js index a09a719b403..8a25ad031c4 100644 --- a/src/plots/cartesian/axes.js +++ b/src/plots/cartesian/axes.js @@ -124,7 +124,7 @@ axes.cleanPosition = function(pos, gd, axRef) { return cleanPos(pos); }; -axes.getDataToCoordFunc = function(gd, trace, target, targetArray) { +var getDataConversions = axes.getDataConversions = function(gd, trace, target, targetArray) { var ax; // If target points to an axis, use the type we already have for that @@ -155,15 +155,23 @@ axes.getDataToCoordFunc = function(gd, trace, target, targetArray) { // if 'target' has corresponding axis // -> use setConvert method - if(ax) return ax.d2c; + if(ax) return {d2c: ax.d2c, c2d: ax.c2d}; // special case for 'ids' // -> cast to String - if(d2cTarget === 'ids') return function(v) { return String(v); }; + if(d2cTarget === 'ids') return {d2c: toString, c2d: toString}; // otherwise (e.g. numeric-array of 'marker.color' or 'marker.size') // -> cast to Number - return function(v) { return +v; }; + + return {d2c: toNum, c2d: toNum}; +}; + +function toNum(v) { return +v; } +function toString(v) { return String(v); } + +axes.getDataToCoordFunc = function(gd, trace, target, targetArray) { + return getDataConversions(gd, trace, target, targetArray).d2c; }; // empty out types for all axes containing these traces diff --git a/src/transforms/aggregate.js b/src/transforms/aggregate.js new file mode 100644 index 00000000000..932e54f4ccc --- /dev/null +++ b/src/transforms/aggregate.js @@ -0,0 +1,410 @@ +/** +* Copyright 2012-2017, Plotly, Inc. +* All rights reserved. +* +* This source code is licensed under the MIT license found in the +* LICENSE file in the root directory of this source tree. +*/ + +'use strict'; + +var Axes = require('../plots/cartesian/axes'); +var Lib = require('../lib'); +var PlotSchema = require('../plot_api/plot_schema'); +var BADNUM = require('../constants/numerical').BADNUM; + +exports.moduleType = 'transform'; + +exports.name = 'aggregate'; + +var attrs = exports.attributes = { + enabled: { + valType: 'boolean', + dflt: true, + description: [ + 'Determines whether this aggregate transform is enabled or disabled.' + ].join(' ') + }, + groups: { + // TODO: groupby should support string or array grouping this way too + // currently groupby only allows a grouping array + valType: 'string', + strict: true, + noBlank: true, + arrayOk: true, + dflt: 'x', + description: [ + 'Sets the grouping target to which the aggregation is applied.', + 'Data points with matching group values will be coalesced into', + 'one point, using the supplied aggregation functions to reduce data', + 'in other data arrays.', + 'If a string, `groups` is assumed to be a reference to a data array', + 'in the parent trace object.', + 'To aggregate by nested variables, use *.* to access them.', + 'For example, set `groups` to *marker.color* to aggregate', + 'about the marker color array.', + 'If an array, `groups` is itself the data array by which we aggregate.' + ].join(' ') + }, + aggregations: { + _isLinkedToArray: 'aggregation', + target: { + valType: 'string', + role: 'info', + description: [ + 'A reference to the data array in the parent trace to aggregate.', + 'To aggregate by nested variables, use *.* to access them.', + 'For example, set `groups` to *marker.color* to aggregate', + 'over the marker color array.', + 'The referenced array must already exist, unless `func` is *count*,', + 'and each array may only be referenced once.' + ].join(' ') + }, + func: { + valType: 'enumerated', + values: ['count', 'sum', 'avg', 'median', 'mode', 'rms', 'stddev', 'min', 'max', 'first', 'last'], + dflt: 'first', + role: 'info', + description: [ + 'Sets the aggregation function.', + 'All values from the linked `target`, corresponding to the same value', + 'in the `groups` array, are collected and reduced by this function.', + '*count* is simply the number of values in the `groups` array, so does', + 'not even require the linked array to exist. *first* (*last*) is just', + 'the first (last) linked value.', + 'Invalid values are ignored, so for example in *avg* they do not', + 'contribute to either the numerator or the denominator.', + 'Any data type (numeric, date, category) may be aggregated with any', + 'function, even though in certain cases it is unlikely to make sense,', + 'for example a sum of dates or average of categories.', + '*median* will return the average of the two central values if there is', + 'an even count. *mode* will return the first value to reach the maximum', + 'count, in case of a tie.' + ].join(' ') + }, + funcmode: { + valType: 'enumerated', + values: ['sample', 'population'], + dflt: 'sample', + role: 'info', + description: [ + '*stddev* supports two formula variants: *sample* (normalize by N-1)', + 'and *population* (normalize by N).' + ].join(' ') + }, + enabled: { + valType: 'boolean', + dflt: true, + description: [ + 'Determines whether this aggregation function is enabled or disabled.' + ].join(' ') + } + } +}; + +var aggAttrs = attrs.aggregations; + +/** + * Supply transform attributes defaults + * + * @param {object} transformIn + * object linked to trace.transforms[i] with 'func' set to exports.name + * @param {object} traceOut + * the _fullData trace this transform applies to + * @param {object} layout + * the plot's (not-so-full) layout + * @param {object} traceIn + * the input data trace this transform applies to + * + * @return {object} transformOut + * copy of transformIn that contains attribute defaults + */ +exports.supplyDefaults = function(transformIn, traceOut) { + var transformOut = {}; + var i; + + function coerce(attr, dflt) { + return Lib.coerce(transformIn, transformOut, attrs, attr, dflt); + } + + var enabled = coerce('enabled'); + + if(!enabled) return transformOut; + + /* + * Normally _arrayAttrs is calculated during doCalc, but that comes later. + * Anyway this can change due to *count* aggregations (see below) so it's not + * necessarily the same set. + * + * For performance we turn it into an object of truthy values + * we'll use 1 for arrays we haven't aggregated yet, 0 for finished arrays, + * as distinct from undefined which means this array isn't present in the input + * missing arrays can still be aggregate outputs for *count* aggregations. + */ + var arrayAttrArray = PlotSchema.findArrayAttributes(traceOut); + var arrayAttrs = {}; + for(i = 0; i < arrayAttrArray.length; i++) arrayAttrs[arrayAttrArray[i]] = 1; + + var groups = coerce('groups'); + + if(!Array.isArray(groups)) { + if(!arrayAttrs[groups]) { + transformOut.enabled = false; + return; + } + arrayAttrs[groups] = 0; + } + + var aggregationsIn = transformIn.aggregations; + var aggregationsOut = transformOut.aggregations = new Array(aggregationsIn.length); + var aggregationOut; + + function coercei(attr, dflt) { + return Lib.coerce(aggregationsIn[i], aggregationOut, aggAttrs, attr, dflt); + } + + if(aggregationsIn) { + for(i = 0; i < aggregationsIn.length; i++) { + aggregationOut = {}; + var target = coercei('target'); + var func = coercei('func'); + var enabledi = coercei('enabled'); + + // add this aggregation to the output only if it's the first instance + // of a valid target attribute - or an unused target attribute with "count" + if(enabledi && target && (arrayAttrs[target] || (func === 'count' && arrayAttrs[target] === undefined))) { + if(func === 'stddev') coercei('funcmode'); + + arrayAttrs[target] = 0; + aggregationsOut[i] = aggregationOut; + } + else aggregationsOut[i] = {enabled: false}; + } + } + + // any array attributes we haven't yet covered, fill them with the default aggregation + for(i = 0; i < arrayAttrArray.length; i++) { + if(arrayAttrs[arrayAttrArray[i]]) { + aggregationsOut.push({ + target: arrayAttrArray[i], + func: aggAttrs.func.dflt, + enabled: true + }); + } + } + + return transformOut; +}; + + +exports.calcTransform = function(gd, trace, opts) { + if(!opts.enabled) return; + + var groups = opts.groups; + + var groupArray = Lib.getTargetArray(trace, {target: groups}); + if(!groupArray) return; + + var i, vi, groupIndex; + + var groupIndices = {}; + var groupings = []; + for(i = 0; i < groupArray.length; i++) { + vi = groupArray[i]; + groupIndex = groupIndices[vi]; + if(groupIndex === undefined) { + groupIndices[vi] = groupings.length; + groupings.push([i]); + } + else groupings[groupIndex].push(i); + } + + var aggregations = opts.aggregations; + + for(i = 0; i < aggregations.length; i++) { + aggregateOneArray(gd, trace, groupings, aggregations[i]); + } + + if(typeof groups === 'string') { + aggregateOneArray(gd, trace, groupings, { + target: groups, + func: 'first', + enabled: true + }); + } +}; + +function aggregateOneArray(gd, trace, groupings, aggregation) { + if(!aggregation.enabled) return; + + var attr = aggregation.target; + var targetNP = Lib.nestedProperty(trace, attr); + var arrayIn = targetNP.get(); + var conversions = Axes.getDataConversions(gd, trace, attr, arrayIn); + var func = getAggregateFunction(aggregation, conversions); + + var arrayOut = new Array(groupings.length); + for(var i = 0; i < groupings.length; i++) { + arrayOut[i] = func(arrayIn, groupings[i]); + } + targetNP.set(arrayOut); +} + +function getAggregateFunction(opts, conversions) { + var func = opts.func; + var d2c = conversions.d2c; + var c2d = conversions.c2d; + + switch(func) { + // count, first, and last don't depend on anything about the data + // point back to pure functions for performance + case 'count': + return count; + case 'first': + return first; + case 'last': + return last; + + case 'sum': + // This will produce output in all cases even though it's nonsensical + // for date or category data. + return function(array, indices) { + var total = 0; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) total += vi; + } + return c2d(total); + }; + + case 'avg': + // Generally meaningless for category data but it still does something. + return function(array, indices) { + var total = 0; + var cnt = 0; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) { + total += vi; + cnt++; + } + } + return cnt ? c2d(total / cnt) : BADNUM; + }; + + case 'min': + return function(array, indices) { + var out = Infinity; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) out = Math.min(out, vi); + } + return (out === Infinity) ? BADNUM : c2d(out); + }; + + case 'max': + return function(array, indices) { + var out = -Infinity; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) out = Math.max(out, vi); + } + return (out === -Infinity) ? BADNUM : c2d(out); + }; + + case 'median': + return function(array, indices) { + var sortCalc = []; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) sortCalc.push(vi); + } + if(!sortCalc.length) return BADNUM; + sortCalc.sort(); + var mid = (sortCalc.length - 1) / 2; + return c2d((sortCalc[Math.floor(mid)] + sortCalc[Math.ceil(mid)]) / 2); + }; + + case 'mode': + return function(array, indices) { + var counts = {}; + var maxCnt = 0; + var out = BADNUM; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) { + var counti = counts[vi] = (counts[vi] || 0) + 1; + if(counti > maxCnt) { + maxCnt = counti; + out = vi; + } + } + } + return maxCnt ? c2d(out) : BADNUM; + }; + + case 'rms': + return function(array, indices) { + var total = 0; + var cnt = 0; + for(var i = 0; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) { + total += vi * vi; + cnt++; + } + } + return cnt ? c2d(Math.sqrt(total / cnt)) : BADNUM; + }; + + case 'stddev': + return function(array, indices) { + // balance numerical stability with performance: + // so that we call d2c once per element but don't need to + // store them, reference all to the first element + var total = 0; + var total2 = 0; + var cnt = 1; + var v0 = BADNUM; + var i; + for(i = 0; i < indices.length && v0 === BADNUM; i++) { + v0 = d2c(array[indices[i]]); + } + if(v0 === BADNUM) return BADNUM; + + for(; i < indices.length; i++) { + var vi = d2c(array[indices[i]]); + if(vi !== BADNUM) { + var dv = vi - v0; + total += dv; + total2 += dv * dv; + cnt++; + } + } + + // This is population std dev, if we want sample std dev + // we would need (...) / (cnt - 1) + // Also note there's no c2d here - that means for dates the result + // is a number of milliseconds, and for categories it's a number + // of category differences, which is not generically meaningful but + // as in other cases we don't forbid it. + var norm = (opts.funcmode === 'sample') ? (cnt - 1) : cnt; + // this is debatable: should a count of 1 return sample stddev of + // 0 or undefined? + if(!norm) return 0; + return Math.sqrt((total2 - (total * total / cnt)) / norm); + }; + } +} + +function count(array, indices) { + return indices.length; +} + +function first(array, indices) { + return array[indices[0]]; +} + +function last(array, indices) { + return array[indices[indices.length - 1]]; +} diff --git a/src/transforms/filter.js b/src/transforms/filter.js index a9799c65818..e895fa89b91 100644 --- a/src/transforms/filter.js +++ b/src/transforms/filter.js @@ -37,13 +37,13 @@ exports.attributes = { description: [ 'Sets the filter target by which the filter is applied.', - 'If a string, *target* is assumed to be a reference to a data array', + 'If a string, `target` is assumed to be a reference to a data array', 'in the parent trace object.', 'To filter about nested variables, use *.* to access them.', 'For example, set `target` to *marker.color* to filter', 'about the marker color array.', - 'If an array, *target* is then the data array by which the filter is applied.' + 'If an array, `target` is then the data array by which the filter is applied.' ].join(' ') }, operation: { @@ -83,23 +83,23 @@ exports.attributes = { valType: 'any', dflt: 0, description: [ - 'Sets the value or values by which to filter by.', + 'Sets the value or values by which to filter.', 'Values are expected to be in the same type as the data linked', - 'to *target*.', + 'to `target`.', 'When `operation` is set to one of', 'the comparison values (' + COMPARISON_OPS + ')', - '*value* is expected to be a number or a string.', + '`value` is expected to be a number or a string.', 'When `operation` is set to one of the interval values', '(' + INTERVAL_OPS + ')', - '*value* is expected to be 2-item array where the first item', + '`value` is expected to be 2-item array where the first item', 'is the lower bound and the second item is the upper bound.', 'When `operation`, is set to one of the set values', '(' + SET_OPS + ')', - '*value* is expected to be an array with as many items as', + '`value` is expected to be an array with as many items as', 'the desired set elements.' ].join(' ') }, diff --git a/src/transforms/groupby.js b/src/transforms/groupby.js index 92e00d17fb3..e0ed531799a 100644 --- a/src/transforms/groupby.js +++ b/src/transforms/groupby.js @@ -63,10 +63,12 @@ exports.attributes = { * * @param {object} transformIn * object linked to trace.transforms[i] with 'type' set to exports.name - * @param {object} fullData - * the plot's full data + * @param {object} traceOut + * the _fullData trace this transform applies to * @param {object} layout * the plot's (not-so-full) layout + * @param {object} traceIn + * the input data trace this transform applies to * * @return {object} transformOut * copy of transformIn that contains attribute defaults diff --git a/test/jasmine/tests/transform_aggregate_test.js b/test/jasmine/tests/transform_aggregate_test.js new file mode 100644 index 00000000000..0377963138b --- /dev/null +++ b/test/jasmine/tests/transform_aggregate_test.js @@ -0,0 +1,228 @@ +var Plotly = require('@lib/index'); + +var createGraphDiv = require('../assets/create_graph_div'); +var destroyGraphDiv = require('../assets/destroy_graph_div'); +var customMatchers = require('../assets/custom_matchers'); + +describe('aggregate', function() { + var gd; + + beforeAll(function() { jasmine.addMatchers(customMatchers);}); + + beforeEach(function() { gd = createGraphDiv(); }); + + afterEach(destroyGraphDiv); + + it('handles all funcs for numeric data', function() { + // throw in some non-numbers, they should get discarded except first/last + Plotly.newPlot(gd, [{ + x: [1, 2, 3, 4, 'fail'], + y: [1.1, 2.2, 3.3, 'nope', 5.5], + marker: { + size: ['2001-01-01', 0.2, 0.1, 0.4, 0.5], + color: [2, 4, '', 10, 8], + opacity: [0.6, 'boo', 0.2, 0.8, 1.0], + line: { + color: [2.2, 3.3, 4.4, 5.5, 'the end'] + } + }, + transforms: [{ + type: 'aggregate', + groups: ['a', 'b', 'a', 'a', 'a'], + aggregations: [ + // missing array - the entry is ignored + {target: '', func: 'avg'}, + // disabled explicitly + {target: 'x', func: 'avg', enabled: false}, + {target: 'x', func: 'sum'}, + // non-numerics will not count toward numerator or denominator for avg + {target: 'y', func: 'avg'}, + {target: 'marker.size', func: 'min'}, + {target: 'marker.color', func: 'max'}, + // marker.opacity doesn't have an entry, but it will default to first + // as if it were {target: 'marker.opacity', func: 'first'}, + {target: 'marker.line.color', func: 'last'}, + // not present in data, but that's OK for count + {target: 'marker.line.width', func: 'count'}, + // duplicate entry - discarded + {target: 'x', func: 'min'} + ] + }] + }], { + // log axis doesn't change how sum (or avg but not tested) works + xaxis: {type: 'log'} + }); + + var traceOut = gd._fullData[0]; + + expect(traceOut.x).toEqual([8, 2]); + expect(traceOut.y).toBeCloseToArray([3.3, 2.2], 5); + expect(traceOut.marker.size).toEqual([0.1, 0.2]); + expect(traceOut.marker.color).toEqual([10, 4]); + expect(traceOut.marker.opacity).toEqual([0.6, 'boo']); + expect(traceOut.marker.line.color).toEqual(['the end', 3.3]); + expect(traceOut.marker.line.width).toEqual([4, 1]); + }); + + it('handles all funcs except sum for date data', function() { + // weird cases handled in another test + Plotly.newPlot(gd, [{ + x: ['2001-01-01', '', '2001-01-03', '2001-01-05', '2001-01-07'], + y: ['1995-01-15', '2005-03-15', '1990-12-23', '2001-01-01', 'not a date'], + text: ['2001-01-01 12:34', '2001-01-01 12:35', '2001-01-01 12:36', '2001-01-01 12:37', ''], + hovertext: ['a', '2001-01-02', '2001-01-03', '2001-01-04', '2001-01-05'], + customdata: ['2001-01', 'b', '2001-03', '2001-04', '2001-05'], + transforms: [{ + type: 'aggregate', + // groups can be any type, but until we implement binning they + // will always compare as strings = so 1 === '1' === 1.0 !== '1.0' + groups: [1, 2, '1', 1.0, 1], + aggregations: [ + {target: 'x', func: 'avg'}, + {target: 'y', func: 'min'}, + {target: 'text', func: 'max'}, + // hovertext doesn't have a func, default to first + {target: 'hovertext'}, + {target: 'customdata', func: 'last'}, + // not present in data, but that's OK for count + {target: 'marker.line.width', func: 'count'}, + // duplicate entry - discarded + {target: 'x', func: 'min'} + ] + }] + }]); + + var traceOut = gd._fullData[0]; + + expect(traceOut.x).toEqual(['2001-01-04', undefined]); + expect(traceOut.y).toEqual(['1990-12-23', '2005-03-15']); + expect(traceOut.text).toEqual(['2001-01-01 12:37', '2001-01-01 12:35']); + expect(traceOut.hovertext).toEqual(['a', '2001-01-02']); + expect(traceOut.customdata).toEqual(['2001-05', 'b']); + expect(traceOut.marker.line.width).toEqual([4, 1]); + }); + + it('handles all funcs except sum and avg for category data', function() { + // weird cases handled in another test + Plotly.newPlot(gd, [{ + x: ['a', 'b', 'c', 'aa', 'd'], + y: ['q', 'w', 'e', 'r', 't'], + text: ['b', 'b', 'a', 'b', 'a'], + hovertext: ['c', 'b', 'a', 'b', 'a'], + transforms: [{ + type: 'aggregate', + groups: [1, 2, 1, 1, 1], + aggregations: [ + {target: 'x', func: 'min'}, + {target: 'y', func: 'max'}, + {target: 'text', func: 'last'}, + // hovertext doesn't have an entry, but it will default to first + // not present in data, but that's OK for count + {target: 'marker.line.width', func: 'count'}, + // duplicate entry - discarded + {target: 'x', func: 'max'} + ] + }] + }], { + xaxis: {categoryarray: ['aaa', 'aa', 'a', 'b', 'c']} + }); + + var traceOut = gd._fullData[0]; + + // explicit order (only possible for axis data) + expect(traceOut.x).toEqual(['aa', 'b']); + // implied order from data + expect(traceOut.y).toEqual(['t', 'w']); + expect(traceOut.text).toEqual(['a', 'b']); + expect(traceOut.hovertext).toEqual(['c', 'b']); + expect(traceOut.marker.line.width).toEqual([4, 1]); + }); + + it('allows date and category sums, and category avg, with weird output', function() { + // this test is more of an FYI than anything else - it doesn't break but + // these results are usually meaningless. + + Plotly.newPlot(gd, [{ + x: ['2001-01-01', '2001-01-02', '2001-01-03', '2001-01-04'], + y: ['a', 'b', 'b', 'c'], + text: ['a', 'b', 'a', 'c'], + transforms: [{ + type: 'aggregate', + groups: [1, 1, 2, 2], + aggregations: [ + {target: 'x', func: 'sum'}, + {target: 'y', func: 'sum'}, + {target: 'text', func: 'avg'} + ] + }] + }]); + + var traceOut = gd._fullData[0]; + + // date sums: 1970-01-01 is "zero", there are shifts due to # of leap years + // without that shift these would be 2032-01-02 and 2032-01-06 + expect(traceOut.x).toEqual(['2032-01-03', '2032-01-07']); + // category sums: can go off the end of the category array -> gives undefined + expect(traceOut.y).toEqual(['b', undefined]); + // category average: can result in fractional categories -> rounds (0.5 rounds to 1) + expect(traceOut.text).toEqual(['b', 'b']); + }); + + it('can aggregate on an existing data array', function() { + Plotly.newPlot(gd, [{ + x: [1, 2, 3, 4, 5], + y: [2, 4, 6, 8, 10], + marker: {size: [10, 10, 20, 20, 10]}, + transforms: [{ + type: 'aggregate', + groups: 'marker.size', + aggregations: [ + {target: 'x', func: 'sum'}, + {target: 'y', func: 'avg'} + ] + }] + }]); + + var traceOut = gd._fullData[0]; + + expect(traceOut.x).toEqual([8, 7]); + expect(traceOut.y).toBeCloseToArray([16 / 3, 7], 5); + expect(traceOut.marker.size).toEqual([10, 20]); + }); + + it('handles median, mode, rms, & stddev for numeric data', function() { + // again, nothing is going to barf with non-numeric data, but sometimes it + // won't make much sense. + + Plotly.newPlot(gd, [{ + x: [1, 1, 2, 2, 1], + y: [1, 2, 3, 4, 5], + marker: { + size: [1, 2, 3, 4, 5], + line: {width: [1, 1, 2, 2, 1]}, + color: [1, 1, 2, 2, 1] + }, + transforms: [{ + type: 'aggregate', + groups: [1, 2, 1, 1, 1], + aggregations: [ + {target: 'x', func: 'mode'}, + {target: 'y', func: 'median'}, + {target: 'marker.size', func: 'rms'}, + {target: 'marker.line.width', func: 'stddev', funcmode: 'population'}, + {target: 'marker.color', func: 'stddev'} + ] + }] + }]); + + var traceOut = gd._fullData[0]; + + // 1 and 2 both have count of 2 in the first group, + // but 2 gets to that count first + expect(traceOut.x).toEqual([2, 1]); + expect(traceOut.y).toBeCloseToArray([3.5, 2], 5); + expect(traceOut.marker.size).toBeCloseToArray([Math.sqrt(51 / 4), 2], 5); + expect(traceOut.marker.line.width).toBeCloseToArray([0.5, 0], 5); + expect(traceOut.marker.color).toBeCloseToArray([Math.sqrt(1 / 3), 0], 5); + }); +}); diff --git a/test/jasmine/tests/transform_multi_test.js b/test/jasmine/tests/transform_multi_test.js index 0592b177b8c..a01b219fa09 100644 --- a/test/jasmine/tests/transform_multi_test.js +++ b/test/jasmine/tests/transform_multi_test.js @@ -232,6 +232,10 @@ describe('user-defined transforms:', function() { describe('multiple transforms:', function() { 'use strict'; + var gd; + + beforeEach(function() { gd = createGraphDiv(); }); + var mockData0 = [{ mode: 'markers', x: [1, -1, -2, 0, 1, 2, 3], @@ -278,8 +282,6 @@ describe('multiple transforms:', function() { it('Plotly.plot should plot the transform traces', function(done) { var data = Lib.extendDeep([], mockData0); - var gd = createGraphDiv(); - Plotly.plot(gd, data).then(function() { expect(gd.data.length).toEqual(1); expect(gd.data[0].x).toEqual([1, -1, -2, 0, 1, 2, 3]); @@ -302,8 +304,6 @@ describe('multiple transforms:', function() { data[0].transforms.slice().reverse(); - var gd = createGraphDiv(); - Plotly.plot(gd, data).then(function() { expect(gd.data.length).toEqual(1); expect(gd.data[0].x).toEqual([1, -1, -2, 0, 1, 2, 3]); @@ -325,7 +325,6 @@ describe('multiple transforms:', function() { var data = Lib.extendDeep([], mockData0); data[0].marker = { size: 20 }; - var gd = createGraphDiv(); var dims = [2, 2]; Plotly.plot(gd, data).then(function() { @@ -377,8 +376,6 @@ describe('multiple transforms:', function() { it('Plotly.extendTraces should work', function(done) { var data = Lib.extendDeep([], mockData0); - var gd = createGraphDiv(); - Plotly.plot(gd, data).then(function() { expect(gd.data[0].x.length).toEqual(7); expect(gd._fullData[0].x.length).toEqual(2); @@ -405,8 +402,6 @@ describe('multiple transforms:', function() { it('Plotly.deleteTraces should work', function(done) { var data = Lib.extendDeep([], mockData1); - var gd = createGraphDiv(); - Plotly.plot(gd, data).then(function() { assertDims([2, 2, 2, 2]); @@ -425,8 +420,6 @@ describe('multiple transforms:', function() { it('toggling trace visibility should work', function(done) { var data = Lib.extendDeep([], mockData1); - var gd = createGraphDiv(); - Plotly.plot(gd, data).then(function() { assertDims([2, 2, 2, 2]); @@ -446,6 +439,88 @@ describe('multiple transforms:', function() { }); }); + it('executes filter and aggregate in the order given', function() { + // filter and aggregate do not commute! + + var trace1 = { + x: [0, -5, 7, 4, 5], + y: [2, 4, 6, 8, 10], + transforms: [{ + type: 'aggregate', + groups: [1, 2, 2, 1, 1], + aggregations: [ + {target: 'x', func: 'sum'}, + {target: 'y', func: 'avg'} + ] + }, { + type: 'filter', + target: 'x', + operation: '<', + value: 5 + }] + }; + + var trace2 = Lib.extendDeep({}, trace1); + trace2.transforms.reverse(); + + Plotly.newPlot(gd, [trace1, trace2]); + + var trace1Out = gd._fullData[0]; + expect(trace1Out.x).toEqual([2]); + expect(trace1Out.y).toEqual([5]); + + var trace2Out = gd._fullData[1]; + expect(trace2Out.x).toEqual([4, -5]); + expect(trace2Out.y).toEqual([5, 4]); + }); + + it('always executes groupby before aggregate', function() { + // aggregate and groupby wouldn't commute, but groupby always happens first + // because it has a `transform`, and aggregate has a `calcTransform` + + var trace1 = { + x: [1, 2, 3, 4, 5], + y: [2, 4, 6, 8, 10], + transforms: [{ + type: 'groupby', + groups: [1, 1, 2, 2, 2] + }, { + type: 'aggregate', + groups: [1, 2, 2, 1, 1], + aggregations: [ + {target: 'x', func: 'sum'}, + {target: 'y', func: 'avg'} + ] + }] + }; + + var trace2 = Lib.extendDeep({}, trace1); + trace2.transforms.reverse(); + + Plotly.newPlot(gd, [trace1, trace2]); + + var t1g1 = gd._fullData[0]; + var t1g2 = gd._fullData[1]; + var t2g1 = gd._fullData[2]; + var t2g2 = gd._fullData[3]; + + expect(t1g1.x).toEqual([1, 2]); + expect(t1g1.y).toEqual([2, 4]); + // group 2 has its aggregations switched, since group 2 comes first + expect(t1g2.x).toEqual([3, 9]); + expect(t1g2.y).toEqual([6, 9]); + + // if we had done aggregation first, we'd implicitly get the first val + // for each of the groupby groups, which is [1, 1] + // so we'd only make 1 output trace, and it would look like: + // {x: [10, 5], y: [20/3, 5]} + // (and if we got some other groupby groups values, the most it could do + // is break ^^ into two separate traces) + expect(t2g1.x).toEqual(t1g1.x); + expect(t2g1.y).toEqual(t1g1.y); + expect(t2g2.x).toEqual(t1g2.x); + expect(t2g2.y).toEqual(t1g2.y); + }); }); describe('invalid transforms', function() {