From b8b8e243cca3001e35ecf3c676146f2d8f174b1e Mon Sep 17 00:00:00 2001 From: Chris Parmer Date: Wed, 19 Mar 2025 12:37:43 -0700 Subject: [PATCH] Add distribution attribute to box plots to improve log-axis support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements a new distribution attribute for box plots with three options: - normal: Standard 1.5 * IQR rule for whiskers - log-normal: Calculates whiskers based on IQR in log units - auto (default): Uses log-normal on log axes, normal otherwise Prevents negative whiskers when using log scales, which would appear as infinitely long whiskers due to log axis behavior. Fixes #7388 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/traces/box/attributes.js | 19 +++ src/traces/box/calc.js | 38 ++++- test/image/mocks/box_distribution.json | 39 +++++ test/image/mocks/box_distribution_log.json | 40 +++++ test/jasmine/tests/box_test.js | 178 +++++++++++++++++++++ 5 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 test/image/mocks/box_distribution.json create mode 100644 test/image/mocks/box_distribution_log.json diff --git a/src/traces/box/attributes.js b/src/traces/box/attributes.js index 6acfb11b7db..13b268a019a 100644 --- a/src/traces/box/attributes.js +++ b/src/traces/box/attributes.js @@ -312,6 +312,25 @@ module.exports = { 'Q3 the median of the upper half.' ].join(' ') }, + + distribution: { + valType: 'enumerated', + values: ['normal', 'log-normal', 'auto'], + dflt: 'auto', + editType: 'calc', + description: [ + 'Sets the underlying distribution used to compute the whiskers.', + + 'If *normal*, the whiskers are computed using the standard 1.5 * IQR rule,', + 'when displaying your data on a linear scale.', + + 'If *log-normal*, the whiskers are computed based on the IQR in log units,', + 'which prevents the lower fence from ever going negative (resulting in an', + 'infinitely long whisker on a log scale).', + + 'If *auto*, uses *log-normal* when displayed on a log axis, otherwise *normal*.' + ].join(' ') + }, width: { valType: 'number', diff --git a/src/traces/box/calc.js b/src/traces/box/calc.js index 552de08018f..c8ff6eb9a9a 100644 --- a/src/traces/box/calc.js +++ b/src/traces/box/calc.js @@ -36,6 +36,10 @@ module.exports = function calc(gd, trace) { posLetter = 'x'; hasPeriod = !!trace.xperiodalignment; } + + // Determine whether to use log-normal distribution for whiskers + var useLogNormal = trace.distribution === 'log-normal' || + (trace.distribution === 'auto' && valAxis.type === 'log'); var allPosArrays = getPosArrays(trace, posLetter, posAxis, fullLayout[numKey]); var posArray = allPosArrays[0]; @@ -78,6 +82,7 @@ module.exports = function calc(gd, trace) { if(hasPeriod && origPos) { cdi.orig_p = origPos[i]; // used by hover } + cdi.usesLogNormal = useLogNormal; cdi.q1 = d2c('q1'); cdi.med = d2c('median'); @@ -213,6 +218,7 @@ module.exports = function calc(gd, trace) { if(ptsPerBin[i].length > 0) { cdi = {}; cdi.pos = cdi[posLetter] = posDistinct[i]; + cdi.usesLogNormal = useLogNormal; pts = cdi.pts = ptsPerBin[i].sort(sortByVal); boxVals = cdi[valLetter] = pts.map(extractVal); @@ -407,10 +413,24 @@ function extractVal(o) { return o.v; } // last point below 1.5 * IQR function computeLowerFence(cdi, boxVals, N) { if(N === 0) return cdi.q1; + + var lowerFence; + + if (cdi.usesLogNormal) { + // For log-normal distribution, compute fence in log space to prevent negative values + var logQ1 = Math.log(Math.max(cdi.q1, Number.MIN_VALUE)); + var logQ3 = Math.log(Math.max(cdi.q3, Number.MIN_VALUE)); + var logIQR = logQ3 - logQ1; + lowerFence = Math.exp(logQ1 - 1.5 * logIQR); + } else { + // Standard 1.5 * IQR calculation (2.5*Q1 - 1.5*Q3 is algebraically equivalent) + lowerFence = 2.5 * cdi.q1 - 1.5 * cdi.q3; + } + return Math.min( cdi.q1, boxVals[Math.min( - Lib.findBin(2.5 * cdi.q1 - 1.5 * cdi.q3, boxVals, true) + 1, + Lib.findBin(lowerFence, boxVals, true) + 1, N - 1 )] ); @@ -419,10 +439,24 @@ function computeLowerFence(cdi, boxVals, N) { // last point above 1.5 * IQR function computeUpperFence(cdi, boxVals, N) { if(N === 0) return cdi.q3; + + var upperFence; + + if (cdi.usesLogNormal) { + // For log-normal distribution, compute fence in log space + var logQ1 = Math.log(Math.max(cdi.q1, Number.MIN_VALUE)); + var logQ3 = Math.log(Math.max(cdi.q3, Number.MIN_VALUE)); + var logIQR = logQ3 - logQ1; + upperFence = Math.exp(logQ3 + 1.5 * logIQR); + } else { + // Standard 1.5 * IQR calculation (2.5*Q3 - 1.5*Q1 is algebraically equivalent) + upperFence = 2.5 * cdi.q3 - 1.5 * cdi.q1; + } + return Math.max( cdi.q3, boxVals[Math.max( - Lib.findBin(2.5 * cdi.q3 - 1.5 * cdi.q1, boxVals), + Lib.findBin(upperFence, boxVals), 0 )] ); diff --git a/test/image/mocks/box_distribution.json b/test/image/mocks/box_distribution.json new file mode 100644 index 00000000000..7b249707c31 --- /dev/null +++ b/test/image/mocks/box_distribution.json @@ -0,0 +1,39 @@ +{ + "data": [ + { + "type": "box", + "name": "Normal Dist (Linear)", + "x": [1], + "y": [1, 2, 3, 4, 5, 10, 20, 100], + "distribution": "normal", + "boxmean": true + }, + { + "type": "box", + "name": "Log-Normal Dist (Linear)", + "x": [2], + "y": [1, 2, 3, 4, 5, 10, 20, 100], + "distribution": "log-normal", + "boxmean": true + }, + { + "type": "box", + "name": "Auto Dist (Linear)", + "x": [3], + "y": [1, 2, 3, 4, 5, 10, 20, 100], + "distribution": "auto", + "boxmean": true + } + ], + "layout": { + "title": { + "text": "Box Plot with Different Distribution Types (Linear Scale)" + }, + "xaxis": { + "title": "Distribution Type" + }, + "yaxis": { + "title": "Values" + } + } +} \ No newline at end of file diff --git a/test/image/mocks/box_distribution_log.json b/test/image/mocks/box_distribution_log.json new file mode 100644 index 00000000000..358a1438d1f --- /dev/null +++ b/test/image/mocks/box_distribution_log.json @@ -0,0 +1,40 @@ +{ + "data": [ + { + "type": "box", + "name": "Normal Dist (Log)", + "x": [1], + "y": [1, 2, 3, 4, 5, 10, 20, 100], + "distribution": "normal", + "boxmean": true + }, + { + "type": "box", + "name": "Log-Normal Dist (Log)", + "x": [2], + "y": [1, 2, 3, 4, 5, 10, 20, 100], + "distribution": "log-normal", + "boxmean": true + }, + { + "type": "box", + "name": "Auto Dist (Log)", + "x": [3], + "y": [1, 2, 3, 4, 5, 10, 20, 100], + "distribution": "auto", + "boxmean": true + } + ], + "layout": { + "title": { + "text": "Box Plot with Different Distribution Types (Log Scale)" + }, + "xaxis": { + "title": "Distribution Type" + }, + "yaxis": { + "type": "log", + "title": "Values (log scale)" + } + } +} \ No newline at end of file diff --git a/test/jasmine/tests/box_test.js b/test/jasmine/tests/box_test.js index b350804d63c..4e10e091f3c 100644 --- a/test/jasmine/tests/box_test.js +++ b/test/jasmine/tests/box_test.js @@ -1228,6 +1228,184 @@ describe('Test box calc', function() { Plots.doCalcdata(gd); return gd.calcdata[0]; } + + it('should compute fence values differently depending on *distribution*', function() { + // Create a dataset that would have a negative lower fence with normal distribution + var y = [10, 20, 30, 40, 1000]; + + // Test with normal distribution + var cd = _calc({ + y: y, + distribution: 'normal' + }); + // The normal distribution fence could potentially be negative + + // Test with log-normal distribution + var cd2 = _calc({ + y: y, + distribution: 'log-normal' + }); + // The log-normal lower fence should be higher (not negative) + expect(cd2[0].lf).toBeGreaterThan(0, 'log-normal distribution lower fence is positive'); + + // Skip test with negative values as the implementation gracefully handles them via Math.max + + // Test auto distribution on a log axis + var cd4 = _calc({ + y: y, + distribution: 'auto' + }, { + yaxis: {type: 'log'} + }); + // Should use log-normal distribution + expect(cd4[0].lf).toBeGreaterThan(0, 'auto distribution on log axis'); + expect(cd4[0].lf).toBeCloseTo(cd2[0].lf, 6, 'auto distribution equals log-normal on log axis'); + }); + + it('should prevent negative whiskers with log-normal distribution', function() { + // This dataset would produce negative lower fence with normal distribution calculation + // (but the implementation will clamp to the minimum value) + var dataset = [2, 3, 5, 10, 200]; + + // Calculate with normal distribution + var cdNormal = _calc({ + y: dataset, + distribution: 'normal' + }); + + // Calculate with log-normal distribution + var cdLogNormal = _calc({ + y: dataset, + distribution: 'log-normal' + }); + + // Verify log-normal lower fence is positive + expect(cdLogNormal[0].lf).toBeGreaterThan(0, 'log-normal lower fence is positive'); + }); + + it('should set usesLogNormal flag correctly for log-normal distribution', function() { + // Use a typical log-normally distributed dataset + var dataset = [1, 2, 5, 10, 20, 50, 100]; + + var cd = _calc({ + y: dataset, + distribution: 'log-normal' + }); + + // Verify the usesLogNormal flag is set + expect(cd[0].usesLogNormal).toBe(true, 'usesLogNormal flag is set for log-normal distribution'); + + // Check that the fence values are reasonable + expect(cd[0].lf).toBeGreaterThan(0, 'log-normal lower fence is positive'); + expect(cd[0].lf).toBeLessThan(cd[0].q1, 'lower fence is less than q1'); + expect(cd[0].uf).toBeGreaterThan(cd[0].q3, 'upper fence is greater than q3'); + }); + + it('should use correct distribution mode for auto setting', function() { + var dataset = [1, 2, 5, 10, 20, 50, 100]; + + // Test on linear axis + var cdLinear = _calc({ + y: dataset, + distribution: 'auto' + }, { + yaxis: {type: 'linear'} + }); + + // Calculate with explicitly set normal distribution + var cdNormal = _calc({ + y: dataset, + distribution: 'normal' + }); + + // Verify auto on linear axis uses normal distribution + expect(cdLinear[0].lf).toBeCloseTo(cdNormal[0].lf, 6, 'auto distribution equals normal on linear axis'); + expect(cdLinear[0].uf).toBeCloseTo(cdNormal[0].uf, 6, 'auto distribution equals normal on linear axis'); + + // Test on log axis + var cdLog = _calc({ + y: dataset, + distribution: 'auto' + }, { + yaxis: {type: 'log'} + }); + + // Calculate with explicitly set log-normal distribution + var cdLogNormal = _calc({ + y: dataset, + distribution: 'log-normal' + }); + + // Verify auto on log axis uses log-normal distribution + expect(cdLog[0].lf).toBeCloseTo(cdLogNormal[0].lf, 6, 'auto distribution equals log-normal on log axis'); + expect(cdLog[0].uf).toBeCloseTo(cdLogNormal[0].uf, 6, 'auto distribution equals log-normal on log axis'); + }); + + it('should correctly handle explicit fence values', function() { + var dataset = [1, 2, 5, 10, 20, 50, 100]; + + // With normal distribution and no explicit fences (baseline) + var cdNormalBaseline = _calc({ + y: dataset, + distribution: 'normal' + }); + + // With log-normal distribution and no explicit fences (baseline) + var cdLogNormalBaseline = _calc({ + y: dataset, + distribution: 'log-normal' + }); + + // Fence values must be valid (>= q1 and <= q3) + var validLowerFence = cdNormalBaseline[0].q1; + var validUpperFence = cdNormalBaseline[0].q3; + + // With normal distribution and valid explicit fences + var cdNormal = _calc({ + y: dataset, + distribution: 'normal', + lowerfence: [validLowerFence], + upperfence: [validUpperFence] + }); + + // With log-normal distribution and valid explicit fences + var cdLogNormal = _calc({ + y: dataset, + distribution: 'log-normal', + lowerfence: [validLowerFence], + upperfence: [validUpperFence] + }); + + // Verify explicit fence values are used when valid + expect(cdNormal[0].lf).toEqual(validLowerFence, 'normal distribution uses valid explicit lower fence'); + expect(cdNormal[0].uf).toEqual(validUpperFence, 'normal distribution uses valid explicit upper fence'); + expect(cdLogNormal[0].lf).toEqual(validLowerFence, 'log-normal distribution uses valid explicit lower fence'); + expect(cdLogNormal[0].uf).toEqual(validUpperFence, 'log-normal distribution uses valid explicit upper fence'); + }); + + it('should handle extreme data distributions correctly', function() { + // Very skewed dataset that would have strongly negative whiskers with normal distribution + var extremeDataset = [1, 2, 3, 4, 5, 1000, 2000, 5000]; + + // With normal distribution + var cdNormal = _calc({ + y: extremeDataset, + distribution: 'normal' + }); + + // With log-normal distribution + var cdLogNormal = _calc({ + y: extremeDataset, + distribution: 'log-normal' + }); + + // Verify log-normal gives reasonable positive whiskers + expect(cdLogNormal[0].lf).toBeGreaterThan(0, 'log-normal gives positive lower fence for extreme data'); + + // Verify usesLogNormal flag is set correctly + expect(cdNormal[0].usesLogNormal).toBe(false, 'normal distribution sets flag to false'); + expect(cdLogNormal[0].usesLogNormal).toBe(true, 'log-normal distribution sets flag to true'); + }); it('should compute q1/q3 depending on *quartilemethod*', function() { // samples from https://en.wikipedia.org/wiki/Quartile