Merge branch 'master' of https://github.com/plotly/plotly.py

emmanuelle · emmanuelle · commit d2f8b7299348 · 2020-06-23T15:06:16.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 ### Fixed
 
 - Fixed special cases with `px.sunburst` and `px.treemap` with `path` input ([#2524](https://github.com/plotly/plotly.py/pull/2524))
+- Fixed bug in `hover_data` argument of `px` functions, when the column name is changed with labels and `hover_data` is a dictionary setting up a specific format for the hover data ([#2544](https://github.com/plotly/plotly.py/pull/2544)).
+- Made the Plotly Express `trendline` argument more robust and made it work with datetime `x` values ([#2554](https://github.com/plotly/plotly.py/pull/2554))
 
 ## [4.8.1] - 2020-05-28
 
diff --git a/packages/python/plotly/_plotly_utils/utils.py b/packages/python/plotly/_plotly_utils/utils.py
@@ -247,3 +247,12 @@ def key(v):
         return tuple(v_parts)
 
     return sorted(vals, key=key, reverse=reverse)
+
+
+def _get_int_type():
+    np = get_module("numpy", should_load=False)
+    if np:
+        int_type = (int, np.integer)
+    else:
+        int_type = (int,)
+    return int_type
diff --git a/packages/python/plotly/plotly/basedatatypes.py b/packages/python/plotly/plotly/basedatatypes.py
@@ -9,7 +9,7 @@
 from contextlib import contextmanager
 from copy import deepcopy, copy
 
-from _plotly_utils.utils import _natural_sort_strings
+from _plotly_utils.utils import _natural_sort_strings, _get_int_type
 from .optional_imports import get_module
 
 # Create Undefined sentinel value
@@ -1560,12 +1560,7 @@ def _validate_rows_cols(name, n, vals):
             if len(vals) != n:
                 BaseFigure._raise_invalid_rows_cols(name=name, n=n, invalid=vals)
 
-            try:
-                import numpy as np
-
-                int_type = (int, np.integer)
-            except ImportError:
-                int_type = (int,)
+            int_type = _get_int_type()
 
             if [r for r in vals if not isinstance(r, int_type)]:
                 BaseFigure._raise_invalid_rows_cols(name=name, n=n, invalid=vals)
@@ -1677,14 +1672,19 @@ def add_traces(self, data, rows=None, cols=None, secondary_ys=None):
                   - All remaining properties are passed to the constructor
                     of the specified trace type.
 
-        rows : None or list[int] (default None)
+        rows : None, list[int], or int (default None)
             List of subplot row indexes (starting from 1) for the traces to be
             added. Only valid if figure was created using
             `plotly.tools.make_subplots`
+            If a single integer is passed, all traces will be added to row number
+
         cols : None or list[int] (default None)
             List of subplot column indexes (starting from 1) for the traces
             to be added. Only valid if figure was created using
             `plotly.tools.make_subplots`
+            If a single integer is passed, all traces will be added to column number
+
+
         secondary_ys: None or list[boolean] (default None)
             List of secondary_y booleans for traces to be added. See the
             docstring for `add_trace` for more info.
@@ -1723,6 +1723,15 @@ def add_traces(self, data, rows=None, cols=None, secondary_ys=None):
         for ind, new_trace in enumerate(data):
             new_trace._trace_ind = ind + len(self.data)
 
+        # Allow integers as inputs to subplots
+        int_type = _get_int_type()
+
+        if isinstance(rows, int_type):
+            rows = [rows] * len(data)
+
+        if isinstance(cols, int_type):
+            cols = [cols] * len(data)
+
         # Validate rows / cols
         n = len(data)
         BaseFigure._validate_rows_cols("rows", n, rows)
diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -117,6 +117,10 @@ def get_label(args, column):
 
 
 def invert_label(args, column):
+    """Invert mapping.
+    Find key corresponding to value column in dict args["labels"].
+    Returns `column` if the value does not exist.
+    """
     reversed_labels = {value: key for (key, value) in args["labels"].items()}
     try:
         return reversed_labels[column]
@@ -273,17 +277,35 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                     attr_value in ["ols", "lowess"]
                     and args["x"]
                     and args["y"]
-                    and len(trace_data) > 1
+                    and len(trace_data[[args["x"], args["y"]]].dropna()) > 1
                 ):
                     import statsmodels.api as sm
 
                     # sorting is bad but trace_specs with "trendline" have no other attrs
                     sorted_trace_data = trace_data.sort_values(by=args["x"])
-                    y = sorted_trace_data[args["y"]]
-                    x = sorted_trace_data[args["x"]]
+                    y = sorted_trace_data[args["y"]].values
+                    x = sorted_trace_data[args["x"]].values
 
+                    x_is_date = False
                     if x.dtype.type == np.datetime64:
                         x = x.astype(int) / 10 ** 9  # convert to unix epoch seconds
+                        x_is_date = True
+                    elif x.dtype.type == np.object_:
+                        try:
+                            x = x.astype(np.float64)
+                        except ValueError:
+                            raise ValueError(
+                                "Could not convert value of 'x' ('%s') into a numeric type. "
+                                "If 'x' contains stringified dates, please convert to a datetime column."
+                                % args["x"]
+                            )
+                    if y.dtype.type == np.object_:
+                        try:
+                            y = y.astype(np.float64)
+                        except ValueError:
+                            raise ValueError(
+                                "Could not convert value of 'y' into a numeric type."
+                            )
 
                     if attr_value == "lowess":
                         # missing ='drop' is the default value for lowess but not for OLS (None)
@@ -294,25 +316,32 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
                         hover_header = "<b>LOWESS trendline</b><br><br>"
                     elif attr_value == "ols":
                         fit_results = sm.OLS(
-                            y.values, sm.add_constant(x.values), missing="drop"
+                            y, sm.add_constant(x), missing="drop"
                         ).fit()
                         trace_patch["y"] = fit_results.predict()
                         trace_patch["x"] = x[
                             np.logical_not(np.logical_or(np.isnan(y), np.isnan(x)))
                         ]
                         hover_header = "<b>OLS trendline</b><br>"
-                        hover_header += "%s = %g * %s + %g<br>" % (
-                            args["y"],
-                            fit_results.params[1],
-                            args["x"],
-                            fit_results.params[0],
-                        )
+                        if len(fit_results.params) == 2:
+                            hover_header += "%s = %g * %s + %g<br>" % (
+                                args["y"],
+                                fit_results.params[1],
+                                args["x"],
+                                fit_results.params[0],
+                            )
+                        else:
+                            hover_header += "%s = %g<br>" % (
+                                args["y"],
+                                fit_results.params[0],
+                            )
                         hover_header += (
                             "R<sup>2</sup>=%f<br><br>" % fit_results.rsquared
                         )
+                    if x_is_date:
+                        trace_patch["x"] = pd.to_datetime(trace_patch["x"] * 10 ** 9)
                     mapping_labels[get_label(args, args["x"])] = "%{x}"
                     mapping_labels[get_label(args, args["y"])] = "%{y} <b>(trend)</b>"
-
             elif attr_name.startswith("error"):
                 error_xy = attr_name[:7]
                 arr = "arrayminus" if attr_name.endswith("minus") else "array"
@@ -442,6 +471,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
         mapping_labels_copy = OrderedDict(mapping_labels)
         if args["hover_data"] and isinstance(args["hover_data"], dict):
             for k, v in mapping_labels.items():
+                # We need to invert the mapping here
                 k_args = invert_label(args, k)
                 if k_args in args["hover_data"]:
                     if args["hover_data"][k_args][0]:
diff --git a/packages/python/plotly/plotly/tests/test_core/test_figure_messages/test_add_traces.py b/packages/python/plotly/plotly/tests/test_core/test_figure_messages/test_add_traces.py
@@ -63,3 +63,33 @@ def test_add_traces(self):
                 {"type": "histogram2dcontour", "line": {"color": "cyan"}},
             ]
         )
+
+
+class TestAddTracesRowsColsDataTypes(TestCase):
+    def test_add_traces_with_iterable(self):
+        import plotly.express as px
+
+        df = px.data.tips()
+        fig = px.scatter(df, x="total_bill", y="tip", color="day")
+        from plotly.subplots import make_subplots
+
+        fig2 = make_subplots(1, 2)
+        fig2.add_traces(fig.data, rows=[1,] * len(fig.data), cols=[1,] * len(fig.data))
+
+        expected_data_length = 4
+
+        self.assertEqual(expected_data_length, len(fig2.data))
+
+    def test_add_traces_with_integers(self):
+        import plotly.express as px
+
+        df = px.data.tips()
+        fig = px.scatter(df, x="total_bill", y="tip", color="day")
+        from plotly.subplots import make_subplots
+
+        fig2 = make_subplots(1, 2)
+        fig2.add_traces(fig.data, rows=1, cols=2)
+
+        expected_data_length = 4
+
+        self.assertEqual(expected_data_length, len(fig2.data))
diff --git a/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py b/packages/python/plotly/plotly/tests/test_core/test_px/test_trendline.py
@@ -1,14 +1,110 @@
 import plotly.express as px
 import numpy as np
+import pandas as pd
+import pytest
+from datetime import datetime
 
 
-def test_trendline_nan_values():
+@pytest.mark.parametrize("mode", ["ols", "lowess"])
+def test_trendline_results_passthrough(mode):
+    df = px.data.gapminder().query("continent == 'Oceania'")
+    fig = px.scatter(df, x="year", y="pop", color="country", trendline=mode)
+    assert len(fig.data) == 4
+    for trace in fig["data"][0::2]:
+        assert "trendline" not in trace.hovertemplate
+    for trendline in fig["data"][1::2]:
+        assert "trendline" in trendline.hovertemplate
+        if mode == "ols":
+            assert "R<sup>2</sup>" in trendline.hovertemplate
+    results = px.get_trendline_results(fig)
+    if mode == "ols":
+        assert len(results) == 2
+        assert results["country"].values[0] == "Australia"
+        assert results["country"].values[0] == "Australia"
+        au_result = results["px_fit_results"].values[0]
+        assert len(au_result.params) == 2
+    else:
+        assert len(results) == 0
+
+
+@pytest.mark.parametrize("mode", ["ols", "lowess"])
+def test_trendline_enough_values(mode):
+    fig = px.scatter(x=[0, 1], y=[0, 1], trendline=mode)
+    assert len(fig.data) == 2
+    assert len(fig.data[1].x) == 2
+    fig = px.scatter(x=[0], y=[0], trendline=mode)
+    assert len(fig.data) == 2
+    assert fig.data[1].x is None
+    fig = px.scatter(x=[0, 1], y=[0, None], trendline=mode)
+    assert len(fig.data) == 2
+    assert fig.data[1].x is None
+    fig = px.scatter(x=[0, 1], y=np.array([0, np.nan]), trendline=mode)
+    assert len(fig.data) == 2
+    assert fig.data[1].x is None
+    fig = px.scatter(x=[0, 1, None], y=[0, None, 1], trendline=mode)
+    assert len(fig.data) == 2
+    assert fig.data[1].x is None
+    fig = px.scatter(
+        x=np.array([0, 1, np.nan]), y=np.array([0, np.nan, 1]), trendline=mode
+    )
+    assert len(fig.data) == 2
+    assert fig.data[1].x is None
+    fig = px.scatter(x=[0, 1, None, 2], y=[1, None, 1, 2], trendline=mode)
+    assert len(fig.data) == 2
+    assert len(fig.data[1].x) == 2
+    fig = px.scatter(
+        x=np.array([0, 1, np.nan, 2]), y=np.array([1, np.nan, 1, 2]), trendline=mode
+    )
+    assert len(fig.data) == 2
+    assert len(fig.data[1].x) == 2
+
+
+@pytest.mark.parametrize("mode", ["ols", "lowess"])
+def test_trendline_nan_values(mode):
     df = px.data.gapminder().query("continent == 'Oceania'")
     start_date = 1970
     df["pop"][df["year"] < start_date] = np.nan
-    modes = ["ols", "lowess"]
-    for mode in modes:
-        fig = px.scatter(df, x="year", y="pop", color="country", trendline=mode)
-        for trendline in fig["data"][1::2]:
-            assert trendline.x[0] >= start_date
-            assert len(trendline.x) == len(trendline.y)
+    fig = px.scatter(df, x="year", y="pop", color="country", trendline=mode)
+    for trendline in fig["data"][1::2]:
+        assert trendline.x[0] >= start_date
+        assert len(trendline.x) == len(trendline.y)
+
+
+def test_no_slope_ols_trendline():
+    fig = px.scatter(x=[0, 1], y=[0, 1], trendline="ols")
+    assert "y = 1" in fig.data[1].hovertemplate  # then + x*(some small number)
+    results = px.get_trendline_results(fig)
+    params = results["px_fit_results"].iloc[0].params
+    assert np.all(np.isclose(params, [0, 1]))
+
+    fig = px.scatter(x=[1, 1], y=[0, 0], trendline="ols")
+    assert "y = 0" in fig.data[1].hovertemplate
+    results = px.get_trendline_results(fig)
+    params = results["px_fit_results"].iloc[0].params
+    assert np.all(np.isclose(params, [0]))
+
+    fig = px.scatter(x=[1, 2], y=[0, 0], trendline="ols")
+    assert "y = 0" in fig.data[1].hovertemplate
+    fig = px.scatter(x=[0, 0], y=[1, 1], trendline="ols")
+    assert "y = 0 * x + 1" in fig.data[1].hovertemplate
+    fig = px.scatter(x=[0, 0], y=[1, 2], trendline="ols")
+    assert "y = 0 * x + 1.5" in fig.data[1].hovertemplate
+
+
+@pytest.mark.parametrize("mode", ["ols", "lowess"])
+def test_trendline_on_timeseries(mode):
+    df = px.data.stocks()
+
+    with pytest.raises(ValueError) as err_msg:
+        px.scatter(df, x="date", y="GOOG", trendline=mode)
+    assert "Could not convert value of 'x' ('date') into a numeric type." in str(
+        err_msg.value
+    )
+
+    df["date"] = pd.to_datetime(df["date"])
+    fig = px.scatter(df, x="date", y="GOOG", trendline=mode)
+    assert len(fig.data) == 2
+    assert len(fig.data[0].x) == len(fig.data[1].x)
+    assert type(fig.data[0].x[0]) == datetime
+    assert type(fig.data[1].x[0]) == datetime
+    assert np.all(fig.data[0].x == fig.data[1].x)
diff --git a/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py b/packages/python/plotly/plotly/tests/test_core/test_utils/test_utils.py
@@ -70,3 +70,28 @@ def test_numpy_integer_import(self):
         value = get_by_path(fig, data_path)
         expected_value = (1,)
         self.assertEqual(value, expected_value)
+
+    def test_get_numpy_int_type(self):
+        import numpy as np
+        from _plotly_utils.utils import _get_int_type
+
+        int_type_tuple = _get_int_type()
+        expected_tuple = (int, np.integer)
+
+        self.assertEqual(int_type_tuple, expected_tuple)
+
+
+class TestNoNumpyIntegerBaseType(TestCase):
+    def test_no_numpy_int_type(self):
+        import sys
+        from _plotly_utils.utils import _get_int_type
+        from _plotly_utils.optional_imports import get_module
+
+        np = get_module("numpy", should_load=False)
+        if np:
+            sys.modules.pop("numpy")
+
+        int_type_tuple = _get_int_type()
+        expected_tuple = (int,)
+
+        self.assertEqual(int_type_tuple, expected_tuple)