diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fd1cd3d0022c9..382e57e0421c5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -371,6 +371,8 @@ Other Enhancements - :func:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) - ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) - ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) +- ``pd.read_csv()`` will now raise a ``csv.Error`` error whenever an end-of-file character is encountered in the middle of a data row (:issue:`15913`) +- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword arg that sorts class labels and the colours assigned to them (:issue:`15908`) .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 812f039f1a2c7..504c55bcfcfd0 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -241,6 +241,26 @@ def test_parallel_coordinates(self): with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, 'Name', colors=colors) + def test_parallel_coordinates_with_sorted_labels(self): + """ For #15908 """ + from pandas.tools.plotting import parallel_coordinates + + df = DataFrame({"feat": [i for i in range(30)], + "class": [2 for _ in range(10)] + + [3 for _ in range(10)] + + [1 for _ in range(10)]}) + ax = parallel_coordinates(df, 'class', sort_labels=True) + polylines, labels = ax.get_legend_handles_labels() + color_label_tuples = \ + zip([polyline.get_color() for polyline in polylines], labels) + ordered_color_label_tuples = sorted(color_label_tuples, + key=lambda x: x[1]) + prev_next_tupels = zip([i for i in ordered_color_label_tuples[0:-1]], + [i for i in ordered_color_label_tuples[1:]]) + for prev, nxt in prev_next_tupels: + # lables and colors are ordered strictly increasing + assert prev[1] < nxt[1] and prev[0] < nxt[0] + @slow def test_radviz(self): from pandas.tools.plotting import radviz diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index f70a2b0b22140..b15ccdacb6ab7 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -694,7 +694,8 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): @deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, - axvlines=True, axvlines_kwds=None, **kwds): + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): """Parallel coordinates plotting. Parameters @@ -718,6 +719,11 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, If true, vertical lines will be added at each xtick axvlines_kwds: keywords, optional Options to be passed to axvline method for vertical lines + sort_labels: bool, False + Sort class_column labels, useful when assigning colours + + .. versionadded:: 0.20.0 + kwds: keywords Options to pass to matplotlib plotting method @@ -774,6 +780,9 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, colormap=colormap, color_type='random', color=color) + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) colors = dict(zip(classes, color_values)) for i in range(n):