From 146b3b538edc21409e0537399ce0493b25bd6ccf Mon Sep 17 00:00:00 2001 From: skojoian Date: Tue, 12 Mar 2019 03:06:25 -0700 Subject: [PATCH] BUG: Raise ValueError if a column index in usecols is out of bounds. #25623 --- ci/code_checks.sh | 4 +- doc/source/development/index.rst | 3 + doc/source/getting_started/dsintro.rst | 44 --- doc/source/getting_started/index.rst | 3 + doc/source/index.rst.template | 70 ++++- doc/source/reference/index.rst | 65 ++-- .../themes/nature_with_gtoc/layout.html | 4 +- doc/source/user_guide/index.rst | 3 + doc/source/user_guide/io.rst | 4 +- doc/source/whatsnew/index.rst | 2 + doc/source/whatsnew/v0.13.1.rst | 149 +++++++-- doc/source/whatsnew/v0.20.0.rst | 64 +++- doc/source/whatsnew/v0.23.0.rst | 60 +++- doc/source/whatsnew/v0.24.1.rst | 13 +- doc/source/whatsnew/v0.24.2.rst | 79 +++-- doc/source/whatsnew/v0.25.0.rst | 5 +- pandas/_libs/tslibs/timedeltas.pyx | 1 - pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/sparse.py | 1 - pandas/core/base.py | 3 +- pandas/core/config.py | 1 - pandas/core/frame.py | 2 - pandas/core/generic.py | 25 +- pandas/core/groupby/base.py | 4 +- pandas/core/groupby/groupby.py | 11 +- pandas/core/indexes/base.py | 1 - pandas/core/ops.py | 286 ++++++++++++++---- pandas/core/sorting.py | 9 +- pandas/io/common.py | 2 +- pandas/io/feather_format.py | 1 - pandas/io/formats/html.py | 4 +- pandas/io/parsers.py | 18 +- pandas/plotting/_core.py | 5 +- pandas/plotting/_misc.py | 1 - pandas/tests/arrays/sparse/test_array.py | 6 +- pandas/tests/computation/test_eval.py | 3 - pandas/tests/frame/test_asof.py | 53 ++-- pandas/tests/frame/test_combine_concat.py | 33 +- pandas/tests/frame/test_constructors.py | 12 +- pandas/tests/frame/test_join.py | 42 ++- pandas/tests/frame/test_mutate_columns.py | 25 +- pandas/tests/frame/test_to_csv.py | 12 + pandas/tests/internals/test_internals.py | 1 - pandas/tests/io/formats/test_to_html.py | 10 + pandas/tests/io/parser/test_usecols.py | 13 + pandas/tests/io/test_sql.py | 120 ++++---- pandas/tests/plotting/test_datetimelike.py | 2 +- pandas/tests/plotting/test_frame.py | 12 +- pandas/tests/plotting/test_series.py | 8 +- pandas/tests/resample/test_datetime_index.py | 12 + pandas/tests/series/test_api.py | 7 + pandas/tests/test_sorting.py | 10 +- pandas/util/testing.py | 29 -- scripts/tests/test_validate_docstrings.py | 23 +- scripts/validate_docstrings.py | 42 ++- 55 files changed, 945 insertions(+), 479 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c4840f1e836c4..51df779341ed5 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -241,8 +241,8 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (GL06, GL07, GL09, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT04, RT05, SA05)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT04,RT05,SA05 + MSG='Validate docstrings (GL03, GL06, GL07, GL09, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT04, RT05, SA05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL06,GL07,GL09,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT04,RT05,SA05 RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst index d67a6c3a2ca04..a149f31118ed5 100644 --- a/doc/source/development/index.rst +++ b/doc/source/development/index.rst @@ -6,6 +6,9 @@ Development =========== +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + .. toctree:: :maxdepth: 2 diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst index c8a2399739cd5..373cffd30ff14 100644 --- a/doc/source/getting_started/dsintro.rst +++ b/doc/source/getting_started/dsintro.rst @@ -1030,47 +1030,3 @@ method: major_axis=pd.date_range('1/1/2000', periods=5), minor_axis=['a', 'b', 'c', 'd']) panel.to_frame() - - -.. _dsintro.deprecate_panel: - -Deprecate Panel ---------------- - -Over the last few years, pandas has increased in both breadth and depth, with new features, -datatype support, and manipulation routines. As a result, supporting efficient indexing and functional -routines for ``Series``, ``DataFrame`` and ``Panel`` has contributed to an increasingly fragmented and -difficult-to-understand code base. - -The 3-D structure of a ``Panel`` is much less common for many types of data analysis, -than the 1-D of the ``Series`` or the 2-D of the ``DataFrame``. Going forward it makes sense for -pandas to focus on these areas exclusively. - -Oftentimes, one can simply use a MultiIndex ``DataFrame`` for easily working with higher dimensional data. - -In addition, the ``xarray`` package was built from the ground up, specifically in order to -support the multi-dimensional analysis that is one of ``Panel`` s main use cases. -`Here is a link to the xarray panel-transition documentation `__. - -.. ipython:: python - :okwarning: - - import pandas.util.testing as tm - p = tm.makePanel() - p - -Convert to a MultiIndex DataFrame. - -.. ipython:: python - :okwarning: - - p.to_frame() - -Alternatively, one can convert to an xarray ``DataArray``. - -.. ipython:: python - :okwarning: - - p.to_xarray() - -You can see the full-documentation for the `xarray package `__. diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 4c5d26461a667..eead28830f861 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -6,6 +6,9 @@ Getting started =============== +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + .. toctree:: :maxdepth: 2 diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index d04e9194e71dc..f18c61b5e2f95 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -25,7 +25,7 @@ See the :ref:`overview` for more detail about what's in the library. {% if single_doc and single_doc.endswith('.rst') -%} .. toctree:: - :maxdepth: 2 + :maxdepth: 3 {{ single_doc[:-4] }} {% elif single_doc %} @@ -35,7 +35,8 @@ See the :ref:`overview` for more detail about what's in the library. {{ single_doc }} {% else -%} .. toctree:: - :maxdepth: 2 + :maxdepth: 3 + :hidden: {% endif %} {% if not single_doc -%} @@ -51,4 +52,67 @@ See the :ref:`overview` for more detail about what's in the library. {% if not single_doc -%} development/index whatsnew/index - {% endif -%} +{% endif -%} + + +* :doc:`whatsnew/v0.25.0` +* :doc:`install` +* :doc:`getting_started/index` + + * :doc:`getting_started/overview` + * :doc:`getting_started/10min` + * :doc:`getting_started/basics` + * :doc:`getting_started/dsintro` + * :doc:`getting_started/comparison/index` + * :doc:`getting_started/tutorials` + +* :doc:`user_guide/index` + + * :doc:`user_guide/io` + * :doc:`user_guide/indexing` + * :doc:`user_guide/advanced` + * :doc:`user_guide/merging` + * :doc:`user_guide/reshaping` + * :doc:`user_guide/text` + * :doc:`user_guide/missing_data` + * :doc:`user_guide/categorical` + * :doc:`user_guide/integer_na` + * :doc:`user_guide/visualization` + * :doc:`user_guide/computation` + * :doc:`user_guide/groupby` + * :doc:`user_guide/timeseries` + * :doc:`user_guide/timedeltas` + * :doc:`user_guide/style` + * :doc:`user_guide/options` + * :doc:`user_guide/enhancingperf` + * :doc:`user_guide/sparse` + * :doc:`user_guide/gotchas` + * :doc:`user_guide/cookbook` + +* :doc:`ecosystem` +* :doc:`reference/index` + + * :doc:`reference/io` + * :doc:`reference/general_functions` + * :doc:`reference/series` + * :doc:`reference/frame` + * :doc:`reference/arrays` + * :doc:`reference/panel` + * :doc:`reference/indexing` + * :doc:`reference/offset_frequency` + * :doc:`reference/window` + * :doc:`reference/groupby` + * :doc:`reference/resampling` + * :doc:`reference/style` + * :doc:`reference/plotting` + * :doc:`reference/general_utility_functions` + * :doc:`reference/extensions` + +* :doc:`development/index` + + * :doc:`development/contributing` + * :doc:`development/internals` + * :doc:`development/extending` + * :doc:`development/developer` + +* :doc:`whatsnew/index` diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst index ef4676054473a..1e652c9e5497d 100644 --- a/doc/source/reference/index.rst +++ b/doc/source/reference/index.rst @@ -19,6 +19,9 @@ public functions related to data types in pandas. The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + .. toctree:: :maxdepth: 2 @@ -41,40 +44,40 @@ public functions related to data types in pandas. .. This is to prevent warnings in the doc build. We don't want to encourage .. these methods. -.. toctree:: - :hidden: - - api/pandas.DataFrame.blocks - api/pandas.DataFrame.as_matrix - api/pandas.DataFrame.ix - api/pandas.Index.asi8 - api/pandas.Index.data - api/pandas.Index.flags - api/pandas.Index.holds_integer - api/pandas.Index.is_type_compatible - api/pandas.Index.nlevels - api/pandas.Index.sort - api/pandas.Panel.agg - api/pandas.Panel.aggregate - api/pandas.Panel.blocks - api/pandas.Panel.empty - api/pandas.Panel.is_copy - api/pandas.Panel.items - api/pandas.Panel.ix - api/pandas.Panel.major_axis - api/pandas.Panel.minor_axis - api/pandas.Series.asobject - api/pandas.Series.blocks - api/pandas.Series.from_array - api/pandas.Series.ix - api/pandas.Series.imag - api/pandas.Series.real +.. + .. toctree:: + + api/pandas.DataFrame.blocks + api/pandas.DataFrame.as_matrix + api/pandas.DataFrame.ix + api/pandas.Index.asi8 + api/pandas.Index.data + api/pandas.Index.flags + api/pandas.Index.holds_integer + api/pandas.Index.is_type_compatible + api/pandas.Index.nlevels + api/pandas.Index.sort + api/pandas.Panel.agg + api/pandas.Panel.aggregate + api/pandas.Panel.blocks + api/pandas.Panel.empty + api/pandas.Panel.is_copy + api/pandas.Panel.items + api/pandas.Panel.ix + api/pandas.Panel.major_axis + api/pandas.Panel.minor_axis + api/pandas.Series.asobject + api/pandas.Series.blocks + api/pandas.Series.from_array + api/pandas.Series.ix + api/pandas.Series.imag + api/pandas.Series.real .. Can't convince sphinx to generate toctree for this class attribute. .. So we do it manually to avoid a warning -.. toctree:: - :hidden: +.. + .. toctree:: - api/pandas.api.extensions.ExtensionDtype.na_value + api/pandas.api.extensions.ExtensionDtype.na_value diff --git a/doc/source/themes/nature_with_gtoc/layout.html b/doc/source/themes/nature_with_gtoc/layout.html index a2106605c5562..b3f13f99f44d4 100644 --- a/doc/source/themes/nature_with_gtoc/layout.html +++ b/doc/source/themes/nature_with_gtoc/layout.html @@ -19,7 +19,7 @@ {%- block sidebar1 %} {%- block sidebartoc %}

{{ _('Table Of Contents') }}

- {{ toctree() }} + {{ toctree(includehidden=True) }} {%- endblock %} {%- block sidebarsearch %}

{{ _('Search') }}

@@ -105,4 +105,4 @@

{{ _('Search') }}

var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s); })(); -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst index d39cf7103ab63..05df83decbd7e 100644 --- a/doc/source/user_guide/index.rst +++ b/doc/source/user_guide/index.rst @@ -15,6 +15,9 @@ Users brand-new to pandas should start with :ref:`10min`. Further information on any specific method can be obtained in the :ref:`api`. +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + .. toctree:: :maxdepth: 2 diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index b23a0f10e9e2b..1b5d96fa9c146 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -1689,7 +1689,7 @@ The ``Series`` and ``DataFrame`` objects have an instance method ``to_csv`` whic allows storing the contents of the object as a comma-separated-values file. The function takes a number of arguments. Only the first is required. -* ``path_or_buf``: A string path to the file to write or a StringIO +* ``path_or_buf``: A string path to the file to write or a file object. If a file object it must be opened with `newline=''` * ``sep`` : Field delimiter for the output file (default ",") * ``na_rep``: A string representation of a missing value (default '') * ``float_format``: Format string for floating point numbers @@ -1702,7 +1702,7 @@ function takes a number of arguments. Only the first is required. * ``mode`` : Python write mode, default 'w' * ``encoding``: a string representing the encoding to use if the contents are non-ASCII, for Python versions prior to 3 -* ``line_terminator``: Character sequence denoting line end (default '\\n') +* ``line_terminator``: Character sequence denoting line end (default `os.linesep`) * ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a `float_format` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric * ``quotechar``: Character used to quote fields (default '"') * ``doublequote``: Control quoting of ``quotechar`` in fields (default True) diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index d0aab4012ffd5..6c529d2e2e5f3 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -16,6 +16,8 @@ Version 0.24 .. toctree:: :maxdepth: 2 + v0.24.2 + v0.24.1 v0.24.0 Version 0.23 diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst index 8a89450be2f48..161b0ef395f05 100644 --- a/doc/source/whatsnew/v0.13.1.rst +++ b/doc/source/whatsnew/v0.13.1.rst @@ -222,60 +222,155 @@ Enhancements - Panel :meth:`~pandas.Panel.apply` will work on non-ufuncs. See :ref:`the docs`. - .. ipython:: python + .. code-block:: ipython + + In [28]: import pandas.util.testing as tm + + In [29]: panel = tm.makePanel(5) - import pandas.util.testing as tm - panel = tm.makePanel(5) - panel - panel['ItemA'] + In [30]: panel + Out[30]: + + Dimensions: 3 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: A to D + + In [31]: panel['ItemA'] + Out[31]: + A B C D + 2000-01-03 -0.673690 0.577046 -1.344312 -1.469388 + 2000-01-04 0.113648 -1.715002 0.844885 0.357021 + 2000-01-05 -1.478427 -1.039268 1.075770 -0.674600 + 2000-01-06 0.524988 -0.370647 -0.109050 -1.776904 + 2000-01-07 0.404705 -1.157892 1.643563 -0.968914 + + [5 rows x 4 columns] Specifying an ``apply`` that operates on a Series (to return a single element) - .. ipython:: python + .. code-block:: ipython + + In [32]: panel.apply(lambda x: x.dtype, axis='items') + Out[32]: + A B C D + 2000-01-03 float64 float64 float64 float64 + 2000-01-04 float64 float64 float64 float64 + 2000-01-05 float64 float64 float64 float64 + 2000-01-06 float64 float64 float64 float64 + 2000-01-07 float64 float64 float64 float64 - panel.apply(lambda x: x.dtype, axis='items') + [5 rows x 4 columns] A similar reduction type operation - .. ipython:: python + .. code-block:: ipython + + In [33]: panel.apply(lambda x: x.sum(), axis='major_axis') + Out[33]: + ItemA ItemB ItemC + A -1.108775 -1.090118 -2.984435 + B -3.705764 0.409204 1.866240 + C 2.110856 2.960500 -0.974967 + D -4.532785 0.303202 -3.685193 - panel.apply(lambda x: x.sum(), axis='major_axis') + [4 rows x 3 columns] This is equivalent to - .. ipython:: python + .. code-block:: ipython + + In [34]: panel.sum('major_axis') + Out[34]: + ItemA ItemB ItemC + A -1.108775 -1.090118 -2.984435 + B -3.705764 0.409204 1.866240 + C 2.110856 2.960500 -0.974967 + D -4.532785 0.303202 -3.685193 - panel.sum('major_axis') + [4 rows x 3 columns] A transformation operation that returns a Panel, but is computing the z-score across the major_axis - .. ipython:: python + .. code-block:: ipython - result = panel.apply(lambda x: (x - x.mean()) / x.std(), - axis='major_axis') - result - result['ItemA'] + In [35]: result = panel.apply(lambda x: (x - x.mean()) / x.std(), + ....: axis='major_axis') + ....: + + In [36]: result + Out[36]: + + Dimensions: 3 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: A to D + + In [37]: result['ItemA'] # noqa E999 + Out[37]: + A B C D + 2000-01-03 -0.535778 1.500802 -1.506416 -0.681456 + 2000-01-04 0.397628 -1.108752 0.360481 1.529895 + 2000-01-05 -1.489811 -0.339412 0.557374 0.280845 + 2000-01-06 0.885279 0.421830 -0.453013 -1.053785 + 2000-01-07 0.742682 -0.474468 1.041575 -0.075499 + + [5 rows x 4 columns] - Panel :meth:`~pandas.Panel.apply` operating on cross-sectional slabs. (:issue:`1148`) - .. ipython:: python + .. code-block:: ipython - def f(x): - return ((x.T - x.mean(1)) / x.std(1)).T + In [38]: def f(x): + ....: return ((x.T - x.mean(1)) / x.std(1)).T + ....: - result = panel.apply(f, axis=['items', 'major_axis']) - result - result.loc[:, :, 'ItemA'] + In [39]: result = panel.apply(f, axis=['items', 'major_axis']) - This is equivalent to the following + In [40]: result + Out[40]: + + Dimensions: 4 (items) x 5 (major_axis) x 3 (minor_axis) + Items axis: A to D + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: ItemA to ItemC - .. ipython:: python + In [41]: result.loc[:, :, 'ItemA'] + Out[41]: + A B C D + 2000-01-03 0.012922 -0.030874 -0.629546 -0.757034 + 2000-01-04 0.392053 -1.071665 0.163228 0.548188 + 2000-01-05 -1.093650 -0.640898 0.385734 -1.154310 + 2000-01-06 1.005446 -1.154593 -0.595615 -0.809185 + 2000-01-07 0.783051 -0.198053 0.919339 -1.052721 + + [5 rows x 4 columns] - result = pd.Panel({ax: f(panel.loc[:, :, ax]) for ax in panel.minor_axis}) + This is equivalent to the following + + .. code-block:: ipython - result - result.loc[:, :, 'ItemA'] + In [42]: result = pd.Panel({ax: f(panel.loc[:, :, ax]) for ax in panel.minor_axis}) + + In [43]: result + Out[43]: + + Dimensions: 4 (items) x 5 (major_axis) x 3 (minor_axis) + Items axis: A to D + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: ItemA to ItemC + + In [44]: result.loc[:, :, 'ItemA'] + Out[44]: + A B C D + 2000-01-03 0.012922 -0.030874 -0.629546 -0.757034 + 2000-01-04 0.392053 -1.071665 0.163228 0.548188 + 2000-01-05 -1.093650 -0.640898 0.385734 -1.154310 + 2000-01-06 1.005446 -1.154593 -0.595615 -0.809185 + 2000-01-07 0.783051 -0.198053 0.919339 -1.052721 + + [5 rows x 4 columns] Performance ~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index c720e075012eb..26fdee4685c4b 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -45,11 +45,6 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ -.. ipython:: python - :suppress: - - import pandas.util.testing as tm - .. _whatsnew_0200.enhancements.agg: ``agg`` API for DataFrame/Series @@ -1363,24 +1358,65 @@ Deprecate Panel with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas provides a :meth:`~Panel.to_xarray` method to automate this conversion. For more details see :ref:`Deprecate Panel ` documentation. (:issue:`13563`). -.. ipython:: python - :okwarning: +.. code-block:: ipython - p = tm.makePanel() - p + In [133]: import pandas.util.testing as tm + + In [134]: p = tm.makePanel() + + In [135]: p + Out[135]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D Convert to a MultiIndex DataFrame -.. ipython:: python +.. code-block:: ipython - p.to_frame() + In [136]: p.to_frame() + Out[136]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.628776 -1.409432 0.209395 + B 0.988138 -1.347533 -0.896581 + C -0.938153 1.272395 -0.161137 + D -0.223019 -0.591863 -1.051539 + 2000-01-04 A 0.186494 1.422986 -0.592886 + B -0.072608 0.363565 1.104352 + C -1.239072 -1.449567 0.889157 + D 2.123692 -0.414505 -0.319561 + 2000-01-05 A 0.952478 -2.147855 -1.473116 + B -0.550603 -0.014752 -0.431550 + C 0.139683 -1.195524 0.288377 + D 0.122273 -1.425795 -0.619993 + + [12 rows x 3 columns] Convert to an xarray DataArray -.. ipython:: python - :okwarning: +.. code-block:: ipython - p.to_xarray() + In [137]: p.to_xarray() + Out[137]: + + array([[[ 0.628776, 0.988138, -0.938153, -0.223019], + [ 0.186494, -0.072608, -1.239072, 2.123692], + [ 0.952478, -0.550603, 0.139683, 0.122273]], + + [[-1.409432, -1.347533, 1.272395, -0.591863], + [ 1.422986, 0.363565, -1.449567, -0.414505], + [-2.147855, -0.014752, -1.195524, -1.425795]], + + [[ 0.209395, -0.896581, -0.161137, -1.051539], + [-0.592886, 1.104352, 0.889157, -0.319561], + [-1.473116, -0.43155 , 0.288377, -0.619993]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' .. _whatsnew_0200.api_breaking.deprecate_group_agg_dict: diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index e52a36a922bd9..7ec5a39c3d384 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -646,29 +646,65 @@ Deprecate Panel with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas provides a :meth:`~Panel.to_xarray` method to automate this conversion. For more details see :ref:`Deprecate Panel ` documentation. (:issue:`13563`, :issue:`18324`). -.. ipython:: python - :suppress: +.. code-block:: ipython - import pandas.util.testing as tm + In [75]: import pandas.util.testing as tm -.. ipython:: python - :okwarning: + In [76]: p = tm.makePanel() - p = tm.makePanel() - p + In [77]: p + Out[77]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D Convert to a MultiIndex DataFrame -.. ipython:: python +.. code-block:: ipython - p.to_frame() + In [78]: p.to_frame() + Out[78]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.469112 0.721555 0.404705 + B -1.135632 0.271860 -1.039268 + C 0.119209 0.276232 -1.344312 + D -2.104569 0.113648 -0.109050 + 2000-01-04 A -0.282863 -0.706771 0.577046 + B 1.212112 -0.424972 -0.370647 + C -1.044236 -1.087401 0.844885 + D -0.494929 -1.478427 1.643563 + 2000-01-05 A -1.509059 -1.039575 -1.715002 + B -0.173215 0.567020 -1.157892 + C -0.861849 -0.673690 1.075770 + D 1.071804 0.524988 -1.469388 + + [12 rows x 3 columns] Convert to an xarray DataArray -.. ipython:: python - :okwarning: +.. code-block:: ipython - p.to_xarray() + In [79]: p.to_xarray() + Out[79]: + + array([[[ 0.469112, -1.135632, 0.119209, -2.104569], + [-0.282863, 1.212112, -1.044236, -0.494929], + [-1.509059, -0.173215, -0.861849, 1.071804]], + + [[ 0.721555, 0.27186 , 0.276232, 0.113648], + [-0.706771, -0.424972, -1.087401, -1.478427], + [-1.039575, 0.56702 , -0.67369 , 0.524988]], + + [[ 0.404705, -1.039268, -1.344312, -0.10905 ], + [ 0.577046, -0.370647, 0.844885, 1.643563], + [-1.715002, -1.157892, 1.07577 , -1.469388]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' .. _whatsnew_0230.api_breaking.core_common: diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index be0a2eb682e87..8f963f1285e1b 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -82,4 +82,15 @@ Bug Fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v0.24.0..v0.24.1 +.. Including the contributors hardcoded for this release, as backporting with + MeeseeksDev loses the commit authors + +A total of 7 people contributed patches to this release. People with a "+" by their names contributed a patch for the first time. + +* Alex Buchkovsky +* Roman Yurchak +* h-vetinari +* jbrockmendel +* Jeremy Schendel +* Joris Van den Bossche +* Tom Augspurger diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 2c6d1e01ed89b..0af2427ead512 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -18,7 +18,7 @@ including other versions of pandas. .. _whatsnew_0242.regressions: Fixed Regressions -^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`) - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) @@ -32,80 +32,69 @@ Fixed Regressions - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) - -.. _whatsnew_0242.enhancements: - -Enhancements -^^^^^^^^^^^^ - -- -- +- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`) .. _whatsnew_0242.bug_fixes: Bug Fixes ~~~~~~~~~ -**Conversion** - -- -- -- - -**Indexing** - -- -- -- - **I/O** - Better handling of terminal printing when the terminal dimensions are not known (:issue:`25080`) - Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`) - Bug in reading a JSON with ``orient='table'`` generated by :meth:`DataFrame.to_json` with ``index=False`` (:issue:`25170`) - Bug where float indexes could have misaligned values when printing (:issue:`25061`) -- - -**Categorical** - -- -- -- - -**Timezones** - -- -- -- - -**Timedelta** - -- -- -- **Reshaping** - Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) -- **Visualization** - Bug in :meth:`Series.plot` where a secondary y axis could not be set to log scale (:issue:`25545`) -- -- **Other** - Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) - Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) - Bug in ``IntervalTree`` where a ``RecursionError`` occurs upon construction due to an overflow when adding endpoints, which also causes :class:`IntervalIndex` to crash during indexing operations (:issue:`25485`) -- +- Bug in :attr:`Series.size` raising for some extension-array-backed ``Series``, rather than returning the size (:issue:`25580`) +- Bug in resampling raising for nullable integer-dtype columns (:issue:`25580`) .. _whatsnew_0242.contributors: Contributors ~~~~~~~~~~~~ -.. contributors:: v0.24.1..v0.24.2 +.. Including the contributors hardcoded for this release, as backporting with + MeeseeksDev loses the commit authors + +A total of 25 people contributed patches to this release. People with a "+" by their names contributed a patch for the first time. + +* Albert Villanova del Moral +* Arno Veenstra + +* chris-b1 +* Devin Petersohn + +* EternalLearner42 + +* Flavien Lambert + +* gfyoung +* Gioia Ballin +* jbrockmendel +* Jeff Reback +* Jeremy Schendel +* Johan von Forstner + +* Joris Van den Bossche +* Josh +* Justin Zheng +* Matthew Roeschke +* Max Bolingbroke + +* rbenes + +* Sterling Paramore + +* Tao He + +* Thomas A Caswell +* Tom Augspurger +* Vibhu Agarwal + +* William Ayd +* Zach Angell diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ea08a0a6fe07b..477f4bacc8cf6 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -121,7 +121,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Bug in :func:`to_datetime` which would raise an (incorrect) ``ValueError`` when called with a date far into the future and the ``format`` argument specified instead of raising ``OutOfBoundsDatetime`` (:issue:`23830`) -- +- Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`) +- Bug in ``read_csv`` which would not raise a ``ValueError`` when a column index in ``usecols`` was out of bounds (:issue:`25623`) - Categorical @@ -214,7 +215,7 @@ I/O - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) -- +- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AsseertionError`` (:issue:`25608`) - - diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 6e40063fb925a..37aa05659b70f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1156,7 +1156,6 @@ class Timedelta(_Timedelta): Notes ----- The ``.value`` attribute is always in ns. - """ def __new__(cls, object value=_no_input, unit=None, **kwargs): cdef _Timedelta td_base diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 75cf658423210..89f2b9961a4d7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -119,7 +119,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = "\n{}\n".format(docstring) + f.__doc__ = docstring return property(f) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 9be2c9af169e8..fd7149edc8d7c 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -541,7 +541,6 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): 3. ``data.dtype.fill_value`` if `fill_value` is None and `dtype` is not a ``SparseDtype`` and `data` is a ``SparseArray``. - kind : {'integer', 'block'}, default 'integer' The type of storage for sparse locations. diff --git a/pandas/core/base.py b/pandas/core/base.py index f896596dd5216..9fc950b9e7b43 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -762,7 +762,7 @@ def size(self): """ Return the number of elements in the underlying data. """ - return self._values.size + return len(self._values) @property def flags(self): @@ -870,7 +870,6 @@ def to_numpy(self, dtype=None, copy=False): .. versionadded:: 0.24.0 - Parameters ---------- dtype : str or numpy.dtype, optional diff --git a/pandas/core/config.py b/pandas/core/config.py index 01664fffb1e27..b6264a5257dcb 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -651,7 +651,6 @@ def _build_option_description(k): .format(rkey=d.rkey if d.rkey else '')) s += u(')') - s += '\n\n' return s diff --git a/pandas/core/frame.py b/pandas/core/frame.py index eadffb779734f..3996728a1cc90 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2184,7 +2184,6 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, Convert URLs to HTML links. .. versionadded:: 0.24.0 - %(returns)s See Also -------- @@ -6027,7 +6026,6 @@ def unstack(self, level=-1, fill_value=None): columns, considered measured variables (`value_vars`), are "unpivoted" to the row axis, leaving just two non-identifier columns, 'variable' and 'value'. - %(versionadded)s Parameters ---------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0b81576404e2f..f23aac9ad3a52 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -948,7 +948,6 @@ def swaplevel(self, i=-2, j=-1, axis=0): The indexes ``i`` and ``j`` are now optional, and default to the two innermost levels of the index. - """ axis = self._get_axis_number(axis) result = self.copy() @@ -2920,7 +2919,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, ---------- path_or_buf : str or file handle, default None File path or object, if None is provided the result is returned as - a string. + a string. If a file object is passed it should be opened with + `newline=''`, disabling universal newlines. .. versionchanged:: 0.24.0 @@ -4534,11 +4534,11 @@ def filter(self, items=None, like=None, regex=None, axis=None): Parameters ---------- items : list-like - List of axis to restrict to (must not all be present). + Keep labels from axis which are in items. like : string - Keep axis where "arg in col == True". + Keep labels from axis for which "like in label == True". regex : string (regular expression) - Keep axis with re.search(regex, col) == True. + Keep labels from axis for which re.search(regex, label) == True. axis : int or string axis name The axis to filter on. By default this is the info axis, 'index' for Series, 'columns' for DataFrame. @@ -4561,7 +4561,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): Examples -------- - >>> df = pd.DataFrame(np.array(([1,2,3], [4,5,6])), + >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), ... index=['mouse', 'rabbit'], ... columns=['one', 'two', 'three']) @@ -4951,9 +4951,7 @@ def pipe(self, func, *args, **kwargs): _shared_docs['aggregate'] = dedent(""" Aggregate using one or more operations over the specified axis. - %(versionadded)s - Parameters ---------- func : function, str, list or dict @@ -4983,17 +4981,13 @@ def pipe(self, func, *args, **kwargs): * DataFrame : when DataFrame.agg is called with several functions Return scalar, Series or DataFrame. - %(see_also)s - Notes ----- `agg` is an alias for `aggregate`. Use the alias. A passed user-defined-function will be passed a Series for evaluation. - - %(examples)s - """) + %(examples)s""") _shared_docs['transform'] = (""" Call ``func`` on self producing a %(klass)s with transformed values @@ -10307,7 +10301,7 @@ def _doc_parms(cls): Returns ------- -%(name1)s or %(name2)s (if level specified) +%(name1)s or %(name2)s (if level specified)\ %(see_also)s %(examples)s\ """ @@ -10464,8 +10458,7 @@ def _doc_parms(cls): %(name2)s.cumsum : Return cumulative sum over %(name2)s axis. %(name2)s.cumprod : Return cumulative product over %(name2)s axis. -%(examples)s -""" +%(examples)s""" _cummin_examples = """\ Examples diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index ebba4a0a9395d..903c898b68873 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -126,9 +126,7 @@ class where members are defined. property_wrapper_template = \ """@property def %(name)s(self) : - \""" - %(doc)s - \""" + \"""%(doc)s\""" return self.__getattr__('%(name)s')""" for name in whitelist: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 36dcb692bb079..3d0a6023ac29f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -221,8 +221,7 @@ class providing the base-class of operations. Examples -------- -%(examples)s -""" +%(examples)s""" _transform_template = """ Call function producing a like-indexed %(klass)s on each group and @@ -1106,9 +1105,7 @@ def mean(self, *args, **kwargs): Returns ------- pandas.Series or pandas.DataFrame - %(see_also)s - Examples -------- >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], @@ -1564,9 +1561,7 @@ def nth(self, n, dropna=None): dropna : None or str, optional apply the specified dropna operation before counting which row is the nth row. Needs to be None, 'any' or 'all' - %(see_also)s - Examples -------- @@ -2139,9 +2134,7 @@ def head(self, n=5): Essentially equivalent to ``.apply(lambda x: x.head(n))``, except ignores as_index flag. - %(see_also)s - Examples -------- @@ -2167,9 +2160,7 @@ def tail(self, n=5): Essentially equivalent to ``.apply(lambda x: x.tail(n))``, except ignores as_index flag. - %(see_also)s - Examples -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dee181fc1c569..29b9a47a92a48 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3104,7 +3104,6 @@ def reindex(self, target, method=None, level=None, limit=None, Resulting index. indexer : np.ndarray or None Indices of output values in original index. - """ # GH6552: preserve names when reindexing to non-named target # (i.e. neither Index nor Series). diff --git a/pandas/core/ops.py b/pandas/core/ops.py index dbdabecafae3a..4d88ce6836ca4 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -384,57 +384,252 @@ def _get_op_name(op, special): # ----------------------------------------------------------------------------- # Docstring Generation and Templates +_add_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.add(b, fill_value=0) +a 2.0 +b 1.0 +c 1.0 +d 1.0 +e NaN +dtype: float64 +""" + +_sub_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.subtract(b, fill_value=0) +a 0.0 +b 1.0 +c 1.0 +d -1.0 +e NaN +dtype: float64 +""" + +_mul_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.multiply(b, fill_value=0) +a 1.0 +b 0.0 +c 0.0 +d 0.0 +e NaN +dtype: float64 +""" + +_div_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.divide(b, fill_value=0) +a 1.0 +b inf +c inf +d 0.0 +e NaN +dtype: float64 +""" + +_floordiv_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.floordiv(b, fill_value=0) +a 1.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" + +_mod_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.mod(b, fill_value=0) +a 0.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" +_pow_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.pow(b, fill_value=0) +a 1.0 +b 1.0 +c 1.0 +d 0.0 +e NaN +dtype: float64 +""" + _op_descriptions = { # Arithmetic Operators 'add': {'op': '+', 'desc': 'Addition', - 'reverse': 'radd'}, + 'reverse': 'radd', + 'series_examples': _add_example_SERIES}, 'sub': {'op': '-', 'desc': 'Subtraction', - 'reverse': 'rsub'}, + 'reverse': 'rsub', + 'series_examples': _sub_example_SERIES}, 'mul': {'op': '*', 'desc': 'Multiplication', 'reverse': 'rmul', + 'series_examples': _mul_example_SERIES, 'df_examples': None}, 'mod': {'op': '%', 'desc': 'Modulo', - 'reverse': 'rmod'}, + 'reverse': 'rmod', + 'series_examples': _mod_example_SERIES}, 'pow': {'op': '**', 'desc': 'Exponential power', 'reverse': 'rpow', + 'series_examples': _pow_example_SERIES, 'df_examples': None}, 'truediv': {'op': '/', 'desc': 'Floating division', 'reverse': 'rtruediv', + 'series_examples': _div_example_SERIES, 'df_examples': None}, 'floordiv': {'op': '//', 'desc': 'Integer division', 'reverse': 'rfloordiv', + 'series_examples': _floordiv_example_SERIES, 'df_examples': None}, 'divmod': {'op': 'divmod', 'desc': 'Integer division and modulo', 'reverse': 'rdivmod', + 'series_examples': None, 'df_examples': None}, # Comparison Operators 'eq': {'op': '==', 'desc': 'Equal to', - 'reverse': None}, + 'reverse': None, + 'series_examples': None}, 'ne': {'op': '!=', 'desc': 'Not equal to', - 'reverse': None}, + 'reverse': None, + 'series_examples': None}, 'lt': {'op': '<', 'desc': 'Less than', - 'reverse': None}, + 'reverse': None, + 'series_examples': None}, 'le': {'op': '<=', 'desc': 'Less than or equal to', - 'reverse': None}, + 'reverse': None, + 'series_examples': None}, 'gt': {'op': '>', 'desc': 'Greater than', - 'reverse': None}, + 'reverse': None, + 'series_examples': None}, 'ge': {'op': '>=', 'desc': 'Greater than or equal to', - 'reverse': None} + 'reverse': None, + 'series_examples': None} } _op_names = list(_op_descriptions.keys()) @@ -472,51 +667,6 @@ def _get_op_name(op, special): See Also -------- Series.{reverse} - -Examples --------- ->>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) ->>> a -a 1.0 -b 1.0 -c 1.0 -d NaN -dtype: float64 ->>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) ->>> b -a 1.0 -b NaN -d 1.0 -e NaN -dtype: float64 ->>> a.add(b, fill_value=0) -a 2.0 -b 1.0 -c 1.0 -d 1.0 -e NaN -dtype: float64 ->>> a.subtract(b, fill_value=0) -a 0.0 -b 1.0 -c 1.0 -d -1.0 -e NaN -dtype: float64 ->>> a.multiply(b) -a 1.0 -b NaN -c NaN -d NaN -e NaN -dtype: float64 ->>> a.divide(b, fill_value=0) -a 1.0 -b inf -c inf -d 0.0 -e NaN -dtype: float64 """ _arith_doc_FRAME = """ @@ -906,16 +1056,32 @@ def _make_flex_doc(op_name, typ): if typ == 'series': base_doc = _flex_doc_SERIES - doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, - equiv=equiv, reverse=op_desc['reverse']) + doc_no_examples = base_doc.format( + desc=op_desc['desc'], + op_name=op_name, + equiv=equiv, + reverse=op_desc['reverse'] + ) + if op_desc['series_examples']: + doc = doc_no_examples + op_desc['series_examples'] + else: + doc = doc_no_examples elif typ == 'dataframe': base_doc = _flex_doc_FRAME - doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, - equiv=equiv, reverse=op_desc['reverse']) + doc = base_doc.format( + desc=op_desc['desc'], + op_name=op_name, + equiv=equiv, + reverse=op_desc['reverse'] + ) elif typ == 'panel': base_doc = _flex_doc_PANEL - doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, - equiv=equiv, reverse=op_desc['reverse']) + doc = base_doc.format( + desc=op_desc['desc'], + op_name=op_name, + equiv=equiv, + reverse=op_desc['reverse'] + ) else: raise AssertionError('Invalid typ argument.') return doc diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index ef69939d6e978..0b5b017bec9ac 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -1,4 +1,5 @@ """ miscellaneous sorting / groupby utilities """ +import warnings import numpy as np @@ -254,7 +255,13 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): sorted_idx = np.roll(sorted_idx, cnt_null) return sorted_idx - items = np.asanyarray(items) + with warnings.catch_warnings(): + # https://github.com/pandas-dev/pandas/issues/25439 + # can be removed once ExtensionArrays are properly handled by nargsort + warnings.filterwarnings( + "ignore", category=FutureWarning, + message="Converting timezone-aware DatetimeArray to") + items = np.asanyarray(items) idx = np.arange(len(items)) mask = isna(items) non_nans = items[~mask] diff --git a/pandas/io/common.py b/pandas/io/common.py index ad054d77b3bc8..c1cacf39c5b08 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -434,7 +434,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, if (compat.PY3 and is_text and (compression or isinstance(f, need_text_wrapping))): from io import TextIOWrapper - f = TextIOWrapper(f, encoding=encoding) + f = TextIOWrapper(f, encoding=encoding, newline='') handles.append(f) if memory_map and hasattr(f, 'fileno'): diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index d76e6b75d3762..b2c6dff4338b6 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -110,7 +110,6 @@ def read_feather(path, columns=None, use_threads=True): Returns ------- type of object stored in file - """ feather, pyarrow = _try_import() diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 66d13bf2668f9..a543b21f287ec 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -163,8 +163,8 @@ def _write_table(self, indent=0): if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): - raise AssertionError('classes must be list or tuple, not {typ}' - .format(typ=type(self.classes))) + raise TypeError('classes must be a string, list, or tuple, ' + 'not {typ}'.format(typ=type(self.classes))) _classes.extend(self.classes) if self.table_id is None: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4163a571df800..0b91c719ff2b5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1894,6 +1894,11 @@ def __init__(self, src, **kwds): not set(usecols).issubset(self.orig_names)): _validate_usecols_names(usecols, self.orig_names) + # GH25623 + elif self.usecols_dtype == 'integer': + indices = lrange(self._reader.table_width) + _validate_usecols_names(usecols, indices) + if len(self.names) > len(usecols): self.names = [n for i, n in enumerate(self.names) if (i in usecols or n in usecols)] @@ -2197,7 +2202,8 @@ def __init__(self, f, **kwds): self.skipinitialspace = kwds['skipinitialspace'] self.lineterminator = kwds['lineterminator'] self.quoting = kwds['quoting'] - self.usecols, _ = _validate_usecols_arg(kwds['usecols']) + self.usecols, self.usecols_dtype = _validate_usecols_arg( + kwds['usecols']) self.skip_blank_lines = kwds['skip_blank_lines'] self.warn_bad_lines = kwds['warn_bad_lines'] @@ -2588,6 +2594,12 @@ def _infer_columns(self): if clear_buffer: self._clear_buffer() + # GH25623 + if self.usecols_dtype == "integer": + for col in columns: + indices = lrange(len(col)) + _validate_usecols_names(self.usecols, indices) + if names is not None: if ((self.usecols is not None and len(names) != len(self.usecols)) or @@ -2623,6 +2635,10 @@ def _infer_columns(self): ncols = len(line) num_original_columns = ncols + # GH25623 + if self.usecols_dtype == "integer": + _validate_usecols_names(self.usecols, lrange(ncols)) + if not names: if self.prefix: columns = [['%s%d' % (self.prefix, i) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 0ea92a57ac3f8..b9ec4d58db739 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -361,10 +361,9 @@ def _compute_plot_data(self): except AttributeError: is_empty = not len(numeric_data) - # no empty frames or series allowed + # no non-numeric frames or series allowed if is_empty: - raise TypeError('Empty {0!r}: no numeric data to ' - 'plot'.format(numeric_data.__class__.__name__)) + raise TypeError('no numeric data to plot') self.data = numeric_data diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 5171ea68fd497..b8073c89892c5 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -303,7 +303,6 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None, Returns ------- class:`matplotlip.axis.Axes` - """ from math import sqrt, pi import matplotlib.pyplot as plt diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 9c13a20726553..11b5bcf702e75 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1091,11 +1091,11 @@ def test_from_coo(self): row = [0, 3, 1, 0] col = [0, 3, 1, 2] data = [4, 5, 7, 9] - sp_array = sparse.coo_matrix(data, (row, col)) + sp_array = sparse.coo_matrix((data, (row, col))) result = pd.Series.sparse.from_coo(sp_array) - index = pd.MultiIndex.from_product([[0], [0, 1, 2, 3]]) - expected = pd.Series(data, index=index, dtype='Sparse[int]') + index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + expected = pd.Series([4, 9, 7, 5], index=index, dtype='Sparse[int]') tm.assert_series_equal(result, expected) def test_to_coo(self): diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index a14d8e4471c23..062d1876141f8 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -253,9 +253,6 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): # local_dict={'lhs': lhs, 'rhs': rhs}, # engine=self.engine, parser=self.parser) # except AssertionError: - # import ipdb - # - # ipdb.set_trace() # raise else: expected = _eval_single_bin( diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index 0947e6f252dab..4ba3431d102df 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -6,21 +6,26 @@ from pandas import DataFrame, Series, Timestamp, date_range, to_datetime import pandas.util.testing as tm -from .common import TestData +@pytest.fixture +def date_range_frame(): + """ + Fixture for DataFrame of ints with date_range index -class TestFrameAsof(TestData): - def setup_method(self, method): - self.N = N = 50 - self.rng = date_range('1/1/1990', periods=N, freq='53s') - self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, - index=self.rng) + Columns are ['A', 'B']. + """ + N = 50 + rng = date_range('1/1/1990', periods=N, freq='53s') + return DataFrame({'A': np.arange(N), 'B': np.arange(N)}, index=rng) - def test_basic(self): - df = self.df.copy() + +class TestFrameAsof(): + + def test_basic(self, date_range_frame): + df = date_range_frame + N = 50 df.loc[15:30, 'A'] = np.nan - dates = date_range('1/1/1990', periods=self.N * 3, - freq='25s') + dates = date_range('1/1/1990', periods=N * 3, freq='25s') result = df.asof(dates) assert result.notna().all(1).all() @@ -35,11 +40,9 @@ def test_basic(self): rs = result[mask] assert (rs == 14).all(1).all() - def test_subset(self): + def test_subset(self, date_range_frame): N = 10 - rng = date_range('1/1/1990', periods=N, freq='53s') - df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, - index=rng) + df = date_range_frame.iloc[:N].copy() df.loc[4:8, 'A'] = np.nan dates = date_range('1/1/1990', periods=N * 3, freq='25s') @@ -54,20 +57,18 @@ def test_subset(self): expected = df.asof(dates) tm.assert_frame_equal(result, expected) - # B gives self.df.asof + # B gives df.asof result = df.asof(dates, subset='B') expected = df.resample('25s', closed='right').ffill().reindex(dates) expected.iloc[20:] = 9 tm.assert_frame_equal(result, expected) - def test_missing(self): + def test_missing(self, date_range_frame): # GH 15118 # no match found - `where` value before earliest date in index N = 10 - rng = date_range('1/1/1990', periods=N, freq='53s') - df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, - index=rng) + df = date_range_frame.iloc[:N].copy() result = df.asof('1989-12-31') expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31')) @@ -78,7 +79,7 @@ def test_missing(self): columns=['A', 'B'], dtype='float64') tm.assert_frame_equal(result, expected) - def test_all_nans(self): + def test_all_nans(self, date_range_frame): # GH 15713 # DataFrame is all nans result = DataFrame([np.nan]).asof([0]) @@ -86,14 +87,16 @@ def test_all_nans(self): tm.assert_frame_equal(result, expected) # testing non-default indexes, multiple inputs - dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') - result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates) + N = 150 + rng = date_range_frame.index + dates = date_range('1/1/1990', periods=N, freq='25s') + result = DataFrame(np.nan, index=rng, columns=['A']).asof(dates) expected = DataFrame(np.nan, index=dates, columns=['A']) tm.assert_frame_equal(result, expected) # testing multiple columns - dates = date_range('1/1/1990', periods=self.N * 3, freq='25s') - result = DataFrame(np.nan, index=self.rng, + dates = date_range('1/1/1990', periods=N, freq='25s') + result = DataFrame(np.nan, index=rng, columns=['A', 'B', 'C']).asof(dates) expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C']) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index c2364dc135a9a..c803d15a690c4 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -11,12 +11,11 @@ import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestDataFrameConcatCommon(TestData): +class TestDataFrameConcatCommon(): def test_concat_multiple_frames_dtypes(self): @@ -515,7 +514,7 @@ def test_concat_astype_dup_col(self): tm.assert_frame_equal(result, expected) -class TestDataFrameCombineFirst(TestData): +class TestDataFrameCombineFirst(): def test_combine_first_mixed(self): a = Series(['a', 'b'], index=lrange(2)) @@ -531,22 +530,22 @@ def test_combine_first_mixed(self): combined = f.combine_first(g) tm.assert_frame_equal(combined, exp) - def test_combine_first(self): + def test_combine_first(self, float_frame): # disjoint - head, tail = self.frame[:5], self.frame[5:] + head, tail = float_frame[:5], float_frame[5:] combined = head.combine_first(tail) - reordered_frame = self.frame.reindex(combined.index) + reordered_frame = float_frame.reindex(combined.index) assert_frame_equal(combined, reordered_frame) - assert tm.equalContents(combined.columns, self.frame.columns) + assert tm.equalContents(combined.columns, float_frame.columns) assert_series_equal(combined['A'], reordered_frame['A']) # same index - fcopy = self.frame.copy() + fcopy = float_frame.copy() fcopy['A'] = 1 del fcopy['C'] - fcopy2 = self.frame.copy() + fcopy2 = float_frame.copy() fcopy2['B'] = 0 del fcopy2['D'] @@ -570,20 +569,20 @@ def test_combine_first(self): assert (combined['A'][:10] == 0).all() # no overlap - f = self.frame[:10] - g = self.frame[10:] + f = float_frame[:10] + g = float_frame[10:] combined = f.combine_first(g) assert_series_equal(combined['A'].reindex(f.index), f['A']) assert_series_equal(combined['A'].reindex(g.index), g['A']) # corner cases - comb = self.frame.combine_first(self.empty) - assert_frame_equal(comb, self.frame) + comb = float_frame.combine_first(DataFrame({})) + assert_frame_equal(comb, float_frame) - comb = self.empty.combine_first(self.frame) - assert_frame_equal(comb, self.frame) + comb = DataFrame({}).combine_first(float_frame) + assert_frame_equal(comb, float_frame) - comb = self.frame.combine_first(DataFrame(index=["faz", "boo"])) + comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) assert "faz" in comb.index # #2525 @@ -850,7 +849,7 @@ def test_concat_datetime_datetime64_frame(self): pd.concat([df1, df2_obj]) -class TestDataFrameUpdate(TestData): +class TestDataFrameUpdate(): def test_update_nan(self): # #15593 #15617 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index fc642d211b30c..1d5cbfec8de52 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -21,7 +21,7 @@ import pandas as pd from pandas import ( Categorical, DataFrame, Index, MultiIndex, Series, Timedelta, Timestamp, - compat, date_range, isna) + _np_version_under1p13, compat, date_range, isna) from pandas.tests.frame.common import TestData import pandas.util.testing as tm @@ -684,6 +684,8 @@ def test_constructor_ndarray(self): frame = DataFrame(['foo', 'bar'], index=[0, 1], columns=['A']) assert len(frame) == 2 + @pytest.mark.skipif(PY2 and _np_version_under1p13, + reason="old numpy & py2") def test_constructor_maskedarray(self): self._check_basic_constructor(ma.masked_all) @@ -700,6 +702,8 @@ def test_constructor_maskedarray(self): frame = DataFrame(mat, columns=['A', 'B', 'C'], index=[1, 2]) assert np.all(~np.asarray(frame == frame)) + @pytest.mark.skipif(PY2 and _np_version_under1p13, + reason="old numpy & py2") def test_constructor_maskedarray_nonfloat(self): # masked int promoted to float mat = ma.masked_all((2, 3), dtype=int) @@ -767,6 +771,8 @@ def test_constructor_maskedarray_nonfloat(self): assert frame['A'][1] is True assert frame['C'][2] is False + @pytest.mark.skipif(PY2 and _np_version_under1p13, + reason="old numpy & py2") def test_constructor_maskedarray_hardened(self): # Check numpy masked arrays with hard masks -- from GH24574 mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask() @@ -789,6 +795,8 @@ def test_constructor_maskedarray_hardened(self): dtype=float) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(PY2 and _np_version_under1p13, + reason="old numpy & py2") def test_constructor_maskedrecarray_dtype(self): # Ensure constructor honors dtype data = np.ma.array( @@ -800,6 +808,8 @@ def test_constructor_maskedrecarray_dtype(self): columns=['date', 'price']) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(PY2 and _np_version_under1p13, + reason="old numpy & py2") def test_constructor_mrecarray(self): # Ensure mrecarray produces frame identical to dict of masked arrays # from GH3479 diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 0508658766cd3..2c9fde652493d 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -4,7 +4,6 @@ import pytest from pandas import DataFrame, Index, period_range -from pandas.tests.frame.common import TestData import pandas.util.testing as tm @@ -16,11 +15,6 @@ def frame_with_period_index(): index=period_range(start='2000', freq='A', periods=4)) -@pytest.fixture -def frame(): - return TestData().frame - - @pytest.fixture def left(): return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) @@ -63,11 +57,11 @@ def test_join(left, right, how, sort, expected): tm.assert_frame_equal(result, expected) -def test_join_index(frame): +def test_join_index(float_frame): # left / right - f = frame.loc[frame.index[:10], ['A', 'B']] - f2 = frame.loc[frame.index[5:], ['C', 'D']].iloc[::-1] + f = float_frame.loc[float_frame.index[:10], ['A', 'B']] + f2 = float_frame.loc[float_frame.index[5:], ['C', 'D']].iloc[::-1] joined = f.join(f2) tm.assert_index_equal(f.index, joined.index) @@ -91,7 +85,7 @@ def test_join_index(frame): # outer joined = f.join(f2, how='outer') - tm.assert_index_equal(joined.index, frame.index.sort_values()) + tm.assert_index_equal(joined.index, float_frame.index.sort_values()) tm.assert_index_equal(joined.columns, expected_columns) with pytest.raises(ValueError, match='join method'): @@ -101,16 +95,16 @@ def test_join_index(frame): msg = 'columns overlap but no suffix' for how in ('outer', 'left', 'inner'): with pytest.raises(ValueError, match=msg): - frame.join(frame, how=how) + float_frame.join(float_frame, how=how) -def test_join_index_more(frame): - af = frame.loc[:, ['A', 'B']] - bf = frame.loc[::2, ['C', 'D']] +def test_join_index_more(float_frame): + af = float_frame.loc[:, ['A', 'B']] + bf = float_frame.loc[::2, ['C', 'D']] expected = af.copy() - expected['C'] = frame['C'][::2] - expected['D'] = frame['D'][::2] + expected['C'] = float_frame['C'][::2] + expected['D'] = float_frame['D'][::2] result = af.join(bf) tm.assert_frame_equal(result, expected) @@ -122,28 +116,28 @@ def test_join_index_more(frame): tm.assert_frame_equal(result, expected.loc[:, result.columns]) -def test_join_index_series(frame): - df = frame.copy() - s = df.pop(frame.columns[-1]) +def test_join_index_series(float_frame): + df = float_frame.copy() + s = df.pop(float_frame.columns[-1]) joined = df.join(s) # TODO should this check_names ? - tm.assert_frame_equal(joined, frame, check_names=False) + tm.assert_frame_equal(joined, float_frame, check_names=False) s.name = None with pytest.raises(ValueError, match='must have a name'): df.join(s) -def test_join_overlap(frame): - df1 = frame.loc[:, ['A', 'B', 'C']] - df2 = frame.loc[:, ['B', 'C', 'D']] +def test_join_overlap(float_frame): + df1 = float_frame.loc[:, ['A', 'B', 'C']] + df2 = float_frame.loc[:, ['B', 'C', 'D']] joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2') df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1') df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2') - no_overlap = frame.loc[:, ['A', 'D']] + no_overlap = float_frame.loc[:, ['A', 'D']] expected = df1_suf.join(df2_suf).join(no_overlap) # column order not necessarily sorted diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 6bef7e3f65b21..211173371ac7e 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -8,14 +8,13 @@ from pandas.compat import PY36, lrange, range from pandas import DataFrame, Index, MultiIndex, Series -from pandas.tests.frame.common import TestData import pandas.util.testing as tm from pandas.util.testing import assert_frame_equal # Column add, remove, delete. -class TestDataFrameMutateColumns(TestData): +class TestDataFrameMutateColumns(): def test_assign(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) @@ -193,9 +192,9 @@ def test_insert(self): exp = DataFrame(data={'X': ['x', 'y', 'z']}, index=['A', 'B', 'C']) assert_frame_equal(df, exp) - def test_delitem(self): - del self.frame['A'] - assert 'A' not in self.frame + def test_delitem(self, float_frame): + del float_frame['A'] + assert 'A' not in float_frame def test_delitem_multiindex(self): midx = MultiIndex.from_product([['A', 'B'], [1, 2]]) @@ -223,16 +222,16 @@ def test_delitem_multiindex(self): with pytest.raises(KeyError): del df['A'] - def test_pop(self): - self.frame.columns.name = 'baz' + def test_pop(self, float_frame): + float_frame.columns.name = 'baz' - self.frame.pop('A') - assert 'A' not in self.frame + float_frame.pop('A') + assert 'A' not in float_frame - self.frame['foo'] = 'bar' - self.frame.pop('foo') - assert 'foo' not in self.frame - assert self.frame.columns.name == 'baz' + float_frame['foo'] = 'bar' + float_frame.pop('foo') + assert 'foo' not in float_frame + assert float_frame.columns.name == 'baz' # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[ diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 54a8712a9c645..59bf3d00f979c 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -1221,3 +1221,15 @@ def test_multi_index_header(self): '1,5,6,7,8'] expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected + + def test_gz_lineend(self): + # GH 25311 + df = pd.DataFrame({'a': [1, 2]}) + expected_rows = ['a', '1', '2'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + with ensure_clean('__test_gz_lineend.csv.gz') as path: + df.to_csv(path, index=False) + with tm.decompress_file(path, compression='gzip') as f: + result = f.read().decode('utf-8') + + assert result == expected diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index bda486411e01e..4129184373a2a 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -865,7 +865,6 @@ class TestIndexing(object): def test_get_slice(self): def assert_slice_ok(mgr, axis, slobj): - # import pudb; pudb.set_trace() mat = mgr.as_array() # we maybe using an ndarray to test slicing and diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 428f1411a10a6..9cb2704f65587 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -623,3 +623,13 @@ def test_ignore_display_max_colwidth(method, expected, max_colwidth): result = getattr(df, method)() expected = expected(max_colwidth) assert expected in result + + +@pytest.mark.parametrize("classes", [True, 0]) +def test_to_html_invalid_classes_type(classes): + # GH 25608 + df = DataFrame() + msg = "classes must be a string, list, or tuple" + + with pytest.raises(TypeError, match=msg): + df.to_html(classes=classes) diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py index 652f78d198ee8..deb664a6c2051 100644 --- a/pandas/tests/io/parser/test_usecols.py +++ b/pandas/tests/io/parser/test_usecols.py @@ -21,6 +21,19 @@ "expected but not found: {0}") +@pytest.mark.parametrize("names, usecols", [ + (None, [0, 3]), + (["a", "b"], [-1, 1]), + (None, [4]), + (["a", "b"], [0, 4]) +]) +def test_usecols_index_out_of_bounds(all_parsers, names, usecols): + data = "a,b,c\n1,2,3\n4,5,6" + parser = all_parsers + + with pytest.raises(ValueError, match=_msg_validate_usecols_names): + parser.read_csv(StringIO(data), names=names, usecols=usecols) + def test_raise_on_mixed_dtype_usecols(all_parsers): # See gh-12678 data = """a,b,c diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 806bd7f2b7c93..d51d9418a370b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -28,7 +28,7 @@ import pytest import pandas.compat as compat -from pandas.compat import PY36, lrange, range, string_types +from pandas.compat import PY2, PY36, lrange, range, string_types from pandas.core.dtypes.common import ( is_datetime64_dtype, is_datetime64tz_dtype) @@ -400,8 +400,10 @@ def _to_sql_fail(self): self.test_frame1, 'test_frame1', if_exists='fail') assert self.pandasSQL.has_table('test_frame1') - pytest.raises(ValueError, self.pandasSQL.to_sql, - self.test_frame1, 'test_frame1', if_exists='fail') + msg = "Table 'test_frame1' already exists" + with pytest.raises(ValueError, match=msg): + self.pandasSQL.to_sql( + self.test_frame1, 'test_frame1', if_exists='fail') self.drop_table('test_frame1') @@ -563,8 +565,10 @@ def test_to_sql_fail(self): self.conn, if_exists='fail') assert sql.has_table('test_frame2', self.conn) - pytest.raises(ValueError, sql.to_sql, self.test_frame1, - 'test_frame2', self.conn, if_exists='fail') + msg = "Table 'test_frame2' already exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql(self.test_frame1, 'test_frame2', + self.conn, if_exists='fail') def test_to_sql_replace(self): sql.to_sql(self.test_frame1, 'test_frame3', @@ -699,10 +703,11 @@ def test_timedelta(self): result = sql.read_sql_query('SELECT * FROM test_timedelta', self.conn) tm.assert_series_equal(result['foo'], df['foo'].astype('int64')) - def test_complex(self): + def test_complex_raises(self): df = DataFrame({'a': [1 + 1j, 2j]}) - # Complex data type should raise error - pytest.raises(ValueError, df.to_sql, 'test_complex', self.conn) + msg = "Complex datatypes not supported" + with pytest.raises(ValueError, match=msg): + df.to_sql('test_complex', self.conn) @pytest.mark.parametrize("index_name,index_label,expected", [ # no index name, defaults to 'index' @@ -758,10 +763,11 @@ def test_to_sql_index_label_multiindex(self): frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn) assert frame.columns[:2].tolist() == ['C', 'D'] - # wrong length of index_label - pytest.raises(ValueError, sql.to_sql, temp_frame, - 'test_index_label', self.conn, if_exists='replace', - index_label='C') + msg = ("Length of 'index_label' should match number of levels, which" + " is 2") + with pytest.raises(ValueError, match=msg): + sql.to_sql(temp_frame, 'test_index_label', self.conn, + if_exists='replace', index_label='C') def test_multiindex_roundtrip(self): df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')], @@ -866,6 +872,8 @@ def test_escaped_table_name(self): @pytest.mark.single +@pytest.mark.skipif( + not SQLALCHEMY_INSTALLED, reason='SQLAlchemy not installed') class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): """ Test the public API as it would be used directly @@ -878,10 +886,7 @@ class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): mode = 'sqlalchemy' def connect(self): - if SQLALCHEMY_INSTALLED: - return sqlalchemy.create_engine('sqlite:///:memory:') - else: - pytest.skip('SQLAlchemy not installed') + return sqlalchemy.create_engine('sqlite:///:memory:') def test_read_table_columns(self): # test columns argument in read_table @@ -1091,20 +1096,21 @@ def test_sql_open_close(self): tm.assert_frame_equal(self.test_frame3, result) + @pytest.mark.skipif(SQLALCHEMY_INSTALLED, reason='SQLAlchemy is installed') def test_con_string_import_error(self): - if not SQLALCHEMY_INSTALLED: - conn = 'mysql://root@localhost/pandas_nosetest' - pytest.raises(ImportError, sql.read_sql, "SELECT * FROM iris", - conn) - else: - pytest.skip('SQLAlchemy is installed') + conn = 'mysql://root@localhost/pandas_nosetest' + msg = "Using URI string without sqlalchemy installed" + with pytest.raises(ImportError, match=msg): + sql.read_sql("SELECT * FROM iris", conn) def test_read_sql_delegate(self): iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) tm.assert_frame_equal(iris_frame1, iris_frame2) - pytest.raises(sql.DatabaseError, sql.read_sql, 'iris', self.conn) + msg = "Execution failed on sql 'iris': near \"iris\": syntax error" + with pytest.raises(sql.DatabaseError, match=msg): + sql.read_sql('iris', self.conn) def test_safe_names_warning(self): # GH 6798 @@ -1260,9 +1266,10 @@ def test_read_table_columns(self): tm.equalContents( iris_frame.columns.values, ['SepalLength', 'SepalLength']) - def test_read_table_absent(self): - pytest.raises( - ValueError, sql.read_sql_table, "this_doesnt_exist", con=self.conn) + def test_read_table_absent_raises(self): + msg = "Table this_doesnt_exist not found" + with pytest.raises(ValueError, match=msg): + sql.read_sql_table("this_doesnt_exist", con=self.conn) def test_default_type_conversion(self): df = sql.read_sql_table("types_test_data", self.conn) @@ -1601,8 +1608,9 @@ def test_dtype(self): meta.reflect() sqltype = meta.tables['dtype_test2'].columns['B'].type assert isinstance(sqltype, sqlalchemy.TEXT) - pytest.raises(ValueError, df.to_sql, - 'error', self.conn, dtype={'B': str}) + msg = "The type of B is not a SQLAlchemy type" + with pytest.raises(ValueError, match=msg): + df.to_sql('error', self.conn, dtype={'B': str}) # GH9083 df.to_sql('dtype_test3', self.conn, dtype={'B': sqlalchemy.String(10)}) @@ -1887,8 +1895,9 @@ def test_schema_support(self): res4 = sql.read_sql_table('test_schema_other', self.conn, schema='other') tm.assert_frame_equal(df, res4) - pytest.raises(ValueError, sql.read_sql_table, 'test_schema_other', - self.conn, schema='public') + msg = "Table test_schema_other not found" + with pytest.raises(ValueError, match=msg): + sql.read_sql_table('test_schema_other', self.conn, schema='public') # different if_exists options @@ -2104,6 +2113,7 @@ def _get_sqlite_column_type(self, table, column): return ctype raise ValueError('Table %s, column %s not found' % (table, column)) + @pytest.mark.skipif(PY2, reason="pytest.raises match regex fails") def test_dtype(self): if self.flavor == 'mysql': pytest.skip('Not applicable to MySQL legacy') @@ -2120,8 +2130,9 @@ def test_dtype(self): assert self._get_sqlite_column_type( 'dtype_test2', 'B') == 'STRING' - pytest.raises(ValueError, df.to_sql, - 'error', self.conn, dtype={'B': bool}) + msg = r"B \(\) not a string" + with pytest.raises(ValueError, match=msg): + df.to_sql('error', self.conn, dtype={'B': bool}) # single dtype df.to_sql('single_dtype_test', self.conn, dtype='STRING') @@ -2153,8 +2164,9 @@ def test_illegal_names(self): # For sqlite, these should work fine df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) - # Raise error on blank - pytest.raises(ValueError, df.to_sql, "", self.conn) + msg = "Empty table or column name specified" + with pytest.raises(ValueError, match=msg): + df.to_sql("", self.conn) for ndx, weird_name in enumerate( ['test_weird_name]', 'test_weird_name[', @@ -2383,25 +2395,19 @@ def clean_up(test_table_to_drop): """ self.drop_table(test_table_to_drop) - # test if invalid value for if_exists raises appropriate error - pytest.raises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='notvalidvalue') + msg = "'notvalidvalue' is not valid for if_exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, + if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail') - pytest.raises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='fail') - + msg = "Table 'table_if_exists' already exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, + if_exists='fail') # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='replace', index=False) @@ -2647,23 +2653,17 @@ def clean_up(test_table_to_drop): self.drop_table(test_table_to_drop) # test if invalid value for if_exists raises appropriate error - pytest.raises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='notvalidvalue') + with pytest.raises(ValueError, match=""): + sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, + if_exists='notvalidvalue') clean_up(table_name) # test if_exists='fail' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, if_exists='fail', index=False) - pytest.raises(ValueError, - sql.to_sql, - frame=df_if_exists_1, - con=self.conn, - name=table_name, - if_exists='fail') + with pytest.raises(ValueError, match=""): + sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, + if_exists='fail') # test if_exists='replace' sql.to_sql(frame=df_if_exists_1, con=self.conn, name=table_name, diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 6702ad6cfb761..b9a29cc4ac27e 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -97,7 +97,7 @@ def test_nonnumeric_exclude(self): assert len(ax.get_lines()) == 1 # B was plotted self.plt.close(fig) - msg = "Empty 'DataFrame': no numeric data to plot" + msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): df['A'].plot() diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 98b241f5c8206..28806bb67c896 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -485,7 +485,9 @@ def test_subplots_timeseries_y_axis(self): ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") assert (ax_datetime_all_tz.get_lines()[0].get_data()[1] == testdata["datetime_all_tz"].values).all() - with pytest.raises(TypeError): + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): testdata.plot(y="text") @pytest.mark.xfail(reason='not support for period, categorical, ' @@ -2219,7 +2221,9 @@ def test_all_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with pytest.raises(TypeError): + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): df.plot(kind=kind) @pytest.mark.slow @@ -2230,7 +2234,9 @@ def test_partially_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with pytest.raises(TypeError): + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): df.plot(kind=kind) with tm.RNGContext(42): diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index a234ea8f9416b..aa78f38b75a10 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -706,7 +706,9 @@ def test_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with pytest.raises(TypeError): + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): s.plot(kind=kind, ax=ax) @pytest.mark.slow @@ -723,7 +725,9 @@ def test_partially_invalid_plot_data(self): for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue - with pytest.raises(TypeError): + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): s.plot(kind=kind, ax=ax) def test_invalid_kind(self): diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index ce675893d9907..ec05595536de4 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -101,6 +101,18 @@ def test_resample_basic(series, closed, expected): assert_series_equal(result, expected) +def test_resample_integerarray(): + # GH 25580, resample on IntegerArray + ts = pd.Series(range(9), + index=pd.date_range('1/1/2000', periods=9, freq='T'), + dtype='Int64') + result = ts.resample('3T').sum() + expected = Series([3, 12, 21], + index=pd.date_range('1/1/2000', periods=3, freq='3T'), + dtype="Int64") + assert_series_equal(result, expected) + + def test_resample_basic_grouper(series): s = series result = s.resample('5Min').last() diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 1f2e2b179c687..3ad9d54175f31 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -493,6 +493,13 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('s.', 1)) + def test_integer_series_size(self): + # GH 25580 + s = Series(range(9)) + assert s.size == 9 + s = Series(range(9), dtype="Int64") + assert s.size == 9 + class TestCategoricalSeries(object): diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 7528566e8326e..fa8fbddd59118 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -9,7 +9,8 @@ from pandas.compat import PY2 -from pandas import DataFrame, MultiIndex, Series, compat, concat, merge +from pandas import ( + DataFrame, MultiIndex, Series, compat, concat, merge, to_datetime) from pandas.core import common as com from pandas.core.sorting import ( decons_group_index, get_group_index, is_int64_overflow_possible, @@ -183,6 +184,13 @@ def test_nargsort(self): exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + def test_nargsort_datetimearray_warning(self): + # https://github.com/pandas-dev/pandas/issues/25439 + # can be removed once the FutureWarning for np.array(DTA) is removed + data = to_datetime([0, 2, 0, 1]).tz_localize('Europe/Brussels') + with tm.assert_produces_warning(None): + nargsort(data) + class TestMerge(object): diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a5ae1f6a4d960..6e88cd7f72dcd 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -638,35 +638,6 @@ def set_defaultencoding(encoding): sys.setdefaultencoding(orig) -# ----------------------------------------------------------------------------- -# Console debugging tools - - -def debug(f, *args, **kwargs): - from pdb import Pdb as OldPdb - try: - from IPython.core.debugger import Pdb - kw = dict(color_scheme='Linux') - except ImportError: - Pdb = OldPdb - kw = {} - pdb = Pdb(**kw) - return pdb.runcall(f, *args, **kwargs) - - -def pudebug(f, *args, **kwargs): - import pudb - return pudb.runcall(f, *args, **kwargs) - - -def set_trace(): - from IPython.core.debugger import Pdb - try: - Pdb(color_scheme='Linux').set_trace(sys._getframe().f_back) - except Exception: - from pdb import Pdb as OldPdb - OldPdb().set_trace(sys._getframe().f_back) - # ----------------------------------------------------------------------------- # contextmanager to ensure the file cleanup diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 09fb5a30cbc3b..120f8d79819ff 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -231,6 +231,27 @@ def good_imports(self): """ pass + def no_returns(self): + """ + Say hello and have no returns. + """ + pass + + def empty_returns(self): + """ + Say hello and always return None. + + Since this function never returns a value, this + docstring doesn't need a return section. + """ + def say_hello(): + return "Hello World!" + say_hello() + if True: + return + else: + return None + class BadGenericDocStrings(object): """Everything here has a bad docstring @@ -785,7 +806,7 @@ def test_good_class(self, capsys): @pytest.mark.parametrize("func", [ 'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1', - 'contains', 'mode', 'good_imports']) + 'contains', 'mode', 'good_imports', 'no_returns', 'empty_returns']) def test_good_functions(self, capsys, func): errors = validate_one(self._import_path( klass='GoodDocStrings', func=func))['errors'] diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 20f32124a2532..1c45c79ba7fba 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -26,6 +26,8 @@ import importlib import doctest import tempfile +import ast +import textwrap import flake8.main.application @@ -490,9 +492,45 @@ def yields(self): @property def method_source(self): try: - return inspect.getsource(self.obj) + source = inspect.getsource(self.obj) except TypeError: return '' + return textwrap.dedent(source) + + @property + def method_returns_something(self): + ''' + Check if the docstrings method can return something. + + Bare returns, returns valued None and returns from nested functions are + disconsidered. + + Returns + ------- + bool + Whether the docstrings method can return something. + ''' + + def get_returns_not_on_nested_functions(node): + returns = [node] if isinstance(node, ast.Return) else [] + for child in ast.iter_child_nodes(node): + # Ignore nested functions and its subtrees. + if not isinstance(child, ast.FunctionDef): + child_returns = get_returns_not_on_nested_functions(child) + returns.extend(child_returns) + return returns + + tree = ast.parse(self.method_source).body + if tree: + returns = get_returns_not_on_nested_functions(tree[0]) + return_values = [r.value for r in returns] + # Replace NameConstant nodes valued None for None. + for i, v in enumerate(return_values): + if isinstance(v, ast.NameConstant) and v.value is None: + return_values[i] = None + return any(return_values) + else: + return False @property def first_line_ends_in_dot(self): @@ -691,7 +729,7 @@ def get_validation_data(doc): if doc.is_function_or_method: if not doc.returns: - if 'return' in doc.method_source: + if doc.method_returns_something: errs.append(error('RT01')) else: if len(doc.returns) == 1 and doc.returns[0][1]: