From cb8f6c6da9d390ef928390da621b716e2cbf2b1f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Mar 2020 11:56:10 -0700
Subject: [PATCH 01/29] REF: reshape.concat operate on arrays, not
 SingleBlockManagers

---
 pandas/core/arrays/numpy_.py         |  5 +++++
 pandas/core/internals/managers.py    | 25 -------------------------
 pandas/core/reshape/concat.py        | 12 +++++++-----
 pandas/tests/extension/test_numpy.py | 17 ++++++++++++-----
 4 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 3058e1d6073f3..e46125a87bf8c 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -436,6 +436,11 @@ def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
     # ------------------------------------------------------------------------
     # Additional Methods
 
+    def astype(self, dtype, copy=True):
+        if dtype is self.dtype:
+            return self.copy() if copy else self
+        return super().astype(dtype, copy=copy)
+
     def to_numpy(
         self, dtype=None, copy: bool = False, na_value=lib.no_default
     ) -> np.ndarray:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 9630abf61f692..5e05398f0e917 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1633,31 +1633,6 @@ def fast_xs(self, loc):
         """
         raise NotImplementedError("Use series._values[loc] instead")
 
-    def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager":
-        """
-        Concatenate a list of SingleBlockManagers into a single
-        SingleBlockManager.
-
-        Used for pd.concat of Series objects with axis=0.
-
-        Parameters
-        ----------
-        to_concat : list of SingleBlockManagers
-        new_axis : Index of the result
-
-        Returns
-        -------
-        SingleBlockManager
-        """
-
-        blocks = [obj.blocks[0] for obj in to_concat]
-        values = concat_compat([x.values for x in blocks])
-
-        new_block = make_block(values, placement=slice(0, len(values), 1))
-
-        mgr = SingleBlockManager(new_block, new_axis)
-        return mgr
-
 
 # --------------------------------------------------------------------
 # Constructor Helpers
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index b4497ce1780e6..59c39128dcd14 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -9,6 +9,7 @@
 
 from pandas._typing import FrameOrSeriesUnion, Label
 
+from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
 from pandas import DataFrame, Index, MultiIndex, Series
@@ -456,12 +457,13 @@ def get_result(self):
             # stack blocks
             if self.axis == 0:
                 name = com.consensus_name_attr(self.objs)
-
-                mgr = self.objs[0]._data.concat(
-                    [x._data for x in self.objs], self.new_axes
-                )
                 cons = self.objs[0]._constructor
-                return cons(mgr, name=name).__finalize__(self, method="concat")
+
+                arrs = [ser._values for ser in self.objs]
+
+                res = concat_compat(arrs, axis=0)
+                result = cons(res, index=self.new_axes[0], name=name, dtype=res.dtype)
+                return result.__finalize__(self, method="concat")
 
             # combine as columns in a frame
             else:
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 61c5925383f88..4649b7619e1c6 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -130,7 +130,18 @@ def skip_numpy_object(dtype):
 
 
 class BaseNumPyTests:
-    pass
+    @classmethod
+    def assert_series_equal(cls, left, right, *args, **kwargs):
+        # FIXME: kludge because we are patching is_extension_array_dtype
+        #  with monkeypatch, needed for test_loc_iloc_frame_single_dtype
+        #  in the object-dtype case
+        ld = left.dtype
+        rd = right.dtype
+        if isinstance(ld, PandasDtype) and ld.numpy_dtype == object:
+            if isinstance(rd, np.dtype) and rd == object:
+                # Call these close enough
+                left = left.astype(rd)
+        tm.assert_series_equal(left, right, *args, **kwargs)
 
 
 class TestCasting(BaseNumPyTests, base.BaseCastingTests):
@@ -170,10 +181,6 @@ def test_take_series(self, data):
         # ValueError: PandasArray must be 1-dimensional.
         super().test_take_series(data)
 
-    @pytest.mark.xfail(reason="astype doesn't recognize data.dtype")
-    def test_loc_iloc_frame_single_dtype(self, data):
-        super().test_loc_iloc_frame_single_dtype(data)
-
 
 class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
     @skip_nested

From e008f4008850d60668737de64173d5cfc9ef765b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 30 Mar 2020 18:42:10 -0700
Subject: [PATCH 02/29] xfail more selectively

---
 pandas/tests/extension/test_numpy.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 4649b7619e1c6..12f7c60349ad2 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -130,18 +130,7 @@ def skip_numpy_object(dtype):
 
 
 class BaseNumPyTests:
-    @classmethod
-    def assert_series_equal(cls, left, right, *args, **kwargs):
-        # FIXME: kludge because we are patching is_extension_array_dtype
-        #  with monkeypatch, needed for test_loc_iloc_frame_single_dtype
-        #  in the object-dtype case
-        ld = left.dtype
-        rd = right.dtype
-        if isinstance(ld, PandasDtype) and ld.numpy_dtype == object:
-            if isinstance(rd, np.dtype) and rd == object:
-                # Call these close enough
-                left = left.astype(rd)
-        tm.assert_series_equal(left, right, *args, **kwargs)
+    pass
 
 
 class TestCasting(BaseNumPyTests, base.BaseCastingTests):
@@ -181,6 +170,12 @@ def test_take_series(self, data):
         # ValueError: PandasArray must be 1-dimensional.
         super().test_take_series(data)
 
+    def test_loc_iloc_frame_single_dtype(self, data):
+        if data.dtype.numpy_dtype == object:
+            # GH#33125
+            pytest.xfail(reason="astype doesn't recognize data.dtype")
+        super().test_loc_iloc_frame_single_dtype(data)
+
 
 class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
     @skip_nested

From 3f0ee1b3dc1afb45bf0bd8682871c6cf5d9ac161 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 13:41:32 -0700
Subject: [PATCH 03/29] Revert PandasArray.astype patch

---
 pandas/core/arrays/numpy_.py         |  5 -----
 pandas/tests/extension/test_numpy.py | 25 ++++++++++++++++++++++---
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index e46125a87bf8c..3058e1d6073f3 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -436,11 +436,6 @@ def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True):
     # ------------------------------------------------------------------------
     # Additional Methods
 
-    def astype(self, dtype, copy=True):
-        if dtype is self.dtype:
-            return self.copy() if copy else self
-        return super().astype(dtype, copy=copy)
-
     def to_numpy(
         self, dtype=None, copy: bool = False, na_value=lib.no_default
     ) -> np.ndarray:
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index 12f7c60349ad2..aa5a99282131a 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -171,9 +171,10 @@ def test_take_series(self, data):
         super().test_take_series(data)
 
     def test_loc_iloc_frame_single_dtype(self, data):
-        if data.dtype.numpy_dtype == object:
+        npdtype = data.dtype.numpy_dtype
+        if npdtype == object or npdtype == np.float64:
             # GH#33125
-            pytest.xfail(reason="astype doesn't recognize data.dtype")
+            pytest.xfail(reason="GH#33125 astype doesn't recognize data.dtype")
         super().test_loc_iloc_frame_single_dtype(data)
 
 
@@ -181,6 +182,8 @@ class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
     @skip_nested
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         # ValueError: Names should be list-like for a MultiIndex
+        if data_for_grouping.dtype.numpy_dtype == np.float64:
+            pytest.xfail(reason="GH#33125 astype doesn't recognize data.dtype")
         super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
 
 
@@ -278,7 +281,11 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
 
 
 class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
-    pass
+    @pytest.mark.xfail(
+        reason="GH#33125 PandasArray.astype does not recognize PandasDtype"
+    )
+    def test_series_repr(self, data):
+        super().test_series_repr(data)
 
 
 @skip_nested
@@ -323,6 +330,18 @@ class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
     def test_concat_mixed_dtypes(self, data):
         super().test_concat_mixed_dtypes(data)
 
+    @pytest.mark.xfail(
+        reason="GH#33125 PandasArray.astype does not recognize PandasDtype"
+    )
+    def test_concat(self, data, in_frame):
+        super().test_concat(data, in_frame)
+
+    @pytest.mark.xfail(
+        reason="GH#33125 PandasArray.astype does not recognize PandasDtype"
+    )
+    def test_concat_all_na_block(self, data_missing, in_frame):
+        super().test_concat_all_na_block(data_missing, in_frame)
+
     @skip_nested
     def test_merge(self, data, na_value):
         # Fails creating expected

From 2da47dec41a5f8fcb353bcd2302db32b9cb11b82 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com>
Date: Mon, 6 Apr 2020 23:42:00 +0300
Subject: [PATCH 04/29]  DOC: Fix examples in `pandas/core/strings.py` (#33328)

---
 ci/code_checks.sh      |  4 ++++
 pandas/core/strings.py | 51 ++++++++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index cd9e4384fd0d9..1bdbbb54a0aac 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -296,6 +296,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     pytest -q --doctest-modules pandas/core/series.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Doctests strings.py' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/strings.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # Directories
 
     MSG='Doctests arrays'; echo $MSG
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 59b8b37f72695..52d9a81489db4 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -652,9 +652,9 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
     To get the idea:
 
     >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)
-    0    <_sre.SRE_Match object; span=(0, 1), match='f'>oo
-    1    <_sre.SRE_Match object; span=(0, 1), match='f'>uz
-    2                                                  NaN
+    0    <re.Match object; span=(0, 1), match='f'>oo
+    1    <re.Match object; span=(0, 1), match='f'>uz
+    2                                            NaN
     dtype: object
 
     Reverse every lowercase alphabetic word:
@@ -2076,8 +2076,18 @@ class StringMethods(NoNewAttributesMixin):
 
     Examples
     --------
-    >>> s.str.split('_')
-    >>> s.str.replace('_', '')
+    >>> s = pd.Series(["A_Str_Series"])
+    >>> s
+    0    A_Str_Series
+    dtype: object
+
+    >>> s.str.split("_")
+    0    [A, Str, Series]
+    dtype: object
+
+    >>> s.str.replace("_", "")
+    0    AStrSeries
+    dtype: object
     """
 
     def __init__(self, data):
@@ -2583,9 +2593,14 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     Examples
     --------
-    >>> s = pd.Series(["this is a regular sentence",
-    ...                "https://docs.python.org/3/tutorial/index.html",
-    ...                np.nan])
+    >>> s = pd.Series(
+    ...     [
+    ...         "this is a regular sentence",
+    ...         "https://docs.python.org/3/tutorial/index.html",
+    ...         np.nan
+    ...     ]
+    ... )
+    >>> s
     0                       this is a regular sentence
     1    https://docs.python.org/3/tutorial/index.html
     2                                              NaN
@@ -2625,7 +2640,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
 
     The `pat` parameter can be used to split by other characters.
 
-    >>> s.str.split(pat = "/")
+    >>> s.str.split(pat="/")
     0                         [this is a regular sentence]
     1    [https:, , docs.python.org, 3, tutorial, index...
     2                                                  NaN
@@ -2636,14 +2651,10 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     the columns during the split.
 
     >>> s.str.split(expand=True)
-                                                   0     1     2        3
-    0                                           this    is     a  regular
-    1  https://docs.python.org/3/tutorial/index.html  None  None     None
-    2                                            NaN   NaN   NaN      NaN \
-                 4
-    0     sentence
-    1         None
-    2          NaN
+                                                   0     1     2        3         4
+    0                                           this    is     a  regular  sentence
+    1  https://docs.python.org/3/tutorial/index.html  None  None     None      None
+    2                                            NaN   NaN   NaN      NaN       NaN
 
     For slightly more complex use cases like splitting the html document name
     from a url, a combination of parameter settings can be used.
@@ -2658,7 +2669,9 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
     expressions.
 
     >>> s = pd.Series(["1+1=2"])
-
+    >>> s
+    0    1+1=2
+    dtype: object
     >>> s.str.split(r"\+|=", expand=True)
          0    1    2
     0    1    1    2
@@ -2750,7 +2763,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
     >>> idx.str.partition()
     MultiIndex([('X', ' ', '123'),
                 ('Y', ' ', '999')],
-               dtype='object')
+               )
 
     Or an index with tuples with ``expand=False``:
 

From 9585a4140370e75afa12616ec0d9b02276a6c4d0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 6 Apr 2020 22:52:52 +0200
Subject: [PATCH 05/29] DOC: do not include type hints in signature in html
 docs (#33312)

---
 doc/source/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 35833627f6c05..d24483abd28e1 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -109,6 +109,7 @@
         )
     )
 autosummary_generate = True if pattern is None else ["index"]
+autodoc_typehints = "none"
 
 # numpydoc
 numpydoc_attributes_as_param_list = False

From ed862c01fee1dd2d87de2dcf7f69eb7a4f2177aa Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Mon, 6 Apr 2020 23:15:18 +0200
Subject: [PATCH 06/29] BUG: DataFrame fail to construct when data is list and
 columns is nested list for MI (#32202)

---
 doc/source/whatsnew/v1.1.0.rst          |   1 +
 pandas/core/internals/construction.py   | 114 ++++++++++++++++++++----
 pandas/tests/frame/test_constructors.py |  26 ++++++
 3 files changed, 123 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 7cb7db27ae603..2df732d67b5da 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -471,6 +471,7 @@ Other
   instead of ``TypeError: Can only append a Series if ignore_index=True or if the Series has a name`` (:issue:`30871`)
 - Set operations on an object-dtype :class:`Index` now always return object-dtype results (:issue:`31401`)
 - Bug in :meth:`AbstractHolidayCalendar.holidays` when no rules were defined (:issue:`31415`)
+- Bug in :class:`DataFrame` when initiating a frame with lists and assign ``columns`` with nested list for ``MultiIndex`` (:issue:`32173`)
 - Bug in :meth:`DataFrame.to_records` incorrectly losing timezone information in timezone-aware ``datetime64`` columns (:issue:`32535`)
 - Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`).
 - :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index fc7da4155db36..5c9e4b96047ee 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -3,12 +3,13 @@
 constructors before passing them to a BlockManager.
 """
 from collections import abc
-from typing import Tuple
+from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import numpy.ma as ma
 
 from pandas._libs import lib
+from pandas._typing import Axis, Dtype, Scalar
 
 from pandas.core.dtypes.cast import (
     construct_1d_arraylike_from_scalar,
@@ -522,7 +523,12 @@ def to_arrays(data, columns, coerce_float=False, dtype=None):
         return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
 
 
-def _list_to_arrays(data, columns, coerce_float=False, dtype=None):
+def _list_to_arrays(
+    data: List[Scalar],
+    columns: Union[Index, List],
+    coerce_float: bool = False,
+    dtype: Optional[Dtype] = None,
+) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
     if len(data) > 0 and isinstance(data[0], tuple):
         content = list(lib.to_object_array_tuples(data).T)
     else:
@@ -530,21 +536,25 @@ def _list_to_arrays(data, columns, coerce_float=False, dtype=None):
         content = list(lib.to_object_array(data).T)
     # gh-26429 do not raise user-facing AssertionError
     try:
-        result = _convert_object_array(
-            content, columns, dtype=dtype, coerce_float=coerce_float
-        )
+        columns = _validate_or_indexify_columns(content, columns)
+        result = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
     except AssertionError as e:
         raise ValueError(e) from e
-    return result
+    return result, columns
 
 
-def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
+def _list_of_series_to_arrays(
+    data: List,
+    columns: Union[Index, List],
+    coerce_float: bool = False,
+    dtype: Optional[Dtype] = None,
+) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
     if columns is None:
         # We know pass_data is non-empty because data[0] is a Series
         pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))]
         columns = get_objs_combined_axis(pass_data, sort=False)
 
-    indexer_cache = {}
+    indexer_cache: Dict[int, Scalar] = {}
 
     aligned_values = []
     for s in data:
@@ -564,14 +574,19 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
 
     if values.dtype == np.object_:
         content = list(values.T)
-        return _convert_object_array(
-            content, columns, dtype=dtype, coerce_float=coerce_float
-        )
+        columns = _validate_or_indexify_columns(content, columns)
+        content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
+        return content, columns
     else:
         return values.T, columns
 
 
-def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None):
+def _list_of_dict_to_arrays(
+    data: List,
+    columns: Union[Index, List],
+    coerce_float: bool = False,
+    dtype: Optional[Dtype] = None,
+) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
     """
     Convert list of dicts to numpy arrays
 
@@ -603,22 +618,85 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None):
     data = [(type(d) is dict) and d or dict(d) for d in data]
 
     content = list(lib.dicts_to_array(data, list(columns)).T)
-    return _convert_object_array(
-        content, columns, dtype=dtype, coerce_float=coerce_float
-    )
+    columns = _validate_or_indexify_columns(content, columns)
+    content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
+    return content, columns
 
 
-def _convert_object_array(content, columns, coerce_float=False, dtype=None):
+def _validate_or_indexify_columns(
+    content: List, columns: Union[Index, List, None]
+) -> Union[Index, List[Axis]]:
+    """
+    If columns is None, make numbers as column names; Otherwise, validate that
+    columns have valid length.
+
+    Parameters
+    ----------
+    content: list of data
+    columns: Iterable or None
+
+    Returns
+    -------
+    columns: If columns is Iterable, return as is; If columns is None, assign
+    positional column index value as columns.
+
+    Raises
+    ------
+    1. AssertionError when content is not composed of list of lists, and if
+        length of columns is not equal to length of content.
+    2. ValueError when content is list of lists, but length of each sub-list
+        is not equal
+    3. ValueError when content is list of lists, but length of sub-list is
+        not equal to length of content
+    """
     if columns is None:
         columns = ibase.default_index(len(content))
     else:
-        if len(columns) != len(content):  # pragma: no cover
+
+        # Add mask for data which is composed of list of lists
+        is_mi_list = isinstance(columns, list) and all(
+            isinstance(col, list) for col in columns
+        )
+
+        if not is_mi_list and len(columns) != len(content):  # pragma: no cover
             # caller's responsibility to check for this...
             raise AssertionError(
                 f"{len(columns)} columns passed, passed data had "
                 f"{len(content)} columns"
             )
+        elif is_mi_list:
+
+            # check if nested list column, length of each sub-list should be equal
+            if len({len(col) for col in columns}) > 1:
+                raise ValueError(
+                    "Length of columns passed for MultiIndex columns is different"
+                )
+
+            # if columns is not empty and length of sublist is not equal to content
+            elif columns and len(columns[0]) != len(content):
+                raise ValueError(
+                    f"{len(columns[0])} columns passed, passed data had "
+                    f"{len(content)} columns"
+                )
+    return columns
+
+
+def _convert_object_array(
+    content: List[Scalar], coerce_float: bool = False, dtype: Optional[Dtype] = None
+) -> List[Scalar]:
+    """
+    Internal function ot convert object array.
+
+    Parameters
+    ----------
+    content: list of processed data records
+    coerce_float: bool, to coerce floats or not, default is False
+    dtype: np.dtype, default is None
 
+    Returns
+    -------
+    arrays: casted content if not object dtype, otherwise return as is in list.
+    """
     # provide soft conversion of object dtypes
     def convert(arr):
         if dtype != object and dtype != np.object:
@@ -628,7 +706,7 @@ def convert(arr):
 
     arrays = [convert(arr) for arr in content]
 
-    return arrays, columns
+    return arrays
 
 
 # ---------------------------------------------------------------------
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index fc27f19490a9b..baac87755c6d2 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1063,6 +1063,32 @@ def test_constructor_list_of_lists(self):
         result = DataFrame(data)
         tm.assert_frame_equal(result, expected)
 
+    def test_constructor_list_like_data_nested_list_column(self):
+        # GH 32173
+        arrays = [list("abcd"), list("cdef")]
+        result = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
+
+        mi = MultiIndex.from_arrays(arrays)
+        expected = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=mi)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_constructor_wrong_length_nested_list_column(self):
+        # GH 32173
+        arrays = [list("abc"), list("cde")]
+
+        msg = "3 columns passed, passed data had 4"
+        with pytest.raises(ValueError, match=msg):
+            DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
+
+    def test_constructor_unequal_length_nested_list_column(self):
+        # GH 32173
+        arrays = [list("abcd"), list("cde")]
+
+        msg = "Length of columns passed for MultiIndex columns is different"
+        with pytest.raises(ValueError, match=msg):
+            DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
+
     def test_constructor_sequence_like(self):
         # GH 3783
         # collections.Squence like

From c57f6e7a4c8579f04505f0e2f82f9c5f2ae1ade3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 14:16:02 -0700
Subject: [PATCH 07/29] API/CLN: simplify CategoricalBlock.replace (#33279)

---
 pandas/core/internals/blocks.py            | 15 +++------------
 pandas/tests/frame/methods/test_replace.py | 12 +++++++++---
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index d8b54fd5cffb3..fe58fd3af966c 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2753,18 +2753,9 @@ def replace(
     ):
         inplace = validate_bool_kwarg(inplace, "inplace")
         result = self if inplace else self.copy()
-        if filter is None:  # replace was called on a series
-            result.values.replace(to_replace, value, inplace=True)
-            if convert:
-                return result.convert(numeric=False, copy=not inplace)
-            else:
-                return result
-        else:  # replace was called on a DataFrame
-            if not isna(value):
-                result.values.add_categories(value, inplace=True)
-            return super(CategoricalBlock, result).replace(
-                to_replace, value, inplace, filter, regex, convert
-            )
+
+        result.values.replace(to_replace, value, inplace=True)
+        return result
 
 
 # -----------------------------------------------------------------
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index ee89562261b19..a9fb686d5bc50 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1303,9 +1303,15 @@ def test_replace_method(self, to_replace, method, expected):
     def test_categorical_replace_with_dict(self, replace_dict, final_data):
         # GH 26988
         df = DataFrame([[1, 1], [2, 2]], columns=["a", "b"], dtype="category")
-        expected = DataFrame(final_data, columns=["a", "b"], dtype="category")
-        expected["a"] = expected["a"].cat.set_categories([1, 2, 3])
-        expected["b"] = expected["b"].cat.set_categories([1, 2, 3])
+
+        final_data = np.array(final_data)
+
+        a = pd.Categorical(final_data[:, 0], categories=[3, 2])
+
+        excat = [3, 2] if replace_dict["b"] == 1 else [1, 3]
+        b = pd.Categorical(final_data[:, 1], categories=excat)
+
+        expected = DataFrame({"a": a, "b": b})
         result = df.replace(replace_dict, 3)
         tm.assert_frame_equal(result, expected)
         with pytest.raises(AssertionError):

From 2b322d2030e15ae7af298653fd5a8f53c612464e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 14:22:21 -0700
Subject: [PATCH 08/29] REF: BlockManager.delete -> idelete (#33332)

---
 pandas/core/generic.py            |  3 ++-
 pandas/core/internals/managers.py | 15 ++++++---------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fac4ca6768ece..3363d22686f96 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3713,7 +3713,8 @@ def __delitem__(self, key) -> None:
             # If the above loop ran and didn't delete anything because
             # there was no match, this call should raise the appropriate
             # exception:
-            self._mgr.delete(key)
+            loc = self.axes[-1].get_loc(key)
+            self._mgr.idelete(loc)
 
         # delete from the caches
         try:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index c6ce4aea9fa40..c98c21dfcc80e 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1007,12 +1007,10 @@ def iget(self, i: int) -> "SingleBlockManager":
             self.axes[1],
         )
 
-    def delete(self, item):
+    def idelete(self, indexer):
         """
-        Delete selected item (items if non-unique) in-place.
+        Delete selected locations in-place (new block and array, same BlockManager)
         """
-        indexer = self.items.get_loc(item)
-
         is_deleted = np.zeros(self.shape[0], dtype=np.bool_)
         is_deleted[indexer] = True
         ref_loc_offset = -is_deleted.cumsum()
@@ -1606,15 +1604,14 @@ def _consolidate_check(self):
     def _consolidate_inplace(self):
         pass
 
-    def delete(self, item):
+    def idelete(self, indexer):
         """
-        Delete single item from SingleBlockManager.
+        Delete single location from SingleBlockManager.
 
         Ensures that self.blocks doesn't become empty.
         """
-        loc = self.items.get_loc(item)
-        self._block.delete(loc)
-        self.axes[0] = self.axes[0].delete(loc)
+        self._block.delete(indexer)
+        self.axes[0] = self.axes[0].delete(indexer)
 
     def fast_xs(self, loc):
         """

From d4d75387b88a78658e53807c2b5860cfcd555687 Mon Sep 17 00:00:00 2001
From: rebecca-palmer <rebecca_palmer@zoho.com>
Date: Mon, 6 Apr 2020 22:34:55 +0100
Subject: [PATCH 09/29] TST: Don't use 'is' on strings to avoid SyntaxWarning
 (#33322)

---
 pandas/tests/frame/test_alter_axes.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 961c18749f055..b28e8a5b347aa 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -234,9 +234,16 @@ def test_set_index_pass_arrays_duplicate(
 
         # need to adapt first drop for case that both keys are 'A' --
         # cannot drop the same column twice;
-        # use "is" because == would give ambiguous Boolean error for containers
+        # plain == would give ambiguous Boolean error for containers
         first_drop = (
-            False if (keys[0] is "A" and keys[1] is "A") else drop  # noqa: F632
+            False
+            if (
+                isinstance(keys[0], str)
+                and keys[0] == "A"
+                and isinstance(keys[1], str)
+                and keys[1] == "A"
+            )
+            else drop
         )
         # to test against already-tested behaviour, we add sequentially,
         # hence second append always True; must wrap keys in list, otherwise

From e3eb29cf44fcd2e2da249d2872b45c4b2c0c4bd8 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 14:39:20 -0700
Subject: [PATCH 10/29] CLN: remove fill_tuple kludge (#33310)

---
 pandas/core/internals/blocks.py   | 13 ++++++-------
 pandas/core/internals/managers.py | 23 +++++++++--------------
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index fe58fd3af966c..c23f78d845cfd 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1241,7 +1241,7 @@ def func(x):
         blocks = [self.make_block_same_class(interp_values)]
         return self._maybe_downcast(blocks, downcast)
 
-    def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_tuple=None):
+    def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_value=lib.no_default):
         """
         Take values according to indexer and return them as a block.bb
 
@@ -1252,11 +1252,10 @@ def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_tuple=None):
 
         values = self.values
 
-        if fill_tuple is None:
+        if fill_value is lib.no_default:
             fill_value = self.fill_value
             allow_fill = False
         else:
-            fill_value = fill_tuple[0]
             allow_fill = True
 
         new_values = algos.take_nd(
@@ -1721,14 +1720,14 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
         # we are expected to return a 2-d ndarray
         return values.reshape(1, len(values))
 
-    def take_nd(self, indexer, axis: int = 0, new_mgr_locs=None, fill_tuple=None):
+    def take_nd(
+        self, indexer, axis: int = 0, new_mgr_locs=None, fill_value=lib.no_default
+    ):
         """
         Take values according to indexer and return them as a block.
         """
-        if fill_tuple is None:
+        if fill_value is lib.no_default:
             fill_value = None
-        else:
-            fill_value = fill_tuple[0]
 
         # axis doesn't matter; we are really a single-dim object
         # but are passed the axis depending on the calling routing
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index c98c21dfcc80e..9191c2f0a0a76 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1297,14 +1297,14 @@ def reindex_indexer(
             raise IndexError("Requested axis not found in manager")
 
         if axis == 0:
-            new_blocks = self._slice_take_blocks_ax0(indexer, fill_tuple=(fill_value,))
+            new_blocks = self._slice_take_blocks_ax0(indexer, fill_value=fill_value)
         else:
             new_blocks = [
                 blk.take_nd(
                     indexer,
                     axis=axis,
-                    fill_tuple=(
-                        fill_value if fill_value is not None else blk.fill_value,
+                    fill_value=(
+                        fill_value if fill_value is not None else blk.fill_value
                     ),
                 )
                 for blk in self.blocks
@@ -1315,7 +1315,7 @@ def reindex_indexer(
 
         return type(self).from_blocks(new_blocks, new_axes)
 
-    def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
+    def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
         """
         Slice/take blocks along axis=0.
 
@@ -1325,7 +1325,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
         -------
         new_blocks : list of Block
         """
-        allow_fill = fill_tuple is not None
+        allow_fill = fill_value is not lib.no_default
 
         sl_type, slobj, sllen = _preprocess_slice_or_indexer(
             slice_or_indexer, self.shape[0], allow_fill=allow_fill
@@ -1337,16 +1337,15 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
             if sl_type in ("slice", "mask"):
                 return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
             elif not allow_fill or self.ndim == 1:
-                if allow_fill and fill_tuple[0] is None:
+                if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
-                    fill_tuple = (fill_value,)
 
                 return [
                     blk.take_nd(
                         slobj,
                         axis=0,
                         new_mgr_locs=slice(0, sllen),
-                        fill_tuple=fill_tuple,
+                        fill_value=fill_value,
                     )
                 ]
 
@@ -1369,8 +1368,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
         blocks = []
         for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=True):
             if blkno == -1:
-                # If we've got here, fill_tuple was not None.
-                fill_value = fill_tuple[0]
+                # If we've got here, fill_value was not lib.no_default
 
                 blocks.append(
                     self._make_na_block(placement=mgr_locs, fill_value=fill_value)
@@ -1391,10 +1389,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
                 else:
                     blocks.append(
                         blk.take_nd(
-                            blklocs[mgr_locs.indexer],
-                            axis=0,
-                            new_mgr_locs=mgr_locs,
-                            fill_tuple=None,
+                            blklocs[mgr_locs.indexer], axis=0, new_mgr_locs=mgr_locs,
                         )
                     )
 

From fcfa7c47a8471c3287d903f5fe79bd38beda37c7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 14:53:19 -0700
Subject: [PATCH 11/29] TST: misplaced reduction/indexing tests (#33307)

---
 pandas/tests/frame/test_analytics.py          | 25 +++++++++++++++++++
 pandas/tests/frame/test_timeseries.py         | 23 -----------------
 .../tests/indexes/datetimes/test_indexing.py  |  7 ++++++
 pandas/tests/series/indexing/test_datetime.py | 15 -----------
 pandas/tests/series/test_reductions.py        | 11 ++++++++
 5 files changed, 43 insertions(+), 38 deletions(-)
 create mode 100644 pandas/tests/series/test_reductions.py

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 6525e93d89fce..e1fc7e9d7c5b8 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1274,3 +1274,28 @@ def test_series_broadcasting(self):
             df_nan.clip(lower=s, axis=0)
             for op in ["lt", "le", "gt", "ge", "eq", "ne"]:
                 getattr(df, op)(s_nan, axis=0)
+
+
+class TestDataFrameReductions:
+    def test_min_max_dt64_with_NaT(self):
+        # Both NaT and Timestamp are in DataFrame.
+        df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]})
+
+        res = df.min()
+        exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
+        tm.assert_series_equal(res, exp)
+
+        res = df.max()
+        exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
+        tm.assert_series_equal(res, exp)
+
+        # GH12941, only NaTs are in DataFrame.
+        df = pd.DataFrame({"foo": [pd.NaT, pd.NaT]})
+
+        res = df.min()
+        exp = pd.Series([pd.NaT], index=["foo"])
+        tm.assert_series_equal(res, exp)
+
+        res = df.max()
+        exp = pd.Series([pd.NaT], index=["foo"])
+        tm.assert_series_equal(res, exp)
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
index 452af895e4967..dea921a92ae37 100644
--- a/pandas/tests/frame/test_timeseries.py
+++ b/pandas/tests/frame/test_timeseries.py
@@ -54,29 +54,6 @@ def test_frame_append_datetime64_col_other_units(self):
 
             assert (tmp["dates"].values == ex_vals).all()
 
-    def test_operation_on_NaT(self):
-        # Both NaT and Timestamp are in DataFrame.
-        df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]})
-
-        res = df.min()
-        exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
-        tm.assert_series_equal(res, exp)
-
-        res = df.max()
-        exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"])
-        tm.assert_series_equal(res, exp)
-
-        # GH12941, only NaTs are in DataFrame.
-        df = pd.DataFrame({"foo": [pd.NaT, pd.NaT]})
-
-        res = df.min()
-        exp = pd.Series([pd.NaT], index=["foo"])
-        tm.assert_series_equal(res, exp)
-
-        res = df.max()
-        exp = pd.Series([pd.NaT], index=["foo"])
-        tm.assert_series_equal(res, exp)
-
     def test_datetime_assignment_with_NaT_and_diff_time_units(self):
         # GH 7492
         data_ns = np.array([1, "nat"], dtype="datetime64[ns]")
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index 5882f5c77428b..58e2afc869e02 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -476,6 +476,13 @@ def test_get_loc_reasonable_key_error(self):
             index.get_loc("1/1/2000")
 
 
+class TestContains:
+    def test_index_dupes_contains(self):
+        d = datetime(2011, 12, 5, 20, 30)
+        ix = DatetimeIndex([d, d])
+        assert d in ix
+
+
 class TestDatetimeIndex:
     @pytest.mark.parametrize(
         "null", [None, np.nan, np.datetime64("NaT"), pd.NaT, pd.NA]
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index b5d04fd499c08..18c11f2b9eb61 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -464,12 +464,6 @@ def test_index_unique(dups):
     assert idx.nunique(dropna=False) == 21
 
 
-def test_index_dupes_contains():
-    d = datetime(2011, 12, 5, 20, 30)
-    ix = DatetimeIndex([d, d])
-    assert d in ix
-
-
 def test_duplicate_dates_indexing(dups):
     ts = dups
 
@@ -705,15 +699,6 @@ def test_set_none_nan():
     assert series[6] is NaT
 
 
-def test_nat_operations():
-    # GH 8617
-    s = Series([0, pd.NaT], dtype="m8[ns]")
-    exp = s[0]
-    assert s.median() == exp
-    assert s.min() == exp
-    assert s.max() == exp
-
-
 def test_setitem_tuple_with_datetimetz():
     # GH 20441
     arr = date_range("2017", periods=4, tz="US/Eastern")
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
new file mode 100644
index 0000000000000..be9330a14f9c9
--- /dev/null
+++ b/pandas/tests/series/test_reductions.py
@@ -0,0 +1,11 @@
+import pandas as pd
+from pandas import Series
+
+
+def test_reductions_td64_with_nat():
+    # GH#8617
+    ser = Series([0, pd.NaT], dtype="m8[ns]")
+    exp = ser[0]
+    assert ser.median() == exp
+    assert ser.min() == exp
+    assert ser.max() == exp

From 7a468b01726dadb7f75c5b427822fe6da7e9c753 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Mon, 6 Apr 2020 16:58:58 -0500
Subject: [PATCH 12/29] BUG: Don't raise on value_counts for empty Int64
 (#33339)

---
 doc/source/whatsnew/v1.1.0.rst               |  2 +-
 pandas/core/arrays/integer.py                |  3 ++-
 pandas/tests/arrays/integer/test_function.py | 10 ++++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 2df732d67b5da..92cfa6812ddd7 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -461,7 +461,7 @@ Sparse
 ExtensionArray
 ^^^^^^^^^^^^^^
 
--
+- Fixed bug where :meth:`Serires.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`)
 -
 
 
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 4f3c68aa03b16..f5189068d5da1 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -499,7 +499,8 @@ def _values_for_argsort(self) -> np.ndarray:
         ExtensionArray.argsort
         """
         data = self._data.copy()
-        data[self._mask] = data.min() - 1
+        if self._mask.any():
+            data[self._mask] = data.min() - 1
         return data
 
     @classmethod
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 58913189593a9..bdf902d1aca62 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -103,6 +103,16 @@ def test_value_counts_na():
     tm.assert_series_equal(result, expected)
 
 
+def test_value_counts_empty():
+    # https://github.com/pandas-dev/pandas/issues/33317
+    s = pd.Series([], dtype="Int64")
+    result = s.value_counts()
+    # TODO: The dtype of the index seems wrong (it's int64 for non-empty)
+    idx = pd.Index([], dtype="object")
+    expected = pd.Series([], index=idx, dtype="Int64")
+    tm.assert_series_equal(result, expected)
+
+
 # TODO(jreback) - these need testing / are broken
 
 # shift

From 0a2b9cdb54fde4f9e45ff10dd05bec7c238c19dc Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Mon, 6 Apr 2020 17:01:56 -0500
Subject: [PATCH 13/29] REGR: Fix bug when replacing categorical value with
 self (#33292)

---
 doc/source/whatsnew/v1.1.0.rst                | 1 +
 pandas/core/arrays/categorical.py             | 2 ++
 pandas/tests/arrays/categorical/test_algos.py | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 92cfa6812ddd7..170d0f7110aa4 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -294,6 +294,7 @@ Categorical
 - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
 - Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`)
 - :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`)
+- Bug where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ad82d68baa5b3..c9b8db28e0cf6 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2447,6 +2447,8 @@ def replace(self, to_replace, value, inplace: bool = False):
         # other cases, like if both to_replace and value are list-like or if
         # to_replace is a dict, are handled separately in NDFrame
         for replace_value, new_value in replace_dict.items():
+            if new_value == replace_value:
+                continue
             if replace_value in cat.categories:
                 if isna(new_value):
                     cat.remove_categories(replace_value, inplace=True)
diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py
index 10c454f7c479a..325fa476d70e6 100644
--- a/pandas/tests/arrays/categorical/test_algos.py
+++ b/pandas/tests/arrays/categorical/test_algos.py
@@ -64,6 +64,8 @@ def test_isin_cats():
     [
         ("b", "c", ["a", "c"], "Categorical.categories are different"),
         ("c", "d", ["a", "b"], None),
+        # https://github.com/pandas-dev/pandas/issues/33288
+        ("a", "a", ["a", "b"], None),
         ("b", None, ["a", None], "Categorical.categories length are different"),
     ],
 )

From 5a38119dcc363202845785fcf4b6dc7b54e1dd69 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 6 Apr 2020 17:04:40 -0500
Subject: [PATCH 14/29] Pass method in __finalize__ (#33273)

---
 pandas/core/base.py    |  2 +-
 pandas/core/frame.py   |  8 ++---
 pandas/core/generic.py | 77 +++++++++++++++++++++++++-----------------
 pandas/core/series.py  | 62 ++++++++++++++++++++++------------
 4 files changed, 92 insertions(+), 57 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index a28a2c9594341..5945d8a4b432d 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1521,4 +1521,4 @@ def duplicated(self, keep="first"):
         else:
             return self._constructor(
                 duplicated(self, keep=keep), index=self.index
-            ).__finalize__(self)
+            ).__finalize__(self, method="duplicated")
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 67523facb7b7d..aedbba755227d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2515,7 +2515,7 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame":
                 new_values, index=self.columns, columns=self.index
             )
 
-        return result.__finalize__(self)
+        return result.__finalize__(self, method="transpose")
 
     @property
     def T(self) -> "DataFrame":
@@ -4470,7 +4470,7 @@ def _maybe_casted_values(index, labels=None):
     @Appender(_shared_docs["isna"] % _shared_doc_kwargs)
     def isna(self) -> "DataFrame":
         result = self._constructor(self._data.isna(func=isna))
-        return result.__finalize__(self)
+        return result.__finalize__(self, method="isna")
 
     @Appender(_shared_docs["isna"] % _shared_doc_kwargs)
     def isnull(self) -> "DataFrame":
@@ -4798,7 +4798,7 @@ def sort_values(
         if inplace:
             return self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="sort_values")
 
     def sort_index(
         self,
@@ -4934,7 +4934,7 @@ def sort_index(
         if inplace:
             return self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="sort_index")
 
     def value_counts(
         self,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3363d22686f96..052a4adddca27 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -590,7 +590,9 @@ def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries:
         if copy:
             new_values = new_values.copy()
 
-        return self._constructor(new_values, *new_axes).__finalize__(self)
+        return self._constructor(new_values, *new_axes).__finalize__(
+            self, method="swapaxes"
+        )
 
     def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries:
         """
@@ -993,7 +995,7 @@ def rename(
             self._update_inplace(result)
             return None
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="rename")
 
     @rewrite_axis_style_signature("mapper", [("copy", True), ("inplace", False)])
     def rename_axis(self, mapper=lib.no_default, **kwargs):
@@ -1357,7 +1359,7 @@ def __invert__(self):
             return self
 
         new_data = self._mgr.apply(operator.invert)
-        result = self._constructor(new_data).__finalize__(self)
+        result = self._constructor(new_data).__finalize__(self, method="__invert__")
         return result
 
     def __nonzero__(self):
@@ -1802,7 +1804,9 @@ def __array_wrap__(self, result, context=None):
             # ptp also requires the item_from_zerodim
             return result
         d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
-        return self._constructor(result, **d).__finalize__(self)
+        return self._constructor(result, **d).__finalize__(
+            self, method="__array_wrap__"
+        )
 
     # ideally we would define this to avoid the getattr checks, but
     # is slower
@@ -3361,7 +3365,7 @@ class  max_speed
         new_data = self._mgr.take(
             indices, axis=self._get_block_manager_axis(axis), verify=True
         )
-        return self._constructor(new_data).__finalize__(self)
+        return self._constructor(new_data).__finalize__(self, method="take")
 
     def _take_with_is_copy(self: FrameOrSeries, indices, axis=0) -> FrameOrSeries:
         """
@@ -4431,7 +4435,7 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries:
         # perform the reindex on the axes
         return self._reindex_axes(
             axes, level, limit, tolerance, method, fill_value, copy
-        ).__finalize__(self)
+        ).__finalize__(self, method="reindex")
 
     def _reindex_axes(
         self: FrameOrSeries, axes, level, limit, tolerance, method, fill_value, copy
@@ -5130,7 +5134,7 @@ def pipe(self, func, *args, **kwargs):
     # Attribute access
 
     def __finalize__(
-        self: FrameOrSeries, other, method=None, **kwargs
+        self: FrameOrSeries, other, method: Optional[str] = None, **kwargs
     ) -> FrameOrSeries:
         """
         Propagate metadata from other to self.
@@ -5139,9 +5143,14 @@ def __finalize__(
         ----------
         other : the object from which to get the attributes that we are going
             to propagate
-        method : optional, a passed method name ; possibly to take different
-            types of propagation actions based on this
+        method : str, optional
+            A passed method name providing context on where ``__finalize__``
+            was called.
+
+            .. warning:
 
+               The value passed as `method` are not currently considered
+               stable across pandas releases.
         """
         if isinstance(other, NDFrame):
             for name in other.attrs:
@@ -5294,10 +5303,10 @@ def _check_inplace_setting(self, value) -> bool_t:
         return True
 
     def _get_numeric_data(self):
-        return self._constructor(self._mgr.get_numeric_data()).__finalize__(self)
+        return self._constructor(self._mgr.get_numeric_data()).__finalize__(self,)
 
     def _get_bool_data(self):
-        return self._constructor(self._mgr.get_bool_data()).__finalize__(self)
+        return self._constructor(self._mgr.get_bool_data()).__finalize__(self,)
 
     # ----------------------------------------------------------------------
     # Internal Interface Methods
@@ -5563,8 +5572,8 @@ def astype(
 
         else:
             # else, only a single dtype is given
-            new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
-            return self._constructor(new_data).__finalize__(self)
+            new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
+            return self._constructor(new_data).__finalize__(self, method="astype")
 
         # GH 19920: retain column metadata after concat
         result = pd.concat(results, axis=1, copy=False)
@@ -5678,7 +5687,7 @@ def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
         """
         data = self._mgr.copy(deep=deep)
         self._clear_item_cache()
-        return self._constructor(data).__finalize__(self)
+        return self._constructor(data).__finalize__(self, method="copy")
 
     def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries:
         return self.copy(deep=deep)
@@ -5784,7 +5793,7 @@ def infer_objects(self: FrameOrSeries) -> FrameOrSeries:
             self._mgr.convert(
                 datetime=True, numeric=False, timedelta=True, coerce=False, copy=True
             )
-        ).__finalize__(self)
+        ).__finalize__(self, method="infer_objects")
 
     def convert_dtypes(
         self: FrameOrSeries,
@@ -6111,7 +6120,7 @@ def fillna(
         if inplace:
             return self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="fillna")
 
     def ffill(
         self: FrameOrSeries,
@@ -6627,7 +6636,7 @@ def replace(
         if inplace:
             return self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="replace")
 
     _shared_docs[
         "interpolate"
@@ -6893,7 +6902,7 @@ def interpolate(
         if inplace:
             return self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="interpolate")
 
     # ----------------------------------------------------------------------
     # Timeseries methods Methods
@@ -7131,11 +7140,11 @@ def asof(self, where, subset=None):
 
     @Appender(_shared_docs["isna"] % _shared_doc_kwargs)
     def isna(self: FrameOrSeries) -> FrameOrSeries:
-        return isna(self).__finalize__(self)
+        return isna(self).__finalize__(self, method="isna")
 
     @Appender(_shared_docs["isna"] % _shared_doc_kwargs)
     def isnull(self: FrameOrSeries) -> FrameOrSeries:
-        return isna(self).__finalize__(self)
+        return isna(self).__finalize__(self, method="isnull")
 
     _shared_docs[
         "notna"
@@ -7201,11 +7210,11 @@ def isnull(self: FrameOrSeries) -> FrameOrSeries:
 
     @Appender(_shared_docs["notna"] % _shared_doc_kwargs)
     def notna(self: FrameOrSeries) -> FrameOrSeries:
-        return notna(self).__finalize__(self)
+        return notna(self).__finalize__(self, method="notna")
 
     @Appender(_shared_docs["notna"] % _shared_doc_kwargs)
     def notnull(self: FrameOrSeries) -> FrameOrSeries:
-        return notna(self).__finalize__(self)
+        return notna(self).__finalize__(self, method="notnull")
 
     def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
         if (lower is not None and np.any(isna(lower))) or (
@@ -8229,7 +8238,7 @@ def ranker(data):
                 pct=pct,
             )
             ranks = self._constructor(ranks, **data._construct_axes_dict())
-            return ranks.__finalize__(self)
+            return ranks.__finalize__(self, method="rank")
 
         # if numeric_only is None, and we can't get anything, we try with
         # numeric_only=True
@@ -8436,7 +8445,10 @@ def _align_frame(
                     left.index = join_index
                     right.index = join_index
 
-        return left.__finalize__(self), right.__finalize__(other)
+        return (
+            left.__finalize__(self),
+            right.__finalize__(other),
+        )
 
     def _align_series(
         self,
@@ -8520,7 +8532,10 @@ def _align_series(
                         left.index = join_index
                         right.index = join_index
 
-        return left.__finalize__(self), right.__finalize__(other)
+        return (
+            left.__finalize__(self),
+            right.__finalize__(other),
+        )
 
     def _where(
         self,
@@ -8933,7 +8948,7 @@ def shift(
         else:
             return self.tshift(periods, freq)
 
-        return self._constructor(new_data).__finalize__(self)
+        return self._constructor(new_data).__finalize__(self, method="shift")
 
     def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries:
         """
@@ -8970,7 +8985,7 @@ def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries:
         shifted_axis = self._get_axis(axis)[islicer]
         new_obj.set_axis(shifted_axis, axis=axis, inplace=True)
 
-        return new_obj.__finalize__(self)
+        return new_obj.__finalize__(self, method="slice_shift")
 
     def tshift(
         self: FrameOrSeries, periods: int = 1, freq=None, axis: Axis = 0
@@ -9030,7 +9045,7 @@ def tshift(
 
         result = self.copy()
         result.set_axis(new_ax, axis, inplace=True)
-        return result.__finalize__(self)
+        return result.__finalize__(self, method="tshift")
 
     def truncate(
         self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True
@@ -9241,7 +9256,7 @@ def _tz_convert(ax, tz):
 
         result = self.copy(deep=copy)
         result = result.set_axis(ax, axis=axis, inplace=False)
-        return result.__finalize__(self)
+        return result.__finalize__(self, method="tz_convert")
 
     def tz_localize(
         self: FrameOrSeries,
@@ -9410,7 +9425,7 @@ def _tz_localize(ax, tz, ambiguous, nonexistent):
 
         result = self.copy(deep=copy)
         result = result.set_axis(ax, axis=axis, inplace=False)
-        return result.__finalize__(self)
+        return result.__finalize__(self, method="tz_localize")
 
     # ----------------------------------------------------------------------
     # Numeric Methods
@@ -11189,7 +11204,7 @@ def block_accum_func(blk_values):
 
         d = self._construct_axes_dict()
         d["copy"] = False
-        return self._constructor(result, **d).__finalize__(self)
+        return self._constructor(result, **d).__finalize__(self, method=name)
 
     return set_function_name(cum_func, name, cls)
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5ed8241101925..ccb1ec25b5ba4 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -661,7 +661,7 @@ def view(self, dtype=None) -> "Series":
         """
         return self._constructor(
             self._values.view(dtype), index=self.index
-        ).__finalize__(self)
+        ).__finalize__(self, method="view")
 
     # ----------------------------------------------------------------------
     # NDArray Compat
@@ -829,7 +829,7 @@ def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
 
         return self._constructor(
             new_values, index=new_index, fastpath=True
-        ).__finalize__(self)
+        ).__finalize__(self, method="take")
 
     def _take_with_is_copy(self, indices, axis=0):
         """
@@ -962,12 +962,12 @@ def _get_values_tuple(self, key):
         # If key is contained, would have returned by now
         indexer, new_index = self.index.get_loc_level(key)
         return self._constructor(self._values[indexer], index=new_index).__finalize__(
-            self
+            self,
         )
 
     def _get_values(self, indexer):
         try:
-            return self._constructor(self._mgr.get_slice(indexer)).__finalize__(self)
+            return self._constructor(self._mgr.get_slice(indexer)).__finalize__(self,)
         except ValueError:
             # mpl compat if we look up e.g. ser[:, np.newaxis];
             #  see tests.series.timeseries.test_mpl_compat_hack
@@ -1181,7 +1181,9 @@ def repeat(self, repeats, axis=None) -> "Series":
         nv.validate_repeat(tuple(), dict(axis=axis))
         new_index = self.index.repeat(repeats)
         new_values = self._values.repeat(repeats)
-        return self._constructor(new_values, index=new_index).__finalize__(self)
+        return self._constructor(new_values, index=new_index).__finalize__(
+            self, method="repeat"
+        )
 
     def reset_index(self, level=None, drop=False, name=None, inplace=False):
         """
@@ -1308,7 +1310,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
             else:
                 return self._constructor(
                     self._values.copy(), index=new_index
-                ).__finalize__(self)
+                ).__finalize__(self, method="reset_index")
         elif inplace:
             raise TypeError(
                 "Cannot reset_index inplace on a Series to create a DataFrame"
@@ -1707,7 +1709,9 @@ def count(self, level=None):
 
         obs = level_codes[notna(self._values)]
         out = np.bincount(obs, minlength=len(lev) or None)
-        return self._constructor(out, index=lev, dtype="int64").__finalize__(self)
+        return self._constructor(out, index=lev, dtype="int64").__finalize__(
+            self, method="count"
+        )
 
     def mode(self, dropna=True) -> "Series":
         """
@@ -2130,7 +2134,9 @@ def round(self, decimals=0, *args, **kwargs) -> "Series":
         """
         nv.validate_round(args, kwargs)
         result = self._values.round(decimals)
-        result = self._constructor(result, index=self.index).__finalize__(self)
+        result = self._constructor(result, index=self.index).__finalize__(
+            self, method="round"
+        )
 
         return result
 
@@ -2352,7 +2358,9 @@ def diff(self, periods: int = 1) -> "Series":
         dtype: float64
         """
         result = algorithms.diff(self.array, periods)
-        return self._constructor(result, index=self.index).__finalize__(self)
+        return self._constructor(result, index=self.index).__finalize__(
+            self, method="diff"
+        )
 
     def autocorr(self, lag=1) -> float:
         """
@@ -2469,7 +2477,7 @@ def dot(self, other):
         if isinstance(other, ABCDataFrame):
             return self._constructor(
                 np.dot(lvals, rvals), index=other.columns
-            ).__finalize__(self)
+            ).__finalize__(self, method="dot")
         elif isinstance(other, Series):
             return np.dot(lvals, rvals)
         elif isinstance(rvals, np.ndarray):
@@ -2994,7 +3002,7 @@ def _try_kind_sort(arr):
         if inplace:
             self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="sort_values")
 
     def sort_index(
         self,
@@ -3172,7 +3180,7 @@ def sort_index(
         if inplace:
             self._update_inplace(result)
         else:
-            return result.__finalize__(self)
+            return result.__finalize__(self, method="sort_index")
 
     def argsort(self, axis=0, kind="quicksort", order=None) -> "Series":
         """
@@ -3206,11 +3214,13 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> "Series":
             result = Series(-1, index=self.index, name=self.name, dtype="int64")
             notmask = ~mask
             result[notmask] = np.argsort(values[notmask], kind=kind)
-            return self._constructor(result, index=self.index).__finalize__(self)
+            return self._constructor(result, index=self.index).__finalize__(
+                self, method="argsort"
+            )
         else:
             return self._constructor(
                 np.argsort(values, kind=kind), index=self.index, dtype="int64"
-            ).__finalize__(self)
+            ).__finalize__(self, method="argsort")
 
     def nlargest(self, n=5, keep="first") -> "Series":
         """
@@ -3428,7 +3438,7 @@ def swaplevel(self, i=-2, j=-1, copy=True) -> "Series":
         assert isinstance(self.index, ABCMultiIndex)
         new_index = self.index.swaplevel(i, j)
         return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
-            self
+            self, method="swaplevel"
         )
 
     def reorder_levels(self, order) -> "Series":
@@ -3632,7 +3642,9 @@ def map(self, arg, na_action=None) -> "Series":
         dtype: object
         """
         new_values = super()._map_values(arg, na_action=na_action)
-        return self._constructor(new_values, index=self.index).__finalize__(self)
+        return self._constructor(new_values, index=self.index).__finalize__(
+            self, method="map"
+        )
 
     def _gotitem(self, key, ndim, subset=None) -> "Series":
         """
@@ -3819,7 +3831,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
         """
         if len(self) == 0:
             return self._constructor(dtype=self.dtype, index=self.index).__finalize__(
-                self
+                self, method="apply"
             )
 
         # dispatch to agg
@@ -3856,7 +3868,9 @@ def f(x):
             # so extension arrays can be used
             return self._constructor_expanddim(pd.array(mapped), index=self.index)
         else:
-            return self._constructor(mapped, index=self.index).__finalize__(self)
+            return self._constructor(mapped, index=self.index).__finalize__(
+                self, method="apply"
+            )
 
     def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
@@ -4297,7 +4311,9 @@ def isin(self, values) -> "Series":
         Name: animal, dtype: bool
         """
         result = algorithms.isin(self, values)
-        return self._constructor(result, index=self.index).__finalize__(self)
+        return self._constructor(result, index=self.index).__finalize__(
+            self, method="isin"
+        )
 
     def between(self, left, right, inclusive=True) -> "Series":
         """
@@ -4533,7 +4549,9 @@ def to_timestamp(self, freq=None, how="start", copy=True) -> "Series":
 
         assert isinstance(self.index, (ABCDatetimeIndex, ABCPeriodIndex))
         new_index = self.index.to_timestamp(freq=freq, how=how)
-        return self._constructor(new_values, index=new_index).__finalize__(self)
+        return self._constructor(new_values, index=new_index).__finalize__(
+            self, method="to_timestamp"
+        )
 
     def to_period(self, freq=None, copy=True) -> "Series":
         """
@@ -4558,7 +4576,9 @@ def to_period(self, freq=None, copy=True) -> "Series":
 
         assert isinstance(self.index, ABCDatetimeIndex)
         new_index = self.index.to_period(freq=freq)
-        return self._constructor(new_values, index=new_index).__finalize__(self)
+        return self._constructor(new_values, index=new_index).__finalize__(
+            self, method="to_period"
+        )
 
     # ----------------------------------------------------------------------
     # Add index

From 4f1fb462636e815f6798ef43f8eb25ad794c5773 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com>
Date: Tue, 7 Apr 2020 01:10:18 +0300
Subject: [PATCH 15/29] DOC: Added an example for each series.dt field accessor
 (#33259)

---
 pandas/core/arrays/datetimes.py | 128 ++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index be2ac8c22bc8a..b9f9edcebad5b 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1239,6 +1239,22 @@ def date(self):
         "Y",
         """
         The year of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="Y")
+        ... )
+        >>> datetime_series
+        0   2000-12-31
+        1   2001-12-31
+        2   2002-12-31
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.year
+        0    2000
+        1    2001
+        2    2002
+        dtype: int64
         """,
     )
     month = _field_accessor(
@@ -1246,6 +1262,22 @@ def date(self):
         "M",
         """
         The month as January=1, December=12.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="M")
+        ... )
+        >>> datetime_series
+        0   2000-01-31
+        1   2000-02-29
+        2   2000-03-31
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.month
+        0    1
+        1    2
+        2    3
+        dtype: int64
         """,
     )
     day = _field_accessor(
@@ -1253,6 +1285,22 @@ def date(self):
         "D",
         """
         The day of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="D")
+        ... )
+        >>> datetime_series
+        0   2000-01-01
+        1   2000-01-02
+        2   2000-01-03
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.day
+        0    1
+        1    2
+        2    3
+        dtype: int64
         """,
     )
     hour = _field_accessor(
@@ -1260,6 +1308,22 @@ def date(self):
         "h",
         """
         The hours of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="h")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00
+        1   2000-01-01 01:00:00
+        2   2000-01-01 02:00:00
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.hour
+        0    0
+        1    1
+        2    2
+        dtype: int64
         """,
     )
     minute = _field_accessor(
@@ -1267,6 +1331,22 @@ def date(self):
         "m",
         """
         The minutes of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="T")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00
+        1   2000-01-01 00:01:00
+        2   2000-01-01 00:02:00
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.minute
+        0    0
+        1    1
+        2    2
+        dtype: int64
         """,
     )
     second = _field_accessor(
@@ -1274,6 +1354,22 @@ def date(self):
         "s",
         """
         The seconds of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="s")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00
+        1   2000-01-01 00:00:01
+        2   2000-01-01 00:00:02
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.second
+        0    0
+        1    1
+        2    2
+        dtype: int64
         """,
     )
     microsecond = _field_accessor(
@@ -1281,6 +1377,22 @@ def date(self):
         "us",
         """
         The microseconds of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="us")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00.000000
+        1   2000-01-01 00:00:00.000001
+        2   2000-01-01 00:00:00.000002
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.microsecond
+        0       0
+        1       1
+        2       2
+        dtype: int64
         """,
     )
     nanosecond = _field_accessor(
@@ -1288,6 +1400,22 @@ def date(self):
         "ns",
         """
         The nanoseconds of the datetime.
+
+        Examples
+        --------
+        >>> datetime_series = pd.Series(
+        ...     pd.date_range("2000-01-01", periods=3, freq="ns")
+        ... )
+        >>> datetime_series
+        0   2000-01-01 00:00:00.000000000
+        1   2000-01-01 00:00:00.000000001
+        2   2000-01-01 00:00:00.000000002
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.nanosecond
+        0       0
+        1       1
+        2       2
+        dtype: int64
         """,
     )
     weekofyear = _field_accessor(

From 8150c11db0c21cb8604c7133d2571070cb725787 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 15:16:27 -0700
Subject: [PATCH 16/29] BUG: Timestamp+- ndarray[td64] (#33296)

---
 doc/source/whatsnew/v1.1.0.rst                |  1 +
 pandas/_libs/tslibs/c_timestamp.pyx           | 14 ++++
 .../tests/scalar/timestamp/test_arithmetic.py | 73 +++++++++++++------
 3 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 170d0f7110aa4..cbfc6d63e8ea3 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -306,6 +306,7 @@ Datetimelike
 - :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`)
 - Bug in :class:`DatetimeIndex` constructor incorrectly accepting ``bool``-dtyped inputs (:issue:`32668`)
 - Bug in :meth:`DatetimeIndex.searchsorted` not accepting a ``list`` or :class:`Series` as its argument (:issue:`32762`)
+- Bug in :class:`Timestamp` arithmetic when adding or subtracting a ``np.ndarray`` with ``timedelta64`` dtype (:issue:`33296`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx
index 3c30460a74ece..04fadf220388f 100644
--- a/pandas/_libs/tslibs/c_timestamp.pyx
+++ b/pandas/_libs/tslibs/c_timestamp.pyx
@@ -253,6 +253,13 @@ cdef class _Timestamp(datetime):
         elif is_array(other):
             if other.dtype.kind in ['i', 'u']:
                 raise integer_op_not_supported(self)
+            if other.dtype.kind == "m":
+                if self.tz is None:
+                    return self.asm8 + other
+                return np.asarray(
+                    [self + other[n] for n in range(len(other))],
+                    dtype=object,
+                )
 
         # index/series like
         elif hasattr(other, '_typ'):
@@ -275,6 +282,13 @@ cdef class _Timestamp(datetime):
         elif is_array(other):
             if other.dtype.kind in ['i', 'u']:
                 raise integer_op_not_supported(self)
+            if other.dtype.kind == "m":
+                if self.tz is None:
+                    return self.asm8 - other
+                return np.asarray(
+                    [self - other[n] for n in range(len(other))],
+                    dtype=object,
+                )
 
         typ = getattr(other, '_typ', None)
         if typ is not None:
diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index ee70d1d0432fc..b038ee1aee106 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -6,6 +6,7 @@
 from pandas.errors import OutOfBoundsDatetime
 
 from pandas import Timedelta, Timestamp
+import pandas._testing as tm
 
 from pandas.tseries import offsets
 from pandas.tseries.frequencies import to_offset
@@ -177,29 +178,6 @@ def test_timestamp_add_timedelta64_unit(self, other, expected_difference):
         valdiff = result.value - ts.value
         assert valdiff == expected_difference
 
-    @pytest.mark.parametrize("ts", [Timestamp.now(), Timestamp.now("utc")])
-    @pytest.mark.parametrize(
-        "other",
-        [
-            1,
-            np.int64(1),
-            np.array([1, 2], dtype=np.int32),
-            np.array([3, 4], dtype=np.uint64),
-        ],
-    )
-    def test_add_int_no_freq_raises(self, ts, other):
-        msg = "Addition/subtraction of integers and integer-arrays"
-        with pytest.raises(TypeError, match=msg):
-            ts + other
-        with pytest.raises(TypeError, match=msg):
-            other + ts
-
-        with pytest.raises(TypeError, match=msg):
-            ts - other
-        msg = "unsupported operand type"
-        with pytest.raises(TypeError, match=msg):
-            other - ts
-
     @pytest.mark.parametrize(
         "ts",
         [
@@ -229,3 +207,52 @@ def test_add_int_with_freq(self, ts, other):
         msg = "unsupported operand type"
         with pytest.raises(TypeError, match=msg):
             other - ts
+
+    @pytest.mark.parametrize("shape", [(6,), (2, 3,)])
+    def test_addsub_m8ndarray(self, shape):
+        # GH#33296
+        ts = Timestamp("2020-04-04 15:45")
+        other = np.arange(6).astype("m8[h]").reshape(shape)
+
+        result = ts + other
+
+        ex_stamps = [ts + Timedelta(hours=n) for n in range(6)]
+        expected = np.array([x.asm8 for x in ex_stamps], dtype="M8[ns]").reshape(shape)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = other + ts
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = ts - other
+        ex_stamps = [ts - Timedelta(hours=n) for n in range(6)]
+        expected = np.array([x.asm8 for x in ex_stamps], dtype="M8[ns]").reshape(shape)
+        tm.assert_numpy_array_equal(result, expected)
+
+        msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timestamp'"
+        with pytest.raises(TypeError, match=msg):
+            other - ts
+
+    @pytest.mark.parametrize("shape", [(6,), (2, 3,)])
+    def test_addsub_m8ndarray_tzaware(self, shape):
+        # GH#33296
+        ts = Timestamp("2020-04-04 15:45", tz="US/Pacific")
+
+        other = np.arange(6).astype("m8[h]").reshape(shape)
+
+        result = ts + other
+
+        ex_stamps = [ts + Timedelta(hours=n) for n in range(6)]
+        expected = np.array(ex_stamps).reshape(shape)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = other + ts
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = ts - other
+        ex_stamps = [ts - Timedelta(hours=n) for n in range(6)]
+        expected = np.array(ex_stamps).reshape(shape)
+        tm.assert_numpy_array_equal(result, expected)
+
+        msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timestamp'"
+        with pytest.raises(TypeError, match=msg):
+            other - ts

From 9585ae424c2eb0b05d94232ca5b5df09111c14cd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 15:17:18 -0700
Subject: [PATCH 17/29] BUG: 2D indexing on DTA/TDA/PA (#33290)

---
 pandas/core/arrays/datetimelike.py       | 12 ++--------
 pandas/core/indexes/extension.py         |  5 +++-
 pandas/tests/arrays/test_datetimelike.py | 29 ++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index c0bbbebac7c33..4fabd8f558fee 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -550,10 +550,7 @@ def __getitem__(self, key):
                 key = np.asarray(key, dtype=bool)
 
             key = check_array_indexer(self, key)
-            if key.all():
-                key = slice(0, None, None)
-            else:
-                key = lib.maybe_booleans_to_slice(key.view(np.uint8))
+            key = lib.maybe_booleans_to_slice(key.view(np.uint8))
         elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice):
             # see https://github.com/pandas-dev/pandas/issues/31299, need to allow
             # this for now (would otherwise raise in check_array_indexer)
@@ -561,7 +558,7 @@ def __getitem__(self, key):
         else:
             key = check_array_indexer(self, key)
 
-        is_period = is_period_dtype(self)
+        is_period = is_period_dtype(self.dtype)
         if is_period:
             freq = self.freq
         else:
@@ -577,11 +574,6 @@ def __getitem__(self, key):
                 freq = self.freq
 
         result = getitem(key)
-        if result.ndim > 1:
-            # To support MPL which performs slicing with 2 dim
-            # even though it only has 1 dim by definition
-            return result
-
         return self._simple_new(result, dtype=self.dtype, freq=freq)
 
     def __setitem__(
diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
index f38a4fb83c64f..c752990531b34 100644
--- a/pandas/core/indexes/extension.py
+++ b/pandas/core/indexes/extension.py
@@ -214,7 +214,10 @@ class ExtensionIndex(Index):
     def __getitem__(self, key):
         result = self._data[key]
         if isinstance(result, type(self._data)):
-            return type(self)(result, name=self.name)
+            if result.ndim == 1:
+                return type(self)(result, name=self.name)
+            # Unpack to ndarray for MPL compat
+            result = result._data
 
         # Includes cases where we get a 2D ndarray back for MPL compat
         deprecate_ndim_indexing(result)
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 83995ab26cb56..fe35344f46688 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -60,6 +60,12 @@ def timedelta_index(request):
 class SharedTests:
     index_cls: Type[Union[DatetimeIndex, PeriodIndex, TimedeltaIndex]]
 
+    @pytest.fixture
+    def arr1d(self):
+        data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
+        arr = self.array_cls(data, freq="D")
+        return arr
+
     def test_compare_len1_raises(self):
         # make sure we raise when comparing with different lengths, specific
         #  to the case where one has length-1, which numpy would broadcast
@@ -204,6 +210,18 @@ def test_searchsorted(self):
         result = arr.searchsorted(pd.NaT)
         assert result == 0
 
+    def test_getitem_2d(self, arr1d):
+        # 2d slicing on a 1D array
+        expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype)
+        result = arr1d[:, np.newaxis]
+        tm.assert_equal(result, expected)
+
+        # Lookup on a 2D array
+        arr2d = expected
+        expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype)
+        result = arr2d[:3, 0]
+        tm.assert_equal(result, expected)
+
     def test_setitem(self):
         data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
         arr = self.array_cls(data, freq="D")
@@ -265,6 +283,13 @@ class TestDatetimeArray(SharedTests):
     array_cls = DatetimeArray
     dtype = pd.Timestamp
 
+    @pytest.fixture
+    def arr1d(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        dti = pd.date_range("2016-01-01 01:01:00", periods=3, freq="H", tz=tz)
+        dta = dti._data
+        return dta
+
     def test_round(self, tz_naive_fixture):
         # GH#24064
         tz = tz_naive_fixture
@@ -645,6 +670,10 @@ class TestPeriodArray(SharedTests):
     array_cls = PeriodArray
     dtype = pd.Period
 
+    @pytest.fixture
+    def arr1d(self, period_index):
+        return period_index._data
+
     def test_from_pi(self, period_index):
         pi = period_index
         arr = PeriodArray(pi)

From c05d28b9b25918ab1013db947e9beada9d55fce9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 16:19:10 -0700
Subject: [PATCH 18/29] REF: dispatch TDBlock.to_native_types to
 TDA._format_native_types (#33270)

---
 pandas/core/arrays/timedeltas.py       |  2 +-
 pandas/core/construction.py            |  2 +-
 pandas/core/indexes/accessors.py       | 16 ++++++++--------
 pandas/core/internals/blocks.py        | 22 +++-------------------
 pandas/core/tools/timedeltas.py        |  4 ++--
 pandas/io/formats/format.py            |  5 -----
 pandas/tests/frame/test_dtypes.py      |  7 +------
 pandas/tests/io/formats/test_format.py |  6 +++---
 pandas/tests/series/test_dtypes.py     | 11 +++++++----
 9 files changed, 26 insertions(+), 49 deletions(-)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index a9c8977991740..8c93dca783113 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -389,7 +389,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
         from pandas.io.formats.format import _get_format_timedelta64
 
         formatter = _get_format_timedelta64(self._data, na_rep)
-        return np.array([formatter(x) for x in self._data])
+        return np.array([formatter(x) for x in self._data.ravel()]).reshape(self.shape)
 
     # ----------------------------------------------------------------
     # Arithmetic Methods
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index c9754ff588896..2d60ad9ba50bf 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -187,7 +187,7 @@ def array(
 
     >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')
     <TimedeltaArray>
-    ['01:00:00', '02:00:00']
+    ['0 days 01:00:00', '0 days 02:00:00']
     Length: 2, dtype: timedelta64[ns]
 
     Examples
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
index 2908d468bcae0..d2cee5d94422c 100644
--- a/pandas/core/indexes/accessors.py
+++ b/pandas/core/indexes/accessors.py
@@ -241,9 +241,9 @@ class TimedeltaProperties(Properties):
     ...     pd.timedelta_range(start="1 second", periods=3, freq="S")
     ... )
     >>> seconds_series
-    0   00:00:01
-    1   00:00:02
-    2   00:00:03
+    0   0 days 00:00:01
+    1   0 days 00:00:02
+    2   0 days 00:00:03
     dtype: timedelta64[ns]
     >>> seconds_series.dt.seconds
     0    1
@@ -301,11 +301,11 @@ def components(self):
         --------
         >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
         >>> s
-        0   00:00:00
-        1   00:00:01
-        2   00:00:02
-        3   00:00:03
-        4   00:00:04
+        0   0 days 00:00:00
+        1   0 days 00:00:01
+        2   0 days 00:00:02
+        3   0 days 00:00:03
+        4   0 days 00:00:04
         dtype: timedelta64[ns]
         >>> s.dt.components
            days  hours  minutes  seconds  milliseconds  microseconds  nanoseconds
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index c23f78d845cfd..ba2fd037901a2 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2357,26 +2357,10 @@ def fillna(self, value, **kwargs):
             )
         return super().fillna(value, **kwargs)
 
-    def to_native_types(self, na_rep=None, quoting=None, **kwargs):
+    def to_native_types(self, na_rep="NaT", **kwargs):
         """ convert to our native types format """
-        values = self.values
-        mask = isna(values)
-
-        rvalues = np.empty(values.shape, dtype=object)
-        if na_rep is None:
-            na_rep = "NaT"
-        rvalues[mask] = na_rep
-        imask = (~mask).ravel()
-
-        # FIXME:
-        # should use the formats.format.Timedelta64Formatter here
-        # to figure what format to pass to the Timedelta
-        # e.g. to not show the decimals say
-        rvalues.flat[imask] = np.array(
-            [Timedelta(val)._repr_base(format="all") for val in values.ravel()[imask]],
-            dtype=object,
-        )
-        return rvalues
+        tda = self.array_values()
+        return tda._format_native_types(na_rep, **kwargs)
 
 
 class BoolBlock(NumericBlock):
diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
index 960a82caafeeb..48f30acf269da 100644
--- a/pandas/core/tools/timedeltas.py
+++ b/pandas/core/tools/timedeltas.py
@@ -69,8 +69,8 @@ def to_timedelta(arg, unit="ns", errors="raise"):
     Converting numbers by specifying the `unit` keyword argument:
 
     >>> pd.to_timedelta(np.arange(5), unit='s')
-    TimedeltaIndex(['00:00:00', '00:00:01', '00:00:02',
-                    '00:00:03', '00:00:04'],
+    TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
+                    '0 days 00:00:03', '0 days 00:00:04'],
                    dtype='timedelta64[ns]', freq=None)
     >>> pd.to_timedelta(np.arange(5), unit='d')
     TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index a9e668312d751..59542a8da535e 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1672,14 +1672,9 @@ def _get_format_timedelta64(
     even_days = (
         np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0
     )
-    all_sub_day = (
-        np.logical_and(consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0
-    )
 
     if even_days:
         format = None
-    elif all_sub_day:
-        format = "sub_day"
     else:
         format = "long"
 
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
index 2cda4ba16f7ce..27ebee4aaaccf 100644
--- a/pandas/tests/frame/test_dtypes.py
+++ b/pandas/tests/frame/test_dtypes.py
@@ -248,12 +248,7 @@ def test_astype_str(self):
             {
                 "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))),
                 "b": list(map(str, map(Timestamp, b._values))),
-                "c": list(
-                    map(
-                        str,
-                        map(lambda x: Timedelta(x)._repr_base(format="all"), c._values),
-                    )
-                ),
+                "c": list(map(lambda x: Timedelta(x)._repr_base(), c._values)),
                 "d": list(map(str, d._values)),
                 "e": list(map(str, e._values)),
             }
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 1a5d122d732a9..f3c3344992942 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -3003,13 +3003,13 @@ def test_days_neg(self):
     def test_subdays(self):
         y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="s")
         result = fmt.Timedelta64Formatter(y, box=True).get_result()
-        assert result[0].strip() == "'00:00:00'"
-        assert result[1].strip() == "'00:00:01'"
+        assert result[0].strip() == "'0 days 00:00:00'"
+        assert result[1].strip() == "'0 days 00:00:01'"
 
     def test_subdays_neg(self):
         y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="s")
         result = fmt.Timedelta64Formatter(-y, box=True).get_result()
-        assert result[0].strip() == "'00:00:00'"
+        assert result[0].strip() == "'0 days 00:00:00'"
         assert result[1].strip() == "'-1 days +23:59:59'"
 
     def test_zero(self):
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index 2f2a663d559d0..05e708e575a64 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -132,7 +132,7 @@ def test_astype_str_map(self, dtype, series):
         expected = series.map(str)
         tm.assert_series_equal(result, expected)
 
-    def test_astype_str_cast(self):
+    def test_astype_str_cast_dt64(self):
         # see gh-9757
         ts = Series([Timestamp("2010-01-04 00:00:00")])
         s = ts.astype(str)
@@ -146,11 +146,14 @@ def test_astype_str_cast(self):
         expected = Series([str("2010-01-04 00:00:00-05:00")])
         tm.assert_series_equal(s, expected)
 
+    def test_astype_str_cast_td64(self):
+        # see gh-9757
+
         td = Series([Timedelta(1, unit="d")])
-        s = td.astype(str)
+        ser = td.astype(str)
 
-        expected = Series([str("1 days 00:00:00.000000000")])
-        tm.assert_series_equal(s, expected)
+        expected = Series([str("1 days")])
+        tm.assert_series_equal(ser, expected)
 
     def test_astype_unicode(self):
         # see gh-7758: A bit of magic is required to set

From 047e5d7620644e3493aac02f85fc2e78f2ed586b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 16:20:24 -0700
Subject: [PATCH 19/29] REF: put concatenate_block_managers in internals.concat
 (#33231)

---
 pandas/core/internals/__init__.py |  7 ++--
 pandas/core/internals/concat.py   | 58 ++++++++++++++++++++++++++++---
 pandas/core/internals/managers.py | 47 -------------------------
 3 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py
index bc45b7c74ecc1..1090f862acb8a 100644
--- a/pandas/core/internals/__init__.py
+++ b/pandas/core/internals/__init__.py
@@ -14,11 +14,8 @@
     _safe_reshape,
     make_block,
 )
-from pandas.core.internals.managers import (
-    BlockManager,
-    SingleBlockManager,
-    concatenate_block_managers,
-)
+from pandas.core.internals.concat import concatenate_block_managers
+from pandas.core.internals.managers import BlockManager, SingleBlockManager
 
 __all__ = [
     "Block",
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 6839d138fbf73..720e6799a3bf3 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -23,9 +23,57 @@
 from pandas.core.dtypes.missing import isna
 
 import pandas.core.algorithms as algos
+from pandas.core.internals.blocks import make_block
+from pandas.core.internals.managers import BlockManager
 
 
-def get_mgr_concatenation_plan(mgr, indexers):
+def concatenate_block_managers(
+    mgrs_indexers, axes, concat_axis: int, copy: bool
+) -> BlockManager:
+    """
+    Concatenate block managers into one.
+
+    Parameters
+    ----------
+    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
+    axes : list of Index
+    concat_axis : int
+    copy : bool
+
+    Returns
+    -------
+    BlockManager
+    """
+    concat_plans = [
+        _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
+    ]
+    concat_plan = _combine_concat_plans(concat_plans, concat_axis)
+    blocks = []
+
+    for placement, join_units in concat_plan:
+
+        if len(join_units) == 1 and not join_units[0].indexers:
+            b = join_units[0].block
+            values = b.values
+            if copy:
+                values = values.copy()
+            else:
+                values = values.view()
+            b = b.make_block_same_class(values, placement=placement)
+        elif _is_uniform_join_units(join_units):
+            b = join_units[0].block.concat_same_type([ju.block for ju in join_units])
+            b.mgr_locs = placement
+        else:
+            b = make_block(
+                _concatenate_join_units(join_units, concat_axis, copy=copy),
+                placement=placement,
+            )
+        blocks.append(b)
+
+    return BlockManager(blocks, axes)
+
+
+def _get_mgr_concatenation_plan(mgr, indexers):
     """
     Construct concatenation plan for given block manager and indexers.
 
@@ -232,7 +280,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
         return values
 
 
-def concatenate_join_units(join_units, concat_axis, copy):
+def _concatenate_join_units(join_units, concat_axis, copy):
     """
     Concatenate values from several join units along selected axis.
     """
@@ -371,11 +419,11 @@ def _get_empty_dtype_and_na(join_units):
     raise AssertionError(msg)
 
 
-def is_uniform_join_units(join_units) -> bool:
+def _is_uniform_join_units(join_units) -> bool:
     """
     Check if the join units consist of blocks of uniform type that can
     be concatenated using Block.concat_same_type instead of the generic
-    concatenate_join_units (which uses `concat_compat`).
+    _concatenate_join_units (which uses `concat_compat`).
 
     """
     return (
@@ -429,7 +477,7 @@ def _trim_join_unit(join_unit, length):
     return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape)
 
 
-def combine_concat_plans(plans, concat_axis):
+def _combine_concat_plans(plans, concat_axis):
     """
     Combine multiple concatenation plans into one.
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 9191c2f0a0a76..b0363dd21f616 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -47,12 +47,6 @@
     get_block_type,
     make_block,
 )
-from pandas.core.internals.concat import (  # all for concatenate_block_managers
-    combine_concat_plans,
-    concatenate_join_units,
-    get_mgr_concatenation_plan,
-    is_uniform_join_units,
-)
 
 from pandas.io.formats.printing import pprint_thing
 
@@ -1974,44 +1968,3 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
         if not allow_fill:
             indexer = maybe_convert_indices(indexer, length)
         return "fancy", indexer, len(indexer)
-
-
-def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
-    """
-    Concatenate block managers into one.
-
-    Parameters
-    ----------
-    mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
-    axes : list of Index
-    concat_axis : int
-    copy : bool
-
-    """
-    concat_plans = [
-        get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
-    ]
-    concat_plan = combine_concat_plans(concat_plans, concat_axis)
-    blocks = []
-
-    for placement, join_units in concat_plan:
-
-        if len(join_units) == 1 and not join_units[0].indexers:
-            b = join_units[0].block
-            values = b.values
-            if copy:
-                values = values.copy()
-            else:
-                values = values.view()
-            b = b.make_block_same_class(values, placement=placement)
-        elif is_uniform_join_units(join_units):
-            b = join_units[0].block.concat_same_type([ju.block for ju in join_units])
-            b.mgr_locs = placement
-        else:
-            b = make_block(
-                concatenate_join_units(join_units, concat_axis, copy=copy),
-                placement=placement,
-            )
-        blocks.append(b)
-
-    return BlockManager(blocks, axes)

From 0e382f2f305e4e8a9fa476d5aff4299a9e3e02f6 Mon Sep 17 00:00:00 2001
From: mproszewska <38814059+mproszewska@users.noreply.github.com>
Date: Tue, 7 Apr 2020 01:22:59 +0200
Subject: [PATCH 20/29] TST: Add tests for duplicated and drop_duplicates
 (#32575)

---
 .../indexes/categorical/test_category.py      | 75 +++++++++++++++++--
 pandas/tests/indexes/conftest.py              |  9 +++
 pandas/tests/indexes/datetimes/test_ops.py    | 71 +++++++-----------
 pandas/tests/indexes/period/test_ops.py       | 49 ++++++------
 pandas/tests/indexes/timedeltas/test_ops.py   | 50 +++++++------
 5 files changed, 159 insertions(+), 95 deletions(-)

diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index 543edc6b66ff2..83fe21fd20bfe 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -292,16 +292,81 @@ def test_is_monotonic(self, data, non_lexsorted_data):
         assert c.is_monotonic_decreasing is False
 
     def test_has_duplicates(self):
-
         idx = CategoricalIndex([0, 0, 0], name="foo")
         assert idx.is_unique is False
         assert idx.has_duplicates is True
 
-    def test_drop_duplicates(self):
+        idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
+        assert idx.is_unique is False
+        assert idx.has_duplicates is True
 
-        idx = CategoricalIndex([0, 0, 0], name="foo")
-        expected = CategoricalIndex([0], name="foo")
-        tm.assert_index_equal(idx.drop_duplicates(), expected)
+        idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
+        assert idx.is_unique is True
+        assert idx.has_duplicates is False
+
+    @pytest.mark.parametrize(
+        "data, categories, expected",
+        [
+            (
+                [1, 1, 1],
+                [1, 2, 3],
+                {
+                    "first": np.array([False, True, True]),
+                    "last": np.array([True, True, False]),
+                    False: np.array([True, True, True]),
+                },
+            ),
+            (
+                [1, 1, 1],
+                list("abc"),
+                {
+                    "first": np.array([False, True, True]),
+                    "last": np.array([True, True, False]),
+                    False: np.array([True, True, True]),
+                },
+            ),
+            (
+                [2, "a", "b"],
+                list("abc"),
+                {
+                    "first": np.zeros(shape=(3), dtype=np.bool),
+                    "last": np.zeros(shape=(3), dtype=np.bool),
+                    False: np.zeros(shape=(3), dtype=np.bool),
+                },
+            ),
+            (
+                list("abb"),
+                list("abc"),
+                {
+                    "first": np.array([False, False, True]),
+                    "last": np.array([False, True, False]),
+                    False: np.array([False, True, True]),
+                },
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, data, categories, expected):
+
+        idx = CategoricalIndex(data, categories=categories, name="foo")
+        for keep, e in expected.items():
+            tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
+            e = idx[~e]
+            result = idx.drop_duplicates(keep=keep)
+            tm.assert_index_equal(result, e)
+
+    @pytest.mark.parametrize(
+        "data, categories, expected_data, expected_categories",
+        [
+            ([1, 1, 1], [1, 2, 3], [1], [1]),
+            ([1, 1, 1], list("abc"), [np.nan], []),
+            ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan], [1, 2]),
+            ([2, "a", "b"], list("abc"), [np.nan, "a", "b"], ["a", "b"]),
+        ],
+    )
+    def test_unique(self, data, categories, expected_data, expected_categories):
+
+        idx = CategoricalIndex(data, categories=categories)
+        expected = CategoricalIndex(expected_data, categories=expected_categories)
         tm.assert_index_equal(idx.unique(), expected)
 
     def test_repr_roundtrip(self):
diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py
index a9fb228073ab4..fb17e1df6341b 100644
--- a/pandas/tests/indexes/conftest.py
+++ b/pandas/tests/indexes/conftest.py
@@ -16,3 +16,12 @@ def sort(request):
         in in the Index setops methods.
     """
     return request.param
+
+
+@pytest.fixture(params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"])
+def freq_sample(request):
+    """
+    Valid values for 'freq' parameter used to create date_range and
+    timedelta_range..
+    """
+    return request.param
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index cbf6b7b63bd50..c55b0481c1041 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -264,9 +264,9 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture)
         tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
         assert ordered.freq is None
 
-    def test_drop_duplicates_metadata(self):
+    def test_drop_duplicates_metadata(self, freq_sample):
         # GH 10115
-        idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
+        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
         result = idx.drop_duplicates()
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
@@ -277,57 +277,38 @@ def test_drop_duplicates_metadata(self):
         tm.assert_index_equal(idx, result)
         assert result.freq is None
 
-    def test_drop_duplicates(self):
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, freq_sample, keep, expected, index):
         # to check Index/Series compat
-        base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
-        idx = base.append(base[:5])
+        idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
+        idx = idx.append(idx[:5])
 
-        res = idx.drop_duplicates()
-        tm.assert_index_equal(res, base)
-        res = Series(idx).drop_duplicates()
-        tm.assert_series_equal(res, Series(base))
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
 
-        res = idx.drop_duplicates(keep="last")
-        exp = base[5:].append(base[:5])
-        tm.assert_index_equal(res, exp)
-        res = Series(idx).drop_duplicates(keep="last")
-        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
 
-        res = idx.drop_duplicates(keep=False)
-        tm.assert_index_equal(res, base[5:])
-        res = Series(idx).drop_duplicates(keep=False)
-        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
 
-    @pytest.mark.parametrize(
-        "freq",
-        [
-            "A",
-            "2A",
-            "-2A",
-            "Q",
-            "-1Q",
-            "M",
-            "-1M",
-            "D",
-            "3D",
-            "-3D",
-            "W",
-            "-1W",
-            "H",
-            "2H",
-            "-2H",
-            "T",
-            "2T",
-            "S",
-            "-3S",
-        ],
-    )
-    def test_infer_freq(self, freq):
+    def test_infer_freq(self, freq_sample):
         # GH 11018
-        idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10)
+        idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
         result = pd.DatetimeIndex(idx.asi8, freq="infer")
         tm.assert_index_equal(idx, result)
-        assert result.freq == freq
+        assert result.freq == freq_sample
 
     def test_nat(self, tz_naive_fixture):
         tz = tz_naive_fixture
diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py
index 196946e696c8d..fc44226f9d72f 100644
--- a/pandas/tests/indexes/period/test_ops.py
+++ b/pandas/tests/indexes/period/test_ops.py
@@ -81,9 +81,10 @@ def test_value_counts_unique(self):
 
         tm.assert_index_equal(idx.unique(), exp_idx)
 
-    def test_drop_duplicates_metadata(self):
+    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
+    def test_drop_duplicates_metadata(self, freq):
         # GH 10115
-        idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
+        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
         result = idx.drop_duplicates()
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
@@ -93,26 +94,32 @@ def test_drop_duplicates_metadata(self):
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
 
-    def test_drop_duplicates(self):
+    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, freq, keep, expected, index):
         # to check Index/Series compat
-        base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx")
-        idx = base.append(base[:5])
-
-        res = idx.drop_duplicates()
-        tm.assert_index_equal(res, base)
-        res = Series(idx).drop_duplicates()
-        tm.assert_series_equal(res, Series(base))
-
-        res = idx.drop_duplicates(keep="last")
-        exp = base[5:].append(base[:5])
-        tm.assert_index_equal(res, exp)
-        res = Series(idx).drop_duplicates(keep="last")
-        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
-
-        res = idx.drop_duplicates(keep=False)
-        tm.assert_index_equal(res, base[5:])
-        res = Series(idx).drop_duplicates(keep=False)
-        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
+        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
+        idx = idx.append(idx[:5])
+
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
+
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
+
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
 
     def test_order_compat(self):
         def _check_freq(index, expected_index):
diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py
index 4af5df6e2cc55..aa1bf997fc66b 100644
--- a/pandas/tests/indexes/timedeltas/test_ops.py
+++ b/pandas/tests/indexes/timedeltas/test_ops.py
@@ -134,9 +134,9 @@ def test_order(self):
             tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
             assert ordered.freq is None
 
-    def test_drop_duplicates_metadata(self):
+    def test_drop_duplicates_metadata(self, freq_sample):
         # GH 10115
-        idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
+        idx = pd.timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
         result = idx.drop_duplicates()
         tm.assert_index_equal(idx, result)
         assert idx.freq == result.freq
@@ -147,36 +147,38 @@ def test_drop_duplicates_metadata(self):
         tm.assert_index_equal(idx, result)
         assert result.freq is None
 
-    def test_drop_duplicates(self):
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, freq_sample, keep, expected, index):
         # to check Index/Series compat
-        base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx")
-        idx = base.append(base[:5])
+        idx = pd.timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
+        idx = idx.append(idx[:5])
 
-        res = idx.drop_duplicates()
-        tm.assert_index_equal(res, base)
-        res = Series(idx).drop_duplicates()
-        tm.assert_series_equal(res, Series(base))
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
 
-        res = idx.drop_duplicates(keep="last")
-        exp = base[5:].append(base[:5])
-        tm.assert_index_equal(res, exp)
-        res = Series(idx).drop_duplicates(keep="last")
-        tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
 
-        res = idx.drop_duplicates(keep=False)
-        tm.assert_index_equal(res, base[5:])
-        res = Series(idx).drop_duplicates(keep=False)
-        tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
 
-    @pytest.mark.parametrize(
-        "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]
-    )
-    def test_infer_freq(self, freq):
+    def test_infer_freq(self, freq_sample):
         # GH#11018
-        idx = pd.timedelta_range("1", freq=freq, periods=10)
+        idx = pd.timedelta_range("1", freq=freq_sample, periods=10)
         result = pd.TimedeltaIndex(idx.asi8, freq="infer")
         tm.assert_index_equal(idx, result)
-        assert result.freq == freq
+        assert result.freq == freq_sample
 
     def test_repeat(self):
         index = pd.timedelta_range("1 days", periods=2, freq="D")

From 717662bf1e9141d6d3e752ea9a0c8c5ca966b284 Mon Sep 17 00:00:00 2001
From: Diane Trout <diane@ghic.org>
Date: Mon, 6 Apr 2020 16:26:48 -0700
Subject: [PATCH 21/29] Ods loses spaces 32207 (#33233)

---
 doc/source/whatsnew/v1.1.0.rst              |   1 +
 pandas/io/excel/_odfreader.py               |  27 +++++++++++++++++++-
 pandas/tests/io/data/excel/test_spaces.ods  | Bin 0 -> 9263 bytes
 pandas/tests/io/data/excel/test_spaces.xls  | Bin 0 -> 5632 bytes
 pandas/tests/io/data/excel/test_spaces.xlsb | Bin 0 -> 8036 bytes
 pandas/tests/io/data/excel/test_spaces.xlsm | Bin 0 -> 4848 bytes
 pandas/tests/io/data/excel/test_spaces.xlsx | Bin 0 -> 8622 bytes
 pandas/tests/io/excel/test_readers.py       |  18 +++++++++++++
 8 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 pandas/tests/io/data/excel/test_spaces.ods
 create mode 100644 pandas/tests/io/data/excel/test_spaces.xls
 create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsb
 create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsm
 create mode 100644 pandas/tests/io/data/excel/test_spaces.xlsx

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index cbfc6d63e8ea3..6bb22f4c16aa1 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -414,6 +414,7 @@ I/O
 - Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`)
 - Bug in :meth:`read_csv` was raising an ``IndexError`` when header=None and 2 extra data columns
 - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`)
+- Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 7af776dc1a10f..739c77d1c0b99 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -171,7 +171,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
             cell_value = cell.attributes.get((OFFICENS, "value"))
             return float(cell_value)
         elif cell_type == "string":
-            return str(cell)
+            return self._get_cell_string_value(cell)
         elif cell_type == "currency":
             cell_value = cell.attributes.get((OFFICENS, "value"))
             return float(cell_value)
@@ -182,3 +182,28 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
             return pd.to_datetime(str(cell)).time()
         else:
             raise ValueError(f"Unrecognized type {cell_type}")
+
+    def _get_cell_string_value(self, cell) -> str:
+        """
+        Find and decode OpenDocument text:s tags that represent
+        a run length encoded sequence of space characters.
+        """
+        from odf.element import Text, Element
+        from odf.text import S, P
+        from odf.namespaces import TEXTNS
+
+        text_p = P().qname
+        text_s = S().qname
+
+        p = cell.childNodes[0]
+
+        value = []
+        if p.qname == text_p:
+            for k, fragment in enumerate(p.childNodes):
+                if isinstance(fragment, Text):
+                    value.append(fragment.data)
+                elif isinstance(fragment, Element):
+                    if fragment.qname == text_s:
+                        spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
+                    value.append(" " * spaces)
+        return "".join(value)
diff --git a/pandas/tests/io/data/excel/test_spaces.ods b/pandas/tests/io/data/excel/test_spaces.ods
new file mode 100644
index 0000000000000000000000000000000000000000..375e839c8c22105e09c86ec28d65268435119a1c
GIT binary patch
literal 9263
zcmdsdbzD^2*FGq%C?z0*N=Zse4kDcn-Q6(65HqB7mvl+DGIV##5F_1P(j}lY@`HPS
zzIyN1d*Ao{_g(YZbM~1z>)HG4v(8$3J$uVZ-$o%oLc%~os*h2X^Rwg$W<o+jx+&K`
zAz6W}jG@jp#`-okmLNlYD99Sj>;N`mvet)yAWYUa#$Y3BLwhS@Fq8>mV`r>y1Tix<
zhRXc~vu4DT8XFS{>8220V=9^1TN!}$L6#6^=#MIs4cIhDPDUIXiv;WX5^PBc5rymH
z8WIvR62|T8ng?lQG7{3Q5V_Y%qS$z(gt)lW<Ybf-6i;cGXsIZf8E9CT7&+PKIoMfP
zUhr^m19&-K@NjeR0RXQAUI7Gz1-V~{@$*Z*;u90#lN8{R5q~8lEFvf>DI)b6C?*1w
z5`Qf%Edo?}Eu#Qblm{wnD2R(GN{h<Nyp{thD9X!dDFGD~m6TM}6xFpgRTMN-)HT&r
zG&MDqG!3+L^mX(MHH<6_HI)ps)eUv^jr6rada4#iI)(-Y21aHEAS-hdLo0J5OA8A_
zONhCZt);EKIrNRKr7^?`<X~&=46%ejAa)LJP-j<H2OBqMsLPu-Rt}!lPM&tIKCT|#
z4xT~oUOp~9p`MP`o{qM@uFxP42Y*jTA5Yg{Psb4NH{L$JUI8I~fuW)PK4F1AAt51N
zA+e#6v7s?Z;R$KcA%0PzLD3O!6C(nWVnSo0qod>AMZZf)ii>&|pOBmolbDno_AVnP
zB|9Z8BR)MRJ_DANk&~1KOOFlBObE|TipfY#&dEqF$WG45$<D|v%>GaS%grp#%P1-;
zN`GITRajnFTvAw43@fR~FRv}GYc6@8US5z>T9{W=R#;aKE2}84sH&-`ZK$a%t*@=B
zudgqvX)UjBt7&SlYJpd`bTzfZ>)U&py84^Dhg+(P+Uv^On(98bHukhsBiiZ*I-3T&
zn%mo2+TmTDJ)gSa9iO^85C}xar{SKy;oiZKPs3Bgh_<2Lo{8R$k%8Xnk=~J!k-@Qv
zk;&P~v7zaSk?HB_v6-dmxy6~q<(Z}Rg{h&%x#^{a+0UOpFRpH`Z0@YDu54|rZf<Qa
zZ|-mHe%bkQxP5qXu(kYUcl&UE=j7|&;o;%eqm!f4vy-F4i<86e-@jkm<ki)c4LaT>
z5)#@=NfALMr>UK(Ek%4)Vst%CmvYqU(kgi-WI^m9xri7%AqpJ4&kFJhLmF&m1k7(c
zBE`@d6CY%MLP2N97OWVezf0Ibg~9-mkP?&hp4JD}wQrxksS82{b>2VHoVai>;i;!d
zy5FD7wdU5ZKOT9)J=R#_DB;<+ER7iNFRsGqKc#9lqm8@Jm;0=yv&T)_pKBZoP)FwN
z9?SXm5_mq^dk@U8cR0sH^Og&BGr4AiI%}xOXl|ofS)9ZgV^H0uB3FttbFAAR61q7>
zpc|({DutgOozvY$LH=E|ZSZ41-V&wn+vo<N1z&b}3KPZJ^c}BttD0|qvbAGP>@jA9
zj$7|o>*^;3F--T@r%%eda-Z0QE9}b8Y99!UbEG-B?S!wLx4xz9<gK{<q`w;X02)8y
z-T~x}v=%R}Gv)<EJm6kuD6!Te5K4)VDk7;=<iO)MNm~t<TtUW*i-2fv*5>ai_arC6
zfJa8i7yRrGS@%ZnYN+9o%O3Qz_}^;_UiY30uAStoS}Wy_;fwlI;I<Y^Nw7atj&@I3
z{lvM8Pi6R1miq{(_jXA3`vVFdWoxUKvY0Q5KUgT<wvBuR8o{&}+X%nEb^>EHo?N4;
zc5S4>Fy&TluyX|H+RNuhR5~Ca0j6auWV0OyP4JfrX*kImYvK%!m(D(+Q;!Mzt3Z#M
zPJ*0XU95hT$=OZCfgk35U|ZalnsB)vm-oJHiHtJWAO0wr!MJTPlLq4N$9mtb%+6Fn
zshV~LmbEz*{I&ZlURIvkXwJwAx8I@`N58LGro%xkyM$WXK?eY#JIsjde>mTHUvQfS
zqpOzc<7|`J_kya!oeR5)&2s<~ir@NnQ4PErb!BWgjN$xK_F9xJi%7~Ct08aC^A9x~
zTC3BamJ(4A`R{coH=bop#m5|v^*>Q+9Ae;GQ>9D;fbzhX24~cUd9trx7vkB;$jMyA
zo1Ril8WlOrP3}0icLGM6pa$mQmVTJslf~onR3Eb*X?zZ=)_DH1Tv3>u-FdwUR=gV$
zaG~BPju(plzGTQuYgG6!az}j&MvIND@o<sn9RK-+ZXx@b?p#+bOO{QkC2gIGzTa}T
ztO(%{i7C)6Oi?p!lRbw10g0}Z)#ga)P$n;OM<F>|D#>{gO)R#G+n5a@dzRdpf~2YS
zGl*$*L~<mSs)fh+)<N*a)Z%NCM919dI}MNqEp8d%9vOCW^qGJjqp+Py>NNXRhT}1m
zgYF!}Je`#9i&O*z$X$10L<y+sKT@cB?jPsWl_o2rV9Cdg8vE*M+&wjfu{n0}MAo_h
zkffA%6z`!!V!2GA9%#d`2e&0k`}VO05||J=$Ri%h^}ZmcMtr3<tn@-_9u&(rk=GNh
z;&ZUMt<gP$YyziEkzi5n{S1@3k~-{K8T3n9BvVy3&B?CHre-V-Ob#$)2S2e6*9e}{
z{))D$(%$a1eC~L$QE64e;l{9IC+ee7K?QL<B1H4BiWth%I=j>s*#r(($Ocn9`Cb)(
zFspN@55spHn~Z8mB1=tx@<&`81{1{EYm30Qf~T<0*&p_$iO7S`TvcA=R!2s8Zk)F*
zr1)#`zC$F;Ne8>jo`1zK*0G)HydQ0)8!taH<a7HW>&9!?xU5Lg3)2y|AaShcsD+!M
zxv(~U^?Y+F$p>Omx-xk7=84iI*{#5_&~t0WOdE~nB_+t`kZ{g7Y>DoRDeCM7QmmS<
zgA9i@zBy&tBBo}HLOMIDKW#@L5Y-nvJVb6ivV;<W##VPK(mtJE46kq|?j25~D&yaO
z_^P=m=<A6UqqA%L+ur##nBhFaZ9^V?6NP)x7hmT8?iagBSTi#1Ox}8NS^0G!K|+!g
zl@Tct()0dv3Wb4z@yE36=AR#O_qqZxhC)GLQwWojmF14QCS;16z<sY+??R@A3c;Bw
z5HMgOnZ$Yb*#>yt*9{41L>5kExL#arxqR1lCyqGeUEemo54GFc`UyZ)w{iNUpcIXq
z?x1Ou$3hc(fypI?`FyN>JNI<Vh06)z7lLZvdNhFTwg96Oq-zxA{A$az1Fg08^PZNV
zishu?m(Z-`L~AKGJo*j>;KuP!vh<{#jY;LTfGaa;c|-d8<rp<nu7kBoS)!GFB=h=#
zoczYFn|$C2BQ1XfeKf{a)ihDz*XGd)x?0o3B95-@2yG9WO8kiLFY<1^ssb7wbE_0u
zgCdU-g}z#H<gd>l*u#~EcziOD*xFlq2uC>wLSh&!2Aa$-?;g-E0MuyNyL9v3tB4K|
z&GwxRt{bK!lrvUmo2TOg+PK{CUeF!u-PX||5b4;X<;j|)t_q;NJ6IS$tU#ZEsfTUA
z3)RU1-Ax>Zh*b!>zh6gC_F)Ow-&>fcCBW-Ip;k`(el;c$9p82JfV{cTpHED%nNro4
z2RUn#-6ewS3abMb1wG?~1=h>=ZYJrxWKdm?+b6=RgB(=rif!@Ks~ak}`@a_Qm>HK0
zmqpl7fg4)^!i{1%1I4%}k1}5k(^j(aqPbBqe#*+_*BM1I!n8HJq;@K%s3~t-e<FA&
z|0b-<)5aoQ1ml65PyJNfLa$!duDyJz9`S5zRLwM-`kgkTpc3zZ^LKfcJd)%Ab+%Dj
zQe~=%qG9(SMFQ|ukr<UbDLU^B=ugu6`(49OwYFdzAd06<0QxUGIIQymdYR1y^Uekr
z%pYD0CheaP+xy-XB`>dUJOm#X>tGj;ddkB^%zK5?WLZgW??dk!iAdmyL(^G3-5K_T
zbI!$Vs=kxHgFa20wZM&HaWAkyCmg@e%k92kJHwbWj4R{7Kqlk;P$ozzf<~F_mV%z!
z`-)+<XZlzQ(fJjH>lM=vva}yJl~i*R&S9=S)>c%9xdRtEdQ@{{uwKo`ZCKh<Q$!5!
ztuS7eBa*dcbsGe|d0ow-`7uy~Cch?b^wQyFzAG^19wq715X0}ub6uy>|D=Y)nVQRg
z0{H@lqRXE}g(Ib{qlxEVieAy}x@n+)NiJAqC3`e~h${@Dh$Sgtl2t}0?#XtHdG!+I
zvAi6AW<IwMzofQh1s<_Kmbh;%`S8)HaQ<#exQqnSl304NpYJKTCK)3c8uB#11kPR%
z#WTxqm?%%Aq)w<l7hy@Za#w}D2~`onQPs5cb$==VPlma1+68<}GLIMNgT=TsbB&$P
z2D}uXhf8?G!b20}*uUIn|ITVu_gLoS9CwYfO<NCsku~r<(08tbV@1SrSxH5MyxRS2
zV3PhEh3^DeP8uCU@Kjji;5so9d5nbg*L3TLpSU@aUKe3&unEZ2-tLd&4Z_B3V`pt@
zXAFTD=-V;>v*t~r_IFLFwYBB{UdzPNS|4iszisO}rL_O|wn%={LmO)wdz%~8f4Q?K
zztIC(>6;oun1w)4D}5WtzbOB%Q3zBYYX9F1=kFebzM-M9rSbK1vHrKg`CW@Q55h(t
zY;5`ed-_jd!b%?uGBJigne2>AdZT;6J}fxCdwhPzeG>xAsHk?dJ+<a(Egb%8sLTh6
zcymo*(JJ!Uu+6lQ<svcm`LK$l9evWSkg^Z0N-qrQsCy*LDmNH->zQhErg^6(su-Rm
zlI@%O<fPHpIfChVbmRP<bKSM<FVCcPN<8F~ZJ=l!<+hc$3$F;nz<m5x2=wi<D+y2E
z5~hi&Jaj8P!uj~=%aB>#y!0|G{VIjVfU2jFUIK41U-$SzeMswMq&wK-4&`}TGj^6b
z^%{qLGWqnrFog9_%mv=fUC~)+EZNoanRksfbJ;a?u^;Z(pQnt`OJ>`Z-K+;VU#)#-
z>c4upR9xX8Vkyv<WqKB|R&nq|S-kk>&8avx8$v}!Lh`wOd;U4f-nq6CLu)YfhS|6d
zvZHE1>t#-y#$#0s`hiEiCzJx2Q***0FR29vExmL=QBgtEqF&jI=kt!VQfw=gWLBCL
z7<yf}YrS7=Pq^EiE;l|H*)71aw82BC+vGqeNe+DDQpu~6gNHA3(yCq+*r(p<Vb28>
zI&I*L=J@QPG)O0*Zapgmz5^Nf^-5*d^pf^MI}mdWA%p4wRpQcJDw_K0g^B6?tfbBD
zQcB&jB!0-9u*P@fy-KFxe%bKd+SE<g3cMAPvLgDWkEkJ81S{jfI>(|O7I31;7^CSF
zfz$Vki%`uifrNWCFwQbj*5KOD?rb4z<vhEk!2q_{v`%<TkH${TyJV83F0OiwhhXGe
zh;MDlgtSKwH@4(ntBhyVM4Hsmn3VNtd{Lv9=G3N_COoHKI^sGy#%%U3b!%u!wl^Lb
zspWHsp*I)daDInOc5YFK4x1w<J&N01TGaPzf8z_@ZF@vX%7vRe=ch=q5n)?cM2&3&
zxr_1fzRp2A`9V+#7bzik0(yxtm+;l;*@4)xRwTPx!+o*Lim;fqPsGibT&ib**u@pL
z!UhK$9YhWmtTxjCYU}uW);k$~`rGsb>x`^1xZEGtpE?~bf2f)pZ)XP!+P-YuV)1Bs
z%uXsfu?yMf>KI6~(~f71@rt<XUQmNOCt|b}_m<)#R@|Aj@3xBufxgl@YW(HKlQ<|x
z=$M2$&h0=QRrSv=cOQw7z4tp58fR<{b+TR0tCV)MvTnWzYs5aS!hD;NL|4beIArC)
zg;CkEr2jMkaH|J%CD4|B$AQfJf+gA4vHwkU6NVYFl}j|2LYt(I(2Q_w#CzZweqP1p
z=`A9(VIzX6W%*WV36z*S(-;=wfl}O9sNV!a%|-*}GgK5?y8sn^33M?sW47O9xi8UF
zgZi@>qpEemcEK!HBs8uT*7ddu0PGzpxe@q7&PKN|6@hQA2?bfRW4G3u%2bS}-oSl6
z+`@(|qf|>4m8UFzW|*>#U`P3?c#I2hqD<IJ&hM~nQh=>I8LaKJ+Sj$!WHbqvop0}*
z=}8*wMP(%pQb&z`G6Rv{UTi5g3BQwY&Obk8m(4&!5J~Ickh?kz0@$TEy!x<!)`bm|
zXqEqR61$ciw`%|#bZN6s#?+Lw+dP2|UQ{BFgvXfRNRoVZm{;iF;+THukeJf9g3T1~
zJNa@ATz^mTd{rJ%MNE!JNh>wA4|^99MK0-})wV-HGjyxlb3cfq!4m^baMHg%zZf<y
z_ckIFuYH?mt961Opw~5(M8mz8GL4#XcrWyVcTvZy%CubCdID9XHij1db((#3>46<}
zI?+zY4370WjmQ{jts<(#*4=DY;2GSt1I?=;XNnh0R-pg3{+OdFdC>r)>oVN(xb4{&
zIsx%r9~&L&MdpagD{szHU+n4!1n%d8aV)aQ<hQ<KGDY2QtC8kG+r*Q)H^GmwFUlr&
zACIS*f%ZYBw+2V3fbc08yMywjTk})G@&5HVHrS+-(&+wM-HIxYKO?)ZGQt!E2?-DX
z-~7`JOA3KHUvr2z?rB!T%6gg;=X;Z`{)d(l<Y;PyTTtUf>r~~eCnxRHy>qnz0&4sm
z5wFah>}Gdyg{V9(aa;5E3HVgFr!)JzwSXqNcvUXUM^-^ybh1Q!M%w_&D1*mCFLb!C
zT-}#-xK$BpOwI)IuUE-ClYFy>dRLxPo?`e8yMlR%`oyv!VRniAiZKzcO#8{ERL}iZ
zIbDhZ*q&=l%GqL9<xtl~3i~~%dKRM6E<UEG$xC3yrKG0srKribB<yExw<<Q%!Pgy;
zjm-ci1~xM#SH?`v96Hp`I&qOfaNk5vTCQ%)_2_9Wmc7*@d0-IEl8J_B>~>m;o$!@-
zF~aXx36$dM4i0P`c^af4-;2_Xd}|lhK$_bXm$x#``8Zj7x+cJ+LCnA=6=d^LId}1k
z(mVeuOYhHhG)+WvZ`Ma%%{5Q2#Y*^x<&<#1O429wxUi?<){<3o>5&z;r)FA?(_B_3
zh4${XIeJS`Fmm%_bOtU4+onPz(Q1OwG2$(id*aP9l6OZhKH*a>3#s^vw9^3eXg)~U
zKgS=_g~6(Q;S_fP8G3yqK4pHHw$(A)sUxdp?HB!g*kx<QKDY4a6%$}6o^zK*3`^!b
zfhW(ffs+r}x`1LR%G!aTQfHt~3Tx<-j50N0M(oO<SUeRK%@u|BBz?0~jvy>aczG?g
zcK!ulT2GOj>-Y6Me$_&H7Hh&~wGhw(BO;c4sl;3i1D}g3VFe-0zO39!c~}st#l1)S
zI&JE7kS0bbs;xpv2)_qR)YnhDe7c9741$VrjAHG<sZfhEoDRr(4@oGp@Y=tYiF$9&
zeA#8O@&N_zvRH&aG#IjCl03jk;%{j?L(AD;AI{fFV*KP--n|uKH3;UE%-JCi8t{Lh
z`Cb8{pT#6igbz=~OC|R*@G7fJ5(MLs(`{G3CQ28)(?1ijikD-)#eoU$k1~>tk?O8W
z=d7UVpx`%c=|}4rkXD%W%)rP&I-hHTCemvqoXbs7=^9GCr2afzw@pV7M^;=kZ9ck*
z=mN42Zw50AByuXNYJ2Ez5!**JH@tWhW00m05~#m6<w%k)zaKN8)KA3P<@X|&4gKJo
z02wdBtOEUHcw+hMn%6k<1(Be+hr582(u!QAy5rXEW9r~A-sKYKH%nXLGALxJ$?msN
zTZdn3Seb04wsGDnbvq0jaKeVH2A2;6w~=KNxVvFqFh>ueO6-SbXjc0Z1H%p)Kkt~$
z(-0khX*7EUKVK+0Zl!d7l00zUuh;8?cP7xN1(6IJDtrq&fl-kZ6ykg*xl^fYD}9Ud
zM6V&7BG@@Nq28c~AUG+e;*1Ka>L$fNl))=}x=bWzo0ZH?5|*lUvYGFZTJ6m}nx=wt
ziI29Ay~cLQZ*+XFEXVSZ87qk|xLFu9=S{Zy)J>>UsTbF-*i)UC{IcS0_Q&<m4X<sp
z4}CFal+O1v`#%`cU7&Ox0Nr6&9tdZrnBBGlLvvC??PX!9!Q-@7dkM0#>cQhilpXw{
zeWFsV``F~hoB#kF^WbE;$RKM3PAEWiRuS)j8E>N$(EW_=Rg7V=-@?)byY~@9qJZ+!
z8Hf6KTJy?_)#;2|+&{YBe&M4YlK5j{tf8=OKkp#+l{Pnop{!=Vd{+8SO*H(rS$*gC
zBY9BL<7_Jz@RZOc2(ZKf&I}f>+>?&To_G$B$}(8;=5nR9aBuF93K~W>pJ&Ot^F~c>
z#y8+;r!Rx*xn9vC%2Wdh-4q2kPfKxK)@G5hw(q82)BI4O#K(bF36x&<MtNeJEc=ce
zCF|QT09)~VYFkA4G_CU(X9+lTe91###`_95vn_1P;?6t$B)CwSMmWRSekYuUC`YbS
zYA3%qnB<NietKRk#d9u?ucT+J-F1Mp#<_>EbqP70ybr5|nerfBD8gJ-p!1{2CtaOQ
z54^76`O`!k9~~WSZEcrccqX+)!~jM;mVsNV8aJ+i^UkRMQZC7*&q0opoJo?!%1SnA
zzq!RbIT$R`eE_AnV_AE|olz(7u^y)xt3@hG5FJ;Ln{FA<r~0Dff;!KLbA<0|iXxlS
zpiq)Xp*<L<dr0*%Zd50$SYKSpYv#GZBYWyy^rkIzFpGU$)2p%hCSA1*PsF@fPV0i-
zUjH{oW2h?0=+2vl8TyCEmdKYy#ym-rw{mwxYS-;mo5wG$B@tKGb7gdl!`zNeBQzwW
zF{*!yO@AyL8bkGOBGcRG-moMV0>5K7tdGz;Xt&WA+qPk_O*48ReJ>N+5pXkZB9gWJ
zYsc9wx6;Js7ohTlobcu?$Zn<fZ1ZEyJ=E7Ul~X6S#HWvGi~N)V-d@?Clha<DpK0eD
zzv=eE>OH5VeMH*I)*lNC>(hMH%oS`t)*VLg;2WkP^9^%ELx>r~`>?mif?+iL?N#p-
zzNi_ozm0^D)je7!1Rk)Sf&Szj;9G7AZ7XgC3on%Ro?%6qVOzC7$s|HTW2<~Hus?$a
zKq^X?IL_e4B2-aLVq@v8C1j%hg+jmPurcQ{#LKW*=RE-aA)hW(X-H}eZ3d})Y9!75
zimm4yV?cTR8B08n!O>=cF9VJn=8b{QJqf1<DkLtlNJ>18<m-!v6fw}ZZWInBn}F0J
z_&P;pB>Yq?KU<47M6BsO)zY%Z5V3w%e}>?6ZjqG^ymBZGIgy<cQeE~~7fqeKP0JF!
zXgW#kl>97MVm(zPaQ+p`_mi&;Z<xBmvgc4s0S_<9YDC`!N)vLu5i9%9Egcp8f)0x*
z_?e^{{Ai!wBH?!2U5_1;^TW>dg01xOs5jAI^<kkF`{IInOAkVxiXB*_=x{Hwp)%!@
z(Fb)dgzIBO1%Da=ZKoVn|8)Y@@~;WhgX;+jP*h3qxulF3^M56x*A`KzC<tzUiqo+6
zfN;ELU{xuDn@42x&Jem>Ik$w9V2McgUPvLgt+Hrv&WxHv!n>3QNwc+%qnHL|j~^Yi
z&a0zNxLB21OdK!fe>bY%F&}2AK*Q_akc)Lp02{a4iEJpBNoH=6OPc50_nfM(MW;F?
zViwQQ<X|r`Q8iy%<lBeK$cVsAM}o1==uy^?2Jbyz$>xGrJvF_XHRPZ?E6y?|sJ*an
z$E8;Wv6q8dCOEBcJ-%g@Z(fIDmVnhd>F|O|=EG?Ma$0zwVqiLy_ZAMmbOC8t?UkUh
zHr@*|=ayZs2h;iYJ`U(+M|plI0PIIi-8P{Y49;i_blMx7VCrN|t{{|smcVvT#i^<x
zg`E*UZP#gJ6?4(sMUzjjRJ`3=mrN@&#Akdj3td4r3jDG8N%JM@&8tV6s6oblJ?f0t
zQ$smv<XZ$t|Ga8{J@miGk2U<iYyEv){-=u4b@iWC>3^X7;2eG>qHd_^pAvt)-v296
z{X_G|UW1=k{z+8*Ja5PTPx9%nwAEkl|EK1kI}CoFU;YN^Cyn(#MPa}9pZ)nsYyD4A
zK)*rxNpt-($_)ebQ__Be@+<B2&p0>4*H0P$4bFeiVE+vE*G>`K|IY#Xl@|MFoEu{7
zr@a0R&R=P=e@6OikNkgw^eb)lC(aKe{*@E@s}VO{<NTNRx~9>781WBH!C$hq8_wvb
zT;lwNLHfJmuc7oNnfoaz*RJztYWH`&U%l^5p#3R4*JF9TTj!6k`**Ehy~mAb{we&|
c1oChFw45~Rb)-i^!oB_&Upt+OKkVoK0Yt*vY5)KL

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/excel/test_spaces.xls b/pandas/tests/io/data/excel/test_spaces.xls
new file mode 100644
index 0000000000000000000000000000000000000000..316db172360d0d7b849e640136da4a091fbab64e
GIT binary patch
literal 5632
zcmeHLZ)jUp6hALb+Sj#f(ypDlx%#Xw>((lE6P(Cwwr<)9V`ZxdGSD=AZL_`pA<1A0
zVm3EHa59<T2ManVOcDHpGQkP9_(f4sgnrOZo%m(yhfF3iSQ~%8^Im8g8=6Q}COzr7
ze}4C#d+t5w-gEEkx8KyyoO^%WB`Kf>q(bJh)l!*<8z`H(dPu~FGO@XAHk*S{T%jAt
z0<+#+=|ex97y#4fRbXP(UVZ~DX=SQqSm1z+;+GOj_Q(kGgggb0mriT3WAfFNBAFjA
zO6k8;pB4S{7FfzwY|pnp<2U1Y1-S71GuHF`uL9SAYr(6*b>JWvSAf)m8^CM8jo>Em
zTJTNao58n$*MZlAZv}4vQ~zyV&T<RNcYyEou&|fUuWF_APg$VagcmCVa&h%n4EY-S
zFJVW#=bzGj@guN9GRD2p6?J0PzeT%r4pD=9a}t|$2TF#F1Fe&H75zlh9lHC_DI(T(
zWPFiaH6l9q)0(zuT8Gq-k5uLdMLMuAKS%%UvuC_R5HNNk{>3FKjuf<_-*UA!`)g%_
z_v^#<h@FT!GkR_&<oB!vEjgOBh;w3!Uo8!%wHE7N4Ckne_5c6k6R@ep<$08|bDL|2
zOd-Cx2;N=<-%<px{tJHAhclIyEb~yhLi74`rREK3zvgSwRhl=Z1KB5a@1DZ`{}O^c
zsRxH_JJs<b&ZdFilwpEJdQThji#*N<ojIV(smw4E^9XYva8Kp)HmS)lf`5v)y(z~H
zM0|4+CrHeQG+$wiN6a@7`%T1j^ACjm>oTNq!h0vC>|`o7>Y!I@D4w+NH<+-aDcl(M
zGFl|ZqA@#(T8EWNMB`3;WYEHmVs%)K9qmUA*TdFaH_IA6>ZIahj%`_~_^_SK;`T8m
zg1PhdiJ47c6g=W>L-O&8azj~YK`5)mlKWu_9{ze=eh9-|f9uM3`}%r1pHMsvBu5+R
z6xioDTBiJ4RH9AO6WX^C2NylUUE!Tgm=SCh?&^sS+E%#dIqh~*(=dkIrgXV;Q(n0h
z<pHHk=jOcfIcdQJ`@#I3kanZp$jRq&LmtK0raYsT5P80dcd%$3{xn{waqHsa{p?0^
z*VCqtUeq!jk3vw0dAnc%=IxLL7~ci}7GN|}A;xcqd~8-<4(?k)gf$Kk)RZ;qqHEkX
zY6<E)gl!x@s3~h4IjHHc(nFa{HxRfoL)YoVAOPviJ+54)Rq^VKyK+?W$S12*j>B{0
z2Yn54K|gdO+N(#?H_+@y`v#7ycOw*4>4(uEj>6#lroj>n-p>@{Ml6R6yoM?uMEQ_O
zTj0LaR374+X<s~+7)_22q^v#1Vzy)5f8Slb(LUR8Y(0Xz;sXQjwdcx|ox*mm(y}F8
zfjO+&SY!zLWaiZCzg^ou6ngt*Q`$Ct@I8I=Avou=Zj|X~+M8HEnD5PFVA}B{nD5Rr
zV4nVQFyEhVf;s8Svm(vN+2@hZE+j^*Zre^4$0)NE905!AX@^;aiANFSue~%o`|;1y
zjDK?el`jroL>a}RRBZj!<(vL`di?J7p|h`I{q5J@*$pgEfE@zH=u9Q#5SX%DTQMBX
z%SOxj{ipLU*u#rWlqY?Xjs`u3ug`=Wkv{znJ>r68qz$kZ?ZthRJA<W&_qrz<Ht&Mw
zVZ3nnV-AN)5na%S`newJCM{?zQsJK6>$Mrs>xf;X=h85vsH@pcmzEH*(23t_q<|ch
zqgekiUcLPG-;4hO45&|tr0^OZ1yY<^J^hTMrLO;W^!>>jG9bI5A*OS%^*<2h&1d`H
XSpP=oPI>y5BP-Yb`QP9Fwf;W=%ZE3X

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/excel/test_spaces.xlsb b/pandas/tests/io/data/excel/test_spaces.xlsb
new file mode 100644
index 0000000000000000000000000000000000000000..e38b6c2d8f17061f1bf5486d18aaea6013639578
GIT binary patch
literal 8036
zcmeHMg<llw_g*>{B_*Vm?h+(KX^<}IuBGcPEue(JqU1`6fP|EEsz?Yd-6b8;NXLRm
z{Z_sAs_5@8_}<yi%zWlEbDn+Yob$|i-t(#}p`nuiFag*A0DulK9^z~nhynmuq5}ZL
z0Blr4X%}aAu(P|VmX9mg&6wNU$q|%^j>?)2Kt(?P-}XQ3fjF%$)n;CTT83Tx*b-LH
zE)=@qN-S0)vOF0=o6#8o4eH6g8m2Q}!8K|1s(eUz{1|@)|4fgqrY`fi#QVYVh%3hd
zOlI*z*~Zoz4(G{Y<>*<J2G?!qEDa?DDQ0IPmJRWf2qr9AACsRwCCJ0gV}z_U4ytZR
zF)`GIX76YgEfT@c%yvL=6@-q;md%Z=S!C@?6L2eop%r7MCxhjSK3>JKok<5Xaf)*6
zT8@Rw*K*W3c&UH^?Kq(31YJ!Gp(==ZK$N@%q0KQGc`g0UtZa?fdz^97K-sn3C`S`@
zTTgD$u>~>KFY5O4);Hi@`mt&nl)DQ}uc|^ajYA{ru2w(vp=wkyAI0!&tkb{Z!9&qh
zmF}|9A3jU~nplbPVj1)n4xd``$(InVD$X<ZR&tX+4dRP1L>pr%rp|0eBfSxD9ua7r
zNxN@4$AkhA0`@XokqtN}q^>EK%uR!W;6zdWKHS+^^eS2p8=r~>T3g**<)6qrlZ|+Y
z<mbf&8bJLQ=1K=Gv>YMRQ5i{0Ad<PJ5U`^g5BJyOe~kSfrs+?w9s%n^G8V6OTfSC$
z@#K@UJa$IaJJ8E%0(Sg4CTGg3$Sg*QqeCl}Jb`NJv+e!97q!zpLwWVWG91N09vVRw
zxh;C5+B*Za#c2{PGQIt*Q5?dMN5=jSU;D4bYSKKeLui1nk#Vfv+v|gB9NpI7gjaOM
z>om!*g=I*q-A|AD>s~@M`MJGBUB8y^J;|}>Sn53kD?OuC*-k~vWCJgV-?>|3?4^uR
z2y)QgFq9WG&AN_Dor@c>`Z^^_euj*Ka_9EIvS;Q+NlQ2}h2kRvRMTY<4<l5A*{kl;
znc%h((S7JFQD|_xI#r?Qv*0JRc@L4V{yR@TA@&nLBk$B8d2$_qjpFUd^PMVQE)WL`
z7Z(R^3p?k3XoG?*bV#=SZ*PT~jbGEI?(2tsTVPWHJ0ts@viv)#<A9hqDpi8{78L^x
zEbKuZg8@oB329snM|<;#<*aQu3PV*R2WvSM`h8-@bxMqqk)a+mY>UmJt)ROY3`CBd
z>jemY&*UMZx1OL_z1wWA-OX8XR#b<{<-F-?PjA6Z+=9wtg9Ux3(~Xvn>muB)vi1qL
z^s+-meSw+R!(N_>d*!Wvm=jDoqPb%l69czQqye#0Q(9q|3i~?TY46%AU|JGs31UIu
zbmo)lcdiNHx4>wk=(alO_Bz_so-7Kmo#QS#EATJ6%bYavrB0q2E^oT<?`_O?sv*Pu
zd)~|)I}Y7K=0Xv2$45Sa%$uJx#weyisY!#RMSQfI?lF}tG)U7xra|XCDH8@_$ELS^
z(!`@j5rWG`NRDw6yGr)JA*$Ne-pS6@epAiX-euLVVQDbfck9k}ir4JB?J-+}<cAy}
zliUb4n~PGzD(T03vQZrC)Uf{1y+Q&Yh4GmHPxIS6TiCT%Z?qmM9FA+1l)I)<%LZq9
z0tX8m?^!0^*?lwnh_@$8zQ=`~TzgS(A;8X9RN90yD@&iK^Ij)~Ht}F<2+ro>l43sO
zG|XDQE0(Ma*Cv}<u{pYzI+bc=_*4k}Ak>=iRNofAzH*st%h%{=ZQ(89N-ySZx5<+P
zp}crAgx7<HyJy|PLe7rxtTeW~Jl<R%YN=O=Y<Bj#{GvwWuSp~8F#T*aJ{>iWp3Jun
zzVw1Tao`UP{-Q<GoRh%P`qhcQySa>Vn2iVNVs9ipuY7m2o4b!A*zGGVU&7ugN=uT+
z>Tcn66uE0+uZ>_U`pJ~)DCZMF#m%$?(y{4qJ$La74}|>uqCCYUo=TCB$cFneGs2}b
zsFM8q99()++@k4=3snG?4ERmbE^;w@rna(dA8&IwYGf#D3#TX(AT-3G9T2Ctr8?*k
z3^m>g8oOsAfvP*_fVWZxBNy6lE2k<=Z;Elfm3bXFtN%KoP0+syA=W73T|VUjG1jLL
z^K!APbTgN;`-F&SGF40Hh=~5I1FKQmuq~W4hpTiIl!={Kdke|BWP2pWb2(tF$K{pL
z7RvfK7_=3?Iab$>d(Bd1pf(a@LFvdY*n&qJDp@c+0QHHwE4r6mI<1YP(~waQ7qAS4
z3!b^o3%mLB(O~yz4+pvQTrh~|r--ViYDF=KA^rEeXB)`~>I1O=fL72~Y5y&O-EF~6
zV4iPBzOT}LL%-(<<z3PSp2df@U%1x`3~7VGS!!w<6q9=8(or`x6^zq9acgKNm+^+0
zy*Ml{jk)p`9nwX|NhuFvPCVtqe9m}sccb8wj#s=^xeaE8RU$=Wzwwtk-G=+yBfe_d
zLN4M@^1VXef;iVck5FElZSIKckuP8U9L<uY*>&_trUOqSVJ@nnwW+~>{b|FL4AzY%
zzC#HyLae-nyPDkionXEYb4KS<Ce^ujA`5)5oldePin}Z6LODwY*@LWYP{#Vibn;=b
zq`nRkU*hb+hAIuQmpjSw8JIb7(ed_<3bq>?m9GdaxW2^s+CV-1l5ARgSw25IxbyhY
zBc59iWG{}F428N@LByGm=t4T7!H3U`n0ZBu)<lly8D9usH>D5sARGYk@7|cZjgbpN
z{I3V(+|#q`Z22%=hzQ1W2pGH@wk7GNUSi-a{m?(mO-PFtSR$E!ZiXd(9ntuh)FfEb
z?@chi@8-<=4<(Xp^<}{>IyJ%Rc59>QP}rh7mN5<`;@rAQ#Hx2SgLvj@utr4!8;?4{
z6(9F}wq?%P6ALamH40i32^2OM<L=`9@D*EJjj6kFDX#O>v9joauVrwEd5!Y5<>GIh
z>b{T=w~Nk1i@>vM0VY9H=~~)|@2Mt~FU(>T)uZ^Fd_g=<6tD9+-hZxJGE;l1EBIOm
z+(DgbW!`zVJ3V`G)<ZV4E&kx7E%8D=ccv9ce6jW7q5n}=$@V>b4sPY$!vflwvxDZz
zl8eL1Bhy=zULv>nJ`xjIew=PwnPDJpMx{R8#c4=^GfKf>5`?Rsc{H8vs57PvpPCS*
zPE9OS<q<${8^rMZt*Fn6L~Y$inPaFl{Bph#qE}qidcb<#<%*<R9mk#UJ5sCNH@#??
z5`x1Dm|p76YmP^k%I?$2hRXm`%+~U^$Jkn<M*LNw;8DvPOUk8h55!nZ@CkDjm>1!=
zFQryv;B$#pPe5-b%6oQ+OWN`rM$r7M8O#kqpFrY`!#5VFD`Bjj`6=m1$(Z6&Hz2W8
zCC9_?D<SA=JiOo;*U%8xj@xskCe3R{pyoHz50ppCA{SvPHSRQYt;&l5sfP?CJjP>u
z3iQM$`NAOtvNO`m#yghQ+XRTK4<tHFyu|6oAV*=ujh6Do%-Qr3!$KTeJ4@8vg`cTQ
z)~L*N6V~>|I*W}}Qw9bMxeEKmnvycz1e(WuPxFH%qq(@AlfB%Qi_yMX#!<ROFRC-*
z8-!7*@Tz27CHAh29-U|_usW=7_ddsNp#~8{z1qJ2PL16@otB6VyT3$kF7u99;358e
zi(MQRsylgOo$h)wwtkgT9nh<``0Y}|lMTMHL^ZDKWMhe-U>+{C@aZ_85+`P;0Wk>*
zzhuAJn?Ww9-WHFHmhp%MNAA?rOl1gVI$!yh)Feh)#|PCDX*&%=hr*uCZFb&MkBj0w
zI&^GI6-B1(T@OE=V@)Qb7%<`UX~CvbJf=sCyl$Rf`0G}<@R%OmIJp(IZqgb?vrv=R
zp{gUM%5$%hr+++eDC!{Y?u%o%Z?xa?qrozxdwz@u`OyKU0u_XK>>~bC;rk5l!RXPi
zx*O3+of~5lc7>n5Af$+7sC<-_@v0<_Qzqv=T`d!_tHsN()S7E)#q*YeN@}+Xv^`!u
z)6ogE;xob7|4TR<##S@Hsk#k8ByUQ+D0jwY9q)?aWv);dre;ijE(vDz&jHH0#q@KW
zuyRgZCFRO~u<(kSRD$aW_jK`aF6@3AJ{kcEop40(p<m8vT+RIj<IV2qwKf0NN_v37
zh6*}KnlneSZ(^=aV(0|MZKqh?rXJ=Dt}M#igpK$gId6@u%Gw%z7~O2jCcOj3y+iv*
zM>1>tKJAl&FV`2V-OyCJl_iO{YZrt*(LL%W;SxQkQjPe?XCk$MjVJ&PU>LnqW>OP6
z)xOaM1Byt{D_$vns+xSof}Zb)HC(jb;K3GV@+wv4TWyCJg7eD7i5Q)8$Hn(+)lYZ*
zqH+h_-|=N~rq)Y^s=0;i8*)AHD1}*ZxE={7r~0zEx6smImzC;kW2O+IkYSAsC-b8U
znfuUl%)1oug6&3Y9}n`yMKRPwsOxBSTctI5u=lO4zMjHtHLSMg%`=A;5~Oiz1>TIb
z*B+|1e@S(yGmQ^q>!MJo$V0kbLwhK-rI11pY}S>a%dv-95E)LF`|O&S%Jd}L>$(mP
zLw_j~ruTDiJIP%#?Qb(F0&?nQVFm3EGg=NXF%0#5-|Pe^bW)`bM^UvDIgVz0RK
zdS`dyLnbwV()%i=`CLUM-93g*o_i#)HbBkPwRov_M7^FOL7}#VzAyo&_oP0|6-6j&
zGmoD}UDOhJ20!51NMtW^)x-THnLUjA4xuT8w?1O7SA=)JicDS4ibS{hpymO(F1dTE
z@x=OW>XQ?1HzhaA&iNZdWcy$hBhx8ehZCNyJE#gPK3RhHqmy-V`NrE(ijC=`>{Ta`
zCU*U+LReXn;*M2!dHlAOgM*#xXaz!xG_xkQ6NBVr2VAO@deU{)F?*aH$lXn?obU>7
ztI$5<dS9D7T6FP8EtH|boYaWCLxrs84BspESIOyS3kJKp{XJ{>cER`WoW-%P(H^PQ
zG3QRwx6Ry5Qx14rq4&njV;t6KBr4+J0tt~|#l1s>)8^Q2;%D)U^U4tIY1Svvj_D2t
z>TtnZeHDS*U8C!wtZeO9N87E;SOSM!z1R9GE%KT4{fM-$k)kQFtw}qS0Wsp3ndfVs
zSzOh#@2*g_EfIbyR=xSWHyRU9E$4b_;O&~L!(cVU9<8YN^DKD2fFLGyZpY+>R$bAH
zG`?*H(TP_fJ~<Y1D_&zaZwAoZKRXz5MJkiuH4DT);4u{W=EKOH<huvlY|SBHt9$Mc
zJ7=43qEIGM@@f+=9(pdB1V~j31#1t3o!ehkTU}ByBlF{(o#A3>Mw0PZFV9W2Dg%It
zH7A0pSossR8oj60^A<|4j*Nq}M8_gWPcw#Q{naC8xj=3gwjIW}INSKBS6Fo<lVv(L
z=7g6$5>boHd9;A&#%DYSK66~YFpqA&QNQbZ9WE_+e;WDw-g}c@kRyg6bu9V!k^dCC
zA5pWnvP$Y^eE<syZwy4iJC{a*@`q>uWXgiDS#7~qt5$LEn(-EbZLvVAD17N=m|3{;
zX_Qcr45<ipF=af)nU?nc*7o$y*7o=Cwtn9D;P4R1$d7FN`0Xf0cvnGOc*KOb*Fm`C
zjFR&pfDVcd#z+3reF+LPTx(t<T4gpGe-IP-m!$NZ`2j4=pbUIcIq!qOII~qtI!9W0
zoKP`6fsbYQFRh3en%zlRpwys?rXURZpi~)MwE0=E^@N#g*f{74UfOuGEfj}m!@QD@
z+^9Ni0OZAw%_|@vkG<-|m;8)kjkDHaD2R4X%6fK~JeiyV>5M-_dcy4sD?jqi*U1+R
za{qgJ^>?l#wZA#UN5R<&?ENj*2VrIaR18o8Dql=MXyBa~AzJpP_JDvIi83uv8W64<
zn3NbB*mw<^dxH;&^@qRPy7jz78A&S>WOxXXT>vW=OLd5gs~eA{3k3Xkm;1lTKhkxN
zV1`P~yyT(V7#GqV{?BPrKJ<Hfn~`OL^qzT>!CVYWRiueFzig{a7J0IIKdW^~yL~KQ
zJvADCgfRh?TV}<TS`4(hLI!!BH40hEZRbx=R4tH`in_&XtDo`$tW5a9*84*wsuKoz
zU+had<t1C2B6@Pl>fq$$_cvbxg(8ddJ(TKN9+%_SU{Hm9WGZgdROM>9)2~ZRNF9@+
zB9F)~KzUCQifbS9VU88Lqc$2;-17n})apZF$Yz}Hkfc}dvLjyZ;S4l?tlFucIXHAb
zdS`NIp=(FWwEck&o_g$Ga!Hu>l*YFO(!rrNqGt!}_8U(r599z1n~$sV*tc^QD9%$w
z#^%I`0)2Z!BevSCQm3X9Ike#BRA0<D+7u|V85KI{{kKcRL}n)=DG`!$rt=}e2?IC7
zOWNt55k?1C?1-Hawg~+(1)EItv15=aNQnjj5d55i=B}>)W#CuZe&14HDlU_}c%j>9
zhqQRplp@pWc!7nP@}dW&7AP=J8<iZ(jB-Y3VeH0ebaia&xaUM9Esu#<W}Y*CD$o7)
zRf{}0MB_1PP-GSGMDg}^y@@{|1`MZamrsHzDqvpc;c2>{2y^=)09cK!7oNac^ipP0
z%Eq;E^L;AwaxX!y50B-LFz{%g-h)8iBylP75t?LPM!n*8s0G9banO+CQHe%dzY!H4
zSXHCcA?g4%&3}>7Lue`8^0bUw9bWA%thb}s3w|u}+HAHwD~mfmCD+~|GVEq_f8;K<
zVjA!hVRvbL<X;q3pX^40vy*`C4c>vHnV;<lFiRB=m_>=9C|Kih5oK!f`WMjSYlZ+R
zCVk#(ZLY<sZCe#4QK(nBLxLO8n<ACqsSNm^>&BCmFF+=};devBy_&|a`VDo9%zm~?
zI|w_v<=OFwL1-q+g}Zj%ZKB0j+SOnMtl?z%G-H`o7}1A;Xn)@QfLH%D^G1f4-MKTK
z&CZSK8j~5$PBo^B^}?LR<5vh5WH`Q8XcSZ~WHtWtlHos5`{(?J1w?hFUjcqyx%kKM
z>$wI=ji1&rE*t*Z%Kh1J9`joT`F|R_mvJt)_I@B;$N9Gs{nHx0Y<#(4^}{$2_nYyL
z_SI$6%N>~?rjx*LrkDFOmjNyd#2)~W$PUl1CG@*5eh80mE$+*Jm-W^Uz-XlWLIVD#
zzAhtN);&KE6s}z!(vQJhM!Bp8exUe}UPigB4K8E-TIzow0RXE=tltX%W%FM@(?6R#
eAf?Q|=6`>*)s--isRsbyA%E1!?B%2Sdi8&y2U)}b

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/excel/test_spaces.xlsm b/pandas/tests/io/data/excel/test_spaces.xlsm
new file mode 100644
index 0000000000000000000000000000000000000000..a41ebe5bb0e655ed2c39c52a43baadeed592ae94
GIT binary patch
literal 4848
zcmaJ_1yt1Ewx+u~q`Om6MWj2W5d?;Y!660d6b6xop-TZt5u`z8KoF2_q>+%6PT!3G
z`z}1+z3$&y6KnnUIqU5G?frfGXse;3ks)DWVIg60hiD_+GF<p?3lPBBU4S3)S)MSU
ziYi1Dx$7VO+RwX=T;8UpC2BB3**_?)@A|2Dx*)Zuzh5YpNl6iAoJzn!zqlOa8;R{v
zbf!|YBCyni2mI=i%C8`Y;N%Bwi<4nL%U1(cRzmA9rK2$CL&gMG<oC3KlV&;{Mo*-c
zEQ{_mMB03_@e6Y?w?O@6vwj>^ay(~0cM)q4c@madRq{hI(tG6zilXGul41U9z7tMG
z0g+~6tpRzx$GJ3`P0j+|XMUPkoa3g|3sKLq>h){DX>FeO8X`eiGNfx^71!>P^;n8=
z*UbqU8@={p?L!>)GDzBLm{_)@As-ju$CgGxg8%<-fgyyy;RD|T2y(Il0-gAMT%2R|
zW<b+|ME<*mK8FOYZd`(Ccz1mg(&yL<dup63tD||J7qUH+x;m)<WWE`BaEM&kvc5I-
zZF#l7A!Sa4iswVLu>@WkCig_R_C=b%pE9P%cjJ}P1znz~DRkZ@;}+BG50W+}4EKC1
zo%^_9wu={HmAT)&ifOgUxN1uz6C93PUGVlg43Lb}WN<=CEZp;qBC7;{h356@u2c=3
zRbzzHMIraZPtK@{rj(}?z22L=yR!f0tj{Sw%Pn0b@JRLI-M(-(!z&HdDIuPQ4-aDv
z&JWox2qblt%v8UV-(kwcTz;@hh!vZp0_dKSqDRl%+Vydgf|<I&Vhp-h`Nd7Xq~%FI
zc}J{6cGkw+<fTn`ucqbTl2t!XOQ?X{d~Q~2=bRO%%szNz$a(g{Rd$O9JjI)-)F+qy
z?PF;46l<<sksyURab!nHqFt_UxdQ$fPzogdLrbe6xH0vqTJqPX5N_!##_;TL@^scN
zTP9A+o#<|+W5H2Z%>zoTh@YhX`tnT9lGL0s8zGG2VP3Iq{A-TDhs+!aP)00MBZ+0D
zn4M;B701S3MeJO{s96D-Non4q%(gOfS_KZm8H%CybgwX;)@R{80Cy$IHfre6n7vo|
zzM++nEN4C_u(56`dKfz^*U&WRIFJP!Up&qXHIue|z~=qyRqi~l^${)tIm2%~UtA$0
zA$g+y4LSJ#AjjR~xii2Wftzh3Ev*^BJ1x6<My+wIY-DM}j9sG+so1g1Od8T@FcscQ
zj`!yFr-v7knj>Ky?4FV2B^)yXZ6}vMw~D<IW0Duv;~k+%nTzY3u&6KUtLiBtFPhQ<
z??JnbnB6;)3q@wNLj{HjUsmJ46(=vE2o4byF*7y|#6d}XtWV@#=*YdOlJ|j;BBBvb
zy_~BjmIa$$y~B+LtAkaOw1Le1DgKvHU6ZOIxo+`!5Yk&A#V0$etCp)g$@-+PLPfEq
zf<?KKX4heps18=yLoADoB#j^yw4-IU{^m!rXv?vf9OZ=^I}Zs@BId;S$-161d_nVL
z)Nr)o8v_xbjvi5J;P<E+ISP!7Ullcu7QSTXKz%gR-h86ER#h9(yUCO!qRXvv>T(~7
zz8JAYf1aPcLZeQqiHq9-NltbuFGc0BDkm8<cAp)6(y-fpNWFU5wLQ{gb#TlSRS~`I
z&o+bX9Jto<!?(!w;U+}feO12f$uPm`YzjD6?W-!+dQ68?Id8x7MS86p<h3VLX_D;#
z<2FUrR$c7{H(&3am3c}QHN)e`_po<CZ#gtJ(S!V>)W2`T_WJZ8J(1y%nrk7L{)rfo
z;S_5B1UpX(s;4$}Su~|ect&Qvx%=jeRy;>6!bB5b=!vy0Nbn#cJ*%xeH2r@c^FG+;
zi$gVPCBIenIUeyue~|liPCpuSxk;vv@az6i#plz<)!Q%D*BHv%LPh)s6(s8!kE%8I
zkrZ}HC86`*vN8x0nk<A}(+>+QGu(>87=N6v+sI1<BnAaUwD}Hu$U|CHO&m1<<A<3e
z#2nY8cA^tgQQqe?yO2%1@!YqWah%tiP1vLu8Es0#^9b~UwIawu;XTiq_;2W?y6JoF
z_5gr~yTI=+Aq0?H3_iIoixIV)@SxH8z5_^CPbdTUP~L7(bXSB*&7k`hbjB-$Efuo@
zfIp9<Sph1wvH2VfyfM(lzGYfR`jNPK9_npb+F(a&EyZOClw=l4$B*E))_d3Qqgbrd
zm(=q^0GAl-*J)Qt^xAC@l6FP1hv@(_6VrLNmpy95Y1B{gqiu0U$!kPMn_#0fR3|aq
z>FS@CpX&<cEfFnh{>q^;B4duk?90GO!j~0Qw#9DcQgB(~J)+L`)W>p=4OG;u!ez%F
zH0$@OLcXXyJ~&3t>2b@{d@t%iX~#vG^r@P}fiq?QYGo&2=Uy|^0)@bKk3OA*FfObn
zzV8n7`SPyG9!FkKnSpruzSg}m?Eq%6sMAH*d~UUfeKimEA}LuNuEUfUDhUYpMZN1c
zBNY`f;1~sfGDz+djRPH;YUH{7Jn0gk>`r;YBdcxQvWKc4`z5|)?J|%B2GA<)MvbxC
zB`BMKF`)thM=yus&tP*^`-OwgXhzDUxlCb(b|9V474iXuuVx_)dSg~mEQ;gzKNwL!
zpBd4l2R@Rd@UJynSiGnfjJuxI&NHQt3I&GDB)DlP)?`fCh~<2hN-1W~X&2HQ86hqT
zcx!TpkMDxYJ|N`2^3Ri?2(d3j$ElaN4^v2rx@71Y62`icEAb)XT~=evxai#IYq|Vy
zjwE@Gv$TiXw6EtU#SIyiys$)y@O?ubS;c)1Hcly`A&ftcc{CSPtu_Yyg(kOmP59u&
zdCj+>H|A2_W#E(C&K=rscFUqu(m{tT>U32b$-x=}K^pw1O%1|T86uV=z3r}*cw5JK
zll=h88XnY)EaavZJm&y_<$;K04XI_%0&aP&wwJk|LX7G#R>IN~J+GGROcUjaMvM@X
z=7RU?70T?ggwyyAE#zls@+7c*kr8RxmqmtI`|HppXjyW8fb#bAJh|D$Y(AT7<I@d=
za&JDOZOcp#lG)MG3#UQaeS*A~=*jyK9W0kJy+i?x+VH{3W-|s_AL%VAuR9G@PTTJ8
z_li?VMj^+lp2Hz^n?S7IdPSGJq=llUJwGS>I@^*vyu^c|oW!(eGCl0Vlbal=IMSk>
zV}n2IdAu!@84aO{nCXt+o=dRiGdp5`71KUmu*SKR&sog;<^1%1VZrnMGvLLwzAYDs
zeurpkpsX~|{A|r1a_F^2lK+vPz;neSr^P}#on4MqbgRO9iV#uI<L9+6W8hT8jPtjG
zrunC!{}Fk&<;*{VFJ8k5Du@@cdqu8z(y4-J4VGvD%T`{UB6sO54Z_3{#%q5C@Y9;&
z0Ja7b2PY0%m5(+>V>hE<a(Eb}`Do#vI=!%3g0FVA7pV>U^XC@q+2X1gNZvJ*P6kn`
z2$<9yNEq{5b-b1~FYHm(ESczO4Bgfd=>4fOl8M|;u2*h~`oWb)yUD%dSC#byK7mDn
zUyfimK6XEdK3-%<H_mE-M?e_!(B*9P{;CW!-6YGG%FrBLF>mZK3tReKUtA#eOu2Tz
z)+Ot~&yVQ}<&|^aUZ5Z%qqTSDI}ATJ+g~Lf*}sU|{wWAxqvru~aJBo**Rk>aj_{a#
z-u1)DyVxuQ3r8>0WB7$^BZCXrFarB^zZ@Tr;Dl!>hVdr^BwQ;W|4b$|D{(IabnFW<
zYGZr80;KO<Z6EDZ2Y04Uy*07#09rNIWA^gqmWOkgDNIjM5%2&8BBzV(R|}4}6rinC
zu349TJn<_wub^(iQ=dbKHVte#NIMLc))WO!CXHWxoioU6ILpoO#L#Oakzz{GPQ1HR
zcD8zLJ<*-pT8uI^wvXock#~1V>Gh8FaOfv~dPQYl{8jJ8&rMW8&1^6Eh=X94hFL?v
zdIm&j_5>*3Pr(n~3)eCKUH!r-0?7twtqlUYxeHhWL4e=-SGQLaDoE6PKpwyQ^1%MN
zyaqRNDN#O&u4!;RwOs}c$>;aq-dqU<1TjEWXhw$V>$i8ncHUn1X@I>QQ$hM=OV~K~
z7$n;rl7+H4?1ggbC%SbYdtfs1NVM&TdCz=>@|oWxhuIlpzlH>Zz(j^Z<k%(-d{$s`
zb6J%nJF3~BUey%&4E^s$MCU4?_Pkw-B@)q|DE4|{kMPIxem!6lmFg4)p4U2f=t=Fh
z9l?|f78MxAr8R^mD?!G>Eu?NK)1fIXkNZe|G#+mj7eUTYFQxn;wFc&08IN@yZrIBx
ztFsZ|r(2^fRhY*$%X^<3V5`c>9Gc5`K<(*gVMC1c5kQv9HGh`L&SZ6lwl-F8dL(^l
z&1ytKaDD=|OGSF!g3hNE^u@F3vIpop{I-0ZDUbpR`Hd3EUkS2aP125@(I%;@t;c~G
zhWl=AmNk_2cCV|R_6Mp%>q;~-&c@EKP>Xl^reHA021+A{A^URZ5EGHGA0o0~5IkAC
zf5kG<?Swsbb3^1S>$Mi5wmWw($UYrHBzhllc0QY;k|wW{6Z68YDE~lz9a)}qI5RMl
zyZv1J{{8*L!%45|Hxjby(@oj4hIhHzRVqJ7AFlZOa7`2{b7qosNw4=%lT7`TKBn*w
zilWht$L2p;iJ}rqVt<atqnqrKn$@GkBiQSc8vywbW+z~WOW>e4K^KnxiAx($kJQ8P
z3|{7sl2c_)isBaU2*=eRI}4%&i}><k#&zILqXc$u$5_TOQSqUFmp|3qWof>y$Tc4`
z&=@Q8n(psCZP@9m;B&?3y6m7-E_7+}h1BW*pzD0#At%R6T11>EK?BR1@F@8H71*Tk
zH%#S#t{wna4-5V0ZUA@l-^xb6U#mlqtnI*%w+T|a@QDh#kn5vQvhVD8Qup{gxo+Wf
z*Pl)m>uYelkIDE+fW>YRofa)y;#~;=@YfMy>NnW-t~hz`?)g8NwVT-R5T+6kqj(Y0
zGQH+N7t)e9dO$;V05cji;)>PdU8sCl40zGQotp|>Bwtm;9Av_cXUPKZ=GyR3Cm4r*
zHZ>1>+p}FKpt{OcV}SD&CB>m^SThrK!!O9m{7!xEgDhhCy8EpZ;}(}?mY$xBjQZ*n
z15Lz0ibf_EWT7|-R}LKI?|tEzZ>oY@VXR4+l3J;{EE`SFdJU~p3In%vG|up72og?d
z7>PRTMMV0auvx}p`^)!4#-as=e3j+aO9_#CdaBA4=JO+nyc|8;z+{+IxDH8cW?;RK
z-Rn~6g)?Q7%zmKB`^xmtB`%5P9=rMW2i`e^uz{><`gtVGHu3#r>3oqG`Gt^miz`jy
zl3$rLOXd8zrxo|9Mvhf7U;LiHr@MY+Il|K<i;n0)$S7n;H<uH)r*p*R#Q(KhR~3Ic
z-X6{n!~2a<!?*fll>gKDwl+r0Z#Sj~Cs?@i%{2F?>unZBi02z4#`yQX|4&N)xyo(E
zLNNA?En)v&<-hRvpX=Oi?+AXou}ip0Kpf;xw)=C1+x-SX12;yA_a8{%Pxsqph-gYT
t)(sErzd7Y!E$UD2+sHta>W%FY{Kvx8RzrtJ83_p+{>Z>PWXA73{|DbzC0_sl

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/excel/test_spaces.xlsx b/pandas/tests/io/data/excel/test_spaces.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..9071543c4739bd5c1d6a886f7f4853de777631d9
GIT binary patch
literal 8622
zcmeHsg;!kJ@^#}*a0#x#-8D!^kf4De!D&Khym6-q?iQRtfCP65(!o7=2u`EH-Tl{@
znfJ|1X1>4Rz0+&m)4kTM>U+=GyQ<DEb!8-EA^<7?9RL8(0nGL?t&9)=fLLSzfDnL=
zXdq{A=M1!SHh$sZ00bLyx!c;%en3WK$p#?8@BiQUFJ6I?!~xX~9=s>#&u-*aIhE$B
z9-|BHLV5_;G^AQP6S|Ac4YJ<6xz7r_#*@t<wid1=7@qUuI~=sCva_v&gmyP+Vn+n@
zwrU$u2y=J!>@c;ElE*vg>+NR~Q9R})H#Uy4Ob4Vm)^}(M$S=t#l{~Z}CXw=A$(-rY
z!H0D>U>E5Zi1yC5Evo7);EVJZj^nM25`MFO!|G*pyuzu796|3jN8)OfuTH0{#cHP6
z;B=L&`~}SNnSa8>9C3jsI2MXKKLkT<`XWsIT;O3<Vfaares?U0y~X>Lw`U7Wbi|4J
zRihBGl`*G5*5d#=$k-G>9TZVRQp#sH?9(YtoU1Z$&`vvl&9p>F*$JbtXv~U<wNNFa
zx8+#O@5z!^6BTBI_<ri``z+F`wrfu5ZTE2$Z58>+#XZp({$}HGt1Mn$D;=Xx$nIkA
zrnuM6$eKam-r>7VKfXIu0O0-}384NrTGndwFr2`#rUGXj7MzyGPCy$lH`mYW|LFK%
z%)!6BdP)3qwGJNa;QePeA-(4l^Ralc%5E~xn&@8m`YX;osEx{Hq?m7cM2Yv}VIa~|
zKdA3b|LnX()K)Lu*|I=M1ObsaL!Db`aPpO-3mO}<(>n#nlBG_3*NM}Kv(%?b9xSfV
z7`Eci1v$!nFb0M3L-|U)VXl`CF$nW1gGnS)1N6I}>n)gFlp##WYVDQ=SJVq<ZzT+;
z`%S+4xQQbjDyp(GmP*_UHhVo?=G|*UcY66yTgyuHO@&#OlNh~+v4u_Rv3z<9;jK5P
zYDTXny#T=l-|*8OrtDKc{ThM8flQB1VT_LA-h)0!1WmyLJVX7HBmq*1$;a>&X@ryH
zApjl0-G=*jp19dNfz0gfK|izCe>j5xPhoH`|J|cR^SN>-4;~cjE`-M|-IW-B+L@bv
zM|=ALdS4CeEHfRy?+J{8t<g~TyD~SDeXz&=V2|s$Anq~};b|LtK{x@jC!x*0IEww?
z@gNeq*={K{<S7av!Oqrp@h%o_8kn#_D!!K^i%8_h1q%~-NbW~^`ICmRU;*nP9v41q
z@Ca2u95VwotNvGSwRTD&HTCthX{?rO_YHEhXQ=NX9yFuJ<QR$)I39q}jc;`97i~|@
z(2tsLET4M}f!cDfu-7Pym8pvaCB1{4X+wIS()h8)gV#Q!HfirKIm=%6v$o70aSWX7
zTdwx?Ou*nf{ZGj7hCiLVhQnnF9(5nU?|?(*uV5+BHdM>wA@YG)+#_v@zvBx+pieVJ
zt!CcqzPPkvbIxN|uj3pdTk(S#D0G*TzD$0s>@~32aGqu0dOrDlcg`_C$VN-vRbus^
zE;irFUenpvby1;rG`N<j7&xHiJS;c^yS}q^W185lO{~WS)QCMv;x&X+a<MJ&KpDgr
zAV5~_4s7fRobQ>-CEtkGKNeQ*d(hdXSdF66Y0$LA4&<|3$ddSU7*r!@G+Ux>Y%4i?
z%7emVS_M_+&oT{qKO%*KG_b(Z{`8+%<2%xI*jx<*XF&uaAQU(B2Zz~*J(;a1cEt)r
zO4bbm;(#dOJlLh0gi3EQUJ0yc(fk<^yYC|#-1?Fi7m7MdB<$X(3=7Gl=+8hSN{DUO
zjQSO8KXRe-`309<3XQnijm9OFsYP*G{I*A$WJVvYFy$JS%b8!mfY5?AB?%h$C1`0z
zOp?|S(>4f<Doed&YD(7aYw{ApV_CaY1PmSqf*yyc`TA&sk<OaqvmMso-fspitLtWS
z9}w&7Fz)En@N0=ei?rSj3yQ?}DKA2VnIe)uaDWO7Z)7ufd{}P+rSwcQyBAM`r+?hq
zBKv16Dd133thHV*Eu$Yf^zYHK$6g;5T0O9xdi2sZhrsESr?1xrAAd`gf!kr7*YI=o
z)F-agwdyw+<AD-isoThjnS(5#g;@l4Ia&gh1U{gvzVEp~?uy*~uOB{G7+;&Yd(J;Y
zUD|PG)r9Ar|32R|Wavt>CjV7_s46{8b0aMJ6CL);z*Eh{k4kYg9?s(Dff4DLED5Z&
z3F=7i^uwup+q_lZWHpRRq&GOL@(W8HSlW@jmHEM46{)%sK;YZ<M&wDr?1=bo_KlWT
z6@f^Zt+YJ3C|jjhK}S5c9;Gz9uA%>!tuRMqHm+F{q<k<=NbUhW;XCS6OD*n?WyxxY
z$qJSG3eL+-;KqYvlQixljtYz_zkpT0jG(~p35VxqdO9kp{9WM&cxM>KmMVcCgm4jz
z@m6~u#u)G3qWl|ATuFuSmw)mD9x{o3=Ly)#)Cp*z<?Qsv?lt&lWQ|lGu<m$_-AsD*
zK=#~nS6vpPDP4}vc*M8WtoW>5YA%U#tfgG@A&Zpu2L0f(v^YsaFa%35x>jVzjS4X-
zC%RGv7X}+r<3{JmiKLs5#cc^aT&1DmsP-QsaTST+cxvvR!iu&q5fn~OQ@cj1U)Ogq
z#u6z(sHkcQb3sdx<3N`)j1ZBaEn)I4JMk=su5FaM)4j`K26Y;MLAQ15z1gChM$?et
z>^5bxP~nm1hlgm61@S2}K_ln1oSk{^mW;hO=nARU?78>%-4Kq_7`eg#j6YgrBYlVb
zkp68=+DP%fsG<P?-LyZCO25SpXDgsBko&hQ@6RRK(Ho4Q5GHCNy^_WTyWI$`#4s%^
z4%;NmGg@ROQr6b(Xz1|s#WzzBA~K1dYO>_zC(GJPCXPKu!hxQ|U@+Ef(u_VDoMI?l
zV@$VE(s#v)czOr@^yxgy*Ynu<a15H_`7{NqJ^r2X7xg2_jL4}J#_@N4AQ2V;Z?$KM
zk@!FiyGtyN8|B&ezE1eIeds+?NSE?9Iqzr`MQ7Pay3m8;KDSzm1i+Xhd!Ds3yeH|Q
zqzYU}Az{zgEG1)zzc7NBseAeG(J}1cBq{lM6*j9a@y5yGM6$qM7F9CkRIYSX$%Y1P
zXzo$b%^aC#@rqL~dySp55D@rUT{7LSlh$*SjOsAUuc7M6*=67JHK>?)>yCntHruSn
zXwd`pz4DEb3SRY`Zr|#+)Qx>Yuo8eIeXN@j+fK5JHN{cCn#gH-xvxDyvM^$7SlvKX
z6caR={lc#zm<GFX@d;MB46D?uj*v{tPp`Kpvx$riIwqBOjnwN;K<e^}PM?Fhf@-bZ
z8fJ(s+Bc#J7qA0mpz*B38nHOVJDOT5y7a<p`jLBlQUHHMO-9A#zAkLF5%j9h5iFvM
zowH2i=mhq`1k5I%U$EjRX<w8}1>AjP2(RPe%S$pe3Dc#}Jaaee>{F?`Vb7irrsd9(
z^twCPJPv7*^gG$O>@cossAW^OLu1m?+M9?mzQ4UCT`+FBJJ>oD-u(9Hu)VY8=5ipp
z<^E9Yv|@RVnEQ**(a~lCDa_||T`h)cW|opF<$x}%@}O+Ve4jbOGywPd5J5V#lSj5Q
z{xSZBYr)<?FQSnF?N+->#cum7rEWJ$vqTGMx{>CvfiXI@T%i(W&OFkU%uQ^uC~e^2
zz#&UzKH<0_tSM|H4z$OmR023(=g)q*tT`Q30sMgzP7wA{q-{*ArBW`Es<=rmbZt_S
zZ&{@x>|_tGJTP>eNzTcHNt3Ha&Mb7G9Z36j=4f!iw!EVNG$HA2!C(r14j$jo!nN)J
zD=v*4O;LGXL8>ni7OQ+ter->g%eWckJ;Pa#ugp2#D*q4*IejS8f{DF?drfWFDHN~l
zv@zBYLB<~^={gmqE&-lViSO<7WyYm@66Y4ENa`IkEiNx9N2y-LJ6DfU*M4hB{~2Y~
zIxX>0=oN5?s<JsU6-q<u45^3!&Pdcv)eeU7i;Eo>Ce1XFX*oxBlX=xkfpter`3Ke;
zMqkpKBvI`sm9X~?X`g18AOPcwK;K!NlF`|L=ZA^56|e0bA?_qBVfe7T%ZKvwN`@k9
z8819Dqj8@v3L>u2tVWEpPNHn^#5c7lZbzZ(y-xNd+4DAZVE47J?lCc5klg)(Ozv$>
zPJ32=Hi)MIf=HMKqhn%|&O<_p?_SAg7VF#WC8kfM3q3$0QqX}CCagwv4Tn`dx^e5L
zc0&N8qQn>+UMGsBvjErS1FA93X*fWhR#|hc4)G=6?;hG<qlt_A>mti>BdK!W5GeIn
zzx(v;RK_~B**l=0dH-l5ORc$bjh`-YZvdsMN!HB=xp_1aji+E%qw!ILHNp`6MVep=
z$6O)<=ZYh<^dr8Rm=SD!IpBP2M!3r93wM6zZ1cPj7k1+JBc<1>$L=vv<EpP4N*dD&
zu~6bK-C18n$t5d&+|ZZYUnu>y?Yr^9O)nXhWqO><RK=1)!-Y)47+03a==EEp=~UfO
zXWdctt>hei1;;f^9$R8!_qDUdC(yMJnHia2OUlG`bK*}msm+mPm_w-wo(cSgsR-(3
z-2AjL&RWxrjvm%7Z;GH#@;DhHovWjG#&{s4OB~{Z@2>T_8&gBR@u@YKHBdP5Z$&-8
z2R)&G^f`c;S!<+I-0rg08oi#4K>&0~xIxx2>5kgK@XdhqgVtwPa>XpW3pdku<fK(0
z@`+-v5>)$9dE-*N5<5AKIsKAw1V$-8*s{O$$CJ`uNqfvrlkSM;EZ*zR*T(QBc%Don
zOO+;INvq7^uBZz}Rl+z;*$$<Nih&}DtqbGl>FRQ`qeX}LM+7=;4e|!0R}jaoUAirs
z^jl3fhmVOSMl*;-sJnhhvB+u{3@iIOs>V)3rZ%0{mb1+zky3<o234CErkUsmqrUm^
z`ePT{);H&b%@FZboZ=(7Z&hshcDJgWPGumj23&#(52fa%Z$1%7Z9JUszw$lTYgX+D
zudddZt>NcD$5#6$bYbhFAR>8K_DH%n^Q*qgV~%a5%!{l3x#kH=G`9c=PX|AdPnsL*
zvi8XgELUR`y@^655pxN8QRuK{(MO#3zN|KA^GZ{ki4RR%@LmLJ=HS;A2f32z4BS?{
z+HzrguxJve@d|f*`wHQe>Btk;H|0H(Tze~PoP$C55fLiJUrB41u9m;Nn)Y_w$EVG_
zuI<m(vvsu&vaFS4L58a4j_iCF3o<G-*vIjluNbA2IDj(X;Lc3gsc#Eo);iT2pj$Hm
zeRK5+(2|tI3+gFuvu|nH%}v%=eJs8J&aIp3?*H|)cYv;Yk`FK4HQ}W@&L1U-vxg1v
zw^}4m+ZHs-gLfx2<%f5**YCQ*fl)Uw_Dy}GJDX~!*lZxIz$lXO^-%ZSwW_NTrC8ml
zZ<y-(ICyv-tT~phidkdc?NrJ~X&6MttGyLo{5-ftintA%VY{HHU?d#qKFsl@57wew
zI*WDo8TggXUtOrfU_A|mu5OM`aW}~>1;Sk`6fZWFhEo}3T2plwOT>uV!5WvyE<wv-
zaVbq3pSE6yp~OkCb&-a(cP2@qQ1xogK2gY-)8Cr(xm8%Bm9^QoRP*?SYx<bg{863B
zJxqtNGLTVKvh%uys+ETV;_4*b6AX-%h#B!?r92E?n;xjQh#IIg<4L|<5)yAffqmPu
z-sXPM!TV-#XusAq(ry7jEmAS-oxW8d<ukO*h~}pneZipyY!@>Q`z3{|X40Rp!O)oy
zk-nUBsc?7dkgj$*uFf#+9Sna4H5sX%NH&u|+_Uj70VkNuw*H$dclc0VXPU0Q4ZtSh
zk4^n_Oy=vkm|S)>lwvA8LGS9*;&GgtvRZJqZ<bDmUCoE~WIq24Y?e<JvJzLD_d3QV
zZi1Sn9C=OUeAwr`!b?`NbZFpK3L$F0NFkFJauSzQCMnuzE8|$H{HI3y*nVe|+O4aU
zv?Gmc?T)8t?f!Z$>m+ZD5Z23$U>RB@8}eP&GnnVjS1C)?w_)1NNr$XkSg<#fauydh
z((d^m3WwXo&P`04O`eo4k>7dL-|!gE6Fc%?C0Cm`4aS>falDk_YcMR+>5_=-m^IEt
zTf~`c@hd+f;`-i>>CX5yXG$(C=CyI~)ARse>D<R&Uwx?6I${)wP`g~nP!d=(QK~mx
z2#6Gk%0DKo8EP^z?Des~4~vx_Jy>)<>T7rGLw@<RG#WDsN=smKuZb+mCNt!RdT9?w
z>pzd8_O$5`b~v6h;RQI^?|A-s6a`xWfzDv=-=^Pc%8bO>=yV>u;C=Q}Dh?Q!c?aUJ
zJtnYa^rFbKS;!K=?jN_C@#1t|oDtJNu)*YId%SJ;``0)3u2;@G`c-K8K|B0g(_Bm=
zV7>4!LmM&s6x(qGwz|euI%lz?gjQFHEq=+=c{5w6t$2(lV!2Vga?2uV!Xv!40|=fs
zT8b0AfpXg7p6adr#~;dEyeFeFqQkO`(8JO?Vzrq>N|<0SBk5T3G6UE@$|-m^*h^Qy
z4pdsR^CphdIH4@^>|Zyr^Yd2v!1Kyo;UwEqo^&g#1qa8KD3i8B$HL++5cWbK(1uYw
zIm3P+CUeTvn|}VgG}cN`Gz3b9;w^J&uiR&#!f}s6(u_~?yyEcliz-5(f?tp0V!7;n
zZf$7p)DeWILT4b!o^yUMxBl>kc1GUMD(f90F?BILW>F*;)Ph{&k=pRxCF^4L{U%{Y
zxcvyCu)mjkV&oY|-duFRu*h1pa%OsMdbMN=izW{ajm5Qc;zt>PQ6K?Z9cb{S#|82S
zJM19^-R_ORJc&{numCN@*00}s5PLw2=>c+WC*w(Lclr|3R7(?&+9g?I=Y8my?x~cU
z*6Wx;C!1$!4h20nl-;T#`%<{XsZkwM80)Z}pz>DJowrzfD<r>v*<VqwpYAf-d24VJ
z(sZ%@;i%R6;_C}e&*%CDL#poC@`feLGgHD&WVeU@<RoKc(}quRqyqgLn}n!snerQO
z6=GB$fbK~D4Pbh(hvSdoq0|5lGJLo^W?^rx?qu%(<~Fx?0{+q1`JXBVXI^By+D}!i
zYaas3SzTbZ)v&f|%C16llyP`+ND@CiZ+0BQI5Tf=KeP9}-}4~F!`}DU@>#>wfY24<
z_QO)PL}uhC*`G<GW^^`y&`2yI(cwgsQXTaiB$1i|Zf+$zmPwMV5+M^GBrZ(GH5Osw
z84r7w?&$r;e96cmn9B+VectXfoI;U)6`YHau^^5%{x*pYNgg3We5yEz+Oy*~vk0^z
z!6b|<`wnS&7d_i;O6RXTvcG0j9ikDsigjmJqrgKm)NRmWr;%&(h)$vR{c`=p%5jDD
zT)~|VrMe-eY6HrT+EN+;UF~JP{1&5u4C5d<`-gb-AqktW(DUdnLXq%;Se&taHI0s&
zu1^Uj$ornDREZ2szL7bG7!mK^&KckO)Kt>OjNdCZ=P>4b2r*;<T*8LG$lb!z@IT{P
zyOADxEF5+e@Iy4-UtwqJ;P5}x!QuAD$cTS#Kl@X^N4lcHo~IDC)xZuYF?;s%q{0j#
z-u1QWNAsL2#?X?uok_L2(QnhPGf_0$Mp7RN_HZ-VJyk3M%R8Mk!V#euwWrrg68qF#
z(h(3cZyYZ7l1_ZZxy=YQ>jc@(PLjf2k}S4SzMiB+3;i8`l7?OICyYX*p*J%FBLd$S
zeR%AtJm0+pdaxh1Zh%U-SNhadqKmWcCu?vm7ju*6c`Z#!zjSVtAPkj%Q~RFxIA>`Y
zBl488fb$DAwiweoUy^9O)k6GQE>}{<Cu>mal32_Lj)*LiH0Bn*sV)rFhQ8F(=+sIi
z53*C<+{&(TgWQf#LLkYAmBqhEn;y^W8Yi+zLkHE2=*_brQR(8%0IgR-M+Y=w1Hnrs
zJOQiSqRT^7a#ssBw1?86$>8NU)8$NuIwY6?ct}`ERPCz6i>eX*E`{RLM*3;_38mW-
zH5qKu!|25<3BmVEG}Ub>;&0{)7EXbEjVLWWSBA%K<|2W~o8M25Ts4f6FO6RdGD(-|
z-^1(4-%*T!$O#v_{=GB)pI!UU{4afSb>+W1_<IlUKZZZ&WH>ec($V|X@b7(%zZy2f
zpY{LW<@nXluVVL~p7!C*|6io<UyXlN0RJ>5#s1CsA9~=g9)8u!{`9~O4;=8n|5ZKv
z)xob7|DO(u;F=`d0r>yb|G%34T3r2U>Iu)4f4urXO08eL{5^yI=>q_mB?kchkxzd$
k|9d3-tGOoSU(ElAi|WcK@ZALfFySA6c*-WD`T6Vr021!&tpET3

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index b1502ed3f3c09..99447c03e89af 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -464,6 +464,24 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
+    def test_reader_spaces(self, read_ext):
+        # see gh-32207
+        basename = "test_spaces"
+
+        actual = pd.read_excel(basename + read_ext)
+        expected = DataFrame(
+            {
+                "testcol": [
+                    "this is great",
+                    "4    spaces",
+                    "1 trailing ",
+                    " 1 leading",
+                    "2  spaces  multiple  times",
+                ]
+            }
+        )
+        tm.assert_frame_equal(actual, expected)
+
     def test_reading_all_sheets(self, read_ext):
         # Test reading all sheetnames by setting sheetname to None,
         # Ensure a dict is returned.

From 9c1984c5ce7648eb5a613637791492030801d43a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 7 Apr 2020 01:28:00 +0200
Subject: [PATCH 22/29] PERF: masked ops for reductions (min/max) (#33261)

---
 doc/source/whatsnew/v1.1.0.rst               |  2 +-
 pandas/core/array_algos/masked_reductions.py | 41 +++++++++++++
 pandas/core/arrays/boolean.py                |  8 +--
 pandas/core/arrays/integer.py                |  7 ++-
 pandas/tests/arrays/integer/test_dtypes.py   |  2 +-
 pandas/tests/reductions/test_reductions.py   | 62 ++++++++++++++------
 6 files changed, 95 insertions(+), 27 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 6bb22f4c16aa1..f74182f6a59c0 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -276,7 +276,7 @@ Performance improvements
   sparse values from ``scipy.sparse`` matrices using the
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
-- Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
+- Performance improvement in reductions (sum, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`).
 
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 0fb2605b554c2..b3723340cefd6 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -45,3 +45,44 @@ def sum(
             return np.sum(values[~mask])
         else:
             return np.sum(values, where=~mask)
+
+
+def _minmax(func, values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+    """
+    Reduction for 1D masked array.
+
+    Parameters
+    ----------
+    func : np.min or np.max
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    mask : np.ndarray
+        Boolean numpy array (True values indicate missing values).
+    skipna : bool, default True
+        Whether to skip NA.
+    """
+    if not skipna:
+        if mask.any():
+            return libmissing.NA
+        else:
+            if values.size:
+                return func(values)
+            else:
+                # min/max with empty array raise in numpy, pandas returns NA
+                return libmissing.NA
+    else:
+        subset = values[~mask]
+        if subset.size:
+            return func(values[~mask])
+        else:
+            # min/max with empty array raise in numpy, pandas returns NA
+            return libmissing.NA
+
+
+def min(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+    return _minmax(np.min, values=values, mask=mask, skipna=skipna)
+
+
+def max(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
+    return _minmax(np.max, values=values, mask=mask, skipna=skipna)
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 442d4ca8cef6d..e85534def6b97 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -696,8 +696,9 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         data = self._data
         mask = self._mask
 
-        if name == "sum":
-            return masked_reductions.sum(data, mask, skipna=skipna, **kwargs)
+        if name in {"sum", "min", "max"}:
+            op = getattr(masked_reductions, name)
+            return op(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         if self._hasna:
@@ -715,9 +716,6 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
             if int_result == result:
                 result = int_result
 
-        elif name in ["min", "max"] and notna(result):
-            result = np.bool_(result)
-
         return result
 
     def _maybe_mask_result(self, result, mask, other, op_name: str):
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index f5189068d5da1..d47a396bbb14e 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -562,8 +562,9 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         data = self._data
         mask = self._mask
 
-        if name == "sum":
-            return masked_reductions.sum(data, mask, skipna=skipna, **kwargs)
+        if name in {"sum", "min", "max"}:
+            op = getattr(masked_reductions, name)
+            return op(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         # (we explicitly use NaN within reductions)
@@ -582,7 +583,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
 
         # if we have a preservable numeric op,
         # provide coercion back to an integer type if possible
-        elif name in ["min", "max", "prod"]:
+        elif name == "prod":
             # GH#31409 more performant than casting-then-checking
             result = com.cast_scalar_indexer(result)
 
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
index ee1ec86745246..515013e95c717 100644
--- a/pandas/tests/arrays/integer/test_dtypes.py
+++ b/pandas/tests/arrays/integer/test_dtypes.py
@@ -34,7 +34,7 @@ def test_preserve_dtypes(op):
 
     # op
     result = getattr(df.C, op)()
-    if op == "sum":
+    if op in {"sum", "min", "max"}:
         assert isinstance(result, np.int64)
     else:
         assert isinstance(result, int)
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 962b105d1e8fc..8fb035e085d40 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -65,27 +65,58 @@ def test_ops(self, opname, obj):
             assert result.value == expected
 
     @pytest.mark.parametrize("opname", ["max", "min"])
-    def test_nanops(self, opname, index_or_series):
+    @pytest.mark.parametrize(
+        "dtype, val",
+        [
+            ("object", 2.0),
+            ("float64", 2.0),
+            ("datetime64[ns]", datetime(2011, 11, 1)),
+            ("Int64", 2),
+            ("boolean", True),
+        ],
+    )
+    def test_nanminmax(self, opname, dtype, val, index_or_series):
         # GH#7261
         klass = index_or_series
-        arg_op = "arg" + opname if klass is Index else "idx" + opname
 
-        obj = klass([np.nan, 2.0])
-        assert getattr(obj, opname)() == 2.0
+        if dtype in ["Int64", "boolean"] and klass == pd.Index:
+            pytest.skip("EAs can't yet be stored in an index")
 
-        obj = klass([np.nan])
-        assert pd.isna(getattr(obj, opname)())
-        assert pd.isna(getattr(obj, opname)(skipna=False))
+        def check_missing(res):
+            if dtype == "datetime64[ns]":
+                return res is pd.NaT
+            elif dtype == "Int64":
+                return res is pd.NA
+            else:
+                return pd.isna(res)
 
-        obj = klass([], dtype=object)
-        assert pd.isna(getattr(obj, opname)())
-        assert pd.isna(getattr(obj, opname)(skipna=False))
+        obj = klass([None], dtype=dtype)
+        assert check_missing(getattr(obj, opname)())
+        assert check_missing(getattr(obj, opname)(skipna=False))
 
-        obj = klass([pd.NaT, datetime(2011, 11, 1)])
-        # check DatetimeIndex monotonic path
-        assert getattr(obj, opname)() == datetime(2011, 11, 1)
-        assert getattr(obj, opname)(skipna=False) is pd.NaT
+        obj = klass([], dtype=dtype)
+        assert check_missing(getattr(obj, opname)())
+        assert check_missing(getattr(obj, opname)(skipna=False))
+
+        if dtype == "object":
+            # generic test with object only works for empty / all NaN
+            return
+
+        obj = klass([None, val], dtype=dtype)
+        assert getattr(obj, opname)() == val
+        assert check_missing(getattr(obj, opname)(skipna=False))
 
+        obj = klass([None, val, None], dtype=dtype)
+        assert getattr(obj, opname)() == val
+        assert check_missing(getattr(obj, opname)(skipna=False))
+
+    @pytest.mark.parametrize("opname", ["max", "min"])
+    def test_nanargminmax(self, opname, index_or_series):
+        # GH#7261
+        klass = index_or_series
+        arg_op = "arg" + opname if klass is Index else "idx" + opname
+
+        obj = klass([pd.NaT, datetime(2011, 11, 1)])
         assert getattr(obj, arg_op)() == 1
         result = getattr(obj, arg_op)(skipna=False)
         if klass is Series:
@@ -95,9 +126,6 @@ def test_nanops(self, opname, index_or_series):
 
         obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
         # check DatetimeIndex non-monotonic path
-        assert getattr(obj, opname)(), datetime(2011, 11, 1)
-        assert getattr(obj, opname)(skipna=False) is pd.NaT
-
         assert getattr(obj, arg_op)() == 1
         result = getattr(obj, arg_op)(skipna=False)
         if klass is Series:

From efce8fcee2de9c0ebd18700033bd61eb61c7302f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 17:14:46 -0700
Subject: [PATCH 23/29] REF: do concat on values, avoid blocks

---
 pandas/core/internals/concat.py               | 5 ++++-
 pandas/core/internals/managers.py             | 2 ++
 pandas/tests/extension/test_external_block.py | 6 ------
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 720e6799a3bf3..3f06d80714623 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -1,6 +1,7 @@
 # TODO: Needs a better name; too many modules are already called "concat"
 from collections import defaultdict
 import copy
+from typing import List
 
 import numpy as np
 
@@ -419,13 +420,15 @@ def _get_empty_dtype_and_na(join_units):
     raise AssertionError(msg)
 
 
-def _is_uniform_join_units(join_units) -> bool:
+def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:
     """
     Check if the join units consist of blocks of uniform type that can
     be concatenated using Block.concat_same_type instead of the generic
     _concatenate_join_units (which uses `concat_compat`).
 
     """
+    # TODO: require dtype match in addition to same type?  e.g. DatetimeTZBlock
+    #  cannot necessarily join
     return (
         # all blocks need to have the same type
         all(type(ju.block) is type(join_units[0].block) for ju in join_units)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index b0363dd21f616..2caab9f91cb50 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -20,6 +20,7 @@
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
     is_datetimelike_v_numeric,
+    is_dtype_equal,
     is_extension_array_dtype,
     is_list_like,
     is_numeric_v_string_like,
@@ -42,6 +43,7 @@
     DatetimeTZBlock,
     ExtensionBlock,
     ObjectValuesExtensionBlock,
+    _block_shape,
     _extend_blocks,
     _safe_reshape,
     get_block_type,
diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py
index 9925fd51561ae..1843126898f3d 100644
--- a/pandas/tests/extension/test_external_block.py
+++ b/pandas/tests/extension/test_external_block.py
@@ -32,12 +32,6 @@ def df():
     return pd.DataFrame(block_manager)
 
 
-def test_concat_dataframe(df):
-    # GH17728
-    res = pd.concat([df, df])
-    assert isinstance(res._mgr.blocks[1], CustomBlock)
-
-
 def test_concat_axis1(df):
     # GH17954
     df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]})

From 362e86c4a1ae46748e56112fb68c4d15de1570dc Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Mon, 6 Apr 2020 19:18:57 -0500
Subject: [PATCH 24/29] CLN: Clean nanops.get_corr_func (#33244)

---
 pandas/core/nanops.py | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 822ab775e7e46..9494248a423a8 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1332,30 +1332,33 @@ def nancorr(
 
 
 def get_corr_func(method):
-    if method in ["kendall", "spearman"]:
-        from scipy.stats import kendalltau, spearmanr
-    elif method in ["pearson"]:
-        pass
-    elif callable(method):
-        return method
-    else:
-        raise ValueError(
-            f"Unknown method '{method}', expected one of 'kendall', 'spearman'"
-        )
+    if method == "kendall":
+        from scipy.stats import kendalltau
+
+        def func(a, b):
+            return kendalltau(a, b)[0]
 
-    def _pearson(a, b):
-        return np.corrcoef(a, b)[0, 1]
+        return func
+    elif method == "spearman":
+        from scipy.stats import spearmanr
 
-    def _kendall(a, b):
-        # kendallttau returns a tuple of the tau statistic and pvalue
-        rs = kendalltau(a, b)
-        return rs[0]
+        def func(a, b):
+            return spearmanr(a, b)[0]
 
-    def _spearman(a, b):
-        return spearmanr(a, b)[0]
+        return func
+    elif method == "pearson":
 
-    _cor_methods = {"pearson": _pearson, "kendall": _kendall, "spearman": _spearman}
-    return _cor_methods[method]
+        def func(a, b):
+            return np.corrcoef(a, b)[0, 1]
+
+        return func
+    elif callable(method):
+        return method
+
+    raise ValueError(
+        f"Unknown method '{method}', expected one of "
+        "'kendall', 'spearman', 'pearson', or callable"
+    )
 
 
 @disallow("M8", "m8")

From 3ad2110d455f3519b5b1b6259df070809314eca9 Mon Sep 17 00:00:00 2001
From: Bharat Raghunathan <bharatraghunthan9767@gmail.com>
Date: Tue, 7 Apr 2020 05:53:20 +0530
Subject: [PATCH 25/29] [DOC]: Mention default behaviour of index_col in
 readcsv (#32977)

---
 doc/source/user_guide/io.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index a4cc1f9ee02ca..d721e00a0a0b6 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -109,6 +109,11 @@ index_col : int, str, sequence of int / str, or False, default ``None``
   Note: ``index_col=False`` can be used to force pandas to *not* use the first
   column as the index, e.g. when you have a malformed file with delimiters at
   the end of each line.
+
+  The default value of ``None`` instructs pandas to guess. If the number of
+  fields in the column header row is equal to the number of fields in the body
+  of the data file, then a default index is used.  If it is one larger, then
+  the first field is used as an index.
 usecols : list-like or callable, default ``None``
   Return a subset of the columns. If list-like, all elements must either
   be positional (i.e. integer indices into the document columns) or strings

From 3ee836308aad4c58014c16a40f976852e75b3837 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 20:21:51 -0700
Subject: [PATCH 26/29] Remove Block.concat_same_type

---
 pandas/core/internals/blocks.py   | 55 -------------------------------
 pandas/core/internals/concat.py   | 14 ++++++--
 pandas/core/internals/managers.py |  2 --
 3 files changed, 12 insertions(+), 59 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index d8875b38ed738..e6d7397f90b65 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -48,7 +48,6 @@
     is_timedelta64_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.concat import concat_categorical, concat_datetime
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -110,7 +109,6 @@ class Block(PandasObject):
     _can_consolidate = True
     _verify_integrity = True
     _validate_ndim = True
-    _concatenator = staticmethod(np.concatenate)
 
     def __init__(self, values, placement, ndim=None):
         self.ndim = self._check_ndim(values, ndim)
@@ -309,16 +307,6 @@ def shape(self):
     def dtype(self):
         return self.values.dtype
 
-    def concat_same_type(self, to_concat):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._concatenator(
-            [blk.values for blk in to_concat], axis=self.ndim - 1
-        )
-        placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-        return self.make_block_same_class(values, placement=placement)
-
     def iget(self, i):
         return self.values[i]
 
@@ -1772,14 +1760,6 @@ def _slice(self, slicer):
 
         return self.values[slicer]
 
-    def concat_same_type(self, to_concat):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._holder._concat_same_type([blk.values for blk in to_concat])
-        placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-        return self.make_block_same_class(values, placement=placement)
-
     def fillna(self, value, limit=None, inplace=False, downcast=None):
         values = self.values if inplace else self.values.copy()
         values = values.fillna(value=value, limit=limit)
@@ -2261,20 +2241,6 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]:
         new_values = new_values.astype("timedelta64[ns]")
         return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
 
-    def concat_same_type(self, to_concat):
-        # need to handle concat([tz1, tz2]) here, since DatetimeArray
-        # only handles cases where all the tzs are the same.
-        # Instead of placing the condition here, it could also go into the
-        # is_uniform_join_units check, but I'm not sure what is better.
-        if len({x.dtype for x in to_concat}) > 1:
-            values = concat_datetime([x.values for x in to_concat])
-
-            values = values.astype(object, copy=False)
-            placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-
-            return self.make_block(values, placement=placement)
-        return super().concat_same_type(to_concat)
-
     def fillna(self, value, limit=None, inplace=False, downcast=None):
         # We support filling a DatetimeTZ with a `value` whose timezone
         # is different by coercing to object.
@@ -2645,7 +2611,6 @@ class CategoricalBlock(ExtensionBlock):
     is_categorical = True
     _verify_integrity = True
     _can_hold_na = True
-    _concatenator = staticmethod(concat_categorical)
 
     should_store = Block.should_store
 
@@ -2659,26 +2624,6 @@ def __init__(self, values, placement, ndim=None):
     def _holder(self):
         return Categorical
 
-    def concat_same_type(self, to_concat):
-        """
-        Concatenate list of single blocks of the same type.
-
-        Note that this CategoricalBlock._concat_same_type *may* not
-        return a CategoricalBlock. When the categories in `to_concat`
-        differ, this will return an object ndarray.
-
-        If / when we decide we don't like that behavior:
-
-        1. Change Categorical._concat_same_type to use union_categoricals
-        2. Delete this method.
-        """
-        values = self._concatenator(
-            [blk.values for blk in to_concat], axis=self.ndim - 1
-        )
-        placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-        # not using self.make_block_same_class as values can be object dtype
-        return self.make_block(values, placement=placement)
-
     def replace(
         self,
         to_replace,
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 3f06d80714623..97edfec3985b0 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -62,8 +62,18 @@ def concatenate_block_managers(
                 values = values.view()
             b = b.make_block_same_class(values, placement=placement)
         elif _is_uniform_join_units(join_units):
-            b = join_units[0].block.concat_same_type([ju.block for ju in join_units])
-            b.mgr_locs = placement
+            blk = join_units[0].block
+            vals = [ju.block.values for ju in join_units]
+            if not blk.is_extension:
+                values = concat_compat(vals, axis=blk.ndim - 1)
+            elif blk.is_datetimetz or blk.is_categorical:
+                # These can have the same type but multiple dtypes,
+                #  we concatting does not necessarily preserve dtype
+                values = concat_compat(vals, axis=blk.ndim - 1)
+            elif blk.is_extension:
+                values = blk._holder._concat_same_type(vals)
+
+            b = make_block(values, placement=placement, ndim=blk.ndim)
         else:
             b = make_block(
                 _concatenate_join_units(join_units, concat_axis, copy=copy),
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 436ef140c1a3b..f3b4ebad9cec1 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -19,7 +19,6 @@
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
     is_datetimelike_v_numeric,
-    is_dtype_equal,
     is_extension_array_dtype,
     is_list_like,
     is_numeric_v_string_like,
@@ -43,7 +42,6 @@
     DatetimeTZBlock,
     ExtensionBlock,
     ObjectValuesExtensionBlock,
-    _block_shape,
     _extend_blocks,
     _safe_reshape,
     get_block_type,

From 41d6da0c351795b3983b8bf9f480824612821318 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 11 Apr 2020 13:45:49 -0700
Subject: [PATCH 27/29] use concat_compat

---
 pandas/core/internals/concat.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 97edfec3985b0..a57bd46d0e033 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -64,14 +64,15 @@ def concatenate_block_managers(
         elif _is_uniform_join_units(join_units):
             blk = join_units[0].block
             vals = [ju.block.values for ju in join_units]
+
             if not blk.is_extension:
                 values = concat_compat(vals, axis=blk.ndim - 1)
             elif blk.is_datetimetz or blk.is_categorical:
                 # These can have the same type but multiple dtypes,
                 #  we concatting does not necessarily preserve dtype
                 values = concat_compat(vals, axis=blk.ndim - 1)
-            elif blk.is_extension:
-                values = blk._holder._concat_same_type(vals)
+            else:
+                values = concat_compat(vals)
 
             b = make_block(values, placement=placement, ndim=blk.ndim)
         else:

From 2e070ca6aa7a95caa22111375109062ba93398a7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 12 Apr 2020 14:50:07 -0700
Subject: [PATCH 28/29] combine cases

---
 pandas/core/internals/concat.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index a57bd46d0e033..37e081aeba3f6 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -65,13 +65,12 @@ def concatenate_block_managers(
             blk = join_units[0].block
             vals = [ju.block.values for ju in join_units]
 
-            if not blk.is_extension:
-                values = concat_compat(vals, axis=blk.ndim - 1)
-            elif blk.is_datetimetz or blk.is_categorical:
-                # These can have the same type but multiple dtypes,
-                #  we concatting does not necessarily preserve dtype
+            if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
+                # datetimetz and categorical can have the same type but multiple
+                #  dtypes, concatting does not necessarily preserve dtype
                 values = concat_compat(vals, axis=blk.ndim - 1)
             else:
+                # TODO(EA2D): special-casing not needed with 2D EAs
                 values = concat_compat(vals)
 
             b = make_block(values, placement=placement, ndim=blk.ndim)

From 675a94822d811ae04b22d04e955cd6c396f8302c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 14 Apr 2020 07:05:55 -0700
Subject: [PATCH 29/29] Dummy commit to force CI