pandas-dev
diff --git a/‎.github/workflows/ci.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/ci.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎asv_bench/benchmarks/arithmetic.py
Lines changed: 18 additions & 8 deletions b/‎asv_bench/benchmarks/arithmetic.py
Lines changed: 18 additions & 8 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py
Lines changed: 23 additions & 0 deletions b/‎asv_bench/benchmarks/groupby.py
Lines changed: 23 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/libs.py
Lines changed: 42 additions & 0 deletions b/‎asv_bench/benchmarks/libs.py
Lines changed: 42 additions & 0 deletions
diff --git a/‎doc/make.py
Lines changed: 3 additions & 3 deletions b/‎doc/make.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/contributing.rst
Lines changed: 2 additions & 1 deletion b/‎doc/source/development/contributing.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/source/user_guide/gotchas.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/user_guide/gotchas.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 4 additions & 6 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 4 additions & 6 deletions
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 11 additions & 0 deletions b/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 3 additions & 2 deletions b/‎pandas/_libs/lib.pyx
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 3 additions & 1 deletion b/‎pandas/_testing/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/core/array_algos/transforms.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/array_algos/transforms.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/numpy_.py
Lines changed: 9 additions & 0 deletions b/‎pandas/core/arrays/numpy_.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎pandas/core/computation/expressions.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/computation/expressions.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/dtypes/cast.py
Lines changed: 7 additions & 20 deletions b/‎pandas/core/dtypes/cast.py
Lines changed: 7 additions & 20 deletions
diff --git a/‎pandas/core/dtypes/missing.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/dtypes/missing.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/frame.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/frame.py
Lines changed: 2 additions & 2 deletions
@@ -153,6 +153,7 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/arithmetic/ --array-manager
 
         # indexing subset (temporary since other tests don't pass yet)
         pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager
 
@@ -116,16 +116,26 @@ class FrameWithFrameWide:
             operator.add,
             operator.floordiv,
             operator.gt,
-        ]
+        ],
+        [
+            # (n_rows, n_columns)
+            (1_000_000, 10),
+            (100_000, 100),
+            (10_000, 1000),
+            (1000, 10_000),
+        ],
     ]
-    param_names = ["op"]
+    param_names = ["op", "shape"]
 
-    def setup(self, op):
+    def setup(self, op, shape):
         # we choose dtypes so as to make the blocks
         #  a) not perfectly match between right and left
         #  b) appreciably bigger than single columns
-        n_cols = 2000
-        n_rows = 500
+        n_rows, n_cols = shape
+
+        if op is operator.floordiv:
+            # floordiv is much slower than the other operations -> use less data
+            n_rows = n_rows // 10
 
         # construct dataframe with 2 blocks
         arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8")
@@ -137,7 +147,7 @@ def setup(self, op):
         df._consolidate_inplace()
 
         # TODO: GH#33198 the setting here shoudlnt need two steps
-        arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8")
+        arr1 = np.random.randn(n_rows, max(n_cols // 4, 3)).astype("f8")
         arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
         arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")
         df2 = pd.concat(
@@ -151,11 +161,11 @@ def setup(self, op):
         self.left = df
         self.right = df2
 
-    def time_op_different_blocks(self, op):
+    def time_op_different_blocks(self, op, shape):
         # blocks (and dtypes) are not aligned
         op(self.left, self.right)
 
-    def time_op_same_blocks(self, op):
+    def time_op_same_blocks(self, op, shape):
         # blocks (and dtypes) are aligned
         op(self.left, self.left)
 
 
@@ -461,6 +461,29 @@ def time_dtype_as_field(self, dtype, method, application):
         self.as_field_method()
 
 
+class GroupByCythonAgg:
+    """
+    Benchmarks specifically targetting our cython aggregation algorithms
+    (using a big enough dataframe with simple key, so a large part of the
+    time is actually spent in the grouped aggregation).
+    """
+
+    param_names = ["dtype", "method"]
+    params = [
+        ["float64"],
+        ["sum", "prod", "min", "max", "mean", "median", "var", "first", "last"],
+    ]
+
+    def setup(self, dtype, method):
+        N = 1_000_000
+        df = DataFrame(np.random.randn(N, 10), columns=list("abcdefghij"))
+        df["key"] = np.random.randint(0, 100, size=N)
+        self.df = df
+
+    def time_frame_agg(self, dtype, method):
+        self.df.groupby("key").agg(method)
+
+
 class RankWithTies:
     # GH 21237
     param_names = ["dtype", "tie_method"]
 
@@ -0,0 +1,42 @@
+"""
+Benchmarks for code in pandas/_libs, excluding pandas/_libs/tslibs,
+which has its own directory
+"""
+import numpy as np
+
+from pandas._libs.lib import (
+    is_list_like,
+    is_scalar,
+)
+
+from pandas import (
+    NA,
+    NaT,
+)
+
+# TODO: share with something in pd._testing?
+scalars = [
+    0,
+    1.0,
+    1 + 2j,
+    True,
+    "foo",
+    b"bar",
+    None,
+    np.datetime64(123, "ns"),
+    np.timedelta64(123, "ns"),
+    NaT,
+    NA,
+]
+zero_dims = [np.array("123")]
+listlikes = [np.array([1, 2, 3]), {0: 1}, {1, 2, 3}, [1, 2, 3], (1, 2, 3)]
+
+
+class ScalarListLike:
+    params = scalars + zero_dims + listlikes
+
+    def time_is_list_like(self, param):
+        is_list_like(param)
+
+    def time_is_scalar(self, param):
+        is_scalar(param)
@@ -39,7 +39,7 @@ class DocBuilder:
 
     def __init__(
         self,
-        num_jobs=0,
+        num_jobs="auto",
         include_api=True,
         whatsnew=False,
         single_doc=None,
@@ -135,7 +135,7 @@ def _sphinx_build(self, kind: str):
 
         cmd = ["sphinx-build", "-b", kind]
         if self.num_jobs:
-            cmd += ["-j", str(self.num_jobs)]
+            cmd += ["-j", self.num_jobs]
         if self.warnings_are_errors:
             cmd += ["-W", "--keep-going"]
         if self.verbosity:
@@ -304,7 +304,7 @@ def main():
         "command", nargs="?", default="html", help=f"command to run: {joined}"
     )
     argparser.add_argument(
-        "--num-jobs", type=int, default=0, help="number of jobs used by sphinx-build"
+        "--num-jobs", default="auto", help="number of jobs used by sphinx-build"
     )
     argparser.add_argument(
         "--no-api", default=False, help="omit api and autosummary", action="store_true"
 
@@ -423,7 +423,7 @@
 if include_api:
     intersphinx_mapping = {
         "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
-        "matplotlib": ("https://matplotlib.org/", None),
+        "matplotlib": ("https://matplotlib.org/stable/", None),
         "numpy": ("https://numpy.org/doc/stable/", None),
         "pandas-gbq": ("https://pandas-gbq.readthedocs.io/en/latest/", None),
         "py": ("https://pylib.readthedocs.io/en/latest/", None),
 
@@ -612,7 +612,8 @@ For comparison, a full documentation build may take 15 minutes, but a single
 section may take 15 seconds. Subsequent builds, which only process portions
 you have changed, will be faster.
 
-You can also specify to use multiple cores to speed up the documentation build::
+The build will automatically use the number of cores available on your machine
+to speed up the documentation build. You can override this::
 
     python make.py html --num-jobs 4
 
 
@@ -183,6 +183,9 @@ testing for membership in the list of column names.
 Mutating with User Defined Function (UDF) methods
 -------------------------------------------------
 
+This section applies to pandas methods that take a UDF. In particular, the methods
+``.apply``, ``.aggregate``, ``.transform``, and ``.filter``.
+
 It is a general rule in programming that one should not mutate a container
 while it is being iterated over. Mutation will invalidate the iterator,
 causing unexpected behavior. Consider the example:
@@ -246,7 +249,6 @@ not apply to the container being iterated over.
    df = pd.DataFrame({"a": [1, 2, 3], 'b': [4, 5, 6]})
    df.apply(f, axis="columns")
 
-
 ``NaN``, Integer ``NA`` values and ``NA`` type promotions
 ---------------------------------------------------------
 
 
@@ -2853,14 +2853,12 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
    The `xlrd <https://xlrd.readthedocs.io/en/latest/>`__ package is now only for reading
    old-style ``.xls`` files.
 
-   Before pandas 1.2.0, the default argument ``engine=None`` to :func:`~pandas.read_excel`
+   Before pandas 1.3.0, the default argument ``engine=None`` to :func:`~pandas.read_excel`
    would result in using the ``xlrd`` engine in many cases, including new
-   Excel 2007+ (``.xlsx``) files.
-   If `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__  is installed,
-   many of these cases will now default to using the ``openpyxl`` engine.
-   See the :func:`read_excel` documentation for more details.
+   Excel 2007+ (``.xlsx``) files. pandas will now default to using the
+   `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__ engine.
 
-   Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+
+   It is strongly encouraged to install ``openpyxl`` to read Excel 2007+
    (``.xlsx``) files.
    **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.**
    This is no longer supported, switch to using ``openpyxl`` instead.
 
@@ -8,6 +8,16 @@ including other versions of pandas.
 
 {{ header }}
 
+.. warning::
+
+   When reading new Excel 2007+ (``.xlsx``) files, the default argument
+   ``engine=None`` to :func:`~pandas.read_excel` will now result in using the
+   `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`_ engine in all cases
+   when the option :attr:`io.excel.xlsx.reader` is set to ``"auto"``.
+   Previously, some cases would use the
+   `xlrd <https://xlrd.readthedocs.io/en/latest/>`_ engine instead. See
+   :ref:`What's new 1.2.0 <whatsnew_120>` for background on this change.
+
 .. ---------------------------------------------------------------------------
 
 Enhancements
@@ -464,6 +474,7 @@ Other
 - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
 - Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`)
 - :class:`Styler` rendered HTML output minor alterations to support w3 good code standard (:issue:`39626`)
+- Bug in :class:`Styler` where rendered HTML was missing a column class identifier for certain header cells (:issue:`39716`)
 - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
 
 
 
@@ -1059,11 +1059,12 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool:
 
 cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
     return (
-        isinstance(obj, abc.Iterable)
+        # equiv: `isinstance(obj, abc.Iterable)`
+        hasattr(obj, "__iter__") and not isinstance(obj, type)
         # we do not count strings/unicode/bytes as list-like
         and not isinstance(obj, (str, bytes))
         # exclude zero-dimensional numpy arrays, effectively scalars
-        and not (util.is_array(obj) and obj.ndim == 0)
+        and not cnp.PyArray_IsZeroDim(obj)
         # exclude sets if allow_sets is False
         and not (allow_sets is False and isinstance(obj, abc.Set))
     )
 
@@ -215,8 +215,10 @@ def box_expected(expected, box_cls, transpose=True):
         if transpose:
             # for vector operations, we need a DataFrame to be a single-row,
             #  not a single-column, in order to operate against non-DataFrame
-            #  vectors of the same length.
+            #  vectors of the same length. But convert to two rows to avoid
+            #  single-row special cases in datetime arithmetic
             expected = expected.T
+            expected = pd.concat([expected] * 2, ignore_index=True)
     elif box_cls is PeriodArray:
         # the PeriodArray constructor is not as flexible as period_array
         expected = period_array(expected)
 
@@ -19,7 +19,7 @@ def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray
         new_values = new_values.T
         axis = new_values.ndim - axis - 1
 
-    if np.prod(new_values.shape):
+    if new_values.size:
         new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis)
 
     axis_indexer = [slice(None)] * values.ndim
 
@@ -18,6 +18,7 @@
 )
 from pandas.compat.numpy import function as nv
 
+from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
 from pandas.core.dtypes.dtypes import PandasDtype
 from pandas.core.dtypes.missing import isna
 
@@ -97,6 +98,14 @@ def _from_sequence(
             dtype = dtype._dtype
 
         result = np.asarray(scalars, dtype=dtype)
+        if (
+            result.ndim > 1
+            and not hasattr(scalars, "dtype")
+            and (dtype is None or dtype == object)
+        ):
+            # e.g. list-of-tuples
+            result = construct_1d_object_array_from_listlike(scalars)
+
         if copy and result is scalars:
             result = result.copy()
         return cls(result)
 
@@ -80,7 +80,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
     if op_str is not None:
 
         # required min elements (otherwise we are adding overhead)
-        if np.prod(a.shape) > _MIN_ELEMENTS:
+        if a.size > _MIN_ELEMENTS:
             # check for dtype compatibility
             dtypes: Set[str] = set()
             for o in [a, b]:
 
@@ -609,24 +609,10 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
         return np.dtype(object), fill_value
 
     elif issubclass(dtype.type, np.timedelta64):
-        if (
-            is_integer(fill_value)
-            or is_float(fill_value)
-            or isinstance(fill_value, str)
-        ):
-            # TODO: What about str that can be a timedelta?
-            dtype = np.dtype(np.object_)
-        else:
-            try:
-                fv = Timedelta(fill_value)
-            except ValueError:
-                dtype = np.dtype(np.object_)
-            else:
-                if fv is NaT:
-                    # NaT has no `to_timedelta64` method
-                    fill_value = np.timedelta64("NaT", "ns")
-                else:
-                    fill_value = fv.to_timedelta64()
+        inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
+        if inferred == dtype:
+            return dtype, fv
+        return np.dtype(object), fill_value
 
     elif is_float(fill_value):
         if issubclass(dtype.type, np.bool_):
@@ -782,11 +768,12 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
 
     elif isinstance(val, (np.timedelta64, timedelta)):
         try:
-            val = Timedelta(val).value
+            val = Timedelta(val)
         except (OutOfBoundsTimedelta, OverflowError):
             dtype = np.dtype(object)
         else:
             dtype = np.dtype("m8[ns]")
+            val = np.timedelta64(val.value, "ns")
 
     elif is_bool(val):
         dtype = np.dtype(np.bool_)
@@ -1546,7 +1533,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
                     value = iNaT
 
                 # we have an array of datetime or timedeltas & nulls
-                elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype):
+                elif value.size or not is_dtype_equal(value.dtype, dtype):
                     _disallow_mismatched_datetimelike(value, dtype)
 
                     try:
 
@@ -439,7 +439,7 @@ def array_equivalent(
 
     # NaNs can occur in float and complex arrays.
     if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype):
-        if not (np.prod(left.shape) and np.prod(right.shape)):
+        if not (left.size and right.size):
             return True
         return ((left == right) | (isna(left) & isna(right))).all()
 
 
@@ -4066,8 +4066,8 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
         .. deprecated:: 1.2.0
             DataFrame.lookup is deprecated,
             use DataFrame.melt and DataFrame.loc instead.
-            For an example see :meth:`~pandas.DataFrame.lookup`
-            in the user guide.
+            For further details see
+            :ref:`Looking up values by index/column labels <indexing.lookup>`.
 
         Parameters
         ----------