pandas-dev
diff --git a/‎asv_bench/benchmarks/array.py
Lines changed: 3 additions & 0 deletions b/‎asv_bench/benchmarks/array.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 10 additions & 5 deletions b/‎asv_bench/benchmarks/reshape.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.5.3.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.5.3.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 6 additions & 1 deletion b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 6 additions & 1 deletion
diff --git a/‎environment.yml
Lines changed: 1 addition & 2 deletions b/‎environment.yml
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/_libs/tslib.pyi
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslib.pyi
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslib.pyx
Lines changed: 5 additions & 52 deletions b/‎pandas/_libs/tslib.pyx
Lines changed: 5 additions & 52 deletions
diff --git a/‎pandas/_libs/tslibs/np_datetime.pyx
Lines changed: 7 additions & 7 deletions b/‎pandas/_libs/tslibs/np_datetime.pyx
Lines changed: 7 additions & 7 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 60 additions & 17 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 60 additions & 17 deletions
@@ -90,6 +90,9 @@ def time_setitem_list(self, multiple_chunks):
     def time_setitem_slice(self, multiple_chunks):
         self.array[::10] = "foo"
 
+    def time_setitem_null_slice(self, multiple_chunks):
+        self.array[:] = "foo"
+
     def time_tolist(self, multiple_chunks):
         self.array.tolist()
 
 
@@ -15,12 +15,17 @@
 
 
 class Melt:
-    def setup(self):
-        self.df = DataFrame(np.random.randn(10000, 3), columns=["A", "B", "C"])
-        self.df["id1"] = np.random.randint(0, 10, 10000)
-        self.df["id2"] = np.random.randint(100, 1000, 10000)
+    params = ["float64", "Float64"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        self.df = DataFrame(
+            np.random.randn(100_000, 3), columns=["A", "B", "C"], dtype=dtype
+        )
+        self.df["id1"] = pd.Series(np.random.randint(0, 10, 10000))
+        self.df["id2"] = pd.Series(np.random.randint(100, 1000, 10000))
 
-    def time_melt_dataframe(self):
+    def time_melt_dataframe(self, dtype):
         melt(self.df, id_vars=["id1", "id2"])
 
 
 
@@ -56,7 +56,6 @@ dependencies:
   - zstandard
 
   # downstream packages
-  - aiobotocore
   - botocore
   - cftime
   - dask
 
@@ -37,6 +37,7 @@ Bug fixes
 
 Other
 ~~~~~
+- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -42,7 +42,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
 
-* :func:`read_csv` (with ``engine="pyarrow"``)
+* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
 * :func:`read_excel`
 * :func:`read_parquet`
 * :func:`read_orc`
@@ -736,6 +736,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`, :issue:`49577`)
 - Performance improvement in :meth:`MultiIndex.putmask` (:issue:`49830`)
 - Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`)
+- Performance improvement in :meth:`Series.rank` for pyarrow-backed dtypes (:issue:`50264`)
 - Performance improvement in :meth:`Series.fillna` for extension array dtypes (:issue:`49722`, :issue:`50078`)
 - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
 - Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
@@ -748,6 +749,7 @@ Performance improvements
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
+- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
@@ -831,6 +833,7 @@ Interval
 
 Indexing
 ^^^^^^^^
+- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
 - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
 - Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
 - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
@@ -870,6 +873,7 @@ I/O
 - Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`)
 - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`)
 - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`)
+- Bug in :func:`read_sql_query` ignoring ``dtype`` argument when ``chunksize`` is specified and result is empty (:issue:`50245`)
 - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
 - Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`)
 - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
@@ -906,6 +910,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :meth:`DataFrame.pivot_table` raising ``TypeError`` for nullable dtype and ``margins=True`` (:issue:`48681`)
 - Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`)
+- Bug in :meth:`DataFrame.melt` losing extension array dtype (:issue:`41570`)
 - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`)
 - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`)
 - Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`)
 
@@ -60,7 +60,6 @@ dependencies:
   - zstandard
 
   # downstream packages
-  - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
   - dask-core
   - seaborn-base
 
@@ -69,7 +68,7 @@ dependencies:
   - flask
 
   # benchmarks
-  - asv
+  - asv>=0.5.1
 
   # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms.
   - c-compiler
 
@@ -12,7 +12,7 @@ def format_array_from_datetime(
     reso: int = ...,  # NPY_DATETIMEUNIT
 ) -> npt.NDArray[np.object_]: ...
 def array_with_unit_to_datetime(
-    values: np.ndarray,
+    values: npt.NDArray[np.object_],
     unit: str,
     errors: str = ...,
 ) -> tuple[np.ndarray, tzinfo | None]: ...
 
@@ -18,7 +18,6 @@ import_datetime()
 
 cimport numpy as cnp
 from numpy cimport (
-    float64_t,
     int64_t,
     ndarray,
 )
@@ -231,7 +230,7 @@ def format_array_from_datetime(
 
 
 def array_with_unit_to_datetime(
-    ndarray values,
+    ndarray[object] values,
     str unit,
     str errors="coerce"
 ):
@@ -266,70 +265,24 @@ def array_with_unit_to_datetime(
     cdef:
         Py_ssize_t i, n=len(values)
         int64_t mult
-        int prec = 0
-        ndarray[float64_t] fvalues
         bint is_ignore = errors=="ignore"
         bint is_coerce = errors=="coerce"
         bint is_raise = errors=="raise"
-        bint need_to_iterate = True
         ndarray[int64_t] iresult
         ndarray[object] oresult
-        ndarray mask
         object tz = None
 
     assert is_ignore or is_coerce or is_raise
 
     if unit == "ns":
-        if issubclass(values.dtype.type, (np.integer, np.float_)):
-            result = values.astype("M8[ns]", copy=False)
-        else:
-            result, tz = array_to_datetime(
-                values.astype(object, copy=False),
-                errors=errors,
-            )
+        result, tz = array_to_datetime(
+            values.astype(object, copy=False),
+            errors=errors,
+        )
         return result, tz
 
     mult, _ = precision_from_unit(unit)
 
-    if is_raise:
-        # try a quick conversion to i8/f8
-        # if we have nulls that are not type-compat
-        # then need to iterate
-
-        if values.dtype.kind in ["i", "f", "u"]:
-            iresult = values.astype("i8", copy=False)
-            # fill missing values by comparing to NPY_NAT
-            mask = iresult == NPY_NAT
-            # Trying to Convert NaN to integer results in undefined
-            # behaviour, so handle it explicitly (see GH #48705)
-            if values.dtype.kind == "f":
-                mask |= values != values
-            iresult[mask] = 0
-            fvalues = iresult.astype("f8") * mult
-            need_to_iterate = False
-
-        if not need_to_iterate:
-            # check the bounds
-            if (fvalues < Timestamp.min.value).any() or (
-                (fvalues > Timestamp.max.value).any()
-            ):
-                raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
-
-            if values.dtype.kind in ["i", "u"]:
-                result = (iresult * mult).astype("M8[ns]")
-
-            elif values.dtype.kind == "f":
-                fresult = (values * mult).astype("f8")
-                fresult[mask] = 0
-                if prec:
-                    fresult = round(fresult, prec)
-                result = fresult.astype("M8[ns]", copy=False)
-
-            iresult = result.view("i8")
-            iresult[mask] = NPY_NAT
-
-            return result, tz
-
     result = np.empty(n, dtype="M8[ns]")
     iresult = result.view("i8")
 
 
@@ -312,10 +312,10 @@ cpdef ndarray astype_overflowsafe(
     """
     if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME:
         # i.e. dtype.kind == "M"
-        pass
+        dtype_name = "datetime64"
     elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA:
         # i.e. dtype.kind == "m"
-        pass
+        dtype_name = "timedelta64"
     else:
         raise TypeError(
             "astype_overflowsafe values.dtype and dtype must be either "
@@ -326,14 +326,14 @@ cpdef ndarray astype_overflowsafe(
         NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype)
         NPY_DATETIMEUNIT to_unit = get_unit_from_dtype(dtype)
 
-    if (
-        from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
-        or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
-    ):
+    if from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
+        raise TypeError(f"{dtype_name} values must have a unit specified")
+
+    if to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
         # without raising explicitly here, we end up with a SystemError
         # built-in function [...] returned a result with an error
         raise ValueError(
-            "datetime64/timedelta64 values and dtype must have a unit specified"
+            f"{dtype_name} dtype must have a unit specified"
         )
 
     if from_unit == to_unit:
 
@@ -1494,11 +1494,29 @@ cdef class BusinessDay(BusinessMixin):
     """
     DateOffset subclass representing possibly n business days.
 
+    Parameters
+    ----------
+    n : int, default 1
+        The number of days represented.
+    normalize : bool, default False
+        Normalize start/end dates to midnight.
+
     Examples
     --------
-    >>> ts = pd.Timestamp(2022, 8, 5)
-    >>> ts + pd.offsets.BusinessDay()
-    Timestamp('2022-08-08 00:00:00')
+    You can use the parameter ``n`` to represent a shift of n business days.
+
+    >>> ts = pd.Timestamp(2022, 12, 9, 15)
+    >>> ts.strftime('%a %d %b %Y %H:%M')
+    'Fri 09 Dec 2022 15:00'
+    >>> (ts + pd.offsets.BusinessDay(n=5)).strftime('%a %d %b %Y %H:%M')
+    'Fri 16 Dec 2022 15:00'
+
+    Passing the parameter ``normalize`` equal to True, you shift the start
+    of the next business day to midnight.
+
+    >>> ts = pd.Timestamp(2022, 12, 9, 15)
+    >>> ts + pd.offsets.BusinessDay(normalize=True)
+    Timestamp('2022-12-12 00:00:00')
     """
     _period_dtype_code = PeriodDtypeCode.B
     _prefix = "B"
@@ -1610,29 +1628,53 @@ cdef class BusinessHour(BusinessMixin):
     Parameters
     ----------
     n : int, default 1
-        The number of months represented.
+        The number of hours represented.
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range.
-    weekmask : str, Default 'Mon Tue Wed Thu Fri'
-        Weekmask of valid business days, passed to ``numpy.busdaycalendar``.
     start : str, time, or list of str/time, default "09:00"
         Start time of your custom business hour in 24h format.
     end : str, time, or list of str/time, default: "17:00"
         End time of your custom business hour in 24h format.
 
     Examples
     --------
-    >>> from datetime import time
+    You can use the parameter ``n`` to represent a shift of n hours.
+
+    >>> ts = pd.Timestamp(2022, 12, 9, 8)
+    >>> ts + pd.offsets.BusinessHour(n=5)
+    Timestamp('2022-12-09 14:00:00')
+
+    You can also change the start and the end of business hours.
+
     >>> ts = pd.Timestamp(2022, 8, 5, 16)
-    >>> ts + pd.offsets.BusinessHour()
-    Timestamp('2022-08-08 09:00:00')
     >>> ts + pd.offsets.BusinessHour(start="11:00")
     Timestamp('2022-08-08 11:00:00')
-    >>> ts + pd.offsets.BusinessHour(end=time(19, 0))
-    Timestamp('2022-08-05 17:00:00')
-    >>> ts + pd.offsets.BusinessHour(start=[time(9, 0), "20:00"],
-    ...                              end=["17:00", time(22, 0)])
-    Timestamp('2022-08-05 20:00:00')
+
+    >>> from datetime import time as dt_time
+    >>> ts = pd.Timestamp(2022, 8, 5, 22)
+    >>> ts + pd.offsets.BusinessHour(end=dt_time(19, 0))
+    Timestamp('2022-08-08 10:00:00')
+
+    Passing the parameter ``normalize`` equal to True, you shift the start
+    of the next business hour to midnight.
+
+    >>> ts = pd.Timestamp(2022, 12, 9, 8)
+    >>> ts + pd.offsets.BusinessHour(normalize=True)
+    Timestamp('2022-12-09 00:00:00')
+
+    You can divide your business day hours into several parts.
+
+    >>> import datetime as dt
+    >>> freq = pd.offsets.BusinessHour(start=["06:00", "10:00", "15:00"],
+    ...                                end=["08:00", "12:00", "17:00"])
+    >>> pd.date_range(dt.datetime(2022, 12, 9), dt.datetime(2022, 12, 13), freq=freq)
+    DatetimeIndex(['2022-12-09 06:00:00', '2022-12-09 07:00:00',
+                   '2022-12-09 10:00:00', '2022-12-09 11:00:00',
+                   '2022-12-09 15:00:00', '2022-12-09 16:00:00',
+                   '2022-12-12 06:00:00', '2022-12-12 07:00:00',
+                   '2022-12-12 10:00:00', '2022-12-12 11:00:00',
+                   '2022-12-12 15:00:00', '2022-12-12 16:00:00'],
+                   dtype='datetime64[ns]', freq='BH')
     """
 
     _prefix = "BH"
@@ -3536,6 +3578,7 @@ cdef class CustomBusinessDay(BusinessDay):
     Parameters
     ----------
     n : int, default 1
+        The number of days represented.
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range.
     weekmask : str, Default 'Mon Tue Wed Thu Fri'
@@ -3624,7 +3667,7 @@ cdef class CustomBusinessHour(BusinessHour):
     Parameters
     ----------
     n : int, default 1
-        The number of months represented.
+        The number of hours represented.
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range.
     weekmask : str, Default 'Mon Tue Wed Thu Fri'
@@ -3662,7 +3705,7 @@ cdef class CustomBusinessHour(BusinessHour):
     >>> ts + pd.offsets.CustomBusinessHour(end=dt_time(19, 0))
     Timestamp('2022-08-08 10:00:00')
 
-    In the example below we divide our business day hours into several parts.
+    You can divide your business day hours into several parts.
 
     >>> import datetime as dt
     >>> freq = pd.offsets.CustomBusinessHour(start=["06:00", "10:00", "15:00"],
@@ -3692,7 +3735,7 @@ cdef class CustomBusinessHour(BusinessHour):
            'Fri 16 Dec 2022 12:00'],
            dtype='object')
 
-    In the example below we define custom holidays by using NumPy business day calendar.
+    Using NumPy business day calendar you can define custom holidays.
 
     >>> import datetime as dt
     >>> bdc = np.busdaycalendar(holidays=['2022-12-12', '2022-12-14'])
Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,7 @@ Bug fixes`
`37`	`37`
`38`	`38`	`Other`
`39`	`39`	`~~~~~`
	`40`	+- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`)
`40`	`41`	`-`
`41`	`42`
`42`	`43`	`.. ---------------------------------------------------------------------------`