pandas-dev
diff --git a/‎asv_bench/benchmarks/array.py
Lines changed: 3 additions & 0 deletions b/‎asv_bench/benchmarks/array.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 10 additions & 5 deletions b/‎asv_bench/benchmarks/reshape.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 11 additions & 3 deletions b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 11 additions & 3 deletions
diff --git a/‎environment.yml
Lines changed: 2 additions & 3 deletions b/‎environment.yml
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 11 additions & 2 deletions b/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 11 additions & 2 deletions
diff --git a/‎pandas/_libs/tslib.pyi
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslib.pyi
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslib.pyx
Lines changed: 5 additions & 52 deletions b/‎pandas/_libs/tslib.pyx
Lines changed: 5 additions & 52 deletions
diff --git a/‎pandas/_libs/tslibs/np_datetime.pyx
Lines changed: 7 additions & 7 deletions b/‎pandas/_libs/tslibs/np_datetime.pyx
Lines changed: 7 additions & 7 deletions
@@ -90,6 +90,9 @@ def time_setitem_list(self, multiple_chunks):
     def time_setitem_slice(self, multiple_chunks):
         self.array[::10] = "foo"
 
+    def time_setitem_null_slice(self, multiple_chunks):
+        self.array[:] = "foo"
+
     def time_tolist(self, multiple_chunks):
         self.array.tolist()
 
 
@@ -15,12 +15,17 @@
 
 
 class Melt:
-    def setup(self):
-        self.df = DataFrame(np.random.randn(10000, 3), columns=["A", "B", "C"])
-        self.df["id1"] = np.random.randint(0, 10, 10000)
-        self.df["id2"] = np.random.randint(100, 1000, 10000)
+    params = ["float64", "Float64"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        self.df = DataFrame(
+            np.random.randn(100_000, 3), columns=["A", "B", "C"], dtype=dtype
+        )
+        self.df["id1"] = pd.Series(np.random.randint(0, 10, 10000))
+        self.df["id2"] = pd.Series(np.random.randint(100, 1000, 10000))
 
-    def time_melt_dataframe(self):
+    def time_melt_dataframe(self, dtype):
         melt(self.df, id_vars=["id1", "id2"])
 
 
 
@@ -56,7 +56,6 @@ dependencies:
   - zstandard
 
   # downstream packages
-  - aiobotocore
   - botocore
   - cftime
   - dask
 
@@ -38,11 +38,13 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 * :func:`read_csv`
 * :func:`read_excel`
 * :func:`read_sql`
+* :func:`read_sql_query`
+* :func:`read_sql_table`
 
 Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
 
-* :func:`read_csv` (with ``engine="pyarrow"``)
+* :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
 * :func:`read_excel`
 * :func:`read_parquet`
 * :func:`read_orc`
@@ -394,7 +396,7 @@ If installed, we now require:
 +-----------------+-----------------+----------+---------+
 | Package         | Minimum Version | Required | Changed |
 +=================+=================+==========+=========+
-| mypy (dev)      | 0.990           |          |    X    |
+| mypy (dev)      | 0.991           |          |    X    |
 +-----------------+-----------------+----------+---------+
 | python-dateutil | 2.8.2           |    X     |    X    |
 +-----------------+-----------------+----------+---------+
@@ -736,6 +738,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`, :issue:`49577`)
 - Performance improvement in :meth:`MultiIndex.putmask` (:issue:`49830`)
 - Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`)
+- Performance improvement in :meth:`Series.rank` for pyarrow-backed dtypes (:issue:`50264`)
 - Performance improvement in :meth:`Series.fillna` for extension array dtypes (:issue:`49722`, :issue:`50078`)
 - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
 - Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
@@ -748,6 +751,7 @@ Performance improvements
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
+- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
@@ -831,8 +835,10 @@ Interval
 
 Indexing
 ^^^^^^^^
+- Bug in :meth:`DataFrame.__setitem__` raising when indexer is a :class:`DataFrame` with ``boolean`` dtype (:issue:`47125`)
 - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
 - Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
+- Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`)
 - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
 - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`)
 - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
@@ -870,12 +876,13 @@ I/O
 - Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`)
 - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`)
 - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`)
+- Bug in :func:`read_sql_query` ignoring ``dtype`` argument when ``chunksize`` is specified and result is empty (:issue:`50245`)
 - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
 - Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`)
 - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
 - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
 - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
--
+- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
 
 Period
 ^^^^^^
@@ -906,6 +913,7 @@ Reshaping
 ^^^^^^^^^
 - Bug in :meth:`DataFrame.pivot_table` raising ``TypeError`` for nullable dtype and ``margins=True`` (:issue:`48681`)
 - Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`)
+- Bug in :meth:`DataFrame.melt` losing extension array dtype (:issue:`41570`)
 - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`)
 - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`)
 - Bug in :meth:`DataFrame.pivot_table` raising ``ValueError`` with parameter ``margins=True`` when result is an empty :class:`DataFrame` (:issue:`49240`)
 
@@ -60,7 +60,6 @@ dependencies:
   - zstandard
 
   # downstream packages
-  - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
   - dask-core
   - seaborn-base
 
@@ -69,7 +68,7 @@ dependencies:
   - flask
 
   # benchmarks
-  - asv
+  - asv>=0.5.1
 
   # The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms.
   - c-compiler
@@ -81,7 +80,7 @@ dependencies:
   - flake8=6.0.0
   - flake8-bugbear=22.7.1 # used by flake8, find likely bugs
   - isort>=5.2.1  # check that imports are in the right order
-  - mypy=0.990
+  - mypy=0.991
   - pre-commit>=2.15.0
   - pycodestyle  # used by flake8
   - pyupgrade
 
@@ -332,9 +332,18 @@ static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
     return PyBytes_AS_STRING(obj);
 }
 
-static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
+static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
                              size_t *_outLen) {
-    return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
+    char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj,
+                                                    (Py_ssize_t *)_outLen);
+    if (encoded == NULL) {
+        /* Something went wrong.
+          Set errorMsg(to tell encoder to stop),
+          and let Python exception propagate. */
+        JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
+        enc->errorMsg = "Encoding failed.";
+    }
+    return encoded;
 }
 
 /* JSON callback. returns a char* and mutates the pointer to *len */
 
@@ -12,7 +12,7 @@ def format_array_from_datetime(
     reso: int = ...,  # NPY_DATETIMEUNIT
 ) -> npt.NDArray[np.object_]: ...
 def array_with_unit_to_datetime(
-    values: np.ndarray,
+    values: npt.NDArray[np.object_],
     unit: str,
     errors: str = ...,
 ) -> tuple[np.ndarray, tzinfo | None]: ...
 
@@ -18,7 +18,6 @@ import_datetime()
 
 cimport numpy as cnp
 from numpy cimport (
-    float64_t,
     int64_t,
     ndarray,
 )
@@ -231,7 +230,7 @@ def format_array_from_datetime(
 
 
 def array_with_unit_to_datetime(
-    ndarray values,
+    ndarray[object] values,
     str unit,
     str errors="coerce"
 ):
@@ -266,70 +265,24 @@ def array_with_unit_to_datetime(
     cdef:
         Py_ssize_t i, n=len(values)
         int64_t mult
-        int prec = 0
-        ndarray[float64_t] fvalues
         bint is_ignore = errors=="ignore"
         bint is_coerce = errors=="coerce"
         bint is_raise = errors=="raise"
-        bint need_to_iterate = True
         ndarray[int64_t] iresult
         ndarray[object] oresult
-        ndarray mask
         object tz = None
 
     assert is_ignore or is_coerce or is_raise
 
     if unit == "ns":
-        if issubclass(values.dtype.type, (np.integer, np.float_)):
-            result = values.astype("M8[ns]", copy=False)
-        else:
-            result, tz = array_to_datetime(
-                values.astype(object, copy=False),
-                errors=errors,
-            )
+        result, tz = array_to_datetime(
+            values.astype(object, copy=False),
+            errors=errors,
+        )
         return result, tz
 
     mult, _ = precision_from_unit(unit)
 
-    if is_raise:
-        # try a quick conversion to i8/f8
-        # if we have nulls that are not type-compat
-        # then need to iterate
-
-        if values.dtype.kind in ["i", "f", "u"]:
-            iresult = values.astype("i8", copy=False)
-            # fill missing values by comparing to NPY_NAT
-            mask = iresult == NPY_NAT
-            # Trying to Convert NaN to integer results in undefined
-            # behaviour, so handle it explicitly (see GH #48705)
-            if values.dtype.kind == "f":
-                mask |= values != values
-            iresult[mask] = 0
-            fvalues = iresult.astype("f8") * mult
-            need_to_iterate = False
-
-        if not need_to_iterate:
-            # check the bounds
-            if (fvalues < Timestamp.min.value).any() or (
-                (fvalues > Timestamp.max.value).any()
-            ):
-                raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
-
-            if values.dtype.kind in ["i", "u"]:
-                result = (iresult * mult).astype("M8[ns]")
-
-            elif values.dtype.kind == "f":
-                fresult = (values * mult).astype("f8")
-                fresult[mask] = 0
-                if prec:
-                    fresult = round(fresult, prec)
-                result = fresult.astype("M8[ns]", copy=False)
-
-            iresult = result.view("i8")
-            iresult[mask] = NPY_NAT
-
-            return result, tz
-
     result = np.empty(n, dtype="M8[ns]")
     iresult = result.view("i8")
 
 
@@ -312,10 +312,10 @@ cpdef ndarray astype_overflowsafe(
     """
     if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME:
         # i.e. dtype.kind == "M"
-        pass
+        dtype_name = "datetime64"
     elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA:
         # i.e. dtype.kind == "m"
-        pass
+        dtype_name = "timedelta64"
     else:
         raise TypeError(
             "astype_overflowsafe values.dtype and dtype must be either "
@@ -326,14 +326,14 @@ cpdef ndarray astype_overflowsafe(
         NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype)
         NPY_DATETIMEUNIT to_unit = get_unit_from_dtype(dtype)
 
-    if (
-        from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
-        or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
-    ):
+    if from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
+        raise TypeError(f"{dtype_name} values must have a unit specified")
+
+    if to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
         # without raising explicitly here, we end up with a SystemError
         # built-in function [...] returned a result with an error
         raise ValueError(
-            "datetime64/timedelta64 values and dtype must have a unit specified"
+            f"{dtype_name} dtype must have a unit specified"
         )
 
     if from_unit == to_unit: