pandas-dev
diff --git a/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 19 additions & 1 deletion b/‎doc/source/whatsnew/v2.0.0.rst
Lines changed: 19 additions & 1 deletion
diff --git a/‎pandas/_libs/lib.pyi
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/lib.pyi
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 15 additions & 35 deletions b/‎pandas/_libs/lib.pyx
Lines changed: 15 additions & 35 deletions
diff --git a/‎pandas/_libs/src/klib/khash.h
Lines changed: 19 additions & 18 deletions b/‎pandas/_libs/src/klib/khash.h
Lines changed: 19 additions & 18 deletions
diff --git a/‎pandas/compat/pickle_compat.py
Lines changed: 1 addition & 70 deletions b/‎pandas/compat/pickle_compat.py
Lines changed: 1 addition & 70 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion
@@ -192,10 +192,24 @@ Removal of prior version deprecations/changes
 - Removed argument ``sort_columns`` in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`)
 - Removed argument ``is_copy`` from :meth:`DataFrame.take` and :meth:`Series.take` (:issue:`30615`)
 - Removed argument ``kind`` from :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer` and :meth:`Index.slice_locs` (:issue:`41378`)
+- Disallow subclass-specific keywords (e.g. "freq", "tz", "names", "closed") in the :class:`Index` constructor (:issue:`38597`)
 - Removed argument ``inplace`` from :meth:`Categorical.remove_unused_categories` (:issue:`37918`)
 - Disallow passing non-round floats to :class:`Timestamp` with ``unit="M"`` or ``unit="Y"`` (:issue:`47266`)
 - Remove keywords ``convert_float`` and ``mangle_dupe_cols`` from :func:`read_excel` (:issue:`41176`)
 - Disallow passing non-keyword arguments to :func:`read_excel` except ``io`` and ``sheet_name`` (:issue:`34418`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.drop_duplicates` except for ``subset`` (:issue:`41485`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` (:issue:`41506`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` except for ``method`` (:issue:`41510`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44896`)
+- Disallow passing non-keyword arguments to :meth:`Index.set_names` except for ``names`` (:issue:`41551`)
+- Disallow passing non-keyword arguments to :meth:`Index.join` except for ``other`` (:issue:`46518`)
+- Disallow passing non-keyword arguments to :func:`concat` except for ``objs`` (:issue:`41485`)
+- Disallow passing non-keyword arguments to :func:`pivot` except for ``data`` (:issue:`48301`)
+- Disallow passing non-keyword arguments to :meth:`DataFrame.pivot` (:issue:`48301`)
+- Disallow passing non-keyword arguments to :func:`read_json` except for ``path_or_buf`` (:issue:`27573`)
+- Disallow passing non-keyword arguments to :func:`read_sas` except for ``filepath_or_buffer`` (:issue:`47154`)
+- Disallow passing non-keyword arguments to :func:`read_stata` except for ``filepath_or_buffer`` (:issue:`48128`)
+- Disallow passing non-keyword arguments to :func:`read_xml` except for ``path_or_buffer`` (:issue:`45133`)
 - Disallow passing non-keyword arguments to :meth:`Series.mask` and :meth:`DataFrame.mask` except ``cond`` and ``other`` (:issue:`41580`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.to_stata` except for ``path`` (:issue:`48128`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.where` and :meth:`Series.where` except for ``cond`` and ``other`` (:issue:`41523`)
@@ -228,16 +242,20 @@ Removal of prior version deprecations/changes
 - Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`)
 - Removed :meth:`Series.str.__iter__` (:issue:`28277`)
 - Removed ``pandas.SparseArray`` in favor of :class:`arrays.SparseArray` (:issue:`30642`)
-- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame`` (:issue:`30642`)
+- Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`)
 - Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
 - Enforced disallowing a tuple of column labels into :meth:`.DataFrameGroupBy.__getitem__` (:issue:`30546`)
 - Removed setting Categorical._codes directly (:issue:`41429`)
 - Enforced :meth:`Rolling.count` with ``min_periods=None`` to default to the size of the window (:issue:`31302`)
 - Renamed ``fname`` to ``path`` in :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata` and :meth:`DataFrame.to_feather` (:issue:`30338`)
+- Enforced disallowing indexing a :class:`Series` with a single item list with a slice (e.g. ``ser[[slice(0, 2)]]``). Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
 - Enforced the ``display.max_colwidth`` option to not accept negative integers (:issue:`31569`)
 - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
 - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
 - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
+- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
+- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
+- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
 - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
 
 .. ---------------------------------------------------------------------------
 
@@ -158,7 +158,7 @@ def ensure_string_array(
 ) -> npt.NDArray[np.object_]: ...
 def infer_datetimelike_array(
     arr: npt.NDArray[np.object_],
-) -> tuple[str, bool]: ...
+) -> str: ...
 def convert_nans_to_NA(
     arr: npt.NDArray[np.object_],
 ) -> npt.NDArray[np.object_]: ...
 
@@ -95,7 +95,6 @@ from pandas._libs.util cimport (
     is_nan,
 )
 
-from pandas._libs.tslib import array_to_datetime
 from pandas._libs.tslibs import (
     OutOfBoundsDatetime,
     OutOfBoundsTimedelta,
@@ -1583,25 +1582,19 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
     Returns
     -------
     str: {datetime, timedelta, date, nat, mixed}
-    bool
     """
     cdef:
         Py_ssize_t i, n = len(arr)
         bint seen_timedelta = False, seen_date = False, seen_datetime = False
         bint seen_tz_aware = False, seen_tz_naive = False
-        bint seen_nat = False, seen_str = False
+        bint seen_nat = False
         bint seen_period = False, seen_interval = False
-        list objs = []
         object v
 
     for i in range(n):
         v = arr[i]
         if isinstance(v, str):
-            objs.append(v)
-            seen_str = True
-
-            if len(objs) == 3:
-                break
+            return "mixed"
 
         elif v is None or util.is_nan(v):
             # nan or None
@@ -1619,7 +1612,7 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
                 seen_tz_aware = True
 
             if seen_tz_naive and seen_tz_aware:
-                return "mixed", seen_str
+                return "mixed"
         elif util.is_datetime64_object(v):
             # np.datetime64
             seen_datetime = True
@@ -1635,43 +1628,30 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
             seen_interval = True
             break
         else:
-            return "mixed", seen_str
+            return "mixed"
 
     if seen_period:
         if is_period_array(arr):
-            return "period", seen_str
-        return "mixed", seen_str
+            return "period"
+        return "mixed"
 
     if seen_interval:
         if is_interval_array(arr):
-            return "interval", seen_str
-        return "mixed", seen_str
+            return "interval"
+        return "mixed"
 
     if seen_date and not (seen_datetime or seen_timedelta):
-        return "date", seen_str
+        return "date"
     elif seen_datetime and not seen_timedelta:
-        return "datetime", seen_str
+        return "datetime"
     elif seen_timedelta and not seen_datetime:
-        return "timedelta", seen_str
+        return "timedelta"
+    elif seen_datetime and seen_timedelta:
+        return "mixed"
     elif seen_nat:
-        return "nat", seen_str
+        return "nat"
 
-    # short-circuit by trying to
-    # actually convert these strings
-    # this is for performance as we don't need to try
-    # convert *every* string array
-    if len(objs):
-        try:
-            # require_iso8601 as in maybe_infer_to_datetimelike
-            array_to_datetime(objs, errors="raise", require_iso8601=True)
-            return "datetime", seen_str
-        except (ValueError, TypeError):
-            pass
-
-        # we are *not* going to infer from strings
-        # for timedelta as too much ambiguity
-
-    return "mixed", seen_str
+    return "mixed"
 
 
 cdef inline bint is_timedelta(object o):
 
@@ -47,6 +47,16 @@ int main() {
 */
 
 /*
+  2013-05-02 (0.2.8):
+	* Use quadratic probing. When the capacity is power of 2, stepping function
+	  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+	  hashing on cache performance and is more robust than linear probing.
+	  In theory, double hashing should be more robust than quadratic probing.
+	  However, my implementation is probably not for large hash tables, because
+	  the second hash function is closely tied to the first hash function,
+	  which reduce the effectiveness of double hashing.
+	Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+
   2011-09-16 (0.2.6):
 
 	* The capacity is a power of 2. This seems to dramatically improve the
@@ -107,7 +117,7 @@ int main() {
   Generic hash table library.
  */
 
-#define AC_VERSION_KHASH_H "0.2.6"
+#define AC_VERSION_KHASH_H "0.2.8"
 
 #include <stdlib.h>
 #include <string.h>
@@ -177,7 +187,6 @@ typedef khuint_t khiter_t;
 #define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i)
 #define __ac_set_isdel_true(flag, i) ((void)0)
 
-
 // specializations of https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
 khuint32_t PANDAS_INLINE murmur2_32to32(khuint32_t k){
     const khuint32_t SEED = 0xc70f6907UL;
@@ -252,13 +261,6 @@ khuint32_t PANDAS_INLINE murmur2_64to32(khuint64_t k){
     return murmur2_32_32to32(k1, k2);
 }
 
-
-#ifdef KHASH_LINEAR
-#define __ac_inc(k, m) 1
-#else
-#define __ac_inc(k, m) (murmur2_32to32(k) | 1) & (m)
-#endif
-
 #define __ac_fsize(m) ((m) < 32? 1 : (m)>>5)
 
 #ifndef kroundup32
@@ -310,12 +312,12 @@ static const double __ac_HASH_UPPER = 0.77;
 	SCOPE khuint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) 	\
 	{																	\
 		if (h->n_buckets) {												\
-			khuint_t inc, k, i, last, mask;								\
+			khuint_t k, i, last, mask, step=0;\
 			mask = h->n_buckets - 1;									\
 			k = __hash_func(key); i = k & mask;							\
-			inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
+                        last = i; \
 			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
-				i = (i + inc) & mask; 									\
+				i = (i + ++step) & mask; \
 				if (i == last) return h->n_buckets;						\
 			}															\
 			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
@@ -348,11 +350,10 @@ static const double __ac_HASH_UPPER = 0.77;
 					if (kh_is_map) val = h->vals[j];					\
 					__ac_set_isempty_true(h->flags, j);					\
 					while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
-						khuint_t inc, k, i;								\
+						khuint_t k, i, step=0;\
 						k = __hash_func(key);							\
 						i = k & new_mask;								\
-						inc = __ac_inc(k, new_mask);					\
-						while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
+						while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
 						__ac_set_isempty_false(new_flags, i);			\
 						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
 							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -385,14 +386,14 @@ static const double __ac_HASH_UPPER = 0.77;
 			else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
 		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
 		{																\
-			khuint_t inc, k, i, site, last, mask = h->n_buckets - 1;		\
+			khuint_t k, i, site, last, mask = h->n_buckets - 1, step=0;\
 			x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
 			if (__ac_isempty(h->flags, i)) x = i; /* for speed up */	\
 			else {														\
-				inc = __ac_inc(k, mask); last = i;						\
+				last = i ;						\
 				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
 					if (__ac_isdel(h->flags, i)) site = i;				\
-					i = (i + inc) & mask; 								\
+					i = (i + (++step)) & mask; \
 					if (i == last) { x = site; break; }					\
 				}														\
 				if (x == h->n_buckets) {								\
 
@@ -7,11 +7,7 @@
 import copy
 import io
 import pickle as pkl
-from typing import (
-    TYPE_CHECKING,
-    Generator,
-)
-import warnings
+from typing import Generator
 
 import numpy as np
 
@@ -26,12 +22,6 @@
 )
 from pandas.core.internals import BlockManager
 
-if TYPE_CHECKING:
-    from pandas import (
-        DataFrame,
-        Series,
-    )
-
 
 def load_reduce(self):
     stack = self.stack
@@ -68,49 +58,6 @@ def load_reduce(self):
         raise
 
 
-_sparse_msg = """\
-
-Loading a saved '{cls}' as a {new} with sparse values.
-'{cls}' is now removed. You should re-save this dataset in its new format.
-"""
-
-
-class _LoadSparseSeries:
-    # To load a SparseSeries as a Series[Sparse]
-
-    # https://github.com/python/mypy/issues/1020
-    # error: Incompatible return type for "__new__" (returns "Series", but must return
-    # a subtype of "_LoadSparseSeries")
-    def __new__(cls) -> Series:  # type: ignore[misc]
-        from pandas import Series
-
-        warnings.warn(
-            _sparse_msg.format(cls="SparseSeries", new="Series"),
-            FutureWarning,
-            stacklevel=6,
-        )
-
-        return Series(dtype=object)
-
-
-class _LoadSparseFrame:
-    # To load a SparseDataFrame as a DataFrame[Sparse]
-
-    # https://github.com/python/mypy/issues/1020
-    # error: Incompatible return type for "__new__" (returns "DataFrame", but must
-    # return a subtype of "_LoadSparseFrame")
-    def __new__(cls) -> DataFrame:  # type: ignore[misc]
-        from pandas import DataFrame
-
-        warnings.warn(
-            _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"),
-            FutureWarning,
-            stacklevel=6,
-        )
-
-        return DataFrame()
-
-
 # If classes are moved, provide compat here.
 _class_locations_map = {
     ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
@@ -144,14 +91,6 @@ def __new__(cls) -> DataFrame:  # type: ignore[misc]
         "pandas.core.arrays.sparse",
         "SparseArray",
     ),
-    ("pandas.sparse.series", "SparseSeries"): (
-        "pandas.compat.pickle_compat",
-        "_LoadSparseSeries",
-    ),
-    ("pandas.sparse.frame", "SparseDataFrame"): (
-        "pandas.core.sparse.frame",
-        "_LoadSparseFrame",
-    ),
     ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
     ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
     ("pandas.indexes.numeric", "Int64Index"): (
@@ -183,14 +122,6 @@ def __new__(cls) -> DataFrame:  # type: ignore[misc]
         "pandas.core.indexes.numeric",
         "Float64Index",
     ),
-    ("pandas.core.sparse.series", "SparseSeries"): (
-        "pandas.compat.pickle_compat",
-        "_LoadSparseSeries",
-    ),
-    ("pandas.core.sparse.frame", "SparseDataFrame"): (
-        "pandas.compat.pickle_compat",
-        "_LoadSparseFrame",
-    ),
 }
 
 
 
@@ -133,7 +133,7 @@
 
 def _cat_compare_op(op):
     opname = f"__{op.__name__}__"
-    fill_value = True if op is operator.ne else False
+    fill_value = op is operator.ne
 
     @unpack_zerodim_and_defer(opname)
     def func(self, other):