Skip to content

Commit 3c63b6f

Browse files
committed
Merge remote-tracking branch 'upstream/master' into docfix-multiindex-set_levels
2 parents 5e84250 + 7670262 commit 3c63b6f

File tree

20 files changed

+95
-62
lines changed

20 files changed

+95
-62
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,7 @@ Interval
836836

837837
- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`)
838838
- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`)
839+
- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`)
839840

840841
Indexing
841842
^^^^^^^^
@@ -921,6 +922,7 @@ Groupby/resample/rolling
921922
- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`)
922923
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
923924
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
925+
- Bug in :meth:`GroupBy.pct_change` and :meth:`SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
924926

925927
Reshaping
926928
^^^^^^^^^
@@ -963,6 +965,7 @@ Other
963965
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
964966
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
965967
- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
968+
- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`)
966969
- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`)
967970
- Fix :class:`AbstractHolidayCalendar` to return correct results for
968971
years after 2030 (now goes up to 2200) (:issue:`27790`)

pandas/_libs/hashing.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
7070
# null, stringify and encode
7171
data = <bytes>str(val).encode(encoding)
7272

73+
elif isinstance(val, tuple):
74+
# GH#28969 we could have a tuple, but need to ensure that
75+
# the tuple entries are themselves hashable before converting
76+
# to str
77+
hash(val)
78+
data = <bytes>str(val).encode(encoding)
7379
else:
7480
raise TypeError(f"{val} of type {type(val)} is not a valid type "
7581
"for hashing, must be string or null")

pandas/compat/pickle_compat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,9 @@ def __new__(cls) -> "DataFrame": # type: ignore
169169

170170

171171
# our Unpickler sub-class to override methods and some dispatcher
172-
# functions for compat
173-
172+
# functions for compat and uses a non-public class of the pickle module.
174173

174+
# error: Name 'pkl._Unpickler' is not defined
175175
class Unpickler(pkl._Unpickler): # type: ignore
176176
def find_class(self, module, name):
177177
# override superclass

pandas/core/arrays/sparse/dtype.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class SparseDtype(ExtensionDtype):
6464
# hash(nan) is (sometimes?) 0.
6565
_metadata = ("_dtype", "_fill_value", "_is_na_fill_value")
6666

67-
def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None:
67+
def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None):
6868

6969
if isinstance(dtype, type(self)):
7070
if fill_value is None:

pandas/core/computation/align.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
"""
33

44
from functools import partial, wraps
5+
from typing import Dict, Optional, Sequence, Tuple, Type, Union
56
import warnings
67

78
import numpy as np
89

10+
from pandas._typing import FrameOrSeries
911
from pandas.errors import PerformanceWarning
1012

1113
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
@@ -15,22 +17,27 @@
1517
from pandas.core.computation.common import result_type_many
1618

1719

18-
def _align_core_single_unary_op(term):
20+
def _align_core_single_unary_op(
21+
term,
22+
) -> Tuple[Union[partial, Type[FrameOrSeries]], Optional[Dict[str, int]]]:
23+
24+
typ: Union[partial, Type[FrameOrSeries]]
25+
axes: Optional[Dict[str, int]] = None
26+
1927
if isinstance(term.value, np.ndarray):
2028
typ = partial(np.asanyarray, dtype=term.value.dtype)
2129
else:
2230
typ = type(term.value)
23-
ret = (typ,)
31+
if hasattr(term.value, "axes"):
32+
axes = _zip_axes_from_type(typ, term.value.axes)
2433

25-
if not hasattr(term.value, "axes"):
26-
ret += (None,)
27-
else:
28-
ret += (_zip_axes_from_type(typ, term.value.axes),)
29-
return ret
34+
return typ, axes
3035

3136

32-
def _zip_axes_from_type(typ, new_axes):
33-
axes = {ax_name: new_axes[ax_ind] for ax_ind, ax_name in typ._AXIS_NAMES.items()}
37+
def _zip_axes_from_type(
38+
typ: Type[FrameOrSeries], new_axes: Sequence[int]
39+
) -> Dict[str, int]:
40+
axes = {name: new_axes[i] for i, name in typ._AXIS_NAMES.items()}
3441
return axes
3542

3643

pandas/core/config_init.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,14 +300,15 @@ def table_schema_cb(key):
300300
_enable_data_resource_formatter(cf.get_option(key))
301301

302302

303-
def is_terminal():
303+
def is_terminal() -> bool:
304304
"""
305305
Detect if Python is running in a terminal.
306306
307307
Returns True if Python is running in a terminal or False if not.
308308
"""
309309
try:
310-
ip = get_ipython()
310+
# error: Name 'get_ipython' is not defined
311+
ip = get_ipython() # type: ignore
311312
except NameError: # assume standard Python interpreter in a terminal
312313
return True
313314
else:

pandas/core/dtypes/common.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,14 @@ def is_string_dtype(arr_or_dtype) -> bool:
633633

634634
# TODO: gh-15585: consider making the checks stricter.
635635
def condition(dtype) -> bool:
636-
return dtype.kind in ("O", "S", "U") and not is_period_dtype(dtype)
636+
return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype)
637+
638+
def is_excluded_dtype(dtype) -> bool:
639+
"""
640+
These have kind = "O" but aren't string dtypes so need to be explicitly excluded
641+
"""
642+
is_excluded_checks = (is_period_dtype, is_interval_dtype)
643+
return any(is_excluded(dtype) for is_excluded in is_excluded_checks)
637644

638645
return _is_dtype(arr_or_dtype, condition)
639646

pandas/core/dtypes/dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ class IntervalDtype(PandasExtensionDtype):
974974
"""
975975

976976
name = "interval"
977-
kind: Optional[str_type] = None
977+
kind: str_type = "O"
978978
str = "|O08"
979979
base = np.dtype("O")
980980
num = 103

pandas/core/dtypes/generic.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
# define abstract base classes to enable isinstance type checking on our
55
# objects
66
def create_pandas_abc_type(name, attr, comp):
7-
@classmethod
7+
8+
# https://github.com/python/mypy/issues/1006
9+
# error: 'classmethod' used with a non-method
10+
@classmethod # type: ignore
811
def _check(cls, inst) -> bool:
912
return getattr(inst, attr, "_typ") in comp
1013

pandas/core/groupby/generic.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,9 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
759759
periods=periods, fill_method=fill_method, limit=limit, freq=freq
760760
)
761761
)
762+
if fill_method is None: # GH30463
763+
fill_method = "pad"
764+
limit = 0
762765
filled = getattr(self, fill_method)(limit=limit)
763766
fill_grp = filled.groupby(self.grouper.codes)
764767
shifted = fill_grp.shift(periods=periods, freq=freq)

pandas/core/groupby/groupby.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,6 +2340,9 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
23402340
axis=axis,
23412341
)
23422342
)
2343+
if fill_method is None: # GH30463
2344+
fill_method = "pad"
2345+
limit = 0
23432346
filled = getattr(self, fill_method)(limit=limit)
23442347
fill_grp = filled.groupby(self.grouper.codes)
23452348
shifted = fill_grp.shift(periods=periods, freq=freq)
@@ -2508,9 +2511,9 @@ def get_groupby(
25082511
squeeze: bool = False,
25092512
observed: bool = False,
25102513
mutated: bool = False,
2511-
):
2514+
) -> GroupBy:
25122515

2513-
klass: Union[Type["SeriesGroupBy"], Type["DataFrameGroupBy"]]
2516+
klass: Type[GroupBy]
25142517
if isinstance(obj, Series):
25152518
from pandas.core.groupby.generic import SeriesGroupBy
25162519

pandas/core/indexes/category.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import operator
2-
from typing import Any
2+
from typing import Any, List
33

44
import numpy as np
55

@@ -583,6 +583,7 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
583583

584584
target = ibase.ensure_index(target)
585585

586+
missing: List[int]
586587
if self.equals(target):
587588
indexer = None
588589
missing = []

pandas/core/util/hashing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,12 @@ def hash_pandas_object(
8585
if isinstance(obj, ABCMultiIndex):
8686
return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False)
8787

88-
if isinstance(obj, ABCIndexClass):
88+
elif isinstance(obj, ABCIndexClass):
8989
h = hash_array(obj.values, encoding, hash_key, categorize).astype(
9090
"uint64", copy=False
9191
)
9292
h = Series(h, index=obj, dtype="uint64", copy=False)
93+
9394
elif isinstance(obj, ABCSeries):
9495
h = hash_array(obj.values, encoding, hash_key, categorize).astype(
9596
"uint64", copy=False

pandas/io/json/_normalize.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def nested_to_record(
112112
def _json_normalize(
113113
data: Union[Dict, List[Dict]],
114114
record_path: Optional[Union[str, List]] = None,
115-
meta: Optional[Union[str, List]] = None,
115+
meta: Optional[Union[str, List[Union[str, List[str]]]]] = None,
116116
meta_prefix: Optional[str] = None,
117117
record_prefix: Optional[str] = None,
118118
errors: Optional[str] = "raise",
@@ -265,21 +265,21 @@ def _pull_field(js, spec):
265265
elif not isinstance(meta, list):
266266
meta = [meta]
267267

268-
meta = [m if isinstance(m, list) else [m] for m in meta]
268+
_meta = [m if isinstance(m, list) else [m] for m in meta]
269269

270270
# Disastrously inefficient for now
271271
records: List = []
272272
lengths = []
273273

274274
meta_vals: DefaultDict = defaultdict(list)
275-
meta_keys = [sep.join(val) for val in meta]
275+
meta_keys = [sep.join(val) for val in _meta]
276276

277277
def _recursive_extract(data, path, seen_meta, level=0):
278278
if isinstance(data, dict):
279279
data = [data]
280280
if len(path) > 1:
281281
for obj in data:
282-
for val, key in zip(meta, meta_keys):
282+
for val, key in zip(_meta, meta_keys):
283283
if level + 1 == len(val):
284284
seen_meta[key] = _pull_field(obj, val[-1])
285285

@@ -296,7 +296,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
296296

297297
# For repeating the metadata later
298298
lengths.append(len(recs))
299-
for val, key in zip(meta, meta_keys):
299+
for val, key in zip(_meta, meta_keys):
300300
if level + 1 > len(val):
301301
meta_val = seen_meta[key]
302302
else:

pandas/tests/dtypes/test_dtypes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,10 @@ def test_caching(self):
685685
tm.round_trip_pickle(dtype)
686686
assert len(IntervalDtype._cache) == 0
687687

688+
def test_not_string(self):
689+
# GH30568: though IntervalDtype has object kind, it cannot be string
690+
assert not is_string_dtype(IntervalDtype())
691+
688692

689693
class TestCategoricalDtypeParametrized:
690694
@pytest.mark.parametrize(

pandas/tests/extension/base/dtype.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ def test_name(self, dtype):
1616

1717
def test_kind(self, dtype):
1818
valid = set("biufcmMOSUV")
19-
if dtype.kind is not None:
20-
assert dtype.kind in valid
19+
assert dtype.kind in valid
2120

2221
def test_construct_from_string_own_name(self, dtype):
2322
result = dtype.construct_from_string(dtype.name)

pandas/tests/groupby/test_transform.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -877,27 +877,19 @@ def test_pad_stable_sorting(fill_method):
877877
),
878878
],
879879
)
880-
@pytest.mark.parametrize(
881-
"periods,fill_method,limit",
882-
[
883-
(1, "ffill", None),
884-
(1, "ffill", 1),
885-
(1, "bfill", None),
886-
(1, "bfill", 1),
887-
(-1, "ffill", None),
888-
(-1, "ffill", 1),
889-
(-1, "bfill", None),
890-
(-1, "bfill", 1),
891-
],
892-
)
880+
@pytest.mark.parametrize("periods", [1, -1])
881+
@pytest.mark.parametrize("fill_method", ["ffill", "bfill", None])
882+
@pytest.mark.parametrize("limit", [None, 1])
893883
def test_pct_change(test_series, freq, periods, fill_method, limit):
894-
# GH 21200, 21621
884+
# GH 21200, 21621, 30463
895885
vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4]
896886
keys = ["a", "b"]
897887
key_v = np.repeat(keys, len(vals))
898888
df = DataFrame({"key": key_v, "vals": vals * 2})
899889

900-
df_g = getattr(df.groupby("key"), fill_method)(limit=limit)
890+
df_g = df
891+
if fill_method is not None:
892+
df_g = getattr(df.groupby("key"), fill_method)(limit=limit)
901893
grp = df_g.groupby(df.key)
902894

903895
expected = grp["vals"].obj / grp["vals"].shift(periods) - 1

pandas/tests/util/test_hashing.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,3 +353,24 @@ def test_hash_collisions():
353353

354354
result = hash_array(np.asarray(hashes, dtype=object), "utf8")
355355
tm.assert_numpy_array_equal(result, np.concatenate([expected1, expected2], axis=0))
356+
357+
358+
def test_hash_with_tuple():
359+
# GH#28969 array containing a tuple raises on call to arr.astype(str)
360+
# apparently a numpy bug github.com/numpy/numpy/issues/9441
361+
362+
df = pd.DataFrame({"data": [tuple("1"), tuple("2")]})
363+
result = hash_pandas_object(df)
364+
expected = pd.Series([10345501319357378243, 8331063931016360761], dtype=np.uint64)
365+
tm.assert_series_equal(result, expected)
366+
367+
df2 = pd.DataFrame({"data": [tuple([1]), tuple([2])]})
368+
result = hash_pandas_object(df2)
369+
expected = pd.Series([9408946347443669104, 3278256261030523334], dtype=np.uint64)
370+
tm.assert_series_equal(result, expected)
371+
372+
# require that the elements of such tuples are themselves hashable
373+
374+
df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]})
375+
with pytest.raises(TypeError, match="unhashable type: 'list'"):
376+
hash_pandas_object(df3)

scripts/validate_docstrings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ def _load_obj(name):
286286
continue
287287

288288
if "obj" not in locals():
289-
raise ImportError("No module can be imported " 'from "{}"'.format(name))
289+
raise ImportError(f'No module can be imported from "{name}"')
290290

291291
for part in func_parts:
292292
obj = getattr(obj, part)

0 commit comments

Comments
 (0)