Skip to content

COMPAT: np 1.18 wants explicit dtype=object) #30035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,14 @@ def __new__(

# extension dtype
elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
data = np.asarray(data)
if not (dtype is None or is_object_dtype(dtype)):
# coerce to the provided dtype
ea_cls = dtype.construct_array_type()
data = ea_cls._from_sequence(data, dtype=dtype, copy=False)

else:
data = np.asarray(data, dtype=object)

# coerce to the object dtype
data = data.astype(object)
return Index(data, dtype=object, copy=copy, name=name, **kwargs)
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,9 @@ def get_values(self, dtype=None):
return values

def to_dense(self):
if self.dtype.kind == "O":
# See https://github.com/numpy/numpy/issues/15041
return np.asarray(self.values, dtype=object)
return np.asarray(self.values)
Comment on lines +1767 to 1770
Copy link
Contributor

@eric-wieser eric-wieser Dec 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is documented as being 1d only, this should be:

x = np.empty(len(self.values), dtype=self.dtype)
x[:] = self.values
return x

which will work correctly even if values == [[1, 2], [3, 4]]


def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
Expand Down
32 changes: 19 additions & 13 deletions pandas/core/ops/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Functions for defining unary operations.
"""
from typing import Any, Callable, Union
import warnings

import numpy as np

Expand Down Expand Up @@ -132,19 +133,24 @@ def dispatch_to_extension_op(
# The op calls will raise TypeError if the op is not defined
# on the ExtensionArray

try:
res_values = op(left, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
if keep_null_freq:
# TODO: remove keep_null_freq after Timestamp+int deprecation
# GH#22535 is enforced
raise
raise TypeError(
"incompatible type for a datetime/timedelta "
"operation [{name}]".format(name=op.__name__)
)
with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")

try:
res_values = op(left, right)
except NullFrequencyError:
# DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
# on add/sub of integers (or int-like). We re-raise as a TypeError.
if keep_null_freq:
# TODO: remove keep_null_freq after Timestamp+int deprecation
# GH#22535 is enforced
raise
raise TypeError(
"incompatible type for a datetime/timedelta "
"operation [{name}]".format(name=op.__name__)
)

return res_values


Expand Down
14 changes: 12 additions & 2 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,20 @@ def cat_core(list_of_columns: List, sep: str):
"""
if sep == "":
# no need to interleave sep if it is empty
return np.sum(list_of_columns, axis=0)
with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")
out = np.sum(list_of_columns, axis=0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can these be replaced with a sep.join(...) with a list-comprehension?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe? ATM im just trying to get a handle on the scope of the problem

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will revert this for 1.18. PRobably also on master (but probably only temporarily to pacify downstream testsuits for a bit!). The interesting thing would be if there are cases where it cannot be fixed easily. I.e. adding a warning filter should not be necessary except for specific tests. (I am sure you are aware, but the filter context manager is not a good solution in this code, since it is not thread safe at all.)

return out

list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
list_with_sep[::2] = list_of_columns
return np.sum(list_with_sep, axis=0)

with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")
out = np.sum(list_with_sep, axis=0)
return out


def cat_safe(list_of_columns: List, sep: str):
Expand Down
5 changes: 5 additions & 0 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,11 @@ def _format_strings(self) -> List[str]:
if is_categorical_dtype(values.dtype):
# Categorical is special for now, so that we can preserve tzinfo
array = values._internal_get_values()
elif values.dtype.kind == "O":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefer to use is_object_dtype

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i didnt check whether that works on JSONArray, just that values.dtype == object doesnt

# numpy>=1.18 wants object dtype passed explicitly
# Note: dtype.kind check works for json extension tests, while
# dtype == object check does not.
array = np.asarray(values, dtype=object)
else:
array = np.asarray(values)

Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def data_missing(allow_in_pandas, dtype):
if dtype.numpy_dtype == "object":
if _np_version_under1p16:
raise pytest.skip("Skipping for NumPy <1.16")
return PandasArray(np.array([np.nan, (1,)]))
return PandasArray(np.array([np.nan, (1,)], dtype=object))
return PandasArray(np.array([np.nan, 1.0]))


Expand All @@ -78,7 +78,7 @@ def data_for_sorting(allow_in_pandas, dtype):
if dtype.numpy_dtype == "object":
# Use an empty tuple for first element, then remove,
# to disable np.array's shape inference.
return PandasArray(np.array([(), (2,), (3,), (1,)])[1:])
return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger does the comment above about shape inference have any bearing on the inference here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using object dtype is still correct, and I think the comment is still correct.

If we want to get rid of the empty tuple / shape stuff, we could probably allocate the array with the right shape and set the values later

In [6]: a = np.empty((3,), dtype=object)

In [7]: a[:] = (2,), (3,), (1,)

return PandasArray(np.array([1, 2, 0]))


Expand All @@ -90,7 +90,7 @@ def data_missing_for_sorting(allow_in_pandas, dtype):
A < B and NA missing.
"""
if dtype.numpy_dtype == "object":
return PandasArray(np.array([(1,), np.nan, (0,)]))
return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object))
return PandasArray(np.array([1, np.nan, 0]))


Expand All @@ -106,7 +106,9 @@ def data_for_grouping(allow_in_pandas, dtype):
a, b, c = (1,), (2,), (3,)
else:
a, b, c = np.arange(3)
return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c]))
return PandasArray(
np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
)


@pytest.fixture
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/json/test_ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,8 +761,9 @@ def test_array_list(self):
["a", "b"],
{"key": "val"},
]
arr = np.array(arr_list)
tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr)
arr = np.array(arr_list, dtype=object)
result = np.array(ujson.decode(ujson.encode(arr)), dtype=object)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you create an expected here

tm.assert_numpy_array_equal(result, arr)

def test_array_float(self):
dtype = np.float32
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ def test_append_index(self):
(1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"),
(1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"),
]
+ expected_tuples
+ expected_tuples,
dtype=object,
),
None,
)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2853,7 +2853,8 @@ def test_partition_index(self):
result = values.str.partition("_", expand=False)
exp = Index(
np.array(
[("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None]
[("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None],
dtype=object,
)
)
tm.assert_index_equal(result, exp)
Expand All @@ -2862,7 +2863,8 @@ def test_partition_index(self):
result = values.str.rpartition("_", expand=False)
exp = Index(
np.array(
[("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None]
[("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None],
dtype=object,
)
)
tm.assert_index_equal(result, exp)
Expand Down