Skip to content

COMPAT: np 1.18 wants explicit dtype=object) #30035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
7 changes: 6 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numbers
from typing import Union
import warnings

import numpy as np
from numpy.lib.mixins import NDArrayOperatorsMixin
Expand Down Expand Up @@ -158,7 +159,11 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
if isinstance(dtype, PandasDtype):
dtype = dtype._dtype

result = np.asarray(scalars, dtype=dtype)
with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")
result = np.asarray(scalars, dtype=dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be fixed at the caller, not here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah, pretty much every usage where we're just suppressing the warnings was a kludge

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought for a way forward with the new sentinel in numpy/numpy#15119 is that you would leave this unchanged. When needed you would add the sentinel as the dtype in the call to _from_sequence (or even to whoever is calling _from_sequence. During the deprecation period, try to rework the use case that justifies the inefficient ragged array, and get back to NumPy with the use case for such a construct so we can stop or rethink the deprecation.


if copy and result is scalars:
result = result.copy()
return cls(result)
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from functools import partial
import inspect
from typing import Any, Iterable, Union
import warnings

import numpy as np

Expand Down Expand Up @@ -224,7 +225,13 @@ def asarray_tuplesafe(values, dtype=None):
if isinstance(values, list) and dtype in [np.object_, object]:
return construct_1d_object_array_from_listlike(values)

result = np.asarray(values, dtype=dtype)
if dtype is None:
with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")
result = np.asarray(values)
else:
result = np.asarray(values, dtype=dtype)

if issubclass(result.dtype.type, str):
result = np.asarray(values, dtype=object)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
""" routings for casting """

from datetime import datetime, timedelta
import warnings

import numpy as np

Expand Down Expand Up @@ -1046,7 +1047,11 @@ def maybe_infer_to_datetimelike(value, convert_dates: bool = False):

if not is_list_like(v):
v = [v]
v = np.array(v, copy=False)

with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")
v = np.array(v, copy=False)

# we only care about object dtypes
if not is_object_dtype(v):
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Utility functions related to concat
"""
import warnings

import numpy as np

Expand Down Expand Up @@ -134,6 +135,7 @@ def is_nonempty(x) -> bool:
# coerce to object
to_concat = [x.astype("object") for x in to_concat]

to_concat = [_safe_array(x) for x in to_concat]
return np.concatenate(to_concat, axis=axis)


Expand Down Expand Up @@ -172,7 +174,7 @@ def concat_categorical(to_concat, axis: int = 0):
to_concat = [
x._internal_get_values()
if is_categorical_dtype(x.dtype)
else np.asarray(x).ravel()
else _safe_array(x).ravel()
if not is_datetime64tz_dtype(x)
else np.asarray(x.astype(object))
for x in to_concat
Expand All @@ -183,6 +185,15 @@ def concat_categorical(to_concat, axis: int = 0):
return result


def _safe_array(x):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so i actually like this as a real function; can you move to pandas.compat.numpy and use everywhere?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you do this one

# FIXME: kludge
with warnings.catch_warnings():
# See https://github.com/numpy/numpy/issues/15041
warnings.filterwarnings("ignore", ".*with automatic object dtype.*")
arr = np.asarray(x)
return arr


def union_categoricals(
to_union, sort_categories: bool = False, ignore_order: bool = False
):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,9 @@ def get_values(self, dtype=None):
return values

def to_dense(self):
if self.dtype.kind == "O":
# See https://github.com/numpy/numpy/issues/15041
return np.asarray(self.values, dtype=object)
return np.asarray(self.values)
Comment on lines +1767 to 1770
Copy link
Contributor

@eric-wieser eric-wieser Dec 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is documented as being 1d only, this should be:

x = np.empty(len(self.values), dtype=self.dtype)
x[:] = self.values
return x

which will work correctly even if values == [[1, 2], [3, 4]]


def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,11 @@ def _format_strings(self) -> List[str]:
if is_categorical_dtype(values.dtype):
# Categorical is special for now, so that we can preserve tzinfo
array = values._internal_get_values()
elif values.dtype.kind == "O":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prefer to use is_object_dtype

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i didnt check whether that works on JSONArray, just that values.dtype == object doesnt

# numpy>=1.18 wants object dtype passed explicitly
# Note: dtype.kind check works for json extension tests, while
# dtype == object check does not.
array = np.asarray(values, dtype=object)
else:
array = np.asarray(values)

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/extension/base/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ def test_memory_usage(self, data):
assert result == s.nbytes

def test_array_interface(self, data):
result = np.array(data)
if hasattr(data, "dtype") and data.dtype.kind == "O":
# e.g. JSONArray
result = np.array(data, dtype=object)
else:
result = np.array(data)
assert result[0] == data[0]

result = np.array(data, dtype=object)
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def data_missing(allow_in_pandas, dtype):
if dtype.numpy_dtype == "object":
if _np_version_under1p16:
raise pytest.skip("Skipping for NumPy <1.16")
return PandasArray(np.array([np.nan, (1,)]))
return PandasArray(np.array([np.nan, (1,)], dtype=object))
return PandasArray(np.array([np.nan, 1.0]))


Expand All @@ -78,7 +78,7 @@ def data_for_sorting(allow_in_pandas, dtype):
if dtype.numpy_dtype == "object":
# Use an empty tuple for first element, then remove,
# to disable np.array's shape inference.
return PandasArray(np.array([(), (2,), (3,), (1,)])[1:])
return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger does the comment above about shape inference have any bearing on the inference here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using object dtype is still correct, and I think the comment is still correct.

If we want to get rid of the empty tuple / shape stuff, we could probably allocate the array with the right shape and set the values later

In [6]: a = np.empty((3,), dtype=object)

In [7]: a[:] = (2,), (3,), (1,)

return PandasArray(np.array([1, 2, 0]))


Expand All @@ -90,7 +90,7 @@ def data_missing_for_sorting(allow_in_pandas, dtype):
A < B and NA missing.
"""
if dtype.numpy_dtype == "object":
return PandasArray(np.array([(1,), np.nan, (0,)]))
return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object))
return PandasArray(np.array([1, np.nan, 0]))


Expand All @@ -106,7 +106,9 @@ def data_for_grouping(allow_in_pandas, dtype):
a, b, c = (1,), (2,), (3,)
else:
a, b, c = np.arange(3)
return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c]))
return PandasArray(
np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
)


@pytest.fixture
Expand Down