-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
COMPAT: np 1.18 wants explicit dtype=object) #30035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
bd1b261
af573eb
fa5df88
1c6c7ff
8f10b6a
4be57a6
77c592f
1ba69b1
5ebff2e
52ae9df
3584dab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
""" | ||
Utility functions related to concat | ||
""" | ||
import warnings | ||
|
||
import numpy as np | ||
|
||
|
@@ -134,6 +135,7 @@ def is_nonempty(x) -> bool: | |
# coerce to object | ||
to_concat = [x.astype("object") for x in to_concat] | ||
|
||
to_concat = [_safe_array(x) for x in to_concat] | ||
return np.concatenate(to_concat, axis=axis) | ||
|
||
|
||
|
@@ -172,7 +174,7 @@ def concat_categorical(to_concat, axis: int = 0): | |
to_concat = [ | ||
x._internal_get_values() | ||
if is_categorical_dtype(x.dtype) | ||
else np.asarray(x).ravel() | ||
else _safe_array(x).ravel() | ||
if not is_datetime64tz_dtype(x) | ||
else np.asarray(x.astype(object)) | ||
for x in to_concat | ||
|
@@ -183,6 +185,15 @@ def concat_categorical(to_concat, axis: int = 0): | |
return result | ||
|
||
|
||
def _safe_array(x): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so i actually like this as a real function; can you move to pandas.compat.numpy and use everywhere? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you do this one |
||
# FIXME: kludge | ||
with warnings.catch_warnings(): | ||
# See https://github.com/numpy/numpy/issues/15041 | ||
warnings.filterwarnings("ignore", ".*with automatic object dtype.*") | ||
arr = np.asarray(x) | ||
return arr | ||
|
||
|
||
def union_categoricals( | ||
to_union, sort_categories: bool = False, ignore_order: bool = False | ||
): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1764,6 +1764,9 @@ def get_values(self, dtype=None): | |
return values | ||
|
||
def to_dense(self): | ||
if self.dtype.kind == "O": | ||
# See https://github.com/numpy/numpy/issues/15041 | ||
return np.asarray(self.values, dtype=object) | ||
return np.asarray(self.values) | ||
Comment on lines
+1767
to
1770
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is documented as being 1d only, this should be:
which will work correctly even if |
||
|
||
def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1485,6 +1485,11 @@ def _format_strings(self) -> List[str]: | |
if is_categorical_dtype(values.dtype): | ||
# Categorical is special for now, so that we can preserve tzinfo | ||
array = values._internal_get_values() | ||
elif values.dtype.kind == "O": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prefer to use is_object_dtype There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i didnt check whether that works on JSONArray, just that |
||
# numpy>=1.18 wants object dtype passed explicitly | ||
# Note: dtype.kind check works for json extension tests, while | ||
# dtype == object check does not. | ||
array = np.asarray(values, dtype=object) | ||
else: | ||
array = np.asarray(values) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,7 +51,7 @@ def data_missing(allow_in_pandas, dtype): | |
if dtype.numpy_dtype == "object": | ||
if _np_version_under1p16: | ||
raise pytest.skip("Skipping for NumPy <1.16") | ||
return PandasArray(np.array([np.nan, (1,)])) | ||
return PandasArray(np.array([np.nan, (1,)], dtype=object)) | ||
return PandasArray(np.array([np.nan, 1.0])) | ||
|
||
|
||
|
@@ -78,7 +78,7 @@ def data_for_sorting(allow_in_pandas, dtype): | |
if dtype.numpy_dtype == "object": | ||
# Use an empty tuple for first element, then remove, | ||
# to disable np.array's shape inference. | ||
return PandasArray(np.array([(), (2,), (3,), (1,)])[1:]) | ||
return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger does the comment above about shape inference have any bearing on the inference here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think using If we want to get rid of the empty tuple / shape stuff, we could probably allocate the array with the right shape and set the values later In [6]: a = np.empty((3,), dtype=object)
In [7]: a[:] = (2,), (3,), (1,) |
||
return PandasArray(np.array([1, 2, 0])) | ||
|
||
|
||
|
@@ -90,7 +90,7 @@ def data_missing_for_sorting(allow_in_pandas, dtype): | |
A < B and NA missing. | ||
""" | ||
if dtype.numpy_dtype == "object": | ||
return PandasArray(np.array([(1,), np.nan, (0,)])) | ||
return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) | ||
return PandasArray(np.array([1, np.nan, 0])) | ||
|
||
|
||
|
@@ -106,7 +106,9 @@ def data_for_grouping(allow_in_pandas, dtype): | |
a, b, c = (1,), (2,), (3,) | ||
else: | ||
a, b, c = np.arange(3) | ||
return PandasArray(np.array([b, b, np.nan, np.nan, a, a, b, c])) | ||
return PandasArray( | ||
np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) | ||
) | ||
|
||
|
||
@pytest.fixture | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be fixed at the caller, not here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yah, pretty much every usage where we're just suppressing the warnings was a kludge
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My thought for a way forward with the new sentinel in numpy/numpy#15119 is that you would leave this unchanged. When needed you would add the sentinel as the dtype in the call to
_from_sequence
(or even to whoever is calling_from_sequence
. During the deprecation period, try to rework the use case that justifies the inefficient ragged array, and get back to NumPy with the use case for such a construct so we can stop or rethink the deprecation.