Skip to content

REF: avoid _internal_get_values in json get_values #32754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,19 @@ static PyObject *get_values(PyObject *obj) {

PRINTMARK();

if (PyObject_HasAttrString(obj, "_internal_get_values")) {
if (PyObject_TypeCheck(obj, cls_index) || PyObject_TypeCheck(obj, cls_series)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess will see on test cases, but is this really required? AFAICT on master _internal_get_values and values return the same for a dt64tz array:

>>> dts = pd.date_range(start="2020-01-01", freq="D", periods=3, tz="US/Pacific")
>>> dts._internal_get_values()
array(['2020-01-01T08:00:00.000000000', '2020-01-02T08:00:00.000000000',
       '2020-01-03T08:00:00.000000000'], dtype='datetime64[ns]')
>>> dts.values
array(['2020-01-01T08:00:00.000000000', '2020-01-02T08:00:00.000000000',
       '2020-01-03T08:00:00.000000000'], dtype='datetime64[ns]')

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its Categorical[dt64tz] and Sparse that make the extra __array__ call necessary

// The special cases to worry about are dt64tz and category[dt64tz].
// In both cases we want the UTC-localized datetime64 ndarray,
// without going through and object array of Timestamps.
PRINTMARK();
values = PyObject_CallMethod(obj, "_internal_get_values", NULL);
values = PyObject_GetAttrString(obj, "values");

if (values == NULL) {
// Clear so we can subsequently try another method
PyErr_Clear();
} else if (PyObject_HasAttrString(values, "__array__")) {
// We may have gotten a Categorical or Sparse array so call np.array
values = PyObject_CallMethod(values, "__array__", NULL);
} else if (!PyArray_CheckExact(values)) {
// Didn't get a numpy array, so keep trying
PRINTMARK();
Expand Down
20 changes: 0 additions & 20 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5382,26 +5382,6 @@ def _values(self) -> np.ndarray:
"""internal implementation"""
return self.values

def _internal_get_values(self) -> np.ndarray:
"""
Return an ndarray after converting sparse values to dense.

This is the same as ``.values`` for non-sparse data. For sparse
data contained in a `SparseArray`, the data are first
converted to a dense representation.

Returns
-------
numpy.ndarray
Numpy representation of DataFrame.

See Also
--------
values : Numpy representation of DataFrame.
SparseArray : Container for sparse data.
"""
return self.values

@property
def dtypes(self):
"""
Expand Down
44 changes: 0 additions & 44 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3870,50 +3870,6 @@ def _values(self) -> Union[ExtensionArray, np.ndarray]:
"""
return self._data

def _internal_get_values(self) -> np.ndarray:
"""
Return `Index` data as an `numpy.ndarray`.

Returns
-------
numpy.ndarray
A one-dimensional numpy array of the `Index` values.

See Also
--------
Index.values : The attribute that _internal_get_values wraps.

Examples
--------
Getting the `Index` values of a `DataFrame`:

>>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
... index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
>>> df
A B C
a 1 2 3
b 4 5 6
c 7 8 9
>>> df.index._internal_get_values()
array(['a', 'b', 'c'], dtype=object)

Standalone `Index` values:

>>> idx = pd.Index(['1', '2', '3'])
>>> idx._internal_get_values()
array(['1', '2', '3'], dtype=object)

`MultiIndex` arrays also have only one dimension:

>>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']],
... names=('number', 'letter'))
>>> midx._internal_get_values()
array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object)
>>> midx._internal_get_values().ndim
1
"""
return self.values

def _get_engine_target(self) -> np.ndarray:
"""
Get the ndarray that we can pass to the IndexEngine constructor.
Expand Down
18 changes: 0 additions & 18 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ def get_block_values_for_json(self) -> np.ndarray:
# TODO(2DEA): reshape will be unnecessary with 2D EAs
return np.asarray(self.values).reshape(self.shape)

def to_dense(self):
return self.values.view()

@property
def fill_value(self):
return np.nan
Expand Down Expand Up @@ -1820,9 +1817,6 @@ def get_values(self, dtype=None):
def array_values(self) -> ExtensionArray:
return self.values

def to_dense(self):
return np.asarray(self.values)

def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
"""override to use ExtensionArray astype for the conversion"""
values = self.values
Expand Down Expand Up @@ -2378,12 +2372,6 @@ def get_values(self, dtype=None):
values = values.reshape(1, -1)
return values

def to_dense(self):
# we request M8[ns] dtype here, even though it discards tzinfo,
# as lots of code (e.g. anything using values_from_object)
# expects that behavior.
return np.asarray(self.values, dtype=_NS_DTYPE)

def _slice(self, slicer):
""" return a slice of my values """
if isinstance(slicer, tuple):
Expand Down Expand Up @@ -2911,12 +2899,6 @@ def __init__(self, values, placement, ndim=None):
def _holder(self):
return Categorical

def to_dense(self):
# Categorical.get_values returns a DatetimeIndex for datetime
# categories, so we can't simply use `np.asarray(self.values)` like
# other types.
return self.values._internal_get_values()

def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs):
""" convert to our native types format, slicing if desired """
values = self.values
Expand Down
12 changes: 0 additions & 12 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,18 +528,6 @@ def _values(self):
def array(self) -> ExtensionArray:
return self._data._block.array_values()

def _internal_get_values(self):
"""
Same as values (but handles sparseness conversions); is a view.

Returns
-------
numpy.ndarray
Data of the Series.
"""
blk = self._data._block
return np.array(blk.to_dense(), copy=False)

# ops
def ravel(self, order="C"):
"""
Expand Down