From adc41984b75e6f927f88696bae884b1cd27c346c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Mar 2020 09:12:31 -0700 Subject: [PATCH] REF: avoid _internal_get_values in json --- pandas/_libs/src/ujson/python/objToJSON.c | 10 ++++-- pandas/core/generic.py | 20 ----------- pandas/core/indexes/base.py | 44 ----------------------- pandas/core/internals/blocks.py | 18 ---------- pandas/core/series.py | 12 ------- 5 files changed, 8 insertions(+), 96 deletions(-) diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index e2deffb46c4e6..db1feccb4d0c6 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -222,13 +222,19 @@ static PyObject *get_values(PyObject *obj) { PRINTMARK(); - if (PyObject_HasAttrString(obj, "_internal_get_values")) { + if (PyObject_TypeCheck(obj, cls_index) || PyObject_TypeCheck(obj, cls_series)) { + // The special cases to worry about are dt64tz and category[dt64tz]. + // In both cases we want the UTC-localized datetime64 ndarray, + // without going through and object array of Timestamps. PRINTMARK(); - values = PyObject_CallMethod(obj, "_internal_get_values", NULL); + values = PyObject_GetAttrString(obj, "values"); if (values == NULL) { // Clear so we can subsequently try another method PyErr_Clear(); + } else if (PyObject_HasAttrString(values, "__array__")) { + // We may have gotten a Categorical or Sparse array so call np.array + values = PyObject_CallMethod(values, "__array__", NULL); } else if (!PyArray_CheckExact(values)) { // Didn't get a numpy array, so keep trying PRINTMARK(); diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d56311331d4d..31313a766222f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5382,26 +5382,6 @@ def _values(self) -> np.ndarray: """internal implementation""" return self.values - def _internal_get_values(self) -> np.ndarray: - """ - Return an ndarray after converting sparse values to dense. - - This is the same as ``.values`` for non-sparse data. For sparse - data contained in a `SparseArray`, the data are first - converted to a dense representation. - - Returns - ------- - numpy.ndarray - Numpy representation of DataFrame. - - See Also - -------- - values : Numpy representation of DataFrame. - SparseArray : Container for sparse data. - """ - return self.values - @property def dtypes(self): """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 31966489403f4..943a82900379c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3870,50 +3870,6 @@ def _values(self) -> Union[ExtensionArray, np.ndarray]: """ return self._data - def _internal_get_values(self) -> np.ndarray: - """ - Return `Index` data as an `numpy.ndarray`. - - Returns - ------- - numpy.ndarray - A one-dimensional numpy array of the `Index` values. - - See Also - -------- - Index.values : The attribute that _internal_get_values wraps. - - Examples - -------- - Getting the `Index` values of a `DataFrame`: - - >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - ... index=['a', 'b', 'c'], columns=['A', 'B', 'C']) - >>> df - A B C - a 1 2 3 - b 4 5 6 - c 7 8 9 - >>> df.index._internal_get_values() - array(['a', 'b', 'c'], dtype=object) - - Standalone `Index` values: - - >>> idx = pd.Index(['1', '2', '3']) - >>> idx._internal_get_values() - array(['1', '2', '3'], dtype=object) - - `MultiIndex` arrays also have only one dimension: - - >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']], - ... names=('number', 'letter')) - >>> midx._internal_get_values() - array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object) - >>> midx._internal_get_values().ndim - 1 - """ - return self.values - def _get_engine_target(self) -> np.ndarray: """ Get the ndarray that we can pass to the IndexEngine constructor. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1a92a9486e9e4..18022fe462da0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -237,9 +237,6 @@ def get_block_values_for_json(self) -> np.ndarray: # TODO(2DEA): reshape will be unnecessary with 2D EAs return np.asarray(self.values).reshape(self.shape) - def to_dense(self): - return self.values.view() - @property def fill_value(self): return np.nan @@ -1827,9 +1824,6 @@ def get_values(self, dtype=None): def array_values(self) -> ExtensionArray: return self.values - def to_dense(self): - return np.asarray(self.values) - def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): """override to use ExtensionArray astype for the conversion""" values = self.values @@ -2385,12 +2379,6 @@ def get_values(self, dtype=None): values = values.reshape(1, -1) return values - def to_dense(self): - # we request M8[ns] dtype here, even though it discards tzinfo, - # as lots of code (e.g. anything using values_from_object) - # expects that behavior. - return np.asarray(self.values, dtype=_NS_DTYPE) - def _slice(self, slicer): """ return a slice of my values """ if isinstance(slicer, tuple): @@ -2918,12 +2906,6 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return Categorical - def to_dense(self): - # Categorical.get_values returns a DatetimeIndex for datetime - # categories, so we can't simply use `np.asarray(self.values)` like - # other types. - return self.values._internal_get_values() - def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values diff --git a/pandas/core/series.py b/pandas/core/series.py index e120695cc83e8..8f7acfdfc714f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -528,18 +528,6 @@ def _values(self): def array(self) -> ExtensionArray: return self._data._block.array_values() - def _internal_get_values(self): - """ - Same as values (but handles sparseness conversions); is a view. - - Returns - ------- - numpy.ndarray - Data of the Series. - """ - blk = self._data._block - return np.array(blk.to_dense(), copy=False) - # ops def ravel(self, order="C"): """