From d05d866c5afbe8cab02a6b4f7123648490e6a7c9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 8 May 2020 12:50:41 -0700 Subject: [PATCH 1/3] CLN: .values->._values --- pandas/core/apply.py | 6 +++--- pandas/core/arrays/interval.py | 3 +-- pandas/core/dtypes/cast.py | 4 ++-- pandas/core/frame.py | 8 +++++--- pandas/core/groupby/groupby.py | 2 +- pandas/core/internals/construction.py | 2 +- pandas/core/internals/managers.py | 2 +- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a013434491589..232768fd1d989 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -130,8 +130,8 @@ def index(self) -> "Index": return self.obj.index @cache_readonly - def values(self): - return self.obj.values + def values(self) -> np.ndarray: + return self.obj._values @cache_readonly def dtypes(self) -> "Series": @@ -237,7 +237,7 @@ def apply_raw(self): return self.obj._constructor_sliced(result, index=self.agg_axis) def apply_broadcast(self, target: "DataFrame") -> "DataFrame": - result_values = np.empty_like(target.values) + result_values = np.empty_like(target._values) # axis which we want to compare compliance result_compare = target.shape[0] diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 66faca29670cb..45c1a88ec68b2 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -27,7 +27,6 @@ from pandas.core.dtypes.dtypes import IntervalDtype from pandas.core.dtypes.generic import ( ABCDatetimeIndex, - ABCExtensionArray, ABCIndexClass, ABCIntervalIndex, ABCPeriodIndex, @@ -762,7 +761,7 @@ def size(self) -> int: # Avoid materializing self.values return self.left.size - def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: + def shift(self, periods: int = 1, fill_value: object = None) -> "IntervalArray": if not len(self) or periods == 0: return self.copy() diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9865a7d28542d..c84532487863d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -103,9 +103,9 @@ def is_nested_object(obj) -> bool: This may not be necessarily be performant. """ - if isinstance(obj, ABCSeries) and is_object_dtype(obj): + if isinstance(obj, ABCSeries) and is_object_dtype(obj.dtype): - if any(isinstance(v, ABCSeries) for v in obj.values): + if any(isinstance(v, ABCSeries) for v in obj._values): return True return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1203cd9fbd1b3..29bed36814b37 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5673,8 +5673,10 @@ def _arith_op(left, right): new_data = ops.dispatch_to_series(self, other, _arith_op) else: with np.errstate(all="ignore"): - res_values = _arith_op(self.values, other.values) - new_data = dispatch_fill_zeros(func, self.values, other.values, res_values) + res_values = _arith_op(self._values, other._values) + new_data = dispatch_fill_zeros( + func, self._values, other._values, res_values + ) return new_data @@ -7347,7 +7349,7 @@ def applymap(self, func) -> "DataFrame": def infer(x): if x.empty: return lib.map_infer(x, func) - return lib.map_infer(x.astype(object).values, func) + return lib.map_infer(x.astype(object)._values, func) return self.apply(infer) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 81c3fd7ad9e89..ec6a6d033499c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1921,7 +1921,7 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) sizes, result = grb.size(), grb.nth(n) - mask = (sizes < max_len).values + mask = (sizes < max_len)._values # set the results which don't meet the criteria if len(result) and mask.any(): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ce3f07d06d6a2..a695a15a475f7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -344,7 +344,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]): val = com.dict_compat(val) else: val = dict(val) - val = lib.fast_multiget(val, oindex.values, default=np.nan) + val = lib.fast_multiget(val, oindex._values, default=np.nan) val = sanitize_array( val, index, dtype=dtype, copy=False, raise_cast_failure=False ) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3b88edabe9eb0..6506881aa284f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -475,7 +475,7 @@ def get_axe(block, qs, axes): b.mgr_locs = sb.mgr_locs else: - new_axes[axis] = Index(np.concatenate([ax.values for ax in axes])) + new_axes[axis] = Index(np.concatenate([ax._values for ax in axes])) if transposed: new_axes = new_axes[::-1] From 0c9877997eb4bb8599ecd7ed142049058b79aac6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 8 May 2020 15:50:31 -0700 Subject: [PATCH 2/3] CLN: .values->._values --- pandas/core/frame.py | 48 ++++++++++++++--------------- pandas/core/generic.py | 2 +- pandas/core/indexes/base.py | 8 +++-- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/multi.py | 4 +-- pandas/plotting/_matplotlib/core.py | 2 +- 6 files changed, 34 insertions(+), 32 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 29bed36814b37..e1ee6f8094000 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -986,7 +986,7 @@ def iterrows(self) -> Iterable[Tuple[Label, Series]]: """ columns = self.columns klass = self._constructor_sliced - for k, v in zip(self.index, self.values): + for k, v in zip(self.index, self._values): s = klass(v, index=columns, name=k) yield k, s @@ -1162,11 +1162,11 @@ def dot(self, other): left = self.reindex(columns=common, copy=False) right = other.reindex(index=common, copy=False) - lvals = left.values + lvals = left._values rvals = right.values else: left = self - lvals = self.values + lvals = self._values rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: raise ValueError( @@ -1336,7 +1336,7 @@ def to_numpy(self, dtype=None, copy: bool = False) -> np.ndarray: array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) """ - result = np.array(self.values, dtype=dtype, copy=copy) + result = np.array(self._values, dtype=dtype, copy=copy) return result def to_dict(self, orient="dict", into=dict): @@ -1840,7 +1840,7 @@ def to_records( if index: if isinstance(self.index, ABCMultiIndex): # array of tuples to numpy cols. copy copy copy - ix_vals = list(map(np.array, zip(*self.index.values))) + ix_vals = list(map(np.array, zip(*self.index._values))) else: ix_vals = [self.index.values] @@ -2653,7 +2653,7 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame": # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] arr_type = dtype.construct_array_type() - values = self.values + values = self._values new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] result = self._constructor( @@ -2661,7 +2661,7 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame": ) else: - new_values = self.values.T + new_values = self._values.T if copy: new_values = new_values.copy() result = self._constructor( @@ -2820,7 +2820,7 @@ def _getitem_multilevel(self, key): result = self.reindex(columns=new_columns) result.columns = result_columns else: - new_values = self.values[:, loc] + new_values = self._values[:, loc] result = self._constructor( new_values, index=self.index, columns=result_columns ) @@ -2943,7 +2943,7 @@ def _setitem_frame(self, key, value): raise ValueError("Array conditional must be same shape as self") key = self._constructor(key, **self._construct_axes_dict()) - if key.values.size and not is_bool_dtype(key.values): + if key.size and not is_bool_dtype(key._values): raise TypeError( "Must pass DataFrame or 2-d ndarray with boolean values only" ) @@ -3693,7 +3693,7 @@ def lookup(self, row_labels, col_labels) -> np.ndarray: thresh = 1000 if not self._is_mixed_type or n > thresh: - values = self.values + values = self._values ridx = self.index.get_indexer(row_labels) cidx = self.columns.get_indexer(col_labels) if (ridx == -1).any(): @@ -3782,7 +3782,7 @@ def _reindex_multi(self, axes, copy, fill_value) -> "DataFrame": if row_indexer is not None and col_indexer is not None: indexer = row_indexer, col_indexer new_values = algorithms.take_2d_multi( - self.values, indexer, fill_value=fill_value + self._values, indexer, fill_value=fill_value ) return self._constructor(new_values, index=new_index, columns=new_columns) else: @@ -8092,7 +8092,7 @@ def c(x): return nanops.nancorr(x[0], x[1], method=method) correl = Series( - map(c, zip(left.values.T, right.values.T)), index=left.columns + map(c, zip(left._values.T, right._values.T)), index=left.columns ) else: @@ -8235,12 +8235,12 @@ def _count_level(self, level, axis=0, numeric_only=False): # Mask NaNs: Mask rows or columns where the index level is NaN, and all # values in the DataFrame that are NaN if frame._is_mixed_type: - # Since we have mixed types, calling notna(frame.values) might + # Since we have mixed types, calling notna(frame._values) might # upcast everything to object - values_mask = notna(frame).values + values_mask = notna(frame)._values else: # But use the speedup when we have homogeneous dtypes - values_mask = notna(frame.values) + values_mask = notna(frame._values) index_mask = notna(count_axis.get_level_values(level=level)) if axis == 1: @@ -8341,11 +8341,11 @@ def blk_func(values): out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype) out.index = df.columns if axis == 0 and is_object_dtype(out.dtype): - out[:] = coerce_to_dtypes(out.values, df.dtypes) + out[:] = coerce_to_dtypes(out._values, df.dtypes) return out if not self._is_homogeneous_type: - # try to avoid self.values call + # try to avoid self._values call if filter_type is None and axis == 0 and len(self) > 0: # operate column-wise @@ -8373,7 +8373,7 @@ def blk_func(values): if numeric_only is None: data = self - values = data.values + values = data._values try: result = f(values) @@ -8385,7 +8385,7 @@ def blk_func(values): data = _get_data(axis_matters=False) labels = data._get_agg_axis(axis) - values = data.values + values = data._values with np.errstate(all="ignore"): result = f(values) @@ -8394,10 +8394,10 @@ def blk_func(values): data = _get_data(axis_matters=True) labels = data._get_agg_axis(axis) - values = data.values + values = data._values else: data = self - values = data.values + values = data._values result = f(values) if filter_type == "bool" and is_object_dtype(values) and axis is None: @@ -8521,7 +8521,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: dtype: object """ axis = self._get_axis_number(axis) - indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + indices = nanops.nanargmin(self._values, axis=axis, skipna=skipna) # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame @@ -8594,7 +8594,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: dtype: object """ axis = self._get_axis_number(axis) - indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + indices = nanops.nanargmax(self._values, axis=axis, skipna=skipna) # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame @@ -8948,7 +8948,7 @@ def isin(self, values) -> "DataFrame": f"you passed a '{type(values).__name__}'" ) return DataFrame( - algorithms.isin(self.values.ravel(), values).reshape(self.shape), + algorithms.isin(self._values.ravel(), values).reshape(self.shape), self.index, self.columns, ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b550857252466..13877a1bee223 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4978,7 +4978,7 @@ def sample( else: raise ValueError("Invalid weights: weights sum to zero") - weights = weights.values + weights = weights._values # If no frac or n, default to n=1. if n is None and frac is None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cf17ce9db6b1a..8a4a07ce099d5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -640,13 +640,15 @@ def astype(self, dtype, copy=True): elif is_categorical_dtype(dtype): from pandas.core.indexes.category import CategoricalIndex - return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) + return CategoricalIndex( + self._values, name=self.name, dtype=dtype, copy=copy + ) elif is_extension_array_dtype(dtype): return Index(np.asarray(self), name=self.name, dtype=dtype, copy=copy) try: - casted = self.values.astype(dtype, copy=copy) + casted = self._values.astype(dtype, copy=copy) except (TypeError, ValueError) as err: raise TypeError( f"Cannot cast {type(self).__name__} to dtype {dtype}" @@ -907,7 +909,7 @@ def format(self, name: bool = False, formatter=None, **kwargs): return self._format_with_header(header, **kwargs) def _format_with_header(self, header, na_rep="NaN", **kwargs): - values = self.values + values = self._values from pandas.io.formats.format import format_array diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5a89c45a3e425..c7df0c6e6266e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -439,7 +439,7 @@ def to_series(self, keep_tz=lib.no_default, index=None, name=None): # preserve the tz & copy values = self.copy(deep=True) else: - values = self.values.copy() + values = self._values.view("M8[ns]").copy() return Series(values, index=index, name=name) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f1e1ebcaca1c4..d1b0cdec5721c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1464,7 +1464,7 @@ def is_monotonic_increasing(self) -> bool: # reversed() because lexsort() wants the most significant key last. values = [ - self._get_level_values(i).values for i in reversed(range(len(self.levels))) + self._get_level_values(i)._values for i in reversed(range(len(self.levels))) ] try: sort_order = np.lexsort(values) @@ -2455,7 +2455,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): "tolerance not implemented yet for MultiIndex" ) indexer = self._engine.get_indexer( - values=self.values, target=target, method=method, limit=limit + values=self._values, target=target, method=method, limit=limit ) elif method == "nearest": raise NotImplementedError( diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 19a75eb151782..a049ac99f0e08 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -247,7 +247,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None): yield col, values.values @property - def nseries(self): + def nseries(self) -> int: if self.data.ndim == 1: return 1 else: From 9406c1311cac3de08fa7de32be30d5b062000ceb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 18 May 2020 09:41:01 -0700 Subject: [PATCH 3/3] revert DataFrame.values --- pandas/core/apply.py | 6 ++--- pandas/core/frame.py | 52 +++++++++++++++++++++----------------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 232768fd1d989..a013434491589 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -130,8 +130,8 @@ def index(self) -> "Index": return self.obj.index @cache_readonly - def values(self) -> np.ndarray: - return self.obj._values + def values(self): + return self.obj.values @cache_readonly def dtypes(self) -> "Series": @@ -237,7 +237,7 @@ def apply_raw(self): return self.obj._constructor_sliced(result, index=self.agg_axis) def apply_broadcast(self, target: "DataFrame") -> "DataFrame": - result_values = np.empty_like(target._values) + result_values = np.empty_like(target.values) # axis which we want to compare compliance result_compare = target.shape[0] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8e5215c62c8b1..4f5cbddc4eb05 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -982,7 +982,7 @@ def iterrows(self) -> Iterable[Tuple[Label, Series]]: """ columns = self.columns klass = self._constructor_sliced - for k, v in zip(self.index, self._values): + for k, v in zip(self.index, self.values): s = klass(v, index=columns, name=k) yield k, s @@ -1158,11 +1158,11 @@ def dot(self, other): left = self.reindex(columns=common, copy=False) right = other.reindex(index=common, copy=False) - lvals = left._values - rvals = right.values + lvals = left.values + rvals = right._values else: left = self - lvals = self._values + lvals = self.values rvals = np.asarray(other) if lvals.shape[1] != rvals.shape[0]: raise ValueError( @@ -2719,7 +2719,7 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame": # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] arr_type = dtype.construct_array_type() - values = self._values + values = self.values new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] result = self._constructor( @@ -2727,7 +2727,7 @@ def transpose(self, *args, copy: bool = False) -> "DataFrame": ) else: - new_values = self._values.T + new_values = self.values.T if copy: new_values = new_values.copy() result = self._constructor( @@ -2886,7 +2886,7 @@ def _getitem_multilevel(self, key): result = self.reindex(columns=new_columns) result.columns = result_columns else: - new_values = self._values[:, loc] + new_values = self.values[:, loc] result = self._constructor( new_values, index=self.index, columns=result_columns ) @@ -3009,7 +3009,7 @@ def _setitem_frame(self, key, value): raise ValueError("Array conditional must be same shape as self") key = self._constructor(key, **self._construct_axes_dict()) - if key.size and not is_bool_dtype(key._values): + if key.size and not is_bool_dtype(key.values): raise TypeError( "Must pass DataFrame or 2-d ndarray with boolean values only" ) @@ -3759,7 +3759,7 @@ def lookup(self, row_labels, col_labels) -> np.ndarray: thresh = 1000 if not self._is_mixed_type or n > thresh: - values = self._values + values = self.values ridx = self.index.get_indexer(row_labels) cidx = self.columns.get_indexer(col_labels) if (ridx == -1).any(): @@ -3848,7 +3848,7 @@ def _reindex_multi(self, axes, copy, fill_value) -> "DataFrame": if row_indexer is not None and col_indexer is not None: indexer = row_indexer, col_indexer new_values = algorithms.take_2d_multi( - self._values, indexer, fill_value=fill_value + self.values, indexer, fill_value=fill_value ) return self._constructor(new_values, index=new_index, columns=new_columns) else: @@ -5737,10 +5737,8 @@ def _arith_op(left, right): new_data = ops.dispatch_to_series(self, other, _arith_op) else: with np.errstate(all="ignore"): - res_values = _arith_op(self._values, other._values) - new_data = dispatch_fill_zeros( - func, self._values, other._values, res_values - ) + res_values = _arith_op(self.values, other.values) + new_data = dispatch_fill_zeros(func, self.values, other.values, res_values) return new_data @@ -8195,7 +8193,7 @@ def c(x): return nanops.nancorr(x[0], x[1], method=method) correl = self._constructor_sliced( - map(c, zip(left._values.T, right._values.T)), index=left.columns + map(c, zip(left.values.T, right.values.T)), index=left.columns ) else: @@ -8340,12 +8338,12 @@ def _count_level(self, level, axis=0, numeric_only=False): # Mask NaNs: Mask rows or columns where the index level is NaN, and all # values in the DataFrame that are NaN if frame._is_mixed_type: - # Since we have mixed types, calling notna(frame._values) might + # Since we have mixed types, calling notna(frame.values) might # upcast everything to object - values_mask = notna(frame)._values + values_mask = notna(frame).values else: # But use the speedup when we have homogeneous dtypes - values_mask = notna(frame._values) + values_mask = notna(frame.values) index_mask = notna(count_axis.get_level_values(level=level)) if axis == 1: @@ -8446,11 +8444,11 @@ def blk_func(values): out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype) out.index = df.columns if axis == 0 and is_object_dtype(out.dtype): - out[:] = coerce_to_dtypes(out._values, df.dtypes) + out[:] = coerce_to_dtypes(out.values, df.dtypes) return out if not self._is_homogeneous_type: - # try to avoid self._values call + # try to avoid self.values call if filter_type is None and axis == 0 and len(self) > 0: # operate column-wise @@ -8478,7 +8476,7 @@ def blk_func(values): if numeric_only is None: data = self - values = data._values + values = data.values try: result = f(values) @@ -8490,7 +8488,7 @@ def blk_func(values): data = _get_data(axis_matters=False) labels = data._get_agg_axis(axis) - values = data._values + values = data.values with np.errstate(all="ignore"): result = f(values) @@ -8499,10 +8497,10 @@ def blk_func(values): data = _get_data(axis_matters=True) labels = data._get_agg_axis(axis) - values = data._values + values = data.values else: data = self - values = data._values + values = data.values result = f(values) if filter_type == "bool" and is_object_dtype(values) and axis is None: @@ -8626,7 +8624,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: dtype: object """ axis = self._get_axis_number(axis) - indices = nanops.nanargmin(self._values, axis=axis, skipna=skipna) + indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame @@ -8699,7 +8697,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: dtype: object """ axis = self._get_axis_number(axis) - indices = nanops.nanargmax(self._values, axis=axis, skipna=skipna) + indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame @@ -9053,7 +9051,7 @@ def isin(self, values) -> "DataFrame": f"you passed a '{type(values).__name__}'" ) return self._constructor( - algorithms.isin(self._values.ravel(), values).reshape(self.shape), + algorithms.isin(self.values.ravel(), values).reshape(self.shape), self.index, self.columns, )