Skip to content

CLN: assorted cleanups #33614

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ from pandas.core.dtypes.common import (
is_bool_dtype, is_object_dtype,
is_datetime64_dtype,
pandas_dtype, is_extension_array_dtype)
from pandas.core.arrays import Categorical
from pandas.core.dtypes.concat import union_categoricals
import pandas.io.common as icom

from pandas.compat import _import_lzma, _get_lzma_file
from pandas.errors import (ParserError, DtypeWarning,
Expand Down Expand Up @@ -1149,7 +1147,8 @@ cdef class TextReader:

# Method accepts list of strings, not encoded ones.
true_values = [x.decode() for x in self.true_values]
cat = Categorical._from_inferred_categories(
array_type = dtype.construct_array_type()
cat = array_type._from_inferred_categories(
cats, codes, dtype, true_values=true_values)
return cat, na_count

Expand Down
8 changes: 1 addition & 7 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
is_bool_dtype,
is_categorical_dtype,
is_complex_dtype,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64_ns_dtype,
is_extension_array_dtype,
Expand Down Expand Up @@ -122,12 +121,7 @@ def _ensure_data(values, dtype=None):
return ensure_object(values), "object"

# datetimelike
if (
needs_i8_conversion(values)
or is_period_dtype(dtype)
or is_datetime64_any_dtype(dtype)
or is_timedelta64_dtype(dtype)
):
if needs_i8_conversion(values) or needs_i8_conversion(dtype):
if is_period_dtype(values) or is_period_dtype(dtype):
from pandas import PeriodIndex

Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,6 @@ def astype(self, dtype, copy=True):
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
# 3. DatetimeArray.astype handles datetime -> period
from pandas import Categorical

dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
Expand Down Expand Up @@ -667,7 +665,8 @@ def astype(self, dtype, copy=True):
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
raise TypeError(msg)
elif is_categorical_dtype(dtype):
return Categorical(self, dtype=dtype)
arr_cls = dtype.construct_array_type()
return arr_cls(self, dtype=dtype)
else:
return np.asarray(self, dtype=dtype)

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from pandas.errors import PerformanceWarning

from pandas.core.dtypes.common import (
_INT64_DTYPE,
DT64NS_DTYPE,
INT64_DTYPE,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_any_dtype,
Expand Down Expand Up @@ -404,7 +404,7 @@ def _generate_range(
start = start.tz_localize(None)
if end is not None:
end = end.tz_localize(None)
# TODO: consider re-implementing _cached_range; GH#17914

values, _tz = generate_regular_range(start, end, periods, freq)
index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz))

Expand Down Expand Up @@ -1963,7 +1963,7 @@ def sequence_to_dt64ns(
if tz:
tz = timezones.maybe_get_tz(tz)

if data.dtype != _INT64_DTYPE:
if data.dtype != INT64_DTYPE:
data = data.astype(np.int64, copy=False)
result = data.view(DT64NS_DTYPE)

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,8 +1143,7 @@ def _map_values(self, mapper, na_action=None):
raise NotImplementedError
map_f = lambda values, f: values.map(f)
else:
values = self.astype(object)
values = getattr(values, "values", values)
values = self.astype(object)._values
if na_action == "ignore":

def map_f(values, f):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.common import (
_INT64_DTYPE,
_POSSIBLY_CAST_DTYPES,
DT64NS_DTYPE,
INT64_DTYPE,
TD64NS_DTYPE,
ensure_int8,
ensure_int16,
Expand Down Expand Up @@ -954,7 +954,7 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False):
raise ValueError("Cannot convert NaT values to integer")
return arr.view(dtype)

if dtype not in [_INT64_DTYPE, TD64NS_DTYPE]:
if dtype not in [INT64_DTYPE, TD64NS_DTYPE]:

# allow frequency conversions
# we return a float here!
Expand Down
5 changes: 1 addition & 4 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,14 @@

DT64NS_DTYPE = conversion.DT64NS_DTYPE
TD64NS_DTYPE = conversion.TD64NS_DTYPE
_INT64_DTYPE = np.dtype(np.int64)
INT64_DTYPE = np.dtype(np.int64)

# oh the troubles to reduce import time
_is_scipy_sparse = None

ensure_float64 = algos.ensure_float64
ensure_float32 = algos.ensure_float32

_ensure_datetime64ns = conversion.ensure_datetime64ns
_ensure_timedelta64ns = conversion.ensure_timedelta64ns


def ensure_float(arr):
"""
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3548,8 +3548,6 @@ class animal locomotion
result._set_is_copy(self, copy=not result._is_view)
return result

_xs: Callable = xs

def __getitem__(self, item):
raise AbstractMethodError(self)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,7 @@ def _getitem_tuple(self, tup: Tuple):

def _get_label(self, label, axis: int):
# GH#5667 this will fail if the label is not present in the axis.
return self.obj._xs(label, axis=axis)
return self.obj.xs(label, axis=axis)

def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
# we have an axis0 multi-index, handle or raise
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,8 +585,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
newb = self.copy() if copy else self
return newb

# TODO(extension)
# should we make this attribute?
# TODO(EA2D): special case not needed with 2D EAs
if isinstance(values, np.ndarray):
values = values.reshape(self.shape)

Expand Down Expand Up @@ -1554,13 +1553,15 @@ def __init__(self, values, placement, ndim=None):

@property
def shape(self):
# TODO(EA2D): override unnecessary with 2D EAs
if self.ndim == 1:
return ((len(self.values)),)
return (len(self.mgr_locs), len(self.values))

def iget(self, col):

if self.ndim == 2 and isinstance(col, tuple):
# TODO(EA2D): unnecessary with 2D EAs
col, loc = col
if not com.is_null_slice(col) and col != 0:
raise IndexError(f"{self} only contains one item")
Expand Down Expand Up @@ -1669,6 +1670,7 @@ def setitem(self, indexer, value):
be a compatible shape.
"""
if isinstance(indexer, tuple):
# TODO(EA2D): not needed with 2D EAs
# we are always 1-D
indexer = indexer[0]

Expand All @@ -1678,6 +1680,7 @@ def setitem(self, indexer, value):

def get_values(self, dtype=None):
# ExtensionArrays must be iterable, so this works.
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(self.values).reshape(self.shape)

def array_values(self) -> ExtensionArray:
Expand All @@ -1691,6 +1694,7 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
values = np.asarray(values.astype(object))
values[mask] = na_rep

# TODO(EA2D): reshape not needed with 2D EAs
# we are expected to return a 2-d ndarray
return values.reshape(1, len(values))

Expand All @@ -1703,6 +1707,7 @@ def take_nd(
if fill_value is lib.no_default:
fill_value = None

# TODO(EA2D): special case not needed with 2D EAs
# axis doesn't matter; we are really a single-dim object
# but are passed the axis depending on the calling routing
# if its REALLY axis 0, then this will be a reindex and not a take
Expand Down Expand Up @@ -2229,6 +2234,7 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]:
by apply.
"""
if axis == 0:
# TODO(EA2D): special case not needed with 2D EAs
# Cannot currently calculate diff across multiple blocks since this
# function is invoked via apply
raise NotImplementedError
Expand Down Expand Up @@ -2280,7 +2286,7 @@ def quantile(self, qs, interpolation="linear", axis=0):
blk = self.make_block(naive)
res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis)

# ravel is kludge for 2D block with 1D values, assumes column-like
# TODO(EA2D): ravel is kludge for 2D block with 1D values, assumes column-like
aware = self._holder(res_blk.values.ravel(), dtype=self.dtype)
return self.make_block_same_class(aware, ndim=res_blk.ndim)

Expand Down Expand Up @@ -2693,6 +2699,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None):
if isinstance(values, ABCPandasArray):
values = values.to_numpy()
if ndim and ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
values = np.atleast_2d(values)

if isinstance(dtype, PandasDtype):
Expand Down Expand Up @@ -2759,6 +2766,7 @@ def _safe_reshape(arr, new_shape):
if isinstance(arr, ABCSeries):
arr = arr._values
if not isinstance(arr, ABCExtensionArray):
# TODO(EA2D): special case not needed with 2D EAs
arr = arr.reshape(new_shape)
return arr

Expand Down
1 change: 1 addition & 0 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
):
if self.block is None:
array = empty_dtype.construct_array_type()
# TODO(EA2D): special case unneeded with 2D EAs
return array(
np.full(self.shape[1], fill_value.value), dtype=empty_dtype
)
Expand Down