Skip to content

CLN: remove CategoricalIndex._create_from_codes #36342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,7 +1376,7 @@ def value_counts(self, dropna=True):
count = np.bincount(np.where(mask, code, ncat))
ix = np.append(ix, -1)

ix = self._constructor(ix, dtype=self.dtype, fastpath=True)
ix = self._from_backing_data(ix)

return Series(count, index=CategoricalIndex(ix), dtype="int64")

Expand Down Expand Up @@ -1546,9 +1546,8 @@ def sort_values(
if inplace:
self._codes = self._codes[sorted_idx]
else:
return self._constructor(
values=self._codes[sorted_idx], dtype=self.dtype, fastpath=True
)
codes = self._codes[sorted_idx]
return self._from_backing_data(codes)

def _values_for_rank(self):
"""
Expand Down Expand Up @@ -1583,7 +1582,7 @@ def _values_for_rank(self):
def view(self, dtype=None):
if dtype is not None:
raise NotImplementedError(dtype)
return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True)
return self._from_backing_data(self._ndarray)

def to_dense(self):
"""
Expand Down Expand Up @@ -1691,7 +1690,7 @@ def fillna(self, value=None, method=None, limit=None):
f"or Series, but you passed a {type(value).__name__}"
)

return self._constructor(codes, dtype=self.dtype, fastpath=True)
return self._from_backing_data(codes)

# ------------------------------------------------------------------
# NDArrayBackedExtensionArray compat
Expand Down Expand Up @@ -2098,7 +2097,7 @@ def mode(self, dropna=True):
good = self._codes != -1
codes = self._codes[good]
codes = sorted(htable.mode_int64(ensure_int64(codes), dropna))
return self._constructor(values=codes, dtype=self.dtype, fastpath=True)
return self._from_backing_data(codes)

# ------------------------------------------------------------------
# ExtensionArray Interface
Expand Down
39 changes: 9 additions & 30 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,29 +211,6 @@ def __new__(

return cls._simple_new(data, name=name)

def _create_from_codes(self, codes, dtype=None, name=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather than delete this, could this be something like

def _create_from_codes(self, codes, name=None):
    name = name if name else self.name
    cat = self._data._from_backing_data(codes)
    return type(self)._simple_new(cat, name=name)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it only saves 1 line, and id rather have fewer constructor methods

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have the same code duplicated 4 times. (except that name should be lib._no_default here)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. the ongoing process is to share more of this with the other ndarray-backed-ea-indexes

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh. so instead of the parameter name being codes it would be more generic? wouldn't this give more opportunities for de-duplication and all numpy backed indexes could have a _create_from_codes called say ``_from_backing_data` to be consistent with the underlying array

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im hopeful but not there yet. theres a lot of special cases in the DTI/TDI code because of .freq

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah the purpose is to avoid special casing things as much as possible; here the constructor is special cases.

"""
*this is an internal non-public method*

create the correct categorical from codes

Parameters
----------
codes : new codes
dtype: CategoricalDtype, defaults to existing
name : optional name attribute, defaults to existing

Returns
-------
CategoricalIndex
"""
if dtype is None:
dtype = self.dtype
if name is None:
name = self.name
cat = Categorical.from_codes(codes, dtype=dtype)
return CategoricalIndex(cat, name=name)

@classmethod
def _simple_new(cls, values: Categorical, name: Label = None):
assert isinstance(values, Categorical), type(values)
Expand Down Expand Up @@ -495,7 +472,8 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):

codes = new_target.codes.copy()
codes[indexer == -1] = cats[missing]
new_target = self._create_from_codes(codes)
cat = self._data._from_backing_data(codes)
new_target = type(self)._simple_new(cat, name=self.name)

# we always want to return an Index type here
# to be consistent with .reindex for other index types (e.g. they don't
Expand Down Expand Up @@ -695,7 +673,9 @@ def delete(self, loc):
-------
new_index : Index
"""
return self._create_from_codes(np.delete(self.codes, loc))
codes = np.delete(self.codes, loc)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=self.name)

def insert(self, loc: int, item):
"""
Expand All @@ -720,15 +700,14 @@ def insert(self, loc: int, item):

codes = self.codes
codes = np.concatenate((codes[:loc], [code], codes[loc:]))
return self._create_from_codes(codes)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=self.name)

def _concat(self, to_concat, name):
# if calling index is category, don't check dtype of others
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
result = self._create_from_codes(codes, name=name)
# if name is None, _create_from_codes sets self.name
result.name = name
return result
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=name)

def _delegate_method(self, name: str, *args, **kwargs):
""" method delegation to the ._values """
Expand Down