-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Add set_index to Series #22225
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Add set_index to Series #22225
Changes from 16 commits
3e968a5
26723ac
d9d9f45
8f5ed33
6cf3ac1
4358d52
b89af70
f17eb7e
5e49884
bb8df65
e4d3698
f8fa20a
ac4cadb
2428e09
31b5990
aaa8644
81cfd82
7f35d2c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,7 +29,8 @@ | |
is_extension_array_dtype, is_integer, is_list_like, is_number, | ||
is_numeric_dtype, is_object_dtype, is_period_arraylike, is_re_compilable, | ||
is_scalar, is_timedelta64_dtype, pandas_dtype) | ||
from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries | ||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPanel, ABCSeries) | ||
from pandas.core.dtypes.inference import is_hashable | ||
from pandas.core.dtypes.missing import isna, notna | ||
|
||
|
@@ -39,7 +40,8 @@ | |
from pandas.core.base import PandasObject, SelectionMixin | ||
import pandas.core.common as com | ||
from pandas.core.index import ( | ||
Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index) | ||
Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index, | ||
ensure_index_from_sequences) | ||
from pandas.core.indexes.datetimes import DatetimeIndex | ||
from pandas.core.indexes.period import Period, PeriodIndex | ||
import pandas.core.indexing as indexing | ||
|
@@ -629,6 +631,103 @@ def _set_axis(self, axis, labels): | |
self._data.set_axis(axis, labels) | ||
self._clear_item_cache() | ||
|
||
def set_index(self, keys, drop=True, append=False, inplace=False, | ||
verify_integrity=False): | ||
""" | ||
Set the index (row labels) using one or more given arrays (or labels). | ||
|
||
Parameters | ||
---------- | ||
%(params)s | ||
append : bool, default False | ||
Whether to append columns to existing index. | ||
inplace : bool, default False | ||
Modify the %(klass)s in place (do not create a new object). | ||
verify_integrity : bool, default False | ||
Check the new index for duplicates. Otherwise defer the check until | ||
necessary. Setting to False will improve the performance of this | ||
method. | ||
|
||
Returns | ||
------- | ||
%(klass)s | ||
The reindexed %(klass)s. Will be None if inplace is True. | ||
|
||
See Also | ||
-------- | ||
%(other_klass)s.set_index: Method adapted for %(other_klass)s. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think your validation error is that there is no space before the colon on this line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @WillAyd There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm yea. So it does fail locally on NDFrame.set_index with the same error as shown in Azure logs |
||
%(klass)s.reset_index : Opposite of set_index. | ||
%(klass)s.reindex : Change to new indices or expand indices. | ||
%(klass)s.reindex_like : Change to same indices as other %(klass)s. | ||
|
||
Examples | ||
-------- | ||
%(examples)s | ||
""" | ||
# parameter keys is checked in Series.set_index / DataFrame.set_index, | ||
# will always be passed as a list of list-likes! | ||
|
||
inplace = validate_bool_kwarg(inplace, 'inplace') | ||
if inplace: | ||
obj = self | ||
else: | ||
obj = self.copy() | ||
|
||
arrays = [] | ||
names = [] | ||
if append: | ||
names = [x for x in self.index.names] | ||
if isinstance(self.index, ABCMultiIndex): | ||
for i in range(self.index.nlevels): | ||
arrays.append(self.index._get_level_values(i)) | ||
else: | ||
arrays.append(self.index) | ||
|
||
to_remove = [] | ||
for col in keys: | ||
if isinstance(col, ABCMultiIndex): | ||
for n in range(col.nlevels): | ||
arrays.append(col._get_level_values(n)) | ||
names.extend(col.names) | ||
elif isinstance(col, (ABCIndexClass, ABCSeries)): | ||
# if Index then not MultiIndex (treated above) | ||
arrays.append(col) | ||
names.append(col.name) | ||
elif isinstance(col, (list, np.ndarray)): | ||
arrays.append(col) | ||
names.append(None) | ||
elif (is_list_like(col) | ||
and not (isinstance(col, tuple) and col in self)): | ||
# all other list-likes (but avoid valid column keys) | ||
col = list(col) # ensure iterator do not get read twice etc. | ||
arrays.append(col) | ||
names.append(None) | ||
# from here, col can only be a column label | ||
else: | ||
arrays.append(obj[col]._values) | ||
names.append(col) | ||
if drop: | ||
to_remove.append(col) | ||
|
||
index = ensure_index_from_sequences(arrays, names) | ||
|
||
if verify_integrity and not index.is_unique: | ||
duplicates = list(index[index.duplicated()]) | ||
raise ValueError('Index has duplicate keys: {dup}'.format( | ||
dup=duplicates)) | ||
|
||
# use set to handle duplicate column names gracefully in case of drop | ||
for c in set(to_remove): | ||
del obj[c] | ||
|
||
# clear up memory usage | ||
index._cleanup() | ||
|
||
obj.index = index | ||
|
||
if not inplace: | ||
return obj | ||
|
||
def transpose(self, *args, **kwargs): | ||
""" | ||
Permute the dimensions of the %(klass)s | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1171,6 +1171,71 @@ def _set_value(self, label, value, takeable=False): | |
return self | ||
_set_value.__doc__ = set_value.__doc__ | ||
|
||
@Substitution( | ||
klass='Series', other_klass='DataFrame', | ||
params=dedent("""\ | ||
arrays : array or list of arrays | ||
Either a Series, Index, MultiIndex, list, np.ndarray or a list | ||
containing only Series, Index, MultiIndex, list, np.ndarray."""), | ||
examples=dedent("""\ | ||
>>> s = pd.Series(range(10, 13)) | ||
>>> s | ||
0 10 | ||
1 11 | ||
2 12 | ||
dtype: int64 | ||
|
||
Set the index to become `['a', 'b', 'c']`: | ||
|
||
>>> s.set_index(['a', 'b', 'c']) | ||
a 10 | ||
b 11 | ||
c 12 | ||
dtype: int64 | ||
|
||
Create a MultiIndex by appending to the existing index: | ||
|
||
>>> s.set_index(['a', 'b', 'c'], append=True) | ||
0 a 10 | ||
1 b 11 | ||
2 c 12 | ||
dtype: int64 | ||
|
||
Create a MultiIndex by passing a list of arrays: | ||
|
||
>>> t = (s ** 2).set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) | ||
>>> t | ||
a I 100 | ||
b II 121 | ||
c III 144 | ||
dtype: int64 | ||
|
||
Apply index from another object (of the same length!): | ||
|
||
>>> s.set_index(t.index) | ||
a I 10 | ||
b II 11 | ||
c III 12 | ||
dtype: int64""") | ||
) | ||
@Appender(generic.NDFrame.set_index.__doc__) | ||
def set_index(self, arrays, append=False, inplace=False, | ||
verify_integrity=False): | ||
|
||
# NDFrame.set_index expects a list of list-likes. Lists of scalars | ||
# must be wrapped in another list to avoid being interpreted as keys. | ||
if not isinstance(arrays, list) or all(is_scalar(x) for x in arrays): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What case does the first condition handle here? I'm having trouble finding a case where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TomAugspurger The point that is crucial though is that only lists are affected by this (and not all list-likes), because In any case, I added some comments to clarify this behaviour. |
||
arrays = [arrays] | ||
|
||
if any(not is_list_like(x, allow_sets=False) | ||
or getattr(x, 'ndim', 1) > 1 for x in arrays): | ||
raise TypeError('The parameter "arrays" may only contain ' | ||
'one-dimensional list-likes') | ||
|
||
return super(Series, self).set_index(keys=arrays, drop=False, | ||
append=append, inplace=inplace, | ||
verify_integrity=verify_integrity) | ||
|
||
def reset_index(self, level=None, drop=False, name=None, inplace=False): | ||
""" | ||
Generate a new DataFrame or Series with the index reset. | ||
|
Uh oh!
There was an error while loading. Please reload this page.