-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Support dicts with default values in series.map #16002
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
e8f9d27
96d12a6
961ea46
d73cee8
2a2bab7
4f3dc6b
24e1478
11f5769
ddb0480
1f56c81
79cfd11
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -449,10 +449,8 @@ Other Enhancements | |
- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here <io.feather>`. | ||
- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) | ||
- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) | ||
|
||
|
||
- ``Series.map()`` now respects default values of dictionary subclasses with a ``__missing__`` method, such as ``collections.Counter`` (:issue:`15999`, :issue:`16002`) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. only list the issue (and not the PR) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is an api change (and not an enhancement), ignore how I labelled the issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the note to "Other API Changes". Is this okay, or should I make a larger entry under:
|
||
- ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) | ||
|
||
- ``DataFrame`` has gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`). | ||
- ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). | ||
- ``DataFrame.groupby()`` has gained a ``.nunique()`` method to count the distinct values for all columns within each group (:issue:`14336`, :issue:`15197`). | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2094,16 +2094,14 @@ def map(self, arg, na_action=None): | |
|
||
>>> s = pd.Series([1, 2, 3, np.nan]) | ||
|
||
>>> s2 = s.map(lambda x: 'this is a string {}'.format(x), | ||
na_action=None) | ||
>>> s2 = s.map('this is a string {}'.format, na_action=None) | ||
0 this is a string 1.0 | ||
1 this is a string 2.0 | ||
2 this is a string 3.0 | ||
3 this is a string nan | ||
dtype: object | ||
|
||
>>> s3 = s.map(lambda x: 'this is a string {}'.format(x), | ||
na_action='ignore') | ||
>>> s3 = s.map('this is a string {}'.format, na_action='ignore') | ||
0 this is a string 1.0 | ||
1 this is a string 2.0 | ||
2 this is a string 3.0 | ||
|
@@ -2132,13 +2130,23 @@ def map_f(values, f): | |
else: | ||
map_f = lib.map_infer | ||
|
||
if isinstance(arg, (dict, Series)): | ||
if isinstance(arg, dict): | ||
if isinstance(arg, dict): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a 1-line comment here on what you are doing |
||
if hasattr(arg, '__missing__'): | ||
# If a dictionary subclass defines a default value method, | ||
# convert arg to a lookup function (https://git.io/vS7LK). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can just add |
||
dict_with_default = arg | ||
arg = lambda x: dict_with_default[x] | ||
else: | ||
# Dictionary does not have a default. Thus it's safe to | ||
# convert to an indexed series for efficiency. | ||
arg = self._constructor(arg, index=arg.keys()) | ||
|
||
if isinstance(arg, Series): | ||
# arg is a Series | ||
indexer = arg.index.get_indexer(values) | ||
new_values = algorithms.take_1d(arg._values, indexer) | ||
else: | ||
# arg is a function | ||
new_values = map_f(values, arg) | ||
|
||
return self._constructor(new_values, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# coding=utf-8 | ||
# pylint: disable-msg=E1101,W0612 | ||
|
||
from collections import OrderedDict | ||
from collections import Counter, defaultdict, OrderedDict | ||
import numpy as np | ||
import pandas as pd | ||
|
||
|
@@ -411,6 +411,42 @@ def test_map_dict_with_tuple_keys(self): | |
tm.assert_series_equal(df['labels'], df['expected_labels'], | ||
check_names=False) | ||
|
||
def test_map_counter(self): | ||
s = Series(['a', 'b', 'c'], index=[1, 2, 3]) | ||
counter = Counter() | ||
counter['b'] = 5 | ||
counter['c'] += 1 | ||
result = s.map(counter) | ||
expected = Series([0, 5, 1], index=[1, 2, 3]) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_defaultdict(self): | ||
s = Series([1, 2, 3], index=['a', 'b', 'c']) | ||
default_dict = defaultdict(lambda: 'blank') | ||
default_dict[1] = 'stuff' | ||
result = s.map(default_dict) | ||
expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c']) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_dict_subclass_with_missing(self): | ||
class DictWithMissing(dict): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue number here as a comment |
||
def __missing__(self, key): | ||
return 'missing' | ||
s = Series([1, 2, 3]) | ||
dictionary = DictWithMissing({3: 'three'}) | ||
result = s.map(dictionary) | ||
expected = Series(['missing', 'missing', 'three']) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_dict_subclass_without_missing(self): | ||
class DictWithoutMissing(dict): | ||
pass | ||
s = Series([1, 2, 3]) | ||
dictionary = DictWithoutMissing({3: 'three'}) | ||
result = s.map(dictionary) | ||
expected = Series([np.nan, np.nan, 'three']) | ||
assert_series_equal(result, expected) | ||
|
||
def test_map_box(self): | ||
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] | ||
s = pd.Series(vals) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jreback let me know if you want me to revert the deletion of these blank lines.