Skip to content

Commit 69e7fe7

Browse files
committed
Updates
1. Reversed order of take keywords 2. Added to extensions API 3. Removed default implementation
1 parent 05d8844 commit 69e7fe7

File tree

12 files changed

+147
-104
lines changed

12 files changed

+147
-104
lines changed

pandas/api/extensions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
from pandas.core.accessor import (register_dataframe_accessor, # noqa
33
register_index_accessor,
44
register_series_accessor)
5+
from pandas.core.algorithms import take # noqa
56
from pandas.core.arrays.base import ExtensionArray # noqa
67
from pandas.core.dtypes.dtypes import ExtensionDtype # noqa

pandas/core/algorithms.py

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,24 +1448,67 @@ def func(arr, indexer, out, fill_value=np.nan):
14481448
return func
14491449

14501450

1451-
def take(arr, indexer, fill_value=None, allow_fill=None):
1451+
def take(arr, indexer, allow_fill=False, fill_value=None):
1452+
"""Take elements from an array.
1453+
1454+
Parameters
1455+
----------
1456+
arr : ndarray or ExtensionArray
1457+
indexer : sequence of integers
1458+
Indices to be taken. See Notes for how negative indicies
1459+
are handled.
1460+
allow_fill : bool, default False
1461+
How to handle negative values in `indexer`.
1462+
1463+
For False values (the default), negative values in `indexer`
1464+
indiciate slices from the right.
1465+
1466+
For True values, indicies where `indexer` is ``-1`` indicate
1467+
missing values. These values are set to `fill_value`. Any other
1468+
other negative value should raise a ``ValueError``.
1469+
fill_value : any, optional
1470+
Fill value to use for NA-indicies when `allow_fill` is True.
1471+
This may be ``None``, in which case the default NA value for
1472+
the type, ``self.dtype.na_value``, is used.
1473+
1474+
Returns
1475+
-------
1476+
ndarray or ExtensionArray
1477+
Same type as the input.
1478+
1479+
Raises
1480+
------
1481+
IndexError
1482+
When the indexer is out of bounds for the array.
1483+
ValueError
1484+
When the indexer contains negative values other than ``-1``
1485+
and `allow_fill` is True.
1486+
1487+
See Also
1488+
--------
1489+
numpy.take
1490+
"""
14521491
indexer = np.asarray(indexer)
14531492

1454-
if allow_fill is None:
1455-
# NumPy style
1456-
result = arr.take(indexer)
1457-
else:
1493+
if allow_fill:
1494+
# Pandas style, -1 means NA
14581495
# bounds checking
14591496
if (indexer < -1).any():
14601497
raise ValueError("Invalid value in 'indexer'. All values "
14611498
"must be non-negative or -1. When "
14621499
"'fill_value' is specified.")
1500+
if (indexer > len(arr)).any():
1501+
# TODO: reuse with logic elsewhere.
1502+
raise IndexError
14631503

14641504
# # take on empty array not handled as desired by numpy
14651505
# # in case of -1 (all missing take)
14661506
# if not len(arr) and mask.all():
14671507
# return arr._from_sequence([fill_value] * len(indexer))
14681508
result = take_1d(arr, indexer, fill_value=fill_value)
1509+
else:
1510+
# NumPy style
1511+
result = arr.take(indexer)
14691512
return result
14701513

14711514

pandas/core/arrays/base.py

Lines changed: 58 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ class ExtensionArray(object):
5353
* unique
5454
* factorize / _values_for_factorize
5555
* argsort / _values_for_argsort
56-
* take / _values_for_take
5756
5857
This class does not inherit from 'abc.ABCMeta' for performance reasons.
5958
Methods and properties required by the interface raise
@@ -277,22 +276,23 @@ def isna(self):
277276
"""
278277
raise AbstractMethodError(self)
279278

280-
def _values_for_argsort(self):
281-
# type: () -> ndarray
282-
"""Return values for sorting.
279+
def _values_for_factorize(self):
280+
# type: () -> Tuple[ndarray, Any]
281+
"""Return an array and missing value suitable for factorization.
283282
284283
Returns
285284
-------
286-
ndarray
287-
The transformed values should maintain the ordering between values
288-
within the array.
289-
290-
See Also
291-
--------
292-
ExtensionArray.argsort
285+
values : ndarray
286+
An array suitable for factoraization. This should maintain order
287+
and be a supported dtype (Float64, Int64, UInt64, String, Object).
288+
By default, the extension array is cast to object dtype.
289+
na_value : object
290+
The value in `values` to consider missing. This will be treated
291+
as NA in the factorization routines, so it will be coded as
292+
`na_sentinal` and not included in `uniques`. By default,
293+
``np.nan`` is used.
293294
"""
294-
# Note: this is used in `ExtensionArray.argsort`.
295-
return np.array(self)
295+
return self.astype(object), np.nan
296296

297297
def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
298298
"""
@@ -393,24 +393,6 @@ def unique(self):
393393
uniques = unique(self.astype(object))
394394
return self._from_sequence(uniques)
395395

396-
def _values_for_factorize(self):
397-
# type: () -> Tuple[ndarray, Any]
398-
"""Return an array and missing value suitable for factorization.
399-
400-
Returns
401-
-------
402-
values : ndarray
403-
An array suitable for factoraization. This should maintain order
404-
and be a supported dtype (Float64, Int64, UInt64, String, Object).
405-
By default, the extension array is cast to object dtype.
406-
na_value : object
407-
The value in `values` to consider missing. This will be treated
408-
as NA in the factorization routines, so it will be coded as
409-
`na_sentinal` and not included in `uniques`. By default,
410-
``np.nan`` is used.
411-
"""
412-
return self.astype(object), np.nan
413-
414396
def factorize(self, na_sentinel=-1):
415397
# type: (int) -> Tuple[ndarray, ExtensionArray]
416398
"""Encode the extension array as an enumerated type.
@@ -463,40 +445,45 @@ def factorize(self, na_sentinel=-1):
463445
# ------------------------------------------------------------------------
464446
# Indexing methods
465447
# ------------------------------------------------------------------------
466-
def _values_for_take(self):
467-
"""Values to use for `take`.
468-
469-
Coerces to object dtype by default.
448+
def _values_for_argsort(self):
449+
# type: () -> ndarray
450+
"""Return values for sorting.
470451
471452
Returns
472453
-------
473-
array-like
474-
Must satisify NumPy's `take` semantics.
454+
ndarray
455+
The transformed values should maintain the ordering between values
456+
within the array.
457+
458+
See Also
459+
--------
460+
ExtensionArray.argsort
475461
"""
476-
return self.astype(object)
462+
# Note: this is used in `ExtensionArray.argsort`.
463+
return np.array(self)
477464

478-
def take(self, indexer, fill_value=None, allow_fill=None):
479-
# type: (Sequence[int], Optional[Any], bool) -> ExtensionArray
465+
def take(self, indexer, allow_fill=False, fill_value=None):
466+
# type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
480467
"""Take elements from an array.
481468
482469
Parameters
483470
----------
484471
indexer : sequence of integers
485472
Indices to be taken. See Notes for how negative indicies
486473
are handled.
474+
allow_fill : bool, default False
475+
How to handle negative values in `indexer`.
476+
477+
For False values (the default), negative values in `indexer`
478+
indiciate slices from the right.
479+
480+
For True values, indicies where `indexer` is ``-1`` indicate
481+
missing values. These values are set to `fill_value`. Any other
482+
other negative value should raise a ``ValueError``.
487483
fill_value : any, optional
488484
Fill value to use for NA-indicies when `allow_fill` is True.
489485
This may be ``None``, in which case the default NA value for
490486
the type, ``self.dtype.na_value``, is used.
491-
allow_fill : bool, optional
492-
How to handle negative values in `indexer`.
493-
494-
For False values (the default), NumPy's behavior is used. Negative
495-
values in `indexer` mean slices from the right.
496-
497-
For True values, Pandas behavior is used. Indicies where `indexer`
498-
is ``-1`` are set to `fill_value`. Any other negative value should
499-
raise a ``ValueError``.
500487
501488
Returns
502489
-------
@@ -514,21 +501,34 @@ def take(self, indexer, fill_value=None, allow_fill=None):
514501
-----
515502
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
516503
``iloc``, when the indexer is a sequence of values. Additionally,
517-
it's called by :meth:`Series.reindex` with a `fill_value`.
504+
it's called by :meth:`Series.reindex`, or any other method
505+
that causes realignemnt, with a `fill_value`.
518506
519507
See Also
520508
--------
521509
numpy.take
522-
"""
523-
from pandas.core.algorithms import take
510+
pandas.api.extensions.take
511+
512+
Examples
513+
--------
514+
Here's an example implementation, which relies on casting the
515+
extension array to object dtype. This uses the helper method
516+
:func:`pandas.api.extensions.take`.
524517
525-
data = self._values_for_take()
526-
if allow_fill and fill_value is None:
527-
fill_value = self.dtype.na_value
518+
.. code-block:: python
528519
529-
result = take(data, indexer, fill_value=fill_value,
530-
allow_fill=allow_fill)
531-
return self._from_sequence(result)
520+
def take(self, indexer, allow_fill=False, fill_value=None):
521+
from pandas.core.algorithms import take
522+
523+
data = self.astype(object)
524+
if allow_fill and fill_value is None:
525+
fill_value = self.dtype.na_value
526+
527+
result = take(data, indexer, fill_value=fill_value,
528+
allow_fill=allow_fill)
529+
return self._from_sequence(result)
530+
"""
531+
raise AbstractMethodError(self)
532532

533533
def copy(self, deep=False):
534534
# type: (bool) -> ExtensionArray

pandas/core/dtypes/base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class _DtypeOpsMixin(object):
1818

1919
# na_value is the default NA value to use for this type. This is used in
2020
# e.g. ExtensionArray.take.
21-
na_value = np.nan # TODO: change to _na_value
21+
na_value = np.nan
2222

2323
def __eq__(self, other):
2424
"""Check whether 'other' is equal to self.
@@ -105,6 +105,9 @@ class ExtensionDtype(_DtypeOpsMixin):
105105
* name
106106
* construct_from_string
107107
108+
The `na_value` class attribute can be used to set the default NA value
109+
for this type. :attr:`numpy.nan` is used by default.
110+
108111
This class does not inherit from 'abc.ABCMeta' for performance reasons.
109112
Methods and properties required by the interface raise
110113
``pandas.errors.AbstractMethodError`` and no ``register`` method is

pandas/core/dtypes/cast.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,7 @@ def changeit():
256256

257257
def maybe_promote(dtype, fill_value=np.nan):
258258
# if we passed an array here, determine the fill value by dtype
259-
if is_extension_array_dtype(dtype):
260-
# XXX: verify this change
261-
fill_value = dtype.na_value
262-
263-
elif isinstance(fill_value, np.ndarray):
259+
if isinstance(fill_value, np.ndarray):
264260
if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
265261
fill_value = iNaT
266262
else:
@@ -297,6 +293,8 @@ def maybe_promote(dtype, fill_value=np.nan):
297293
elif is_datetimetz(dtype):
298294
if isna(fill_value):
299295
fill_value = iNaT
296+
elif is_extension_array_dtype(dtype) and isna(fill_value):
297+
fill_value = dtype.na_value
300298
elif is_float(fill_value):
301299
if issubclass(dtype.type, np.bool_):
302300
dtype = np.object_

pandas/core/internals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5445,7 +5445,7 @@ def is_uniform_join_units(join_units):
54455445

54465446
def is_uniform_reindex(join_units):
54475447
return (
5448-
# TODO: should this be ju.block.can_hold_na?
5448+
# TODO: should this be ju.block._can_hold_na?
54495449
all(ju.block and ju.block.is_extension for ju in join_units) and
54505450
len(set(ju.block.dtype.name for ju in join_units)) == 1
54515451
)

pandas/tests/extension/base/getitem.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,11 @@ def test_take_negative(self, data):
149149

150150
def test_take_non_na_fill_value(self, data_missing):
151151
fill_value = data_missing[1] # valid
152-
result = data_missing.take([-1, 1], fill_value=fill_value)
153-
expected = data_missing.take([1, 1])
152+
na = data_missing[0]
153+
154+
array = data_missing._from_sequence([na, fill_value, na])
155+
result = array.take([-1, 1], fill_value=fill_value, allow_fill=True)
156+
expected = array.take([1, 1])
154157
self.assert_extension_array_equal(result, expected)
155158

156159
def test_take_pandas_style_negative_raises(self, data, na_value):

pandas/tests/extension/base/reshaping.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,11 @@ def test_merge(self, data, na_value):
124124
'key': [0, 1, 2]})
125125
df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})
126126

127-
# res = pd.merge(df1, df2)
128-
# exp = pd.DataFrame(
129-
# {'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
130-
# 'ext': data._from_sequence([data[0], data[0], data[1]])})
131-
# self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
127+
res = pd.merge(df1, df2)
128+
exp = pd.DataFrame(
129+
{'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
130+
'ext': data._from_sequence([data[0], data[0], data[1]])})
131+
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
132132

133133
res = pd.merge(df1, df2, how='outer')
134134
exp = pd.DataFrame(

pandas/tests/extension/decimal/array.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,17 @@ def __getitem__(self, item):
5353
else:
5454
return type(self)(self._data[item])
5555

56+
def take(self, indexer, allow_fill=False, fill_value=None):
57+
from pandas.api.extensions import take
58+
59+
data = self._data
60+
if allow_fill and fill_value is None:
61+
fill_value = self.dtype.na_value
62+
63+
result = take(data, indexer, fill_value=fill_value,
64+
allow_fill=allow_fill)
65+
return self._from_sequence(result)
66+
5667
def copy(self, deep=False):
5768
if deep:
5869
return type(self)(self._data.copy())
@@ -81,9 +92,6 @@ def nbytes(self):
8192
def isna(self):
8293
return np.array([x.is_nan() for x in self._data])
8394

84-
def _values_for_take(self):
85-
return self.data
86-
8795
@property
8896
def _na_value(self):
8997
return decimal.Decimal('NaN')

pandas/tests/extension/decimal/test_decimal.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -108,26 +108,7 @@ class TestReshaping(BaseDecimal, base.BaseReshapingTests):
108108

109109

110110
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
111-
112-
def test_take_basic(self):
113-
ea = DecimalArray([decimal.Decimal('1'),
114-
decimal.Decimal('2'),
115-
decimal.Decimal('3')])
116-
result = ea.take([1, 2, -1])
117-
expected = DecimalArray([decimal.Decimal('2'),
118-
decimal.Decimal('3'),
119-
decimal.Decimal('3')])
120-
self.assert_extension_array_equal(result, expected)
121-
122-
result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value,
123-
allow_fill=True)
124-
expected = DecimalArray([decimal.Decimal('2'),
125-
decimal.Decimal('3'),
126-
decimal.Decimal('NaN')])
127-
self.assert_extension_array_equal(result, expected)
128-
129-
result = pd.Series(ea).reindex([1, 2, -1]).values
130-
self.assert_extension_array_equal(result, expected)
111+
pass
131112

132113

133114
class TestMissing(BaseDecimal, base.BaseMissingTests):

0 commit comments

Comments
 (0)