|
5 | 5 | This is an experimental API and subject to breaking changes
|
6 | 6 | without warning.
|
7 | 7 | """
|
| 8 | +import textwrap |
| 9 | + |
8 | 10 | import numpy as np
|
9 | 11 |
|
10 | 12 | from pandas.errors import AbstractMethodError
|
11 |
| -from pandas.compat import _default_fill_value |
12 | 13 | from pandas.compat.numpy import function as nv
|
| 14 | +from pandas.util._decorators import Appender, Substitution |
13 | 15 |
|
14 | 16 | _not_implemented_message = "{} does not implement {}."
|
| 17 | +_default_fill_value = object() |
| 18 | + |
| 19 | + |
| 20 | +_take_docstring = textwrap.dedent("""\ |
| 21 | +Take elements from an array. |
| 22 | +
|
| 23 | +Parameters |
| 24 | +---------- |
| 25 | +%(arr)s\ |
| 26 | +indexer : sequence of integers |
| 27 | + Indices to be taken. See Notes for how negative indicies |
| 28 | + are handled. |
| 29 | +fill_value : any, optional |
| 30 | + Fill value to use for NA-indicies. This has a few behaviors. |
| 31 | +
|
| 32 | + * fill_value is not specified : triggers NumPy's semantics |
| 33 | + where negative values in `indexer` mean slices from the end. |
| 34 | + * fill_value is NA : Fill positions where `indexer` is ``-1`` |
| 35 | + with ``self.dtype.na_value``. Anything considered NA by |
| 36 | + :func:`pandas.isna` will result in ``self.dtype.na_value`` |
| 37 | + being used to fill. |
| 38 | + * fill_value is not NA : Fill positions where `indexer` is ``-1`` |
| 39 | + with `fill_value`. |
| 40 | +
|
| 41 | +Returns |
| 42 | +------- |
| 43 | +ExtensionArray |
| 44 | +
|
| 45 | +Raises |
| 46 | +------ |
| 47 | +IndexError |
| 48 | + When the indexer is out of bounds for the array. |
| 49 | +ValueError |
| 50 | + When the indexer contains negative values other than ``-1`` |
| 51 | + and `fill_value` is specified. |
| 52 | +
|
| 53 | +Notes |
| 54 | +----- |
| 55 | +The meaning of negative values in `indexer` depends on the |
| 56 | +`fill_value` argument. By default, we follow the behavior |
| 57 | +:meth:`numpy.take` of where negative indices indicate slices |
| 58 | +from the end. |
| 59 | +
|
| 60 | +When `fill_value` is specified, we follow pandas semantics of ``-1`` |
| 61 | +indicating a missing value. In this case, positions where `indexer` |
| 62 | +is ``-1`` will be filled with `fill_value` or the default NA value |
| 63 | +for this type. |
| 64 | +
|
| 65 | +ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, |
| 66 | +``iloc``, when the indexer is a sequence of values. Additionally, |
| 67 | +it's called by :meth:`Series.reindex` with a `fill_value`. |
| 68 | +
|
| 69 | +See Also |
| 70 | +-------- |
| 71 | +numpy.take""") |
15 | 72 |
|
16 | 73 |
|
17 | 74 | class ExtensionArray(object):
|
@@ -476,60 +533,10 @@ def _values_for_take(self):
|
476 | 533 | """
|
477 | 534 | return self.astype(object)
|
478 | 535 |
|
| 536 | + @Substitution(arr='') |
| 537 | + @Appender(_take_docstring) |
479 | 538 | def take(self, indexer, fill_value=_default_fill_value):
|
480 | 539 | # type: (Sequence[int], Optional[Any]) -> ExtensionArray
|
481 |
| - """Take elements from an array. |
482 |
| -
|
483 |
| - Parameters |
484 |
| - ---------- |
485 |
| - indexer : sequence of integers |
486 |
| - Indices to be taken. See Notes for how negative indicies |
487 |
| - are handled. |
488 |
| - fill_value : any, optional |
489 |
| - Fill value to use for NA-indicies. This has a few behaviors. |
490 |
| -
|
491 |
| - * fill_value is not specified : triggers NumPy's semantics |
492 |
| - where negative values in `indexer` mean slices from the end. |
493 |
| - * fill_value is NA : Fill positions where `indexer` is ``-1`` |
494 |
| - with ``self.dtype.na_value``. Anything considered NA by |
495 |
| - :func:`pandas.isna` will result in ``self.dtype.na_value`` |
496 |
| - being used to fill. |
497 |
| - * fill_value is not NA : Fill positions where `indexer` is ``-1`` |
498 |
| - with `fill_value`. |
499 |
| -
|
500 |
| - Returns |
501 |
| - ------- |
502 |
| - ExtensionArray |
503 |
| -
|
504 |
| - Raises |
505 |
| - ------ |
506 |
| - IndexError |
507 |
| - When the indexer is out of bounds for the array. |
508 |
| - ValueError |
509 |
| - When the indexer contains negative values other than ``-1`` |
510 |
| - and `fill_value` is specified. |
511 |
| -
|
512 |
| - Notes |
513 |
| - ----- |
514 |
| - The meaning of negative values in `indexer` depends on the |
515 |
| - `fill_value` argument. By default, we follow the behavior |
516 |
| - :meth:`numpy.take` of where negative indices indicate slices |
517 |
| - from the end. |
518 |
| -
|
519 |
| - When `fill_value` is specified, we follow pandas semantics of ``-1`` |
520 |
| - indicating a missing value. In this case, positions where `indexer` |
521 |
| - is ``-1`` will be filled with `fill_value` or the default NA value |
522 |
| - for this type. |
523 |
| -
|
524 |
| - ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, |
525 |
| - ``iloc``, when the indexer is a sequence of values. Additionally, |
526 |
| - it's called by :meth:`Series.reindex` with a `fill_value`. |
527 |
| -
|
528 |
| - See Also |
529 |
| - -------- |
530 |
| - numpy.take |
531 |
| - """ |
532 |
| - from pandas.core.algorithms import take_ea |
533 | 540 | from pandas.core.missing import isna
|
534 | 541 |
|
535 | 542 | if isna(fill_value):
|
@@ -601,3 +608,31 @@ def _ndarray_values(self):
|
601 | 608 | used for interacting with our indexers.
|
602 | 609 | """
|
603 | 610 | return np.array(self)
|
| 611 | + |
| 612 | + |
| 613 | +@Substitution(arr=textwrap.dedent("""\ |
| 614 | +arr : array-like |
| 615 | + Must satisfy NumPy's indexing sematnics, including `take` |
| 616 | + and boolean masking. |
| 617 | +""")) |
| 618 | +@Appender(_take_docstring) |
| 619 | +def take_ea(arr, indexer, fill_value=_default_fill_value): |
| 620 | + indexer = np.asarray(indexer) |
| 621 | + if fill_value is _default_fill_value: |
| 622 | + # NumPy style |
| 623 | + result = arr.take(indexer) |
| 624 | + else: |
| 625 | + mask = indexer == -1 |
| 626 | + if (indexer < -1).any(): |
| 627 | + raise ValueError("Invalid value in 'indexer'. All values " |
| 628 | + "must be non-negative or -1. When " |
| 629 | + "'fill_value' is specified.") |
| 630 | + |
| 631 | + # take on empty array not handled as desired by numpy |
| 632 | + # in case of -1 (all missing take) |
| 633 | + if not len(arr) and mask.all(): |
| 634 | + return arr._from_sequence([fill_value] * len(indexer)) |
| 635 | + |
| 636 | + result = arr.take(indexer) |
| 637 | + result[mask] = fill_value |
| 638 | + return result |
0 commit comments