Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the master branch of pandas.
Reproducible Example
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
x, y = np.random.randn(2, 100)
x = pd.Series(x, dtype="Float32")
z = pd.Series(range(5))
x[:, None] # fails with ValueError
z[:, None] # warns but promised to be IndexError
Issue Description
Per discussion around #35527 / https://github.com/matplotlib/matplotlib/issues/18158and related links there have been a bunch of issues about multi-dimensional indexing in to Series
and objects that have inconsistent .ndims
and .shape
.
In a departure from numpy
it was my understanding that the implicit broadcasting to higher dimensions was going to be dropped by pandas at some point in the future (although it seems to still warn for builtin types). However, for the new missing-data types trying to do this 2D slicing raises a ValueError
which was reported as a bug to Matplotlib matplotlib/matplotlib#22125
In [15]: x[:, None]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/series.py, in Series._get_values(self, indexer)
1025 try:
-> 1026 new_mgr = self._mgr.getitem_mgr(indexer)
1027 return self._constructor(new_mgr).__finalize__(self)
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/internals/managers.py, in SingleBlockManager.getitem_mgr(self, indexer)
1635 blk = self._block
-> 1636 array = blk._slice(indexer)
1637 if array.ndim > 1:
1638 # This will be caught by Series._get_values
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/internals/blocks.py, in ExtensionBlock._slice(self, slicer)
1553 raise AssertionError(
1554 "invalid slicing for a 1-ndim ExtensionArray", slicer
1555 )
-> 1557 return self.values[slicer]
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/arrays/masked.py, in BaseMaskedArray.__getitem__(self, item)
144 item = check_array_indexer(self, item)
--> 146 return type(self)(self._data[item], self._mask[item])
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/arrays/floating.py, in FloatingArray.__init__(self, values, mask, copy)
251 raise TypeError(
252 "values should be floating numpy array. Use "
253 "the 'pd.array' function instead"
254 )
--> 255 super().__init__(values, mask, copy=copy)
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/arrays/masked.py, in BaseMaskedArray.__init__(self, values, mask, copy)
122 if values.ndim != 1:
--> 123 raise ValueError("values must be a 1D array")
124 if mask.ndim != 1:
ValueError: values must be a 1D array
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Input In [15], in <module>
----> 1 x[:, None]
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/series.py, in Series.__getitem__(self, key)
963 key = np.asarray(key, dtype=bool)
964 return self._get_values(key)
--> 966 return self._get_with(key)
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/series.py, in Series._get_with(self, key)
976 raise TypeError(
977 "Indexing a Series with DataFrame is not "
978 "supported, use the appropriate DataFrame column"
979 )
980 elif isinstance(key, tuple):
--> 981 return self._get_values_tuple(key)
983 elif not is_list_like(key):
984 # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684
985 return self.loc[key]
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/series.py, in Series._get_values_tuple(self, key)
1008 def _get_values_tuple(self, key):
1009 # mpl hackaround
1010 if com.any_none(*key):
-> 1011 result = self._get_values(key)
1012 deprecate_ndim_indexing(result, stacklevel=5)
1013 return result
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/series.py, in Series._get_values(self, indexer)
1027 return self._constructor(new_mgr).__finalize__(self)
1028 except ValueError:
1029 # mpl compat if we look up e.g. ser[:, np.newaxis];
1030 # see tests.series.timeseries.test_mpl_compat_hack
1031 # the asarray is needed to avoid returning a 2D DatetimeArray
-> 1032 return np.asarray(self._values[indexer])
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/arrays/masked.py, in BaseMaskedArray.__getitem__(self, item)
142 return self._data[item]
144 item = check_array_indexer(self, item)
--> 146 return type(self)(self._data[item], self._mask[item])
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/arrays/floating.py, in FloatingArray.__init__(self, values, mask, copy)
250 if not (isinstance(values, np.ndarray) and values.dtype.kind == "f"):
251 raise TypeError(
252 "values should be floating numpy array. Use "
253 "the 'pd.array' function instead"
254 )
--> 255 super().__init__(values, mask, copy=copy)
File ~/.virtualenvs/sys310/lib/python3.10/site-packages/pandas/core/arrays/masked.py, in BaseMaskedArray.__init__(self, values, mask, copy)
118 raise TypeError(
119 "mask should be boolean numpy array. Use "
120 "the 'pd.array' function instead"
121 )
122 if values.ndim != 1:
--> 123 raise ValueError("values must be a 1D array")
124 if mask.ndim != 1:
125 raise ValueError("mask must be a 1D array")
ValueError: values must be a 1D array
Given
1008 def _get_values_tuple(self, key):
1009 # mpl hackaround
I think we need to revisit this and make sure we are not both grumbling about the other and putting in dueling workarounds ;)
Expected Behavior
Behavior of the container to be independent of the contained type.