Closed
Description
Code Sample, a copy-pastable example if possible
pd.Series(['E'], dtype=FletcherDType(pa.string())
Problem description
This leads to the following traceback but should actually create a Series object of a string-based type.
def test_constructor_sanitation():
> data = pd.Series(["E"], index=["F"], dtype=ArrowStringDtype())
pandas/tests/extension/arrow/test_string.py:11:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/series.py:310: in __init__
data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
data = ['E'], index = Index(['F'], dtype='object'), dtype = <pandas.tests.extension.arrow.arrays.ArrowStringDtype object at 0x120913cf8>, copy = False, raise_cast_failure = True
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False):
"""
Sanitize input data to an ndarray, copy if specified, coerce to the
dtype if specified.
"""
if dtype is not None:
dtype = pandas_dtype(dtype)
if isinstance(data, ma.MaskedArray):
mask = ma.getmaskarray(data)
if mask.any():
data, fill_value = maybe_upcast(data, copy=True)
data.soften_mask() # set hardmask False if it was True
data[mask] = fill_value
else:
data = data.copy()
# extract ndarray or ExtensionArray, ensure we have no PandasArray
data = extract_array(data, extract_numpy=True)
# GH#846
if isinstance(data, np.ndarray):
if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
# possibility of nan -> garbage
try:
subarr = _try_cast(data, dtype, copy, True)
except ValueError:
if copy:
subarr = data.copy()
else:
subarr = np.array(data, copy=False)
else:
# we will try to copy be-definition here
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
elif isinstance(data, ABCExtensionArray):
# it is already ensured above this is not a PandasArray
subarr = data
if dtype is not None:
subarr = subarr.astype(dtype, copy=copy)
elif copy:
subarr = subarr.copy()
return subarr
elif isinstance(data, (list, tuple)) and len(data) > 0:
if dtype is not None:
try:
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
except Exception:
if raise_cast_failure: # pragma: no cover
raise
subarr = np.array(data, dtype=object, copy=copy)
subarr = lib.maybe_convert_objects(subarr)
else:
subarr = maybe_convert_platform(data)
subarr = maybe_cast_to_datetime(subarr, dtype)
elif isinstance(data, range):
# GH#16804
arr = np.arange(data.start, data.stop, data.step, dtype="int64")
subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
else:
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
# scalar like, GH
if getattr(subarr, "ndim", 0) == 0:
if isinstance(data, list): # pragma: no cover
subarr = np.array(data, dtype=object)
elif index is not None:
value = data
# figure out the dtype from the value (upcast if necessary)
if dtype is None:
dtype, value = infer_dtype_from_scalar(value)
else:
# need to possibly convert the value here
value = maybe_cast_to_datetime(value, dtype)
subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)
else:
return subarr.item()
# the result that we want
elif subarr.ndim == 1:
if index is not None:
# a 1-element ndarray
if len(subarr) != len(index) and len(subarr) == 1:
subarr = construct_1d_arraylike_from_scalar(
subarr[0], len(index), subarr.dtype
)
elif subarr.ndim > 1:
if isinstance(data, np.ndarray):
raise Exception("Data must be 1-dimensional")
else:
subarr = com.asarray_tuplesafe(data, dtype=dtype)
# This is to prevent mixed-type Series getting all casted to
# NumPy string type, e.g. NaN --> '-1#IND'.
if issubclass(subarr.dtype.type, str):
import ipdb; ipdb.set_trace()
# GH#16605
# If not empty convert the data to dtype
# GH#19853: If data is a scalar, subarr has already the result
> if not lib.is_scalar(data):
E TypeError: data type not understood
pandas/core/construction.py:478: TypeError
Problem exists in 0.25 and master, I'm working on a fix and will submit a PR later.
Metadata
Metadata
Assignees
Labels
No labels