Skip to content

API: tighten DTA/TDA _from_sequence signature #37179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
eab1030
EA: tighten TimedeltaArray._from_sequence signature
jbrockmendel Sep 29, 2020
137fdf1
Merge branch 'master' of https://github.com/pandas-dev/pandas into st…
jbrockmendel Oct 1, 2020
e54ed75
EA: Tighten signature on DatetimeArray._from_sequence
jbrockmendel Sep 29, 2020
3d2a7ab
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 3, 2020
96a8475
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 5, 2020
d49d819
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 8, 2020
f693ed4
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 9, 2020
058338a
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 16, 2020
77e7c21
API: restrict DTA/TDA _from_sequence
jbrockmendel Oct 16, 2020
7e97d03
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 23, 2020
f2a2aaf
test
jbrockmendel Oct 24, 2020
00d19e3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 31, 2020
bc532be
lint fixup
jbrockmendel Oct 31, 2020
2e612c4
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 4, 2020
b8db5c1
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 4, 2020
cad1104
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 4, 2020
8f1b25d
Use _from_sequence_of_strings where appropriate
jbrockmendel Nov 5, 2020
fb41950
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 5, 2020
9f72ad8
workaround for i8 case
jbrockmendel Nov 5, 2020
54c87da
mypy fixup
jbrockmendel Nov 5, 2020
d87533b
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 9, 2020
158f2b2
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 11, 2020
b071b5f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 19, 2020
bffacb1
from_sequence -> from_sequence_strict
jbrockmendel Nov 19, 2020
0153a51
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 20, 2020
5acede8
mypy fixup
jbrockmendel Nov 20, 2020
3b25043
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Nov 20, 2020
8c87760
revert test edits
jbrockmendel Nov 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,14 @@ class TimelikeOps(DatetimeLikeArrayMixin):
Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
"""

@classmethod
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
# Note: for now at least, caller is responsible for checking that
# strings are being passed.
return cls._from_sequence(strings, dtype=dtype, copy=copy)

# --------------------------------------------------------------

def _round(self, freq, mode, ambiguous, nonexistent):
# round the local times
if is_datetime64tz_dtype(self.dtype):
Expand Down Expand Up @@ -1595,6 +1603,24 @@ def _with_freq(self, freq):
# Shared Constructor Helpers


def ensure_arraylike(scalars, copy: bool) -> Tuple[Any, bool]:
"""
Convert non-arraylike scalar sequences to ndarray.
"""
if not hasattr(scalars, "dtype"):
copy = False
if np.ndim(scalars) == 0:
scalars = list(scalars)

scalars = np.asarray(scalars)
if len(scalars) == 0:
# Without casting, we would have float64 and so would reject later
# in from_sequence
scalars = scalars.astype(object)

return scalars, copy


def validate_periods(periods):
"""
If a `periods` argument is passed to the Datetime/Timedelta Array/Index
Expand Down
40 changes: 39 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCPandasArray, ABCSeries
from pandas.core.dtypes.generic import (
ABCIndexClass,
ABCMultiIndex,
ABCPandasArray,
ABCSeries,
)
from pandas.core.dtypes.missing import isna

from pandas.core.algorithms import checked_add_with_arr
Expand Down Expand Up @@ -300,6 +305,39 @@ def _simple_new(
result._dtype = dtype
return result

@classmethod
def _from_sequence_strict(cls, scalars, *, dtype=None, copy: bool = False):
# GH#37179 eventually _from_sequence should be strict

scalars, copy = dtl.ensure_arraylike(scalars, copy)

if scalars.dtype.kind == "M":
pass
elif scalars.dtype == object:
if isinstance(scalars, ABCMultiIndex):
raise TypeError("Cannot create a DatetimeArray from MultiIndex")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this tested?


inferred = lib.infer_dtype(scalars)
if inferred in ["datetime64", "date", "datetime", "empty"]:
pass
else:
msg = f"{inferred} scalars cannot be converted to datetime64[ns]"
raise TypeError(msg)
elif is_string_dtype(scalars.dtype):
# TODO: should go through from_sequence_of_strings?
pass
elif (
is_categorical_dtype(scalars.dtype) and scalars.categories.dtype.kind == "M"
):
# TODO: Could also use Categorical[object]
# with inferred_type as above?
pass
else:
msg = f"dtype {scalars.dtype} cannot be converted to datetime64[ns]"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you ensure we have a test that hits this

raise TypeError(msg)

return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)

@classmethod
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
Expand Down
23 changes: 23 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,29 @@ def _simple_new(
result._dtype = TD64NS_DTYPE
return result

@classmethod
def _from_sequence_strict(
cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False
) -> "TimedeltaArray":
# GH#37179 eventually we want _from_sequence to be strict
if dtype:
_validate_td64_dtype(dtype)

data, copy = dtl.ensure_arraylike(data, copy)

if data.dtype.kind == "m":
pass
elif data.dtype == object:
inferred = lib.infer_dtype(data)
if inferred in ["timedelta64", "timedelta", "empty"]:
pass
else:
raise ValueError(inferred)
else:
raise TypeError(data.dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you ensure we have a test that hits hits


return cls._from_sequence(data=data, copy=copy)

@classmethod
def _from_sequence(
cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,10 @@ def astype_nansafe(
"""
# dispatch on extension dtype if needed
if is_extension_array_dtype(dtype):
return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
cls = dtype.construct_array_type()
if lib.infer_dtype(arr) == "string":
return cls._from_sequence_of_strings(arr, dtype=dtype, copy=copy)
return cls._from_sequence(arr, dtype=dtype, copy=copy)

if not isinstance(dtype, np.dtype):
dtype = pandas_dtype(dtype)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@


class TestDatetimeArrayConstructor:
def test_from_sequence_strict_invalid_type(self):
mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
DatetimeArray._from_sequence_strict(mi)

msg = "mixed scalars cannot be converted to datetime64"
with pytest.raises(TypeError, match=msg):
# GH#37179
DatetimeArray._from_sequence_strict(mi._values)

def test_from_sequence_invalid_type(self):
mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ def test_copy(self):
assert arr._data is not data
assert arr._data.base is not data

def test_from_sequence_strict_invalid_dtypes(self):
# GH#37179
data = np.arange(5, dtype=np.float64)
with pytest.raises(TypeError, match="float64"):
TimedeltaArray._from_sequence_strict(data)

with pytest.raises(ValueError, match="floating"):
# object-dtype array of floats
TimedeltaArray._from_sequence_strict(data.astype(object))


class TestTimedeltaArray:
# TODO: de-duplicate with test_npsum below
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ def astype(self, dtype, copy=True):
return self
elif isinstance(dtype, StringDtype):
value = self.astype(str) # numpy doesn'y like nested dicts
return dtype.construct_array_type()._from_sequence(value, copy=False)
cls = dtype.construct_array_type()
return cls._from_sequence_of_strings(value, copy=False)

return np.array([dict(x) for x in self], dtype=dtype, copy=copy)

Expand Down