Skip to content

BUG: TypeError in merge with timedelta64 column #13802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,10 @@ Bug Fixes
- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`)
- Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`)
- Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`)
- Bug in ``isnull`` ``notnull`` raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`)
- Bug in ``.merge`` may raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`)



- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)
Expand Down
255 changes: 161 additions & 94 deletions pandas/tests/types/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime
from pandas.util import testing as tm

import pandas as pd
from pandas.core import config as cf
from pandas.compat import u
from pandas.tslib import iNaT
Expand Down Expand Up @@ -45,100 +46,6 @@ def test_notnull():
assert (isinstance(isnull(s), Series))


def test_isnull():
assert not isnull(1.)
assert isnull(None)
assert isnull(np.NaN)
assert not isnull(np.inf)
assert not isnull(-np.inf)

# series
for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
tm.makeObjectSeries(), tm.makeTimeSeries(),
tm.makePeriodSeries()]:
assert (isinstance(isnull(s), Series))

# frame
for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(),
tm.makeMixedDataFrame()]:
result = isnull(df)
expected = df.apply(isnull)
tm.assert_frame_equal(result, expected)

# panel
for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())
]:
result = isnull(p)
expected = p.apply(isnull)
tm.assert_panel_equal(result, expected)

# panel 4d
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
result = isnull(p)
expected = p.apply(isnull)
tm.assert_panel4d_equal(result, expected)


def test_isnull_lists():
result = isnull([[False]])
exp = np.array([[False]])
assert (np.array_equal(result, exp))

result = isnull([[1], [2]])
exp = np.array([[False], [False]])
assert (np.array_equal(result, exp))

# list of strings / unicode
result = isnull(['foo', 'bar'])
assert (not result.any())

result = isnull([u('foo'), u('bar')])
assert (not result.any())


def test_isnull_nat():
result = isnull([NaT])
exp = np.array([True])
assert (np.array_equal(result, exp))

result = isnull(np.array([NaT], dtype=object))
exp = np.array([True])
assert (np.array_equal(result, exp))


def test_isnull_numpy_nat():
arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'),
np.datetime64('NaT', 's')])
result = isnull(arr)
expected = np.array([True] * 4)
tm.assert_numpy_array_equal(result, expected)


def test_isnull_datetime():
assert (not isnull(datetime.now()))
assert notnull(datetime.now())

idx = date_range('1/1/1990', periods=20)
assert (notnull(idx).all())

idx = np.asarray(idx)
idx[0] = iNaT
idx = DatetimeIndex(idx)
mask = isnull(idx)
assert (mask[0])
assert (not mask[1:].any())

# GH 9129
pidx = idx.to_period(freq='M')
mask = isnull(pidx)
assert (mask[0])
assert (not mask[1:].any())

mask = isnull(pidx[1:])
assert (not mask.any())


class TestIsNull(tm.TestCase):

def test_0d_array(self):
Expand All @@ -150,6 +57,166 @@ def test_0d_array(self):
self.assertFalse(isnull(np.array(0.0, dtype=object)))
self.assertFalse(isnull(np.array(0, dtype=object)))

def test_isnull(self):
self.assertFalse(isnull(1.))
self.assertTrue(isnull(None))
self.assertTrue(isnull(np.NaN))
self.assertTrue(float('nan'))
self.assertFalse(isnull(np.inf))
self.assertFalse(isnull(-np.inf))

# series
for s in [tm.makeFloatSeries(), tm.makeStringSeries(),
tm.makeObjectSeries(), tm.makeTimeSeries(),
tm.makePeriodSeries()]:
self.assertIsInstance(isnull(s), Series)

# frame
for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(),
tm.makeMixedDataFrame()]:
result = isnull(df)
expected = df.apply(isnull)
tm.assert_frame_equal(result, expected)

# panel
for p in [tm.makePanel(), tm.makePeriodPanel(),
tm.add_nans(tm.makePanel())]:
result = isnull(p)
expected = p.apply(isnull)
tm.assert_panel_equal(result, expected)

# panel 4d
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]:
result = isnull(p)
expected = p.apply(isnull)
tm.assert_panel4d_equal(result, expected)

def test_isnull_lists(self):
result = isnull([[False]])
exp = np.array([[False]])
tm.assert_numpy_array_equal(result, exp)

result = isnull([[1], [2]])
exp = np.array([[False], [False]])
tm.assert_numpy_array_equal(result, exp)

# list of strings / unicode
result = isnull(['foo', 'bar'])
exp = np.array([False, False])
tm.assert_numpy_array_equal(result, exp)

result = isnull([u('foo'), u('bar')])
exp = np.array([False, False])
tm.assert_numpy_array_equal(result, exp)

def test_isnull_nat(self):
result = isnull([NaT])
exp = np.array([True])
tm.assert_numpy_array_equal(result, exp)

result = isnull(np.array([NaT], dtype=object))
exp = np.array([True])
tm.assert_numpy_array_equal(result, exp)

def test_isnull_numpy_nat(self):
arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'),
np.datetime64('NaT', 's')])
result = isnull(arr)
expected = np.array([True] * 4)
tm.assert_numpy_array_equal(result, expected)

def test_isnull_datetime(self):
self.assertFalse(isnull(datetime.now()))
self.assertTrue(notnull(datetime.now()))

idx = date_range('1/1/1990', periods=20)
exp = np.ones(len(idx), dtype=bool)
tm.assert_numpy_array_equal(notnull(idx), exp)

idx = np.asarray(idx)
idx[0] = iNaT
idx = DatetimeIndex(idx)
mask = isnull(idx)
self.assertTrue(mask[0])
exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
self.assert_numpy_array_equal(mask, exp)

# GH 9129
pidx = idx.to_period(freq='M')
mask = isnull(pidx)
self.assertTrue(mask[0])
exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool)
self.assert_numpy_array_equal(mask, exp)

mask = isnull(pidx[1:])
exp = np.zeros(len(mask), dtype=bool)
self.assert_numpy_array_equal(mask, exp)

def test_datetime_other_units(self):
idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02'])
exp = np.array([False, True, False])
tm.assert_numpy_array_equal(isnull(idx), exp)
tm.assert_numpy_array_equal(notnull(idx), ~exp)
tm.assert_numpy_array_equal(isnull(idx.values), exp)
tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
'datetime64[ns]']:
values = idx.values.astype(dtype)

exp = np.array([False, True, False])
tm.assert_numpy_array_equal(isnull(values), exp)
tm.assert_numpy_array_equal(notnull(values), ~exp)

exp = pd.Series([False, True, False])
s = pd.Series(values)
tm.assert_series_equal(isnull(s), exp)
tm.assert_series_equal(notnull(s), ~exp)
s = pd.Series(values, dtype=object)
tm.assert_series_equal(isnull(s), exp)
tm.assert_series_equal(notnull(s), ~exp)

def test_timedelta_other_units(self):
idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days'])
exp = np.array([False, True, False])
tm.assert_numpy_array_equal(isnull(idx), exp)
tm.assert_numpy_array_equal(notnull(idx), ~exp)
tm.assert_numpy_array_equal(isnull(idx.values), exp)
tm.assert_numpy_array_equal(notnull(idx.values), ~exp)

for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
'timedelta64[ns]']:
values = idx.values.astype(dtype)

exp = np.array([False, True, False])
tm.assert_numpy_array_equal(isnull(values), exp)
tm.assert_numpy_array_equal(notnull(values), ~exp)

exp = pd.Series([False, True, False])
s = pd.Series(values)
tm.assert_series_equal(isnull(s), exp)
tm.assert_series_equal(notnull(s), ~exp)
s = pd.Series(values, dtype=object)
tm.assert_series_equal(isnull(s), exp)
tm.assert_series_equal(notnull(s), ~exp)

def test_period(self):
idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M')
exp = np.array([False, True, False])
tm.assert_numpy_array_equal(isnull(idx), exp)
tm.assert_numpy_array_equal(notnull(idx), ~exp)

exp = pd.Series([False, True, False])
s = pd.Series(idx)
tm.assert_series_equal(isnull(s), exp)
tm.assert_series_equal(notnull(s), ~exp)
s = pd.Series(idx, dtype=object)
tm.assert_series_equal(isnull(s), exp)
tm.assert_series_equal(notnull(s), ~exp)


def test_array_equivalent():
assert array_equivalent(np.array([np.nan, np.nan]),
Expand Down
41 changes: 41 additions & 0 deletions pandas/tools/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,47 @@ def test_join_append_timedeltas(self):
'0r': Series([td, NaT], index=list('AB'))})
assert_frame_equal(result, expected)

def test_other_datetime_unit(self):
# GH 13389
df1 = pd.DataFrame({'entity_id': [101, 102]})
s = pd.Series([None, None], index=[101, 102], name='days')

for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
'datetime64[ns]']:

df2 = s.astype(dtype).to_frame('days')
# coerces to datetime64[ns], thus sholuld not be affected
self.assertEqual(df2['days'].dtype, 'datetime64[ns]')

result = df1.merge(df2, left_on='entity_id', right_index=True)

exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype='datetime64[ns]')},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)

def test_other_timedelta_unit(self):
# GH 13389
df1 = pd.DataFrame({'entity_id': [101, 102]})
s = pd.Series([None, None], index=[101, 102], name='days')

for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
'timedelta64[ns]']:

df2 = s.astype(dtype).to_frame('days')
self.assertEqual(df2['days'].dtype, dtype)

result = df1.merge(df2, left_on='entity_id', right_index=True)

exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype=dtype)},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)

def test_overlapping_columns_error_message(self):
df = DataFrame({'key': [1, 2, 3],
'v1': [4, 5, 6],
Expand Down
18 changes: 18 additions & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,24 @@ def test_constructor_datetime64arr(self):

self.assertRaises(ValueError, PeriodIndex, vals, freq='D')

def test_view(self):
idx = pd.PeriodIndex([], freq='M')

exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)

idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')

exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)

exp = np.array([14975, -9223372036854775808], dtype=np.int64)
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)

def test_constructor_empty(self):
idx = pd.PeriodIndex([], freq='M')
tm.assertIsInstance(idx, PeriodIndex)
Expand Down
3 changes: 2 additions & 1 deletion pandas/types/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ def is_object(x):

def needs_i8_conversion(arr_or_dtype):
return (is_datetime_or_timedelta_dtype(arr_or_dtype) or
is_datetime64tz_dtype(arr_or_dtype))
is_datetime64tz_dtype(arr_or_dtype) or
isinstance(arr_or_dtype, ABCPeriodIndex))


def is_numeric_dtype(arr_or_dtype):
Expand Down
2 changes: 1 addition & 1 deletion pandas/types/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _isnull_ndarraylike(obj):
vec = lib.isnullobj(values.ravel())
result[...] = vec.reshape(shape)

elif is_datetimelike(obj):
elif needs_i8_conversion(obj):
# this is the NaT pattern
result = values.view('i8') == iNaT
else:
Expand Down