diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 375bbd79fd29b..06625e09d70a1 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -768,6 +768,10 @@ Bug Fixes - Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`) - Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`) - Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`) +- Bug in ``isnull`` ``notnull`` raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`) +- Bug in ``.merge`` may raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`) + + - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) - Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py index b0e1eb72bd791..fa2bd535bb8d5 100644 --- a/pandas/tests/types/test_missing.py +++ b/pandas/tests/types/test_missing.py @@ -5,6 +5,7 @@ from datetime import datetime from pandas.util import testing as tm +import pandas as pd from pandas.core import config as cf from pandas.compat import u from pandas.tslib import iNaT @@ -45,100 +46,6 @@ def test_notnull(): assert (isinstance(isnull(s), Series)) -def test_isnull(): - assert not isnull(1.) - assert isnull(None) - assert isnull(np.NaN) - assert not isnull(np.inf) - assert not isnull(-np.inf) - - # series - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries(), tm.makeTimeSeries(), - tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) - - # frame - for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), - tm.makeMixedDataFrame()]: - result = isnull(df) - expected = df.apply(isnull) - tm.assert_frame_equal(result, expected) - - # panel - for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) - ]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel_equal(result, expected) - - # panel 4d - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel4d_equal(result, expected) - - -def test_isnull_lists(): - result = isnull([[False]]) - exp = np.array([[False]]) - assert (np.array_equal(result, exp)) - - result = isnull([[1], [2]]) - exp = np.array([[False], [False]]) - assert (np.array_equal(result, exp)) - - # list of strings / unicode - result = isnull(['foo', 'bar']) - assert (not result.any()) - - result = isnull([u('foo'), u('bar')]) - assert (not result.any()) - - -def test_isnull_nat(): - result = isnull([NaT]) - exp = np.array([True]) - assert (np.array_equal(result, exp)) - - result = isnull(np.array([NaT], dtype=object)) - exp = np.array([True]) - assert (np.array_equal(result, exp)) - - -def test_isnull_numpy_nat(): - arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), - np.datetime64('NaT', 's')]) - result = isnull(arr) - expected = np.array([True] * 4) - tm.assert_numpy_array_equal(result, expected) - - -def test_isnull_datetime(): - assert (not isnull(datetime.now())) - assert notnull(datetime.now()) - - idx = date_range('1/1/1990', periods=20) - assert (notnull(idx).all()) - - idx = np.asarray(idx) - idx[0] = iNaT - idx = DatetimeIndex(idx) - mask = isnull(idx) - assert (mask[0]) - assert (not mask[1:].any()) - - # GH 9129 - pidx = idx.to_period(freq='M') - mask = isnull(pidx) - assert (mask[0]) - assert (not mask[1:].any()) - - mask = isnull(pidx[1:]) - assert (not mask.any()) - - class TestIsNull(tm.TestCase): def test_0d_array(self): @@ -150,6 +57,166 @@ def test_0d_array(self): self.assertFalse(isnull(np.array(0.0, dtype=object))) self.assertFalse(isnull(np.array(0, dtype=object))) + def test_isnull(self): + self.assertFalse(isnull(1.)) + self.assertTrue(isnull(None)) + self.assertTrue(isnull(np.NaN)) + self.assertTrue(float('nan')) + self.assertFalse(isnull(np.inf)) + self.assertFalse(isnull(-np.inf)) + + # series + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + self.assertIsInstance(isnull(s), Series) + + # frame + for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), + tm.makeMixedDataFrame()]: + result = isnull(df) + expected = df.apply(isnull) + tm.assert_frame_equal(result, expected) + + # panel + for p in [tm.makePanel(), tm.makePeriodPanel(), + tm.add_nans(tm.makePanel())]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel_equal(result, expected) + + # panel 4d + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel4d_equal(result, expected) + + def test_isnull_lists(self): + result = isnull([[False]]) + exp = np.array([[False]]) + tm.assert_numpy_array_equal(result, exp) + + result = isnull([[1], [2]]) + exp = np.array([[False], [False]]) + tm.assert_numpy_array_equal(result, exp) + + # list of strings / unicode + result = isnull(['foo', 'bar']) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = isnull([u('foo'), u('bar')]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + def test_isnull_nat(self): + result = isnull([NaT]) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + result = isnull(np.array([NaT], dtype=object)) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + def test_isnull_numpy_nat(self): + arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), + np.datetime64('NaT', 's')]) + result = isnull(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + + def test_isnull_datetime(self): + self.assertFalse(isnull(datetime.now())) + self.assertTrue(notnull(datetime.now())) + + idx = date_range('1/1/1990', periods=20) + exp = np.ones(len(idx), dtype=bool) + tm.assert_numpy_array_equal(notnull(idx), exp) + + idx = np.asarray(idx) + idx[0] = iNaT + idx = DatetimeIndex(idx) + mask = isnull(idx) + self.assertTrue(mask[0]) + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + self.assert_numpy_array_equal(mask, exp) + + # GH 9129 + pidx = idx.to_period(freq='M') + mask = isnull(pidx) + self.assertTrue(mask[0]) + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + self.assert_numpy_array_equal(mask, exp) + + mask = isnull(pidx[1:]) + exp = np.zeros(len(mask), dtype=bool) + self.assert_numpy_array_equal(mask, exp) + + def test_datetime_other_units(self): + idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02']) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isnull(idx), exp) + tm.assert_numpy_array_equal(notnull(idx), ~exp) + tm.assert_numpy_array_equal(isnull(idx.values), exp) + tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + + for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', + 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', + 'datetime64[ns]']: + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isnull(values), exp) + tm.assert_numpy_array_equal(notnull(values), ~exp) + + exp = pd.Series([False, True, False]) + s = pd.Series(values) + tm.assert_series_equal(isnull(s), exp) + tm.assert_series_equal(notnull(s), ~exp) + s = pd.Series(values, dtype=object) + tm.assert_series_equal(isnull(s), exp) + tm.assert_series_equal(notnull(s), ~exp) + + def test_timedelta_other_units(self): + idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isnull(idx), exp) + tm.assert_numpy_array_equal(notnull(idx), ~exp) + tm.assert_numpy_array_equal(isnull(idx.values), exp) + tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + + for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', + 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', + 'timedelta64[ns]']: + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isnull(values), exp) + tm.assert_numpy_array_equal(notnull(values), ~exp) + + exp = pd.Series([False, True, False]) + s = pd.Series(values) + tm.assert_series_equal(isnull(s), exp) + tm.assert_series_equal(notnull(s), ~exp) + s = pd.Series(values, dtype=object) + tm.assert_series_equal(isnull(s), exp) + tm.assert_series_equal(notnull(s), ~exp) + + def test_period(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M') + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isnull(idx), exp) + tm.assert_numpy_array_equal(notnull(idx), ~exp) + + exp = pd.Series([False, True, False]) + s = pd.Series(idx) + tm.assert_series_equal(isnull(s), exp) + tm.assert_series_equal(notnull(s), ~exp) + s = pd.Series(idx, dtype=object) + tm.assert_series_equal(isnull(s), exp) + tm.assert_series_equal(notnull(s), ~exp) + def test_array_equivalent(): assert array_equivalent(np.array([np.nan, np.nan]), diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 396b095fabbd6..6e36100ddd0b4 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -473,6 +473,47 @@ def test_join_append_timedeltas(self): '0r': Series([td, NaT], index=list('AB'))}) assert_frame_equal(result, expected) + def test_other_datetime_unit(self): + # GH 13389 + df1 = pd.DataFrame({'entity_id': [101, 102]}) + s = pd.Series([None, None], index=[101, 102], name='days') + + for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', + 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', + 'datetime64[ns]']: + + df2 = s.astype(dtype).to_frame('days') + # coerces to datetime64[ns], thus sholuld not be affected + self.assertEqual(df2['days'].dtype, 'datetime64[ns]') + + result = df1.merge(df2, left_on='entity_id', right_index=True) + + exp = pd.DataFrame({'entity_id': [101, 102], + 'days': np.array(['nat', 'nat'], + dtype='datetime64[ns]')}, + columns=['entity_id', 'days']) + tm.assert_frame_equal(result, exp) + + def test_other_timedelta_unit(self): + # GH 13389 + df1 = pd.DataFrame({'entity_id': [101, 102]}) + s = pd.Series([None, None], index=[101, 102], name='days') + + for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', + 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', + 'timedelta64[ns]']: + + df2 = s.astype(dtype).to_frame('days') + self.assertEqual(df2['days'].dtype, dtype) + + result = df1.merge(df2, left_on='entity_id', right_index=True) + + exp = pd.DataFrame({'entity_id': [101, 102], + 'days': np.array(['nat', 'nat'], + dtype=dtype)}, + columns=['entity_id', 'days']) + tm.assert_frame_equal(result, exp) + def test_overlapping_columns_error_message(self): df = DataFrame({'key': [1, 2, 3], 'v1': [4, 5, 6], diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 88ab239790aa1..c3d0ee28540e1 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1663,6 +1663,24 @@ def test_constructor_datetime64arr(self): self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + def test_view(self): + idx = pd.PeriodIndex([], freq='M') + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') + + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + tm.assert_numpy_array_equal(idx.view('i8'), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + def test_constructor_empty(self): idx = pd.PeriodIndex([], freq='M') tm.assertIsInstance(idx, PeriodIndex) diff --git a/pandas/types/common.py b/pandas/types/common.py index 9d0ccaac843ef..bffff0357f329 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -230,7 +230,8 @@ def is_object(x): def needs_i8_conversion(arr_or_dtype): return (is_datetime_or_timedelta_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype)) + is_datetime64tz_dtype(arr_or_dtype) or + isinstance(arr_or_dtype, ABCPeriodIndex)) def is_numeric_dtype(arr_or_dtype): diff --git a/pandas/types/missing.py b/pandas/types/missing.py index 8b4193d02beb7..a4af127e0c381 100644 --- a/pandas/types/missing.py +++ b/pandas/types/missing.py @@ -140,7 +140,7 @@ def _isnull_ndarraylike(obj): vec = lib.isnullobj(values.ravel()) result[...] = vec.reshape(shape) - elif is_datetimelike(obj): + elif needs_i8_conversion(obj): # this is the NaT pattern result = values.view('i8') == iNaT else: