diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index fd1eb23643c2b..0bc4a7df6a55b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -28,6 +28,53 @@ _check_mixed_int) +class TestDataFrameArithmetic(object): + + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_frame_sub_datetime64_not_ns(self): + df = pd.DataFrame(date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), + pd.Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_radd_str_invalid(self, dtype, data): + df = DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + assert_frame_equal(result, expected) + result = df + 1 + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_nan(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + assert_frame_equal(result, expected) + result = df + np.nan + assert_frame_equal(result, expected) + + def test_frame_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + + class TestDataFrameOperators(TestData): def test_operators(self): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 441e811706487..56ff092dd0a27 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -3,7 +3,7 @@ import pytest -from datetime import datetime +from datetime import datetime, timedelta import sys import string @@ -29,6 +29,18 @@ class TestSeriesDtypes(TestData): + def test_dt64_series_astype_object(self): + dt64ser = Series(date_range('20130101', periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ + + def test_td64_series_astype_object(self): + tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index ed9307d50521f..1797dbcc15872 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -183,18 +183,18 @@ def test_comparison_tuples(self): assert_series_equal(result, expected) def test_comparison_operators_with_nas(self): - s = Series(bdate_range('1/1/2000', periods=10), dtype=object) - s[::2] = np.nan + ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) + ser[::2] = np.nan # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: - val = s[5] + val = ser[5] f = getattr(operator, op) - result = f(s, val) + result = f(ser, val) - expected = f(s.dropna(), val).reindex(s.index) + expected = f(ser.dropna(), val).reindex(ser.index) if op == 'ne': expected = expected.fillna(True).astype(bool) @@ -211,28 +211,28 @@ def test_comparison_operators_with_nas(self): # boolean &, |, ^ should work with object arrays and propagate NAs ops = ['and_', 'or_', 'xor'] - mask = s.isna() + mask = ser.isna() for bool_op in ops: - f = getattr(operator, bool_op) + func = getattr(operator, bool_op) - filled = s.fillna(s[0]) + filled = ser.fillna(ser[0]) - result = f(s < s[9], s > s[3]) + result = func(ser < ser[9], ser > ser[3]) - expected = f(filled < filled[9], filled > filled[3]) + expected = func(filled < filled[9], filled > filled[3]) expected[mask] = False assert_series_equal(result, expected) def test_comparison_object_numeric_nas(self): - s = Series(np.random.randn(10), dtype=object) - shifted = s.shift(2) + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: - f = getattr(operator, op) + func = getattr(operator, op) - result = f(s, shifted) - expected = f(s.astype(float), shifted.astype(float)) + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) assert_series_equal(result, expected) def test_comparison_invalid(self): @@ -277,98 +277,94 @@ def f(): tm.assert_series_equal(cat == "d", Series([False, False, False])) tm.assert_series_equal(cat != "d", Series([True, True, True])) - def test_more_na_comparisons(self): - for dtype in [None, object]: - left = Series(['a', np.nan, 'c'], dtype=dtype) - right = Series(['a', np.nan, 'd'], dtype=dtype) - - result = left == right - expected = Series([True, False, False]) - assert_series_equal(result, expected) - - result = left != right - expected = Series([False, True, True]) - assert_series_equal(result, expected) - - result = left == np.nan - expected = Series([False, False, False]) - assert_series_equal(result, expected) - - result = left != np.nan - expected = Series([True, True, True]) - assert_series_equal(result, expected) + @pytest.mark.parametrize('dtype', [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(['a', np.nan, 'c'], dtype=dtype) + right = Series(['a', np.nan, 'd'], dtype=dtype) - def test_nat_comparisons(self): - data = [([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - [pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]), + result = left == right + expected = Series([True, False, False]) + assert_series_equal(result, expected) - ([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')], - [pd.NaT, pd.NaT, pd.Timedelta('3 days')]), + result = left != right + expected = Series([False, True, True]) + assert_series_equal(result, expected) - ([pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')], - [pd.NaT, pd.NaT, pd.Period('2011-03', freq='M')])] + result = left == np.nan + expected = Series([False, False, False]) + assert_series_equal(result, expected) - # add lhs / rhs switched data - data = data + [(r, l) for l, r in data] + result = left != np.nan + expected = Series([True, True, True]) + assert_series_equal(result, expected) - for l, r in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) + @pytest.mark.parametrize('pair', [ + ([pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [NaT, NaT, pd.Timestamp('2011-01-03')]), - # Series, Index - for right in [Series(r, dtype=dtype), Index(r, dtype=dtype)]: - expected = Series([False, False, True]) - assert_series_equal(left == right, expected) + ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [NaT, NaT, pd.Timedelta('3 days')]), - expected = Series([True, True, False]) - assert_series_equal(left != right, expected) + ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')])]) + @pytest.mark.parametrize('reverse', [True, False]) + @pytest.mark.parametrize('box', [Series, Index]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons(self, dtype, box, reverse, pair): + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l - expected = Series([False, False, False]) - assert_series_equal(left < right, expected) + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + # Series, Index - expected = Series([False, False, False]) - assert_series_equal(left > right, expected) + expected = Series([False, False, True]) + assert_series_equal(left == right, expected) - expected = Series([False, False, True]) - assert_series_equal(left >= right, expected) + expected = Series([True, True, False]) + assert_series_equal(left != right, expected) - expected = Series([False, False, True]) - assert_series_equal(left <= right, expected) + expected = Series([False, False, False]) + assert_series_equal(left < right, expected) - def test_nat_comparisons_scalar(self): - data = [[pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], + expected = Series([False, False, False]) + assert_series_equal(left > right, expected) - [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')], + expected = Series([False, False, True]) + assert_series_equal(left >= right, expected) - [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')]] + expected = Series([False, False, True]) + assert_series_equal(left <= right, expected) - for l in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) + @pytest.mark.parametrize('data', [ + [pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')] + ]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons_scalar(self, dtype, data): + left = Series(data, dtype=dtype) - expected = Series([False, False, False]) - assert_series_equal(left == pd.NaT, expected) - assert_series_equal(pd.NaT == left, expected) + expected = Series([False, False, False]) + assert_series_equal(left == pd.NaT, expected) + assert_series_equal(pd.NaT == left, expected) - expected = Series([True, True, True]) - assert_series_equal(left != pd.NaT, expected) - assert_series_equal(pd.NaT != left, expected) + expected = Series([True, True, True]) + assert_series_equal(left != pd.NaT, expected) + assert_series_equal(pd.NaT != left, expected) - expected = Series([False, False, False]) - assert_series_equal(left < pd.NaT, expected) - assert_series_equal(pd.NaT > left, expected) - assert_series_equal(left <= pd.NaT, expected) - assert_series_equal(pd.NaT >= left, expected) + expected = Series([False, False, False]) + assert_series_equal(left < pd.NaT, expected) + assert_series_equal(pd.NaT > left, expected) + assert_series_equal(left <= pd.NaT, expected) + assert_series_equal(pd.NaT >= left, expected) - assert_series_equal(left > pd.NaT, expected) - assert_series_equal(pd.NaT < left, expected) - assert_series_equal(left >= pd.NaT, expected) - assert_series_equal(pd.NaT <= left, expected) + assert_series_equal(left > pd.NaT, expected) + assert_series_equal(pd.NaT < left, expected) + assert_series_equal(left >= pd.NaT, expected) + assert_series_equal(pd.NaT <= left, expected) def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) @@ -559,27 +555,27 @@ def test_comp_ops_df_compat(self): s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - for l, r in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: + for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: msg = "Can only compare identically-labeled Series objects" with tm.assert_raises_regex(ValueError, msg): - l == r + left == right with tm.assert_raises_regex(ValueError, msg): - l != r + left != right with tm.assert_raises_regex(ValueError, msg): - l < r + left < right msg = "Can only compare identically-labeled DataFrame objects" with tm.assert_raises_regex(ValueError, msg): - l.to_frame() == r.to_frame() + left.to_frame() == right.to_frame() with tm.assert_raises_regex(ValueError, msg): - l.to_frame() != r.to_frame() + left.to_frame() != right.to_frame() with tm.assert_raises_regex(ValueError, msg): - l.to_frame() < r.to_frame() + left.to_frame() < right.to_frame() class TestSeriesArithmetic(object): @@ -1231,16 +1227,6 @@ def test_sub_datetime64_not_ns(self, box, assert_func): res = dt64 - obj assert_func(res, -expected) - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') - def test_frame_sub_datetime64_not_ns(self): - df = pd.DataFrame(date_range('20130101', periods=3)) - dt64 = np.datetime64('2013-01-01') - assert dt64.dtype == 'datetime64[D]' - res = df - dt64 - expected = pd.DataFrame([Timedelta(days=0), Timedelta(days=1), - Timedelta(days=2)]) - tm.assert_frame_equal(res, expected) - def test_operators_datetimelike(self): def run_ops(ops, get_ser, test_ser): @@ -1381,7 +1367,7 @@ def test_sub_datetime_compat(self): assert_series_equal(s - dt, exp) assert_series_equal(s - Timestamp(dt), exp) - def test_datetime_series_with_timedelta(self): + def test_dt64_series_with_timedelta(self): # scalar timedeltas/np.timedelta64 objects # operate with np.timedelta64 correctly s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1400,25 +1386,51 @@ def test_datetime_series_with_timedelta(self): assert_series_equal(result, expected) assert_series_equal(result2, expected) - def test_datetime_series_with_DateOffset(self): + def test_dt64_series_add_tick_DateOffset(self): # GH 4532 # operate with pd.offsets - s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) - - result = s + pd.offsets.Second(5) - result2 = pd.offsets.Second(5) + s + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) expected = Series([Timestamp('20130101 9:01:05'), Timestamp('20130101 9:02:05')]) + + result = ser + pd.offsets.Second(5) assert_series_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser assert_series_equal(result2, expected) - result = s - pd.offsets.Second(5) - result2 = -pd.offsets.Second(5) + s + def test_dt64_series_sub_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) expected = Series([Timestamp('20130101 9:00:55'), Timestamp('20130101 9:01:55')]) + + result = ser - pd.offsets.Second(5) assert_series_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser assert_series_equal(result2, expected) + with pytest.raises(TypeError): + pd.offsets.Second(5) - ser + + @pytest.mark.parametrize('cls_name', ['Day', 'Hour', 'Minute', 'Second', + 'Milli', 'Micro', 'Nano']) + def test_dt64_series_with_tick_DateOffset_smoke(self, cls_name): + # GH 4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + + def test_dt64_series_add_mixed_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + result = s + pd.offsets.Milli(5) result2 = pd.offsets.Milli(5) + s expected = Series([Timestamp('20130101 9:01:00.005'), @@ -1431,14 +1443,7 @@ def test_datetime_series_with_DateOffset(self): Timestamp('20130101 9:07:00.005')]) assert_series_equal(result, expected) - # valid DateOffsets - for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', - 'Nano']: - op = getattr(pd.offsets, do) - s + op(5) - op(5) + s - - def test_dt64_sub_NaT(self): + def test_dt64_series_sub_NaT(self): # GH#18808 dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) ser = pd.Series(dti) @@ -1497,7 +1502,7 @@ def test_dt64_mul_div_numeric_invalid(self, one, dt64_series): with pytest.raises(TypeError): one / dt64_series - def test_dt64series_arith_overflow(self): + def test_dt64_series_arith_overflow(self): # GH#12534, fixed by #19024 dt = pd.Timestamp('1700-01-31') td = pd.Timedelta('20000 Days') @@ -1670,16 +1675,6 @@ def test_timedelta64_conversions(self, m, unit): result = np.timedelta64(m, unit) / s1 assert_series_equal(result, expected) - # astype - s = Series(date_range('20130101', periods=3)) - result = s.astype(object) - assert isinstance(result.iloc[0], datetime) - assert result.dtype == np.object_ - - result = s1.astype(object) - assert isinstance(result.iloc[0], timedelta) - assert result.dtype == np.object_ - @pytest.mark.parametrize('op', [operator.add, operator.sub]) def test_timedelta64_equal_timedelta_supported_ops(self, op): ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), @@ -1705,13 +1700,7 @@ def timedelta64(*args): lhs = op(ser, nptd) rhs = op(ser, pytd) - try: - assert_series_equal(lhs, rhs) - except: - raise AssertionError( - "invalid comparison [op->{0},d->{1},h->{2},m->{3}," - "s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, - us, lhs, rhs)) + assert_series_equal(lhs, rhs) def test_ops_nat_mixed_datetime64_timedelta64(self): # GH 11349 @@ -2091,25 +2080,39 @@ def test_series_radd_str(self): assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) @pytest.mark.parametrize('dtype', [None, object]) - def test_series_radd_more(self, dtype): - res = 1 + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([2, 3, 4], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + 1 - assert_series_equal(res, exp) - - res = np.nan + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + np.nan - assert_series_equal(res, exp) - - s = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], dtype=dtype) - exp = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), - pd.Timedelta('6 days')]) - assert_series_equal(pd.Timedelta('3 days') + s, exp) - assert_series_equal(s + pd.Timedelta('3 days'), exp) + def test_series_with_dtype_radd_timedelta(self, dtype): + ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + + result = pd.Timedelta('3 days') + ser + assert_series_equal(result, expected) + + result = ser + pd.Timedelta('3 days') + assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_int(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([2, 3, 4], dtype=dtype) + + result = 1 + ser + assert_series_equal(result, expected) + + result = ser + 1 + assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_nan(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + + result = np.nan + ser + assert_series_equal(result, expected) + + result = ser + np.nan + assert_series_equal(result, expected) @pytest.mark.parametrize('data', [ [1, 2, 3], @@ -2122,36 +2125,6 @@ def test_series_radd_str_invalid(self, dtype, data): with pytest.raises(TypeError): 'foo_' + ser - @pytest.mark.parametrize('data', [ - [1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], - ['x', 'y', 1]]) - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_str_invalid(self, dtype, data): - df = DataFrame(data, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + df - - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_more(self, dtype): - res = 1 + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([2, 3, 4], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + 1 - assert_frame_equal(res, exp) - - res = np.nan + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + np.nan - assert_frame_equal(res, exp) - - def test_frame_radd_str(self): - df = pd.DataFrame(['x', np.nan, 'x']) - assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) - assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) - def test_operators_frame(self): # rpow does not work with DataFrame df = DataFrame({'A': self.ts}) @@ -2245,24 +2218,23 @@ def test_operators_na_handling(self): assert_series_equal(result, expected) def test_datetime64_with_index(self): - # arithmetic integer ops with an index - s = Series(np.random.randn(5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) # GH 4629 # arithmetic datetime64 ops with an index - s = Series(date_range('20130101', periods=5), - index=date_range('20130101', periods=5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(date_range('20130101', periods=5), + index=date_range('20130101', periods=5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) with pytest.raises(TypeError): # GH#18850 - result = s - s.index.to_period() + result = ser - ser.index.to_period() df = DataFrame(np.random.randn(5, 2), index=date_range('20130101', periods=5))