From 31f2db90f6918b77b15689b87329f856d5b1cf19 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 10:53:51 -0800 Subject: [PATCH 01/10] parametrize and break up tests, fix 1-letter names --- pandas/tests/series/test_operators.py | 210 ++++++++++++++------------ 1 file changed, 114 insertions(+), 96 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index dda2918bf7615..1acaf1a99d43c 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -178,18 +178,18 @@ def test_comparison_tuples(self): assert_series_equal(result, expected) def test_comparison_operators_with_nas(self): - s = Series(bdate_range('1/1/2000', periods=10), dtype=object) - s[::2] = np.nan + ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) + ser[::2] = np.nan # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: - val = s[5] + val = ser[5] f = getattr(operator, op) - result = f(s, val) + result = f(ser, val) - expected = f(s.dropna(), val).reindex(s.index) + expected = f(ser.dropna(), val).reindex(ser.index) if op == 'ne': expected = expected.fillna(True).astype(bool) @@ -206,28 +206,28 @@ def test_comparison_operators_with_nas(self): # boolean &, |, ^ should work with object arrays and propagate NAs ops = ['and_', 'or_', 'xor'] - mask = s.isna() + mask = ser.isna() for bool_op in ops: - f = getattr(operator, bool_op) + func = getattr(operator, bool_op) - filled = s.fillna(s[0]) + filled = ser.fillna(ser[0]) - result = f(s < s[9], s > s[3]) + result = func(ser < ser[9], ser > ser[3]) - expected = f(filled < filled[9], filled > filled[3]) + expected = func(filled < filled[9], filled > filled[3]) expected[mask] = False assert_series_equal(result, expected) def test_comparison_object_numeric_nas(self): - s = Series(np.random.randn(10), dtype=object) - shifted = s.shift(2) + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: - f = getattr(operator, op) + func = getattr(operator, op) - result = f(s, shifted) - expected = f(s.astype(float), shifted.astype(float)) + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) assert_series_equal(result, expected) def test_comparison_invalid(self): @@ -272,26 +272,26 @@ def f(): tm.assert_series_equal(cat == "d", Series([False, False, False])) tm.assert_series_equal(cat != "d", Series([True, True, True])) - def test_more_na_comparisons(self): - for dtype in [None, object]: - left = Series(['a', np.nan, 'c'], dtype=dtype) - right = Series(['a', np.nan, 'd'], dtype=dtype) + @pytest.mark.parametrize('dtype', [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(['a', np.nan, 'c'], dtype=dtype) + right = Series(['a', np.nan, 'd'], dtype=dtype) - result = left == right - expected = Series([True, False, False]) - assert_series_equal(result, expected) + result = left == right + expected = Series([True, False, False]) + assert_series_equal(result, expected) - result = left != right - expected = Series([False, True, True]) - assert_series_equal(result, expected) + result = left != right + expected = Series([False, True, True]) + assert_series_equal(result, expected) - result = left == np.nan - expected = Series([False, False, False]) - assert_series_equal(result, expected) + result = left == np.nan + expected = Series([False, False, False]) + assert_series_equal(result, expected) - result = left != np.nan - expected = Series([True, True, True]) - assert_series_equal(result, expected) + result = left != np.nan + expected = Series([True, True, True]) + assert_series_equal(result, expected) def test_nat_comparisons(self): data = [([pd.Timestamp('2011-01-01'), pd.NaT, @@ -333,7 +333,8 @@ def test_nat_comparisons(self): expected = Series([False, False, True]) assert_series_equal(left <= right, expected) - def test_nat_comparisons_scalar(self): + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons_scalar(self, dtype): data = [[pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')], @@ -343,27 +344,26 @@ def test_nat_comparisons_scalar(self): pd.Period('2011-03', freq='M')]] for l in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) + left = Series(l, dtype=dtype) - expected = Series([False, False, False]) - assert_series_equal(left == pd.NaT, expected) - assert_series_equal(pd.NaT == left, expected) + expected = Series([False, False, False]) + assert_series_equal(left == pd.NaT, expected) + assert_series_equal(pd.NaT == left, expected) - expected = Series([True, True, True]) - assert_series_equal(left != pd.NaT, expected) - assert_series_equal(pd.NaT != left, expected) + expected = Series([True, True, True]) + assert_series_equal(left != pd.NaT, expected) + assert_series_equal(pd.NaT != left, expected) - expected = Series([False, False, False]) - assert_series_equal(left < pd.NaT, expected) - assert_series_equal(pd.NaT > left, expected) - assert_series_equal(left <= pd.NaT, expected) - assert_series_equal(pd.NaT >= left, expected) + expected = Series([False, False, False]) + assert_series_equal(left < pd.NaT, expected) + assert_series_equal(pd.NaT > left, expected) + assert_series_equal(left <= pd.NaT, expected) + assert_series_equal(pd.NaT >= left, expected) - assert_series_equal(left > pd.NaT, expected) - assert_series_equal(pd.NaT < left, expected) - assert_series_equal(left >= pd.NaT, expected) - assert_series_equal(pd.NaT <= left, expected) + assert_series_equal(left > pd.NaT, expected) + assert_series_equal(pd.NaT < left, expected) + assert_series_equal(left >= pd.NaT, expected) + assert_series_equal(pd.NaT <= left, expected) def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) @@ -1997,25 +1997,39 @@ def test_series_radd_str(self): assert_series_equal(ser + 'a', pd.Series(['xa', np.nan, 'xa'])) @pytest.mark.parametrize('dtype', [None, object]) - def test_series_radd_more(self, dtype): - res = 1 + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([2, 3, 4], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + 1 - assert_series_equal(res, exp) - - res = np.nan + pd.Series([1, 2, 3], dtype=dtype) - exp = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) - assert_series_equal(res, exp) - res = pd.Series([1, 2, 3], dtype=dtype) + np.nan - assert_series_equal(res, exp) - - s = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), - pd.Timedelta('3 days')], dtype=dtype) - exp = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), - pd.Timedelta('6 days')]) - assert_series_equal(pd.Timedelta('3 days') + s, exp) - assert_series_equal(s + pd.Timedelta('3 days'), exp) + def test_series_with_dtype_radd_timedelta(self, dtype): + ser = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + expected = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + + result = pd.Timedelta('3 days') + ser + assert_series_equal(result, expected) + + result = ser + pd.Timedelta('3 days') + assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_int(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([2, 3, 4], dtype=dtype) + + result = 1 + ser + assert_series_equal(result, expected) + + result = ser + 1 + assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_series_with_dtype_radd_nan(self, dtype): + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + + result = np.nan + ser + assert_series_equal(result, expected) + + result = ser + np.nan + assert_series_equal(result, expected) @pytest.mark.parametrize('data', [ [1, 2, 3], @@ -2039,25 +2053,6 @@ def test_frame_radd_str_invalid(self, dtype, data): with pytest.raises(TypeError): 'foo_' + df - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_more(self, dtype): - res = 1 + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([2, 3, 4], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + 1 - assert_frame_equal(res, exp) - - res = np.nan + pd.DataFrame([1, 2, 3], dtype=dtype) - exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - assert_frame_equal(res, exp) - res = pd.DataFrame([1, 2, 3], dtype=dtype) + np.nan - assert_frame_equal(res, exp) - - def test_frame_radd_str(self): - df = pd.DataFrame(['x', np.nan, 'x']) - assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) - assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) - def test_operators_frame(self): # rpow does not work with DataFrame df = DataFrame({'A': self.ts}) @@ -2151,24 +2146,23 @@ def test_operators_na_handling(self): assert_series_equal(result, expected) def test_datetime64_with_index(self): - # arithmetic integer ops with an index - s = Series(np.random.randn(5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) # GH 4629 # arithmetic datetime64 ops with an index - s = Series(date_range('20130101', periods=5), - index=date_range('20130101', periods=5)) - expected = s - s.index.to_series() - result = s - s.index + ser = Series(date_range('20130101', periods=5), + index=date_range('20130101', periods=5)) + expected = ser - ser.index.to_series() + result = ser - ser.index assert_series_equal(result, expected) with pytest.raises(TypeError): # GH#18850 - result = s - s.index.to_period() + result = ser - ser.index.to_period() df = DataFrame(np.random.randn(5, 2), index=date_range('20130101', periods=5)) @@ -2240,3 +2234,27 @@ def test_idxminmax_with_inf(self): assert np.isnan(s.idxmin(skipna=False)) assert s.idxmax() == 0 np.isnan(s.idxmax(skipna=False)) + + +class TestDataFrameOperators(object): + # TODO: This may belong in a frame-specific test file + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_radd_more(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + res = 1 + df + exp = pd.DataFrame([2, 3, 4], dtype=dtype) + assert_frame_equal(res, exp) + res = df + 1 + assert_frame_equal(res, exp) + + res = np.nan + df + exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + assert_frame_equal(res, exp) + res = df + np.nan + assert_frame_equal(res, exp) + + def test_frame_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) From bbdf85f17522605a9475e5072785e76bf25d4cfd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 11:00:53 -0800 Subject: [PATCH 02/10] fix flake8 complaints about 1-letter variables --- pandas/tests/series/test_operators.py | 40 ++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 1acaf1a99d43c..4629ae2513f21 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -554,27 +554,27 @@ def test_comp_ops_df_compat(self): s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x') s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x') - for l, r in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: + for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: msg = "Can only compare identically-labeled Series objects" with tm.assert_raises_regex(ValueError, msg): - l == r + left == right with tm.assert_raises_regex(ValueError, msg): - l != r + left != right with tm.assert_raises_regex(ValueError, msg): - l < r + left < right msg = "Can only compare identically-labeled DataFrame objects" with tm.assert_raises_regex(ValueError, msg): - l.to_frame() == r.to_frame() + left.to_frame() == right.to_frame() with tm.assert_raises_regex(ValueError, msg): - l.to_frame() != r.to_frame() + left.to_frame() != right.to_frame() with tm.assert_raises_regex(ValueError, msg): - l.to_frame() < r.to_frame() + left.to_frame() < right.to_frame() class TestSeriesArithmetic(object): @@ -1549,6 +1549,18 @@ def test_invalid_ops(self): pytest.raises(Exception, self.objSeries.__sub__, np.array(1, dtype=np.int64)) + def test_dt64series_astype_object(self): + dt64ser = Series(date_range('20130101', periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ + + def test_td64series_astype_object(self): + tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + def test_timedelta64_conversions(self): startdate = Series(date_range('2013-01-01', '2013-01-03')) enddate = Series(date_range('2013-03-01', '2013-03-03')) @@ -1576,16 +1588,6 @@ def test_timedelta64_conversions(self): result = np.timedelta64(m, unit) / s1 assert_series_equal(result, expected) - # astype - s = Series(date_range('20130101', periods=3)) - result = s.astype(object) - assert isinstance(result.iloc[0], datetime) - assert result.dtype == np.object_ - - result = s1.astype(object) - assert isinstance(result.iloc[0], timedelta) - assert result.dtype == np.object_ - @pytest.mark.parametrize('op', [operator.add, operator.sub]) def test_timedelta64_equal_timedelta_supported_ops(self, op): ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), @@ -1613,7 +1615,7 @@ def timedelta64(*args): try: assert_series_equal(lhs, rhs) - except: + except Exception: raise AssertionError( "invalid comparison [op->{0},d->{1},h->{2},m->{3}," "s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, @@ -2013,7 +2015,7 @@ def test_series_with_dtype_radd_timedelta(self, dtype): def test_series_with_dtype_radd_int(self, dtype): ser = pd.Series([1, 2, 3], dtype=dtype) expected = pd.Series([2, 3, 4], dtype=dtype) - + result = 1 + ser assert_series_equal(result, expected) From 872962e14580c27df404c96c00ba57e19696fca4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 11:08:07 -0800 Subject: [PATCH 03/10] parametrize a nested-for-loop test --- pandas/tests/series/test_operators.py | 60 +++++++++++++-------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 4629ae2513f21..5abb9d6a1c716 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -293,45 +293,45 @@ def test_more_na_comparisons(self, dtype): expected = Series([True, True, True]) assert_series_equal(result, expected) - def test_nat_comparisons(self): - data = [([pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - [pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]), + @pytest.mark.parametrize('pair', [ + ([pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [NaT, NaT, pd.Timestamp('2011-01-03')]), - ([pd.Timedelta('1 days'), pd.NaT, - pd.Timedelta('3 days')], - [pd.NaT, pd.NaT, pd.Timedelta('3 days')]), + ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [NaT, NaT, pd.Timedelta('3 days')]), - ([pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')], - [pd.NaT, pd.NaT, pd.Period('2011-03', freq='M')])] - - # add lhs / rhs switched data - data = data + [(r, l) for l, r in data] + ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')])]) + @pytest.mark.parametrize('reverse', [True, False]) + @pytest.mark.parametrize('box', [Series, Index]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_nat_comparisons(self, dtype, box, reverse, pair): + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l - for l, r in data: - for dtype in [None, object]: - left = Series(l, dtype=dtype) + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + # Series, Index - # Series, Index - for right in [Series(r, dtype=dtype), Index(r, dtype=dtype)]: - expected = Series([False, False, True]) - assert_series_equal(left == right, expected) + expected = Series([False, False, True]) + assert_series_equal(left == right, expected) - expected = Series([True, True, False]) - assert_series_equal(left != right, expected) + expected = Series([True, True, False]) + assert_series_equal(left != right, expected) - expected = Series([False, False, False]) - assert_series_equal(left < right, expected) + expected = Series([False, False, False]) + assert_series_equal(left < right, expected) - expected = Series([False, False, False]) - assert_series_equal(left > right, expected) + expected = Series([False, False, False]) + assert_series_equal(left > right, expected) - expected = Series([False, False, True]) - assert_series_equal(left >= right, expected) + expected = Series([False, False, True]) + assert_series_equal(left >= right, expected) - expected = Series([False, False, True]) - assert_series_equal(left <= right, expected) + expected = Series([False, False, True]) + assert_series_equal(left <= right, expected) @pytest.mark.parametrize('dtype', [None, object]) def test_nat_comparisons_scalar(self, dtype): From 1da7b0dcca6dae463e72389cdd512969db1196d9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 11:20:35 -0800 Subject: [PATCH 04/10] break up dateoffset tests --- pandas/tests/series/test_operators.py | 157 ++++++++++++++------------ 1 file changed, 87 insertions(+), 70 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 5abb9d6a1c716..b839480dad449 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1131,16 +1131,6 @@ def test_sub_datetime64_not_ns(self, box, assert_func): res = dt64 - obj assert_func(res, -expected) - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') - def test_frame_sub_datetime64_not_ns(self): - df = pd.DataFrame(date_range('20130101', periods=3)) - dt64 = np.datetime64('2013-01-01') - assert dt64.dtype == 'datetime64[D]' - res = df - dt64 - expected = pd.DataFrame([Timedelta(days=0), Timedelta(days=1), - Timedelta(days=2)]) - tm.assert_frame_equal(res, expected) - def test_operators_datetimelike(self): def run_ops(ops, get_ser, test_ser): @@ -1281,7 +1271,7 @@ def test_sub_datetime_compat(self): assert_series_equal(s - dt, exp) assert_series_equal(s - Timestamp(dt), exp) - def test_datetime_series_with_timedelta(self): + def test_dt64series_with_timedelta(self): # scalar timedeltas/np.timedelta64 objects # operate with np.timedelta64 correctly s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1300,25 +1290,52 @@ def test_datetime_series_with_timedelta(self): assert_series_equal(result, expected) assert_series_equal(result2, expected) - def test_datetime_series_with_DateOffset(self): + def test_dt64series_add_tick_DateOffset(self): # GH 4532 # operate with pd.offsets - s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) - - result = s + pd.offsets.Second(5) - result2 = pd.offsets.Second(5) + s + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) expected = Series([Timestamp('20130101 9:01:05'), Timestamp('20130101 9:02:05')]) + + result = ser + pd.offsets.Second(5) assert_series_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser assert_series_equal(result2, expected) - result = s - pd.offsets.Second(5) - result2 = -pd.offsets.Second(5) + s + def test_dt64series_sub_tick_DateOffset(self): + # GH 4532 + # operate with pd.offsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) expected = Series([Timestamp('20130101 9:00:55'), Timestamp('20130101 9:01:55')]) + + result = ser - pd.offsets.Second(5) assert_series_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser assert_series_equal(result2, expected) + with pytest.raises(TypeError): + pd.offsets.Second(5) - ser + + def test_dt64series_with_DateOffset_smoke(self): + # GH 4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + + # valid DateOffsets + for cls_name in ['Day', 'Hour', 'Minute', 'Second', + 'Milli', 'Micro', 'Nano']: + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + + def test_dt64series_with_DateOffset(self): + # GH 4532 + # operate with pd.offsets + s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) + result = s + pd.offsets.Milli(5) result2 = pd.offsets.Milli(5) + s expected = Series([Timestamp('20130101 9:01:00.005'), @@ -1331,14 +1348,7 @@ def test_datetime_series_with_DateOffset(self): Timestamp('20130101 9:07:00.005')]) assert_series_equal(result, expected) - # valid DateOffsets - for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', - 'Nano']: - op = getattr(pd.offsets, do) - s + op(5) - op(5) + s - - def test_dt64_sub_NaT(self): + def test_dt64series_sub_NaT(self): # GH#18808 dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) ser = pd.Series(dti) @@ -1561,32 +1571,26 @@ def test_td64series_astype_object(self): assert isinstance(result.iloc[0], timedelta) assert result.dtype == np.object_ - def test_timedelta64_conversions(self): - startdate = Series(date_range('2013-01-01', '2013-01-03')) - enddate = Series(date_range('2013-03-01', '2013-03-03')) - - s1 = enddate - startdate - s1[2] = np.nan + @pytest.mark.parametrize('unit', ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) + def test_timedelta64_conversions(self, unit): + s1 = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') for m in [1, 3, 10]: - for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']: + # op + expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) + result = s1 / np.timedelta64(m, unit) + assert_series_equal(result, expected) - # op - expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) - result = s1 / np.timedelta64(m, unit) + if m == 1 and unit != 'ns': + # astype + result = s1.astype("timedelta64[{0}]".format(unit)) assert_series_equal(result, expected) - if m == 1 and unit != 'ns': - - # astype - result = s1.astype("timedelta64[{0}]".format(unit)) - assert_series_equal(result, expected) - - # reverse op - expected = s1.apply( - lambda x: Timedelta(np.timedelta64(m, unit)) / x) - result = np.timedelta64(m, unit) / s1 - assert_series_equal(result, expected) + # reverse op + expected = s1.apply( + lambda x: Timedelta(np.timedelta64(m, unit)) / x) + result = np.timedelta64(m, unit) / s1 + assert_series_equal(result, expected) @pytest.mark.parametrize('op', [operator.add, operator.sub]) def test_timedelta64_equal_timedelta_supported_ops(self, op): @@ -2044,17 +2048,6 @@ def test_series_radd_str_invalid(self, dtype, data): with pytest.raises(TypeError): 'foo_' + ser - @pytest.mark.parametrize('data', [ - [1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], - ['x', 'y', 1]]) - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_str_invalid(self, dtype, data): - df = DataFrame(data, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + df - def test_operators_frame(self): # rpow does not work with DataFrame df = DataFrame({'A': self.ts}) @@ -2241,20 +2234,44 @@ def test_idxminmax_with_inf(self): class TestDataFrameOperators(object): # TODO: This may belong in a frame-specific test file + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_frame_sub_datetime64_not_ns(self): + df = pd.DataFrame(date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([Timedelta(days=0), Timedelta(days=1), + Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_radd_str_invalid(self, dtype, data): + df = DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + assert_frame_equal(result, expected) + result = df + 1 + assert_frame_equal(result, expected) + @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_more(self, dtype): + def test_frame_with_dtype_radd_nan(self, dtype): df = pd.DataFrame([1, 2, 3], dtype=dtype) - res = 1 + df - exp = pd.DataFrame([2, 3, 4], dtype=dtype) - assert_frame_equal(res, exp) - res = df + 1 - assert_frame_equal(res, exp) - - res = np.nan + df - exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - assert_frame_equal(res, exp) - res = df + np.nan - assert_frame_equal(res, exp) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + assert_frame_equal(result, expected) + result = df + np.nan + assert_frame_equal(result, expected) def test_frame_radd_str(self): df = pd.DataFrame(['x', np.nan, 'x']) From 313d715d8a395aeb76333935a2eed54cbc1676e2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 17:30:24 -0800 Subject: [PATCH 05/10] rename test --- pandas/tests/series/test_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index b839480dad449..57ed0f805e3ee 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1331,7 +1331,7 @@ def test_dt64series_with_DateOffset_smoke(self): ser + offset_cls(5) offset_cls(5) + ser - def test_dt64series_with_DateOffset(self): + def test_dt64series_add_mixed_tick_DateOffset(self): # GH 4532 # operate with pd.offsets s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) From 052d116e40baa134121bf9ddc49fc9c60e1f4d28 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 17:33:31 -0800 Subject: [PATCH 06/10] fix abbrevs, parametrize data --- pandas/tests/series/test_operators.py | 66 +++++++++++++-------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 57ed0f805e3ee..a4dc24c257477 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -333,37 +333,33 @@ def test_nat_comparisons(self, dtype, box, reverse, pair): expected = Series([False, False, True]) assert_series_equal(left <= right, expected) + @pytest.mark.parametrize('data', [ + [pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')], + [pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], + [pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')] + ]) @pytest.mark.parametrize('dtype', [None, object]) - def test_nat_comparisons_scalar(self, dtype): - data = [[pd.Timestamp('2011-01-01'), pd.NaT, - pd.Timestamp('2011-01-03')], - - [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')], - - [pd.Period('2011-01', freq='M'), pd.NaT, - pd.Period('2011-03', freq='M')]] - - for l in data: - left = Series(l, dtype=dtype) + def test_nat_comparisons_scalar(self, dtype, data): + left = Series(data, dtype=dtype) - expected = Series([False, False, False]) - assert_series_equal(left == pd.NaT, expected) - assert_series_equal(pd.NaT == left, expected) + expected = Series([False, False, False]) + assert_series_equal(left == pd.NaT, expected) + assert_series_equal(pd.NaT == left, expected) - expected = Series([True, True, True]) - assert_series_equal(left != pd.NaT, expected) - assert_series_equal(pd.NaT != left, expected) + expected = Series([True, True, True]) + assert_series_equal(left != pd.NaT, expected) + assert_series_equal(pd.NaT != left, expected) - expected = Series([False, False, False]) - assert_series_equal(left < pd.NaT, expected) - assert_series_equal(pd.NaT > left, expected) - assert_series_equal(left <= pd.NaT, expected) - assert_series_equal(pd.NaT >= left, expected) + expected = Series([False, False, False]) + assert_series_equal(left < pd.NaT, expected) + assert_series_equal(pd.NaT > left, expected) + assert_series_equal(left <= pd.NaT, expected) + assert_series_equal(pd.NaT >= left, expected) - assert_series_equal(left > pd.NaT, expected) - assert_series_equal(pd.NaT < left, expected) - assert_series_equal(left >= pd.NaT, expected) - assert_series_equal(pd.NaT <= left, expected) + assert_series_equal(left > pd.NaT, expected) + assert_series_equal(pd.NaT < left, expected) + assert_series_equal(left >= pd.NaT, expected) + assert_series_equal(pd.NaT <= left, expected) def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) @@ -1271,7 +1267,7 @@ def test_sub_datetime_compat(self): assert_series_equal(s - dt, exp) assert_series_equal(s - Timestamp(dt), exp) - def test_dt64series_with_timedelta(self): + def test_dt64_series_with_timedelta(self): # scalar timedeltas/np.timedelta64 objects # operate with np.timedelta64 correctly s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1290,7 +1286,7 @@ def test_dt64series_with_timedelta(self): assert_series_equal(result, expected) assert_series_equal(result2, expected) - def test_dt64series_add_tick_DateOffset(self): + def test_dt64_series_add_tick_DateOffset(self): # GH 4532 # operate with pd.offsets ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1303,7 +1299,7 @@ def test_dt64series_add_tick_DateOffset(self): result2 = pd.offsets.Second(5) + ser assert_series_equal(result2, expected) - def test_dt64series_sub_tick_DateOffset(self): + def test_dt64_series_sub_tick_DateOffset(self): # GH 4532 # operate with pd.offsets ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1319,7 +1315,7 @@ def test_dt64series_sub_tick_DateOffset(self): with pytest.raises(TypeError): pd.offsets.Second(5) - ser - def test_dt64series_with_DateOffset_smoke(self): + def test_dt64_series_with_DateOffset_smoke(self): # GH 4532 # smoke tests for valid DateOffsets ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1331,7 +1327,7 @@ def test_dt64series_with_DateOffset_smoke(self): ser + offset_cls(5) offset_cls(5) + ser - def test_dt64series_add_mixed_tick_DateOffset(self): + def test_dt64_series_add_mixed_tick_DateOffset(self): # GH 4532 # operate with pd.offsets s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) @@ -1348,7 +1344,7 @@ def test_dt64series_add_mixed_tick_DateOffset(self): Timestamp('20130101 9:07:00.005')]) assert_series_equal(result, expected) - def test_dt64series_sub_NaT(self): + def test_dt64_series_sub_NaT(self): # GH#18808 dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')]) ser = pd.Series(dti) @@ -1407,7 +1403,7 @@ def test_dt64_mul_div_numeric_invalid(self, one, dt64_series): with pytest.raises(TypeError): one / dt64_series - def test_dt64series_arith_overflow(self): + def test_dt64_series_arith_overflow(self): # GH#12534, fixed by #19024 dt = pd.Timestamp('1700-01-31') td = pd.Timedelta('20000 Days') @@ -1559,13 +1555,13 @@ def test_invalid_ops(self): pytest.raises(Exception, self.objSeries.__sub__, np.array(1, dtype=np.int64)) - def test_dt64series_astype_object(self): + def test_dt64_series_astype_object(self): dt64ser = Series(date_range('20130101', periods=3)) result = dt64ser.astype(object) assert isinstance(result.iloc[0], datetime) assert result.dtype == np.object_ - def test_td64series_astype_object(self): + def test_td64_series_astype_object(self): tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') result = tdser.astype(object) assert isinstance(result.iloc[0], timedelta) From 872ff5f33b7bfb6c3d4962665d8b5363566788d6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 17:34:49 -0800 Subject: [PATCH 07/10] move tests to frames --- pandas/tests/frame/test_operators.py | 47 ++++++++++++++++++++++++++ pandas/tests/series/test_operators.py | 48 --------------------------- 2 files changed, 47 insertions(+), 48 deletions(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index fd1eb23643c2b..0bc4a7df6a55b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -28,6 +28,53 @@ _check_mixed_int) +class TestDataFrameArithmetic(object): + + @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') + def test_frame_sub_datetime64_not_ns(self): + df = pd.DataFrame(date_range('20130101', periods=3)) + dt64 = np.datetime64('2013-01-01') + assert dt64.dtype == 'datetime64[D]' + res = df - dt64 + expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1), + pd.Timedelta(days=2)]) + tm.assert_frame_equal(res, expected) + + @pytest.mark.parametrize('data', [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], + ['x', 'y', 1]]) + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_radd_str_invalid(self, dtype, data): + df = DataFrame(data, dtype=dtype) + with pytest.raises(TypeError): + 'foo_' + df + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_int(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([2, 3, 4], dtype=dtype) + result = 1 + df + assert_frame_equal(result, expected) + result = df + 1 + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, object]) + def test_frame_with_dtype_radd_nan(self, dtype): + df = pd.DataFrame([1, 2, 3], dtype=dtype) + expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + result = np.nan + df + assert_frame_equal(result, expected) + result = df + np.nan + assert_frame_equal(result, expected) + + def test_frame_radd_str(self): + df = pd.DataFrame(['x', np.nan, 'x']) + assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) + + class TestDataFrameOperators(TestData): def test_operators(self): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index a4dc24c257477..950a364bd3f3c 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -2225,51 +2225,3 @@ def test_idxminmax_with_inf(self): assert np.isnan(s.idxmin(skipna=False)) assert s.idxmax() == 0 np.isnan(s.idxmax(skipna=False)) - - -class TestDataFrameOperators(object): - # TODO: This may belong in a frame-specific test file - - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano') - def test_frame_sub_datetime64_not_ns(self): - df = pd.DataFrame(date_range('20130101', periods=3)) - dt64 = np.datetime64('2013-01-01') - assert dt64.dtype == 'datetime64[D]' - res = df - dt64 - expected = pd.DataFrame([Timedelta(days=0), Timedelta(days=1), - Timedelta(days=2)]) - tm.assert_frame_equal(res, expected) - - @pytest.mark.parametrize('data', [ - [1, 2, 3], - [1.1, 2.2, 3.3], - [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT], - ['x', 'y', 1]]) - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_radd_str_invalid(self, dtype, data): - df = DataFrame(data, dtype=dtype) - with pytest.raises(TypeError): - 'foo_' + df - - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_with_dtype_radd_int(self, dtype): - df = pd.DataFrame([1, 2, 3], dtype=dtype) - expected = pd.DataFrame([2, 3, 4], dtype=dtype) - result = 1 + df - assert_frame_equal(result, expected) - result = df + 1 - assert_frame_equal(result, expected) - - @pytest.mark.parametrize('dtype', [None, object]) - def test_frame_with_dtype_radd_nan(self, dtype): - df = pd.DataFrame([1, 2, 3], dtype=dtype) - expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) - result = np.nan + df - assert_frame_equal(result, expected) - result = df + np.nan - assert_frame_equal(result, expected) - - def test_frame_radd_str(self): - df = pd.DataFrame(['x', np.nan, 'x']) - assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) - assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) From 2484f46bab2d62f13eb0a61306424c362586a8f3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 17:38:12 -0800 Subject: [PATCH 08/10] move astype tests to test_dtypes --- pandas/tests/series/test_dtypes.py | 14 +++++++++++++- pandas/tests/series/test_operators.py | 12 ------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 441e811706487..56ff092dd0a27 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -3,7 +3,7 @@ import pytest -from datetime import datetime +from datetime import datetime, timedelta import sys import string @@ -29,6 +29,18 @@ class TestSeriesDtypes(TestData): + def test_dt64_series_astype_object(self): + dt64ser = Series(date_range('20130101', periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ + + def test_td64_series_astype_object(self): + tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 950a364bd3f3c..68395fe9621cf 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1555,18 +1555,6 @@ def test_invalid_ops(self): pytest.raises(Exception, self.objSeries.__sub__, np.array(1, dtype=np.int64)) - def test_dt64_series_astype_object(self): - dt64ser = Series(date_range('20130101', periods=3)) - result = dt64ser.astype(object) - assert isinstance(result.iloc[0], datetime) - assert result.dtype == np.object_ - - def test_td64_series_astype_object(self): - tdser = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') - result = tdser.astype(object) - assert isinstance(result.iloc[0], timedelta) - assert result.dtype == np.object_ - @pytest.mark.parametrize('unit', ['D', 'h', 'm', 's', 'ms', 'us', 'ns']) def test_timedelta64_conversions(self, unit): s1 = Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') From 3359e9b1d3a8a3e33bd95cb8f21e8215472cdd81 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 17:40:03 -0800 Subject: [PATCH 09/10] parametrize tick classes --- pandas/tests/series/test_operators.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 68395fe9621cf..b17739ca16c69 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1315,17 +1315,16 @@ def test_dt64_series_sub_tick_DateOffset(self): with pytest.raises(TypeError): pd.offsets.Second(5) - ser - def test_dt64_series_with_DateOffset_smoke(self): + @pytest.mark.parametrize('cls_name', ['Day', 'Hour', 'Minute', 'Second', + 'Milli', 'Micro', 'Nano']) + def test_dt64_series_with_tick_DateOffset_smoke(self, cls_name): # GH 4532 # smoke tests for valid DateOffsets ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')]) - # valid DateOffsets - for cls_name in ['Day', 'Hour', 'Minute', 'Second', - 'Milli', 'Micro', 'Nano']: - offset_cls = getattr(pd.offsets, cls_name) - ser + offset_cls(5) - offset_cls(5) + ser + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser def test_dt64_series_add_mixed_tick_DateOffset(self): # GH 4532 From 9f3fa1b44ecee8258713b8951ab4cd2cea438d4a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Jan 2018 17:07:55 -0800 Subject: [PATCH 10/10] requested removal of catch --- pandas/tests/series/test_operators.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index b17739ca16c69..a58e6c8328684 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1600,13 +1600,7 @@ def timedelta64(*args): lhs = op(ser, nptd) rhs = op(ser, pytd) - try: - assert_series_equal(lhs, rhs) - except Exception: - raise AssertionError( - "invalid comparison [op->{0},d->{1},h->{2},m->{3}," - "s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, - us, lhs, rhs)) + assert_series_equal(lhs, rhs) def test_ops_nat_mixed_datetime64_timedelta64(self): # GH 11349