From fd20dd19131f15511d93a9b53ffacdb958a71cca Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Apr 2020 19:20:46 -0500 Subject: [PATCH 1/3] CLN: Split dtype inference tests --- pandas/tests/dtypes/test_inference.py | 164 +++++++++++--------------- 1 file changed, 72 insertions(+), 92 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 3d58df258e8e9..ecfe69e3f7dd4 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -782,108 +782,88 @@ def test_datetime(self): index = Index(dates) assert index.inferred_type == "datetime64" - def test_infer_dtype_datetime(self): - - arr = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - + def test_infer_dtype_datetime64(self): arr = np.array( [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object ) assert lib.infer_dtype(arr, skipna=True) == "datetime64" - arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - arr = np.array([n, np.datetime64("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime64" - - arr = np.array([n, datetime(2011, 1, 1)]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - arr = np.array([n, pd.Timestamp("2011-01-02"), n]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - arr = np.array([n, np.datetime64("2011-01-02"), n]) - assert lib.infer_dtype(arr, skipna=True) == "datetime64" - - arr = np.array([n, datetime(2011, 1, 1), n]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_datetime64_with_na(self, na_value): + arr = np.array([na_value, np.datetime64("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" - # different type of nat - arr = np.array( - [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object - ) - assert lib.infer_dtype(arr, skipna=False) == "mixed" + arr = np.array([na_value, np.datetime64("2011-01-02"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" - arr = np.array( - [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object - ) + @pytest.mark.parametrize( + "arr", + [ + np.array( + [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object + ), + np.array( + [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object + ), + np.array([np.datetime64("2011-01-01"), pd.Timestamp("2011-01-02")]), + np.array([pd.Timestamp("2011-01-02"), np.datetime64("2011-01-01")]), + np.array([np.nan, pd.Timestamp("2011-01-02"), 1.1]), + np.array([np.nan, "2011-01-01", pd.Timestamp("2011-01-02")]), + np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object), + np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object), + ], + ) + def test_infer_datetimelike_dtype_mixed(self, arr): assert lib.infer_dtype(arr, skipna=False) == "mixed" - # mixed datetime - arr = np.array([datetime(2011, 1, 1), pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "datetime" - - # should be datetime? - arr = np.array([np.datetime64("2011-01-01"), pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" - - arr = np.array([pd.Timestamp("2011-01-02"), np.datetime64("2011-01-01")]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" - + def test_infer_dtype_mixed_integer(self): arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1]) assert lib.infer_dtype(arr, skipna=True) == "mixed-integer" - arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1.1]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" + @pytest.mark.parametrize( + "arr", + [ + np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]), + np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]), + np.array([datetime(2011, 1, 1), pd.Timestamp("2011-01-02")]), + ], + ) + def test_infer_dtype_datetime(self, arr): + assert lib.infer_dtype(arr, skipna=True) == "datetime" - arr = np.array([np.nan, "2011-01-01", pd.Timestamp("2011-01-02")]) - assert lib.infer_dtype(arr, skipna=True) == "mixed" + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "time_stamp", [pd.Timestamp("2011-01-01"), datetime(2011, 1, 1)] + ) + def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): + arr = np.array([na_value, time_stamp]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" - def test_infer_dtype_timedelta(self): + arr = np.array([na_value, time_stamp, na_value]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" - arr = np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]) + @pytest.mark.parametrize( + "arr", + [ + np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]), + np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), + np.array([timedelta(1), timedelta(2)]), + ], + ) + def test_infer_dtype_timedelta(self, arr): assert lib.infer_dtype(arr, skipna=True) == "timedelta" - arr = np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object) + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + @pytest.mark.parametrize( + "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1),] + ) + def test_infer_dtype_timedelta_with_na(self, na_value, delta): + arr = np.array([na_value, delta]) assert lib.infer_dtype(arr, skipna=True) == "timedelta" - arr = np.array([timedelta(1), timedelta(2)]) + arr = np.array([na_value, delta, na_value]) assert lib.infer_dtype(arr, skipna=True) == "timedelta" - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, Timedelta("1 days")]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, np.timedelta64(1, "D")]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, timedelta(1)]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, pd.Timedelta("1 days"), n]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, np.timedelta64(1, "D"), n]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - arr = np.array([n, timedelta(1), n]) - assert lib.infer_dtype(arr, skipna=True) == "timedelta" - - # different type of nat - arr = np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object) - assert lib.infer_dtype(arr, skipna=False) == "mixed" - - arr = np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object) - assert lib.infer_dtype(arr, skipna=False) == "mixed" - def test_infer_dtype_period(self): # GH 13664 arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="D")]) @@ -892,25 +872,25 @@ def test_infer_dtype_period(self): arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="M")]) assert lib.infer_dtype(arr, skipna=True) == "period" - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Period("2011-01", freq="D")]) - assert lib.infer_dtype(arr, skipna=True) == "period" - - arr = np.array([n, pd.Period("2011-01", freq="D"), n]) - assert lib.infer_dtype(arr, skipna=True) == "period" - - # different type of nat + def test_infer_dtype_period_mixed(self): arr = np.array( - [np.datetime64("nat"), pd.Period("2011-01", freq="M")], dtype=object + [pd.Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object ) assert lib.infer_dtype(arr, skipna=False) == "mixed" arr = np.array( - [pd.Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object + [np.datetime64("nat"), pd.Period("2011-01", freq="M")], dtype=object ) assert lib.infer_dtype(arr, skipna=False) == "mixed" + @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) + def test_infer_dtype_period_with_na(self, na_value): + arr = np.array([na_value, pd.Period("2011-01", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + arr = np.array([na_value, pd.Period("2011-01", freq="D"), na_value]) + assert lib.infer_dtype(arr, skipna=True) == "period" + @pytest.mark.parametrize( "data", [ From 2d4916083016eb70398363091405c3decd2eb09a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Apr 2020 19:29:41 -0500 Subject: [PATCH 2/3] Lint --- pandas/tests/dtypes/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ecfe69e3f7dd4..954b2e23ce70d 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -855,7 +855,7 @@ def test_infer_dtype_timedelta(self, arr): @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) @pytest.mark.parametrize( - "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1),] + "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)] ) def test_infer_dtype_timedelta_with_na(self, na_value, delta): arr = np.array([na_value, delta]) From 0e6eaeeb7175dc8c5fc581e96d3ce6415f7829b0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 20 Apr 2020 21:26:59 -0500 Subject: [PATCH 3/3] Include comments --- pandas/tests/dtypes/test_inference.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 954b2e23ce70d..8c0580b7cf047 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -790,6 +790,7 @@ def test_infer_dtype_datetime64(self): @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) def test_infer_dtype_datetime64_with_na(self, na_value): + # starts with nan arr = np.array([na_value, np.datetime64("2011-01-02")]) assert lib.infer_dtype(arr, skipna=True) == "datetime64" @@ -836,6 +837,7 @@ def test_infer_dtype_datetime(self, arr): "time_stamp", [pd.Timestamp("2011-01-01"), datetime(2011, 1, 1)] ) def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): + # starts with nan arr = np.array([na_value, time_stamp]) assert lib.infer_dtype(arr, skipna=True) == "datetime" @@ -858,6 +860,7 @@ def test_infer_dtype_timedelta(self, arr): "delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)] ) def test_infer_dtype_timedelta_with_na(self, na_value, delta): + # starts with nan arr = np.array([na_value, delta]) assert lib.infer_dtype(arr, skipna=True) == "timedelta" @@ -885,6 +888,7 @@ def test_infer_dtype_period_mixed(self): @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) def test_infer_dtype_period_with_na(self, na_value): + # starts with nan arr = np.array([na_value, pd.Period("2011-01", freq="D")]) assert lib.infer_dtype(arr, skipna=True) == "period"