From 716a5592e38db8a2369abb50119f20bfdf15c3e7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 4 Dec 2021 12:34:37 -0800 Subject: [PATCH] TST: Parameterize test_algos --- pandas/tests/test_algos.py | 142 +++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 77 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 779d6e6b6bb0f..1345a66673d1c 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -779,7 +779,8 @@ def test_different_nans(self): expected = np.array([np.nan]) tm.assert_numpy_array_equal(result, expected) - def test_first_nan_kept(self): + @pytest.mark.parametrize("el_type", [np.float64, object]) + def test_first_nan_kept(self, el_type): # GH 22295 # create different nans from bit-patterns: bits_for_nan1 = 0xFFF8000000000001 @@ -788,13 +789,12 @@ def test_first_nan_kept(self): NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0] assert NAN1 != NAN1 assert NAN2 != NAN2 - for el_type in [np.float64, object]: - a = np.array([NAN1, NAN2], dtype=el_type) - result = pd.unique(a) - assert result.size == 1 - # use bit patterns to identify which nan was kept: - result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0] - assert result_nan_bits == bits_for_nan1 + a = np.array([NAN1, NAN2], dtype=el_type) + result = pd.unique(a) + assert result.size == 1 + # use bit patterns to identify which nan was kept: + result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0] + assert result_nan_bits == bits_for_nan1 def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2): # GH 22295 @@ -1261,21 +1261,20 @@ def test_dropna(self): expected = Series([3, 2, 1], index=[5.0, 10.3, np.nan]) tm.assert_series_equal(result, expected) - def test_value_counts_normalized(self): + @pytest.mark.parametrize("dtype", (np.float64, object, "M8[ns]")) + def test_value_counts_normalized(self, dtype): # GH12558 s = Series([1] * 2 + [2] * 3 + [np.nan] * 5) - dtypes = (np.float64, object, "M8[ns]") - for t in dtypes: - s_typed = s.astype(t) - result = s_typed.value_counts(normalize=True, dropna=False) - expected = Series( - [0.5, 0.3, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t) - ) - tm.assert_series_equal(result, expected) + s_typed = s.astype(dtype) + result = s_typed.value_counts(normalize=True, dropna=False) + expected = Series( + [0.5, 0.3, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=dtype) + ) + tm.assert_series_equal(result, expected) - result = s_typed.value_counts(normalize=True, dropna=True) - expected = Series([0.6, 0.4], index=Series([2.0, 1.0], dtype=t)) - tm.assert_series_equal(result, expected) + result = s_typed.value_counts(normalize=True, dropna=True) + expected = Series([0.6, 0.4], index=Series([2.0, 1.0], dtype=dtype)) + tm.assert_series_equal(result, expected) def test_value_counts_uint64(self): arr = np.array([2 ** 63], dtype=np.uint64) @@ -1479,13 +1478,10 @@ def test_datetime_likes(self): res_false = s.duplicated(keep=False) tm.assert_series_equal(res_false, Series(exp_false)) - def test_unique_index(self): - cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] - for case in cases: - assert case.is_unique is True - tm.assert_numpy_array_equal( - case.duplicated(), np.array([False, False, False]) - ) + @pytest.mark.parametrize("case", [Index([1, 2, 3]), pd.RangeIndex(0, 3)]) + def test_unique_index(self, case): + assert case.is_unique is True + tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) @pytest.mark.parametrize( "arr, uniques", @@ -1744,20 +1740,25 @@ def test_unique_label_indices(): class TestRank: @td.skip_if_no_scipy - def test_scipy_compat(self): + @pytest.mark.parametrize( + "arr", + [ + [np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan], + [4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan], + ], + ) + def test_scipy_compat(self, arr): from scipy.stats import rankdata - def _check(arr): - mask = ~np.isfinite(arr) - arr = arr.copy() - result = libalgos.rank_1d(arr) - arr[mask] = np.inf - exp = rankdata(arr) - exp[mask] = np.nan - tm.assert_almost_equal(result, exp) + arr = np.array(arr) - _check(np.array([np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan])) - _check(np.array([4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan])) + mask = ~np.isfinite(arr) + arr = arr.copy() + result = libalgos.rank_1d(arr) + arr[mask] = np.inf + exp = rankdata(arr) + exp[mask] = np.nan + tm.assert_almost_equal(result, exp) @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) def test_basic(self, writable, dtype): @@ -1769,12 +1770,12 @@ def test_basic(self, writable, dtype): result = algos.rank(ser) tm.assert_numpy_array_equal(result, exp) - def test_uint64_overflow(self): + @pytest.mark.parametrize("dtype", [np.float64, np.uint64]) + def test_uint64_overflow(self, dtype): exp = np.array([1, 2], dtype=np.float64) - for dtype in [np.float64, np.uint64]: - s = Series([1, 2 ** 63], dtype=dtype) - tm.assert_numpy_array_equal(algos.rank(s), exp) + s = Series([1, 2 ** 63], dtype=dtype) + tm.assert_numpy_array_equal(algos.rank(s), exp) def test_too_many_ndims(self): arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) @@ -1819,21 +1820,6 @@ def test_pad_backfill_object_segfault(): class TestTseriesUtil: - def test_combineFunc(self): - pass - - def test_reindex(self): - pass - - def test_isna(self): - pass - - def test_groupby(self): - pass - - def test_groupby_withnull(self): - pass - def test_backfill(self): old = Index([1, 5, 10]) new = Index(list(range(12))) @@ -2274,7 +2260,8 @@ def test_no_mode(self): exp = Series([], dtype=np.float64, index=Index([], dtype=int)) tm.assert_series_equal(algos.mode([]), exp) - def test_mode_single(self): + @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"]) + def test_mode_single(self, dt): # GH 15714 exp_single = [1] data_single = [1] @@ -2282,36 +2269,36 @@ def test_mode_single(self): exp_multi = [1] data_multi = [1, 1] - for dt in np.typecodes["AllInteger"] + np.typecodes["Float"]: - s = Series(data_single, dtype=dt) - exp = Series(exp_single, dtype=dt) - tm.assert_series_equal(algos.mode(s), exp) + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) - s = Series(data_multi, dtype=dt) - exp = Series(exp_multi, dtype=dt) - tm.assert_series_equal(algos.mode(s), exp) + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + def test_mode_obj_int(self): exp = Series([1], dtype=int) tm.assert_series_equal(algos.mode([1]), exp) exp = Series(["a", "b", "c"], dtype=object) tm.assert_series_equal(algos.mode(["a", "b", "c"]), exp) - def test_number_mode(self): + @pytest.mark.parametrize("dt", np.typecodes["AllInteger"] + np.typecodes["Float"]) + def test_number_mode(self, dt): exp_single = [1] data_single = [1] * 5 + [2] * 3 exp_multi = [1, 3] data_multi = [1] * 5 + [2] * 3 + [3] * 5 - for dt in np.typecodes["AllInteger"] + np.typecodes["Float"]: - s = Series(data_single, dtype=dt) - exp = Series(exp_single, dtype=dt) - tm.assert_series_equal(algos.mode(s), exp) + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) - s = Series(data_multi, dtype=dt) - exp = Series(exp_multi, dtype=dt) - tm.assert_series_equal(algos.mode(s), exp) + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) def test_strobj_mode(self): exp = ["b"] @@ -2321,13 +2308,14 @@ def test_strobj_mode(self): exp = Series(exp, dtype="c") tm.assert_series_equal(algos.mode(s), exp) + @pytest.mark.parametrize("dt", [str, object]) + def test_strobj_multi_char(self, dt): exp = ["bar"] data = ["foo"] * 2 + ["bar"] * 3 - for dt in [str, object]: - s = Series(data, dtype=dt) - exp = Series(exp, dtype=dt) - tm.assert_series_equal(algos.mode(s), exp) + s = Series(data, dtype=dt) + exp = Series(exp, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) def test_datelike_mode(self): exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]")