From 36c62cdcedc1323f05794b0f34bd7cbb6482e075 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:05:36 -0700 Subject: [PATCH 01/22] TST: Use numpy random generator with ruff NPY002 --- pandas/tests/apply/test_frame_apply.py | 16 +- pandas/tests/apply/test_invalid_arg.py | 8 +- pandas/tests/apply/test_series_apply.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 35 ++- pandas/tests/arithmetic/test_object.py | 2 +- pandas/tests/arrays/categorical/test_api.py | 4 +- .../tests/arrays/categorical/test_astype.py | 4 +- .../arrays/categorical/test_constructors.py | 2 +- .../tests/arrays/categorical/test_indexing.py | 10 +- .../tests/arrays/categorical/test_missing.py | 2 +- .../arrays/categorical/test_operators.py | 4 +- pandas/tests/arrays/interval/test_interval.py | 2 +- .../tests/arrays/sparse/test_arithmetics.py | 4 +- pandas/tests/arrays/test_datetimelike.py | 2 +- pandas/tests/computation/test_eval.py | 171 ++++++----- .../copy_view/test_core_functionalities.py | 2 +- pandas/tests/dtypes/test_generic.py | 2 +- pandas/tests/extension/decimal/array.py | 3 +- .../tests/extension/decimal/test_decimal.py | 2 +- pandas/tests/extension/json/array.py | 6 +- pandas/tests/extension/list/array.py | 4 +- pandas/tests/extension/test_categorical.py | 2 +- pandas/tests/extension/test_interval.py | 4 +- pandas/tests/extension/test_sparse.py | 4 +- pandas/tests/extension/test_string.py | 4 +- pandas/tests/frame/conftest.py | 6 +- .../frame/constructors/test_from_records.py | 26 +- pandas/tests/frame/indexing/test_delitem.py | 4 +- pandas/tests/frame/indexing/test_getitem.py | 8 +- pandas/tests/frame/indexing/test_indexing.py | 50 ++-- pandas/tests/frame/indexing/test_insert.py | 10 +- pandas/tests/frame/indexing/test_mask.py | 8 +- pandas/tests/frame/indexing/test_set_value.py | 4 +- pandas/tests/frame/indexing/test_setitem.py | 38 ++- pandas/tests/frame/indexing/test_where.py | 16 +- pandas/tests/frame/indexing/test_xs.py | 18 +- pandas/tests/frame/methods/test_align.py | 4 +- pandas/tests/frame/methods/test_asfreq.py | 8 +- pandas/tests/frame/methods/test_asof.py | 2 +- pandas/tests/frame/methods/test_astype.py | 2 +- pandas/tests/frame/methods/test_at_time.py | 12 +- .../tests/frame/methods/test_between_time.py | 18 +- pandas/tests/frame/methods/test_clip.py | 18 +- pandas/tests/frame/methods/test_copy.py | 6 +- pandas/tests/frame/methods/test_cov_corr.py | 40 ++- pandas/tests/frame/methods/test_describe.py | 2 +- pandas/tests/frame/methods/test_diff.py | 6 +- pandas/tests/frame/methods/test_dot.py | 14 +- pandas/tests/frame/methods/test_drop.py | 14 +- pandas/tests/frame/methods/test_dropna.py | 12 +- pandas/tests/frame/methods/test_duplicated.py | 2 +- pandas/tests/frame/methods/test_equals.py | 6 +- pandas/tests/frame/methods/test_fillna.py | 18 +- pandas/tests/frame/methods/test_filter.py | 2 +- .../frame/methods/test_first_valid_index.py | 2 +- pandas/tests/frame/methods/test_head_tail.py | 2 +- .../tests/frame/methods/test_interpolate.py | 2 +- pandas/tests/frame/methods/test_map.py | 6 +- pandas/tests/frame/methods/test_matmul.py | 20 +- pandas/tests/frame/methods/test_nlargest.py | 4 +- pandas/tests/frame/methods/test_pop.py | 2 +- pandas/tests/frame/methods/test_quantile.py | 10 +- pandas/tests/frame/methods/test_rank.py | 10 +- pandas/tests/frame/methods/test_reindex.py | 48 +-- pandas/tests/frame/methods/test_rename.py | 2 +- pandas/tests/frame/methods/test_replace.py | 4 +- .../tests/frame/methods/test_reset_index.py | 22 +- pandas/tests/frame/methods/test_round.py | 4 +- pandas/tests/frame/methods/test_sample.py | 51 ++-- .../tests/frame/methods/test_select_dtypes.py | 4 +- pandas/tests/frame/methods/test_set_index.py | 16 +- pandas/tests/frame/methods/test_shift.py | 24 +- pandas/tests/frame/methods/test_sort_index.py | 29 +- .../tests/frame/methods/test_sort_values.py | 18 +- pandas/tests/frame/methods/test_swapaxes.py | 6 +- pandas/tests/frame/methods/test_to_csv.py | 40 ++- pandas/tests/frame/methods/test_to_dict.py | 2 +- pandas/tests/frame/methods/test_to_numpy.py | 2 +- pandas/tests/frame/methods/test_to_period.py | 10 +- pandas/tests/frame/methods/test_to_records.py | 10 +- .../tests/frame/methods/test_to_timestamp.py | 6 +- pandas/tests/frame/methods/test_truncate.py | 14 +- pandas/tests/frame/methods/test_values.py | 2 +- pandas/tests/frame/test_api.py | 4 +- pandas/tests/frame/test_arithmetic.py | 86 +++--- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/frame/test_constructors.py | 55 ++-- pandas/tests/frame/test_iteration.py | 3 +- pandas/tests/frame/test_nonunique_indexes.py | 12 +- pandas/tests/frame/test_npfuncs.py | 2 +- pandas/tests/frame/test_query_eval.py | 148 ++++++---- pandas/tests/frame/test_reductions.py | 18 +- pandas/tests/frame/test_repr_info.py | 16 +- pandas/tests/frame/test_stack_unstack.py | 38 +-- pandas/tests/frame/test_subclass.py | 7 +- pandas/tests/generic/test_frame.py | 21 +- pandas/tests/generic/test_generic.py | 2 +- pandas/tests/generic/test_series.py | 2 +- .../tests/groupby/aggregate/test_aggregate.py | 4 +- pandas/tests/groupby/aggregate/test_cython.py | 24 +- pandas/tests/groupby/aggregate/test_numba.py | 8 +- pandas/tests/groupby/aggregate/test_other.py | 24 +- pandas/tests/groupby/conftest.py | 14 +- pandas/tests/groupby/test_apply.py | 16 +- pandas/tests/groupby/test_apply_mutate.py | 2 +- pandas/tests/groupby/test_categorical.py | 22 +- pandas/tests/groupby/test_counting.py | 29 +- pandas/tests/groupby/test_filters.py | 13 +- pandas/tests/groupby/test_function.py | 20 +- pandas/tests/groupby/test_groupby.py | 71 +++-- pandas/tests/groupby/test_groupby_dropna.py | 10 +- pandas/tests/groupby/test_grouping.py | 45 ++- pandas/tests/groupby/test_indexing.py | 7 +- pandas/tests/groupby/test_libgroupby.py | 12 +- pandas/tests/groupby/test_nth.py | 2 +- pandas/tests/groupby/test_nunique.py | 6 +- pandas/tests/groupby/test_pipe.py | 2 +- pandas/tests/groupby/test_quantile.py | 6 +- pandas/tests/groupby/test_rank.py | 6 +- pandas/tests/groupby/test_size.py | 2 +- pandas/tests/groupby/test_skew.py | 6 +- pandas/tests/groupby/test_timegrouper.py | 4 +- pandas/tests/groupby/test_value_counts.py | 7 +- pandas/tests/groupby/transform/test_numba.py | 8 +- .../tests/groupby/transform/test_transform.py | 37 ++- .../indexes/categorical/test_category.py | 2 +- .../indexes/categorical/test_indexing.py | 4 +- .../tests/indexes/datetimes/test_datetime.py | 5 +- .../tests/indexes/datetimes/test_indexing.py | 2 +- pandas/tests/indexes/datetimes/test_join.py | 4 +- .../indexes/datetimes/test_partial_slicing.py | 14 +- pandas/tests/indexes/datetimes/test_setops.py | 4 +- pandas/tests/indexes/multi/test_duplicates.py | 4 +- pandas/tests/indexes/multi/test_get_set.py | 4 +- pandas/tests/indexes/multi/test_sorting.py | 12 +- pandas/tests/indexes/period/test_indexing.py | 6 +- pandas/tests/indexes/period/test_join.py | 2 +- .../indexes/period/test_partial_slicing.py | 12 +- pandas/tests/indexes/period/test_setops.py | 2 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_common.py | 2 +- pandas/tests/indexes/test_subclass.py | 4 +- .../tests/indexes/timedeltas/test_indexing.py | 2 +- pandas/tests/indexes/timedeltas/test_join.py | 2 +- pandas/tests/indexing/conftest.py | 44 ++- .../multiindex/test_chaining_and_caching.py | 2 +- .../indexing/multiindex/test_datetime.py | 2 +- .../tests/indexing/multiindex/test_getitem.py | 4 +- pandas/tests/indexing/multiindex/test_iloc.py | 6 +- .../indexing/multiindex/test_indexing_slow.py | 22 +- pandas/tests/indexing/multiindex/test_loc.py | 32 +- .../indexing/multiindex/test_multiindex.py | 4 +- .../tests/indexing/multiindex/test_partial.py | 8 +- .../tests/indexing/multiindex/test_setitem.py | 29 +- .../tests/indexing/multiindex/test_sorted.py | 2 +- pandas/tests/indexing/test_at.py | 2 +- pandas/tests/indexing/test_categorical.py | 8 +- .../indexing/test_chaining_and_caching.py | 6 +- pandas/tests/indexing/test_floats.py | 8 +- pandas/tests/indexing/test_iat.py | 2 +- pandas/tests/indexing/test_iloc.py | 59 ++-- pandas/tests/indexing/test_indexing.py | 25 +- pandas/tests/indexing/test_loc.py | 61 ++-- pandas/tests/indexing/test_partial.py | 4 +- pandas/tests/indexing/test_scalar.py | 8 +- pandas/tests/interchange/test_impl.py | 17 +- pandas/tests/internals/test_internals.py | 20 +- pandas/tests/io/excel/test_style.py | 10 +- pandas/tests/io/excel/test_writers.py | 38 ++- pandas/tests/io/formats/style/test_style.py | 8 +- pandas/tests/io/formats/test_format.py | 49 ++-- pandas/tests/io/formats/test_info.py | 28 +- pandas/tests/io/formats/test_printing.py | 2 +- pandas/tests/io/formats/test_series_info.py | 10 +- pandas/tests/io/formats/test_to_html.py | 20 +- pandas/tests/io/formats/test_to_string.py | 2 +- .../tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/json/test_pandas.py | 6 +- pandas/tests/io/json/test_ujson.py | 2 +- .../io/parser/dtypes/test_dtypes_basic.py | 2 +- pandas/tests/io/parser/test_c_parser_only.py | 4 +- pandas/tests/io/parser/test_index_col.py | 8 +- pandas/tests/io/parser/test_multi_thread.py | 10 +- pandas/tests/io/parser/test_network.py | 2 +- pandas/tests/io/pytables/test_append.py | 44 +-- pandas/tests/io/pytables/test_complex.py | 13 +- pandas/tests/io/pytables/test_errors.py | 22 +- pandas/tests/io/pytables/test_put.py | 5 +- pandas/tests/io/pytables/test_read.py | 22 +- pandas/tests/io/pytables/test_round_trip.py | 20 +- pandas/tests/io/pytables/test_select.py | 34 ++- pandas/tests/io/pytables/test_store.py | 34 ++- pandas/tests/io/pytables/test_time_series.py | 8 +- pandas/tests/io/pytables/test_timezones.py | 10 +- pandas/tests/io/sas/test_byteswap.py | 4 +- pandas/tests/io/test_clipboard.py | 4 +- pandas/tests/io/test_html.py | 4 +- pandas/tests/io/test_parquet.py | 18 +- pandas/tests/io/test_pickle.py | 2 +- pandas/tests/io/test_sql.py | 2 +- pandas/tests/io/test_stata.py | 10 +- pandas/tests/libs/test_hashtable.py | 6 +- pandas/tests/plotting/conftest.py | 14 +- pandas/tests/plotting/frame/test_frame.py | 273 +++++++++++------- .../tests/plotting/frame/test_frame_color.py | 114 ++++---- .../tests/plotting/frame/test_frame_legend.py | 50 ++-- .../plotting/frame/test_frame_subplots.py | 72 +++-- .../tests/plotting/frame/test_hist_box_by.py | 7 +- pandas/tests/plotting/test_boxplot_method.py | 110 ++++--- pandas/tests/plotting/test_datetimelike.py | 193 ++++++++----- pandas/tests/plotting/test_groupby.py | 44 ++- pandas/tests/plotting/test_hist_method.py | 96 +++--- pandas/tests/plotting/test_misc.py | 33 ++- pandas/tests/plotting/test_series.py | 82 +++--- pandas/tests/reductions/test_reductions.py | 12 +- pandas/tests/resample/conftest.py | 4 +- pandas/tests/resample/test_datetime_index.py | 76 ++--- pandas/tests/resample/test_period_index.py | 27 +- pandas/tests/resample/test_resample_api.py | 38 ++- .../tests/resample/test_resampler_grouper.py | 8 +- pandas/tests/resample/test_time_grouper.py | 16 +- pandas/tests/resample/test_timedelta.py | 4 +- pandas/tests/reshape/concat/test_append.py | 6 +- pandas/tests/reshape/concat/test_concat.py | 52 ++-- pandas/tests/reshape/concat/test_empty.py | 2 +- pandas/tests/reshape/concat/test_index.py | 15 +- pandas/tests/reshape/concat/test_invalid.py | 2 +- pandas/tests/reshape/concat/test_series.py | 18 +- pandas/tests/reshape/merge/test_join.py | 106 ++++--- pandas/tests/reshape/merge/test_merge.py | 53 ++-- pandas/tests/reshape/merge/test_multi.py | 36 ++- pandas/tests/reshape/test_crosstab.py | 38 +-- pandas/tests/reshape/test_cut.py | 8 +- pandas/tests/reshape/test_melt.py | 12 +- pandas/tests/reshape/test_pivot.py | 26 +- pandas/tests/reshape/test_qcut.py | 8 +- .../tests/scalar/timestamp/test_timestamp.py | 1 - .../series/accessors/test_dt_accessor.py | 7 +- pandas/tests/series/indexing/test_datetime.py | 22 +- pandas/tests/series/indexing/test_get.py | 5 +- pandas/tests/series/indexing/test_getitem.py | 21 +- pandas/tests/series/indexing/test_indexing.py | 6 +- pandas/tests/series/indexing/test_mask.py | 4 +- pandas/tests/series/indexing/test_setitem.py | 8 +- pandas/tests/series/indexing/test_where.py | 6 +- pandas/tests/series/methods/test_align.py | 4 +- pandas/tests/series/methods/test_argsort.py | 2 +- pandas/tests/series/methods/test_asof.py | 8 +- pandas/tests/series/methods/test_astype.py | 8 +- pandas/tests/series/methods/test_autocorr.py | 2 +- .../series/methods/test_combine_first.py | 2 +- pandas/tests/series/methods/test_cov_corr.py | 8 +- pandas/tests/series/methods/test_fillna.py | 8 +- .../tests/series/methods/test_interpolate.py | 10 +- .../tests/series/methods/test_is_monotonic.py | 2 +- pandas/tests/series/methods/test_is_unique.py | 2 +- pandas/tests/series/methods/test_matmul.py | 6 +- pandas/tests/series/methods/test_nlargest.py | 4 +- pandas/tests/series/methods/test_nunique.py | 2 +- pandas/tests/series/methods/test_quantile.py | 2 +- pandas/tests/series/methods/test_rank.py | 6 +- pandas/tests/series/methods/test_reindex.py | 2 +- pandas/tests/series/methods/test_repeat.py | 2 +- pandas/tests/series/methods/test_replace.py | 14 +- .../tests/series/methods/test_reset_index.py | 4 +- .../tests/series/methods/test_sort_index.py | 6 +- .../tests/series/methods/test_sort_values.py | 2 +- pandas/tests/series/methods/test_unstack.py | 2 +- pandas/tests/series/test_api.py | 8 +- pandas/tests/series/test_arithmetic.py | 26 +- pandas/tests/series/test_constructors.py | 10 +- pandas/tests/series/test_missing.py | 2 +- pandas/tests/series/test_npfuncs.py | 2 +- pandas/tests/series/test_repr.py | 14 +- pandas/tests/series/test_ufunc.py | 8 +- pandas/tests/test_algos.py | 14 +- pandas/tests/test_common.py | 24 +- pandas/tests/test_expressions.py | 36 ++- pandas/tests/test_multilevel.py | 14 +- pandas/tests/test_nanops.py | 38 ++- pandas/tests/test_sorting.py | 32 +- pandas/tests/test_take.py | 17 +- pandas/tests/tslibs/test_ccalendar.py | 2 +- pandas/tests/util/test_make_objects.py | 2 +- pandas/tests/window/conftest.py | 4 +- pandas/tests/window/test_api.py | 2 +- pandas/tests/window/test_apply.py | 11 +- pandas/tests/window/test_ewm.py | 18 +- pandas/tests/window/test_expanding.py | 24 +- pandas/tests/window/test_pairwise.py | 10 +- pandas/tests/window/test_rolling.py | 18 +- pandas/tests/window/test_rolling_functions.py | 12 +- pandas/tests/window/test_rolling_quantile.py | 6 +- pandas/tests/window/test_rolling_skew_kurt.py | 10 +- pandas/tests/window/test_timeseries_window.py | 3 +- pandas/tests/window/test_win_type.py | 6 +- pyproject.toml | 4 +- 297 files changed, 2928 insertions(+), 2093 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 43f903f99d0d7..10ccf12442522 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -376,7 +376,7 @@ def test_apply_reduce_to_dict(): def test_apply_differently_indexed(): - df = DataFrame(np.random.randn(20, 10)) + df = DataFrame(np.random.default_rng(2).randn(20, 10)) result = df.apply(Series.describe, axis=0) expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) @@ -463,9 +463,9 @@ def test_apply_convert_objects(): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) @@ -659,7 +659,7 @@ def test_apply_category_equalness(val): def test_infer_row_shape(): # GH 17437 # if row shape is changing, infer it - df = DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.default_rng(2).rand(10, 2)) result = df.apply(np.fft.fft, axis=0).shape assert result == (10, 2) @@ -816,7 +816,7 @@ def test_with_listlike_columns(): # GH 17348 df = DataFrame( { - "a": Series(np.random.randn(4)), + "a": Series(np.random.default_rng(2).randn(4)), "b": ["a", "list", "of", "words"], "ts": date_range("2016-10-01", periods=4, freq="H"), } @@ -862,7 +862,7 @@ def test_infer_output_shape_columns(): def test_infer_output_shape_listlike_columns(): # GH 16353 - df = DataFrame(np.random.randn(6, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(6, 3), columns=["A", "B", "C"]) result = df.apply(lambda x: [1, 2, 3], axis=1) expected = Series([[1, 2, 3] for t in df.itertuples()]) @@ -911,7 +911,7 @@ def fun(x): def test_consistent_coerce_for_shapes(lst): # we want column names to NOT be propagated # just because the shape matches the input shape - df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["A", "B", "C"]) result = df.apply(lambda x: lst, axis=1) expected = Series([lst for t in df.itertuples()]) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index e0d52f094515b..2f7e0d56af2fa 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -93,7 +93,7 @@ def test_series_nested_renamer(renamer): def test_apply_dict_depr(): tsdf = DataFrame( - np.random.randn(10, 3), + np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=10), ) @@ -190,9 +190,9 @@ def test_apply_modify_traceback(): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 79954eeed8e95..b8892e114b80d 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -76,7 +76,7 @@ def f(x): @pytest.mark.parametrize("convert_dtype", [True, False]) def test_apply_convert_dtype_deprecated(convert_dtype): - ser = Series(np.random.randn(10)) + ser = Series(np.random.default_rng(2).randn(10)) def func(x): return x if x > 0 else np.nan diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 455cae084b7c6..3c79cc773047c 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -99,8 +99,8 @@ def test_df_numeric_cmp_dt64_raises(self, box_with_array, fixed_now_ts): def test_compare_invalid(self): # GH#8058 # ops testing - a = Series(np.random.randn(5), name=0) - b = Series(np.random.randn(5)) + a = Series(np.random.default_rng(2).randn(5), name=0) + b = Series(np.random.default_rng(2).randn(5)) b.name = pd.Timestamp("2000-01-01") tm.assert_series_equal(a / b, 1 / (b / a)) @@ -109,7 +109,7 @@ def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch): box = box_with_array xbox = box if box is not Index else np.ndarray - obj = Series(np.random.randn(51)) + obj = Series(np.random.default_rng(2).randn(51)) obj = tm.box_expected(obj, box, transpose=False) with monkeypatch.context() as m: m.setattr(expr, "_MIN_ELEMENTS", 50) @@ -459,7 +459,7 @@ def test_ser_divmod_inf(self): def test_rdiv_zero_compat(self): # GH#8674 zero_array = np.array([0] * 5) - data = np.random.randn(5) + data = np.random.default_rng(2).randn(5) expected = Series([0.0] * 5) result = zero_array / Series(data) @@ -535,7 +535,7 @@ def test_df_div_zero_int(self): def test_df_div_zero_series_does_not_commute(self): # integer div, but deal with the 0's (GH#9144) - df = pd.DataFrame(np.random.randn(10, 5)) + df = pd.DataFrame(np.random.default_rng(2).randn(10, 5)) ser = df[0] res = ser / df res2 = df / ser @@ -602,7 +602,7 @@ def test_df_mod_zero_int(self): def test_df_mod_zero_series_does_not_commute(self): # GH#3590, modulo as ints # not commutative with series - df = pd.DataFrame(np.random.randn(10, 5)) + df = pd.DataFrame(np.random.default_rng(2).randn(10, 5)) ser = df[0] res = ser % df res2 = df % ser @@ -770,7 +770,7 @@ def test_divmod_series(self, numeric_idx): @pytest.mark.parametrize("other", [np.nan, 7, -23, 2.718, -3.14, np.inf]) def test_ops_np_scalar(self, other): - vals = np.random.randn(5, 3) + vals = np.random.default_rng(2).randn(5, 3) f = lambda x: pd.DataFrame( x, index=list("ABCDE"), columns=["jim", "joe", "jolie"] ) @@ -912,7 +912,7 @@ def test_series_frame_radd_bug(self, fixed_now_ts): # TODO: This came from series.test.test_operators, needs cleanup def test_datetime64_with_index(self): # arithmetic integer ops with an index - ser = Series(np.random.randn(5)) + ser = Series(np.random.default_rng(2).randn(5)) expected = ser - ser.index.to_series() result = ser - ser.index tm.assert_series_equal(result, expected) @@ -933,7 +933,8 @@ def test_datetime64_with_index(self): result = ser - ser.index.to_period() df = pd.DataFrame( - np.random.randn(5, 2), index=pd.date_range("20130101", periods=5) + np.random.default_rng(2).randn(5, 2), + index=pd.date_range("20130101", periods=5), ) df["date"] = pd.Timestamp("20130102") df["expected"] = df["date"] - df.index.to_series() @@ -944,7 +945,7 @@ def test_datetime64_with_index(self): def test_frame_operators(self, float_frame): frame = float_frame - garbage = np.random.random(4) + garbage = np.random.default_rng(2).random(4) colSeries = Series(garbage, index=np.array(frame.columns)) idSum = frame + frame @@ -1177,7 +1178,9 @@ def test_numarr_with_dtype_add_int(self, dtype, box_with_array): ) def test_operators_reverse_object(self, op): # GH#56 - arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) + arr = Series( + np.random.default_rng(2).randn(10), index=np.arange(10), dtype=object + ) result = op(1.0, arr) expected = op(1.0, arr.astype(float)) @@ -1284,13 +1287,13 @@ def test_arithmetic_with_frame_or_series(self, op): # check that we return NotImplemented when operating with Series # or DataFrame index = RangeIndex(5) - other = Series(np.random.randn(5)) + other = Series(np.random.default_rng(2).randn(5)) expected = op(Series(index), other) result = op(index, other) tm.assert_series_equal(result, expected) - other = pd.DataFrame(np.random.randn(2, 5)) + other = pd.DataFrame(np.random.default_rng(2).randn(2, 5)) expected = op(pd.DataFrame([index, index]), other) result = op(index, other) tm.assert_frame_equal(result, expected) @@ -1409,7 +1412,9 @@ def test_dataframe_div_silenced(): columns=list("ABCD"), ) pdf2 = pd.DataFrame( - np.random.randn(10, 4), index=list("abcdefghjk"), columns=list("ABCX") + np.random.default_rng(2).randn(10, 4), + index=list("abcdefghjk"), + columns=list("ABCX"), ) with tm.assert_produces_warning(None): pdf1.div(pdf2, fill_value=0) @@ -1444,7 +1449,7 @@ def test_integer_array_add_list_like( def test_sub_multiindex_swapped_levels(): # GH 9952 df = pd.DataFrame( - {"a": np.random.randn(6)}, + {"a": np.random.default_rng(2).randn(6)}, index=pd.MultiIndex.from_product( [["a", "b"], [0, 1, 2]], names=["levA", "levB"] ), diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index cacd580658149..6e2ca746663bc 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -22,7 +22,7 @@ class TestObjectComparisons: def test_comparison_object_numeric_nas(self, comparison_op): - ser = Series(np.random.randn(10), dtype=object) + ser = Series(np.random.default_rng(2).randn(10), dtype=object) shifted = ser.shift(2) func = comparison_op diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 7d96890b6c669..b4215b4a6fe21 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -376,8 +376,8 @@ def test_remove_unused_categories(self): assert out.tolist() == val alpha = list("abcdefghijklmnopqrstuvwxyz") - val = np.random.choice(alpha[::2], 10000).astype("object") - val[np.random.choice(len(val), 100)] = np.nan + val = np.random.default_rng(2).choice(alpha[::2], 10000).astype("object") + val[np.random.default_rng(2).choice(len(val), 100)] = np.nan cat = Categorical(values=val, categories=alpha) out = cat.remove_unused_categories() diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 205e3950fa1ce..ffca1807dac52 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -16,7 +16,7 @@ class TestAstype: def test_astype_str_int_categories_to_nullable_int(self): # GH#39616 dtype = CategoricalDtype([str(i) for i in range(5)]) - codes = np.random.randint(5, size=20) + codes = np.random.default_rng(2).randint(5, size=20) arr = Categorical.from_codes(codes, dtype=dtype) res = arr.astype("Int64") @@ -26,7 +26,7 @@ def test_astype_str_int_categories_to_nullable_int(self): def test_astype_str_int_categories_to_nullable_float(self): # GH#39616 dtype = CategoricalDtype([str(i / 2) for i in range(5)]) - codes = np.random.randint(5, size=20) + codes = np.random.default_rng(2).randint(5, size=20) arr = Categorical.from_codes(codes, dtype=dtype) res = arr.astype("Float64") diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 5eb7f37a4ae34..1f2c2fd709eb7 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -513,7 +513,7 @@ def test_construction_with_null(self, klass, nulls_fixture): def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype, validate): # GH#39649 cats = pd.array(range(5), dtype=any_numeric_ea_dtype) - codes = np.random.randint(5, size=3) + codes = np.random.default_rng(2).randint(5, size=3) dtype = CategoricalDtype(cats) arr = Categorical.from_codes(codes, dtype=dtype, validate=validate) assert arr.categories.dtype == cats.dtype diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 635b331c14ac9..4046eca5d2416 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -105,9 +105,9 @@ def test_setitem_tuple(self): def test_setitem_listlike(self): # GH#9469 # properly coerce the input indexers - np.random.seed(1) + cat = Categorical( - np.random.randint(0, 5, size=150000).astype(np.int8) + np.random.default_rng(2).randint(0, 5, size=150000).astype(np.int8) ).add_categories([-1000]) indexer = np.array([100000]).astype(np.int64) cat[indexer] = -1000 @@ -131,8 +131,10 @@ def test_getitem_slice(self): def test_getitem_listlike(self): # GH 9469 # properly coerce the input indexers - np.random.seed(1) - c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) + + c = Categorical( + np.random.default_rng(2).randint(0, 5, size=150000).astype(np.int8) + ) result = c.codes[np.array([100000]).astype(np.int64)] expected = c[np.array([100000]).astype(np.int64)].codes tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 892795b89c1f5..19ac81102f7f2 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -28,7 +28,7 @@ def test_na_flags_int_categories(self): # #1457 categories = list(range(10)) - labels = np.random.randint(0, 10, 20) + labels = np.random.default_rng(2).randint(0, 10, 20) labels[::5] = -1 cat = Categorical(labels, categories) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index de88960280102..cac0c4e61e82d 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -45,7 +45,7 @@ def test_comparisons(self, factor): n = len(factor) - other = factor[np.random.permutation(n)] + other = factor[np.random.default_rng(2).permutation(n)] result = factor == other expected = np.asarray(factor) == np.asarray(other) tm.assert_numpy_array_equal(result, expected) @@ -345,7 +345,7 @@ def test_compare_unordered_different_order(self): assert not a.equals(b) def test_numeric_like_ops(self): - df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + df = DataFrame({"value": np.random.default_rng(2).randint(0, 10000, 100)}) labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index b97eb32a60838..8d4022112f2a4 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -215,7 +215,7 @@ def test_min_max(self, left_right_dtypes, index_or_series_or_array): MAX = arr[-1] indexer = np.arange(len(arr)) - np.random.shuffle(indexer) + np.random.default_rng(2).shuffle(indexer) arr = arr.take(indexer) arr_na = arr.insert(2, np.nan) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 6c11979506b58..b336a2658a79b 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -475,8 +475,8 @@ def test_mismatched_length_cmp_op(cons): @pytest.mark.parametrize("fill_value", [np.nan, 3]) def test_binary_operators(op, fill_value): op = getattr(operator, op) - data1 = np.random.randn(20) - data2 = np.random.randn(20) + data1 = np.random.default_rng(2).randn(20) + data2 = np.random.default_rng(2).randn(20) data1[::2] = fill_value data2[::3] = fill_value diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 9e402af931199..ad8c5334115c5 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -152,7 +152,7 @@ def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered): def test_take(self): data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9 - np.random.shuffle(data) + np.random.default_rng(2).shuffle(data) if self.array_cls is PeriodArray: arr = PeriodArray(data, dtype="period[D]") diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 35960c707d3bd..2697c6d0107ea 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -3,7 +3,6 @@ from functools import reduce from itertools import product import operator -import random import warnings import numpy as np @@ -99,15 +98,15 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"], ) def lhs(request): - nan_df1 = DataFrame(np.random.rand(10, 5)) + nan_df1 = DataFrame(np.random.default_rng(2).rand(10, 5)) nan_df1[nan_df1 > 0.5] = np.nan opts = ( - DataFrame(np.random.randn(10, 5)), - Series(np.random.randn(5)), + DataFrame(np.random.default_rng(2).randn(10, 5)), + Series(np.random.default_rng(2).randn(5)), Series([1, 2, np.nan, np.nan, 5]), nan_df1, - np.random.randn(), + np.random.default_rng(2).randn(), ) return opts[request.param] @@ -367,7 +366,7 @@ def test_frame_invert(self, engine, parser): # ~ ## # frame # float always raises - lhs = DataFrame(np.random.randn(5, 2)) + lhs = DataFrame(np.random.default_rng(2).randn(5, 2)) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert_dd'" with pytest.raises(NotImplementedError, match=msg): @@ -378,7 +377,7 @@ def test_frame_invert(self, engine, parser): pd.eval(expr, engine=engine, parser=parser) # int raises on numexpr - lhs = DataFrame(np.random.randint(5, size=(5, 2))) + lhs = DataFrame(np.random.default_rng(2).randint(5, size=(5, 2))) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert" with pytest.raises(NotImplementedError, match=msg): @@ -389,13 +388,15 @@ def test_frame_invert(self, engine, parser): tm.assert_frame_equal(expect, result) # bool always works - lhs = DataFrame(np.random.rand(5, 2) > 0.5) + lhs = DataFrame(np.random.default_rng(2).rand(5, 2) > 0.5) expect = ~lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) # object raises - lhs = DataFrame({"b": ["a", 1, 2.0], "c": np.random.rand(3) > 0.5}) + lhs = DataFrame( + {"b": ["a", 1, 2.0], "c": np.random.default_rng(2).rand(3) > 0.5} + ) if engine == "numexpr": with pytest.raises(ValueError, match="unknown type object"): pd.eval(expr, engine=engine, parser=parser) @@ -410,7 +411,7 @@ def test_series_invert(self, engine, parser): # series # float raises - lhs = Series(np.random.randn(5)) + lhs = Series(np.random.default_rng(2).randn(5)) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert_dd'" with pytest.raises(NotImplementedError, match=msg): @@ -421,7 +422,7 @@ def test_series_invert(self, engine, parser): pd.eval(expr, engine=engine, parser=parser) # int raises on numexpr - lhs = Series(np.random.randint(5, size=5)) + lhs = Series(np.random.default_rng(2).randint(5, size=5)) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert" with pytest.raises(NotImplementedError, match=msg): @@ -432,7 +433,7 @@ def test_series_invert(self, engine, parser): tm.assert_series_equal(expect, result) # bool - lhs = Series(np.random.rand(5) > 0.5) + lhs = Series(np.random.default_rng(2).rand(5) > 0.5) expect = ~lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_series_equal(expect, result) @@ -455,19 +456,19 @@ def test_frame_negate(self, engine, parser): expr = "-lhs" # float - lhs = DataFrame(np.random.randn(5, 2)) + lhs = DataFrame(np.random.default_rng(2).randn(5, 2)) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) # int - lhs = DataFrame(np.random.randint(5, size=(5, 2))) + lhs = DataFrame(np.random.default_rng(2).randint(5, size=(5, 2))) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) # bool doesn't work with numexpr but works elsewhere - lhs = DataFrame(np.random.rand(5, 2) > 0.5) + lhs = DataFrame(np.random.default_rng(2).rand(5, 2) > 0.5) if engine == "numexpr": msg = "couldn't find matching opcode for 'neg_bb'" with pytest.raises(NotImplementedError, match=msg): @@ -481,19 +482,19 @@ def test_series_negate(self, engine, parser): expr = "-lhs" # float - lhs = Series(np.random.randn(5)) + lhs = Series(np.random.default_rng(2).randn(5)) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_series_equal(expect, result) # int - lhs = Series(np.random.randint(5, size=5)) + lhs = Series(np.random.default_rng(2).randint(5, size=5)) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_series_equal(expect, result) # bool doesn't work with numexpr but works elsewhere - lhs = Series(np.random.rand(5) > 0.5) + lhs = Series(np.random.default_rng(2).rand(5) > 0.5) if engine == "numexpr": msg = "couldn't find matching opcode for 'neg_bb'" with pytest.raises(NotImplementedError, match=msg): @@ -507,11 +508,11 @@ def test_series_negate(self, engine, parser): "lhs", [ # Float - DataFrame(np.random.randn(5, 2)), + DataFrame(np.random.default_rng(2).randn(5, 2)), # Int - DataFrame(np.random.randint(5, size=(5, 2))), + DataFrame(np.random.default_rng(2).randint(5, size=(5, 2))), # bool doesn't work with numexpr but works elsewhere - DataFrame(np.random.rand(5, 2) > 0.5), + DataFrame(np.random.default_rng(2).rand(5, 2) > 0.5), ], ) def test_frame_pos(self, lhs, engine, parser): @@ -525,11 +526,11 @@ def test_frame_pos(self, lhs, engine, parser): "lhs", [ # Float - Series(np.random.randn(5)), + Series(np.random.default_rng(2).randn(5)), # Int - Series(np.random.randint(5, size=5)), + Series(np.random.default_rng(2).randint(5, size=5)), # bool doesn't work with numexpr but works elsewhere - Series(np.random.rand(5) > 0.5), + Series(np.random.default_rng(2).rand(5) > 0.5), ], ) def test_series_pos(self, lhs, engine, parser): @@ -622,8 +623,8 @@ def test_unary_in_function(self): ), ) def test_disallow_scalar_bool_ops(self, ex, engine, parser): - x, a, b = np.random.randn(3), 1, 2 # noqa: F841 - df = DataFrame(np.random.randn(3, 2)) # noqa: F841 + x, a, b = np.random.default_rng(2).randn(3), 1, 2 # noqa: F841 + df = DataFrame(np.random.default_rng(2).randn(3, 2)) # noqa: F841 msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" with pytest.raises(NotImplementedError, match=msg): @@ -716,7 +717,7 @@ def test_and_logic_string_match(self): assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}") -f = lambda *args, **kwargs: np.random.randn() +f = lambda *args, **kwargs: np.random.default_rng(2).randn() # ------------------------------------- @@ -793,7 +794,11 @@ def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): res = pd.eval("df < 2", engine=engine, parser=parser) tm.assert_frame_equal(res, df < 2) - df3 = DataFrame(np.random.randn(*df.shape), index=df.index, columns=df.columns) + df3 = DataFrame( + np.random.default_rng(2).randn(*df.shape), + index=df.index, + columns=df.columns, + ) res = pd.eval("df < df3", engine=engine, parser=parser) tm.assert_frame_equal(res, df < df3) @@ -833,7 +838,7 @@ def test_basic_frame_series_alignment( 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type ) index = getattr(df, index_name) - s = Series(np.random.randn(5), index[:5]) + s = Series(np.random.default_rng(2).randn(5), index[:5]) if should_warn(df.index, s.index): with tm.assert_produces_warning(RuntimeWarning): @@ -873,7 +878,7 @@ def test_basic_series_frame_alignment( 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type ) index = getattr(df, index_name) - s = Series(np.random.randn(5), index[:5]) + s = Series(np.random.default_rng(2).randn(5), index[:5]) if should_warn(s.index, df.index): with tm.assert_produces_warning(RuntimeWarning): res = pd.eval("s + df", engine=engine, parser=parser) @@ -900,7 +905,7 @@ def test_series_frame_commutativity( 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type ) index = getattr(df, index_name) - s = Series(np.random.randn(5), index[:5]) + s = Series(np.random.default_rng(2).randn(5), index[:5]) lhs = f"s {op} df" rhs = f"df {op} s" @@ -929,8 +934,8 @@ def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): with warnings.catch_warnings(record=True): warnings.simplefilter("always", RuntimeWarning) - index_name = random.choice(["index", "columns"]) - obj_name = random.choice(["df", "df2"]) + index_name = np.random.default_rng(2).choice(["index", "columns"]) + obj_name = np.random.default_rng(2).choice(["df", "df2"]) df = tm.makeCustomDataframe( m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 @@ -939,7 +944,7 @@ def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 ) index = getattr(locals().get(obj_name), index_name) - ser = Series(np.random.randn(n), index[:n]) + ser = Series(np.random.default_rng(2).randn(n), index[:n]) if r2 == "dt" or c2 == "dt": if engine == "numexpr": @@ -966,8 +971,8 @@ def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): tm.assert_frame_equal(res, expected) def test_performance_warning_for_poor_alignment(self, engine, parser): - df = DataFrame(np.random.randn(1000, 10)) - s = Series(np.random.randn(10000)) + df = DataFrame(np.random.default_rng(2).randn(1000, 10)) + s = Series(np.random.default_rng(2).randn(10000)) if engine == "numexpr": seen = PerformanceWarning else: @@ -976,17 +981,17 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): with tm.assert_produces_warning(seen): pd.eval("df + s", engine=engine, parser=parser) - s = Series(np.random.randn(1000)) + s = Series(np.random.default_rng(2).randn(1000)) with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) - df = DataFrame(np.random.randn(10, 10000)) - s = Series(np.random.randn(10000)) + df = DataFrame(np.random.default_rng(2).randn(10, 10000)) + s = Series(np.random.default_rng(2).randn(10000)) with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) - df = DataFrame(np.random.randn(10, 10)) - s = Series(np.random.randn(10000)) + df = DataFrame(np.random.default_rng(2).randn(10, 10)) + s = Series(np.random.default_rng(2).randn(10000)) is_python_engine = engine == "python" @@ -1087,8 +1092,8 @@ def test_bool_ops_with_constants(self, rhs, lhs, op): assert res == exp def test_4d_ndarray_fails(self): - x = np.random.randn(3, 4, 5, 6) - y = Series(np.random.randn(10)) + x = np.random.default_rng(2).randn(3, 4, 5, 6) + y = Series(np.random.default_rng(2).randn(10)) msg = "N-dimensional objects, where N > 2, are not supported with eval" with pytest.raises(NotImplementedError, match=msg): self.eval("x + y", local_dict={"x": x, "y": y}) @@ -1098,23 +1103,23 @@ def test_constant(self): assert x == 1 def test_single_variable(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) df2 = self.eval("df", local_dict={"df": df}) tm.assert_frame_equal(df, df2) def test_failing_subscript_with_name_error(self): - df = DataFrame(np.random.randn(5, 3)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).randn(5, 3)) # noqa: F841 with pytest.raises(NameError, match="name 'x' is not defined"): self.eval("df[x > 2] > 2") def test_lhs_expression_subscript(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) expected = (df + 1)[df > 2] tm.assert_frame_equal(result, expected) def test_attr_expression(self): - df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=list("abc")) expr1 = "df.a < df.b" expec1 = df.a < df.b expr2 = "df.a + df.b + df.c" @@ -1127,34 +1132,34 @@ def test_attr_expression(self): tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df})) def test_assignment_fails(self): - df = DataFrame(np.random.randn(5, 3), columns=list("abc")) - df2 = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=list("abc")) + df2 = DataFrame(np.random.default_rng(2).randn(5, 3)) expr1 = "df = df2" msg = "cannot assign without a target object" with pytest.raises(ValueError, match=msg): self.eval(expr1, local_dict={"df": df, "df2": df2}) def test_assignment_column_multiple_raise(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) # multiple assignees with pytest.raises(SyntaxError, match="invalid syntax"): df.eval("d c = a + b") def test_assignment_column_invalid_assign(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) # invalid assignees msg = "left hand side of an assignment must be a single name" with pytest.raises(SyntaxError, match=msg): df.eval("d,c = a + b") def test_assignment_column_invalid_assign_function_call(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) msg = "cannot assign to function call" with pytest.raises(SyntaxError, match=msg): df.eval('Timestamp("20131001") = a + b') def test_assignment_single_assign_existing(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) # single assignment - existing variable expected = df.copy() expected["a"] = expected["a"] + expected["b"] @@ -1162,7 +1167,7 @@ def test_assignment_single_assign_existing(self): tm.assert_frame_equal(df, expected) def test_assignment_single_assign_new(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) # single assignment - new variable expected = df.copy() expected["c"] = expected["a"] + expected["b"] @@ -1170,7 +1175,7 @@ def test_assignment_single_assign_new(self): tm.assert_frame_equal(df, expected) def test_assignment_single_assign_local_overlap(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) df = df.copy() a = 1 # noqa: F841 df.eval("a = 1 + b", inplace=True) @@ -1180,7 +1185,7 @@ def test_assignment_single_assign_local_overlap(self): tm.assert_frame_equal(df, expected) def test_assignment_single_assign_name(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) a = 1 # noqa: F841 old_a = df.a.copy() @@ -1190,7 +1195,7 @@ def test_assignment_single_assign_name(self): assert result.name is None def test_assignment_multiple_raises(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) # multiple assignment df.eval("c = a + b", inplace=True) msg = "can only assign a single expression" @@ -1198,7 +1203,7 @@ def test_assignment_multiple_raises(self): df.eval("c = a = b") def test_assignment_explicit(self): - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) # explicit targets self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) expected = df.copy() @@ -1217,7 +1222,7 @@ def test_column_in(self): @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") def test_assignment_not_inplace(self): # see gh-9297 - df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) actual = df.eval("c = a + b", inplace=False) assert actual is not None @@ -1422,7 +1427,7 @@ def test_nested_period_index_subscript_expression(self): tm.assert_frame_equal(r, e) def test_date_boolean(self, engine, parser): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) res = self.eval( "df.dates1 < 20130101", @@ -1496,7 +1501,7 @@ def test_check_many_exprs(self, engine, parser): ], ) def test_fails_and_or_not(self, expr, engine, parser): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) if parser == "python": msg = "'BoolOp' nodes are not implemented" if "not" in expr: @@ -1520,7 +1525,7 @@ def test_fails_and_or_not(self, expr, engine, parser): @pytest.mark.parametrize("char", ["|", "&"]) def test_fails_ampersand_pipe(self, char, engine, parser): - df = DataFrame(np.random.randn(5, 3)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).randn(5, 3)) # noqa: F841 ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)" if parser == "python": msg = "cannot evaluate scalar only bool ops" @@ -1541,7 +1546,7 @@ def eval(self, *args, **kwargs): ) @pytest.mark.parametrize("fn", _unary_math_ops) def test_unary_functions(self, fn): - df = DataFrame({"a": np.random.randn(10)}) + df = DataFrame({"a": np.random.default_rng(2).randn(10)}) a = df.a expr = f"{fn}(a)" @@ -1552,7 +1557,12 @@ def test_unary_functions(self, fn): @pytest.mark.parametrize("fn", _binary_math_ops) def test_binary_functions(self, fn): - df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df = DataFrame( + { + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + } + ) a = df.a b = df.b @@ -1563,7 +1573,12 @@ def test_binary_functions(self, fn): tm.assert_almost_equal(got, expect, check_names=False) def test_df_use_case(self, engine, parser): - df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df = DataFrame( + { + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + } + ) df.eval( "e = arctan2(sin(a), b)", engine=engine, @@ -1575,7 +1590,12 @@ def test_df_use_case(self, engine, parser): tm.assert_series_equal(got, expect, check_names=False) def test_df_arithmetic_subexpression(self, engine, parser): - df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df = DataFrame( + { + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + } + ) df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True) got = df.e expect = np.sin(df.a + df.b) @@ -1597,7 +1617,7 @@ def test_result_types(self, dtype, expect_dtype, engine, parser): # Did not test complex64 because DataFrame is converting it to # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 - df = DataFrame({"a": np.random.randn(10).astype(dtype)}) + df = DataFrame({"a": np.random.default_rng(2).randn(10).astype(dtype)}) assert df.a.dtype == dtype df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True) got = df.b @@ -1607,21 +1627,21 @@ def test_result_types(self, dtype, expect_dtype, engine, parser): tm.assert_series_equal(got, expect, check_names=False) def test_undefined_func(self, engine, parser): - df = DataFrame({"a": np.random.randn(10)}) + df = DataFrame({"a": np.random.default_rng(2).randn(10)}) msg = '"mysin" is not a supported function' with pytest.raises(ValueError, match=msg): df.eval("mysin(a)", engine=engine, parser=parser) def test_keyword_arg(self, engine, parser): - df = DataFrame({"a": np.random.randn(10)}) + df = DataFrame({"a": np.random.default_rng(2).randn(10)}) msg = 'Function "sin" does not support keyword arguments' with pytest.raises(TypeError, match=msg): df.eval("sin(x=a)", engine=engine, parser=parser) -_var_s = np.random.randn(10) +_var_s = np.random.default_rng(2).randn(10) class TestScope: @@ -1780,7 +1800,10 @@ def test_more_than_one_expression_raises(engine, parser): @pytest.mark.parametrize("lhs", (int, float)) @pytest.mark.parametrize("rhs", (int, float)) def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): - gen = {int: lambda: np.random.randint(10), float: np.random.randn} + gen = { + int: lambda: np.random.default_rng(2).randint(10), + float: np.random.default_rng(2).randn, + } mid = gen[lhs]() # noqa: F841 lhs = gen[lhs]() @@ -1823,7 +1846,7 @@ def test_inf(engine, parser): @pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"]) def test_query_token(engine, column): # See: https://github.com/pandas-dev/pandas/pull/42826 - df = DataFrame(np.random.randn(5, 2), columns=[column, "b"]) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=[column, "b"]) expected = df[df[column] > 5] query_string = f"`{column}` > 5" result = df.query(query_string, engine=engine) @@ -1860,7 +1883,9 @@ def test_eval_no_support_column_name(request, column): ) ) - df = DataFrame(np.random.randint(0, 100, size=(10, 2)), columns=[column, "col1"]) + df = DataFrame( + np.random.default_rng(2).randint(0, 100, size=(10, 2)), columns=[column, "col1"] + ) expected = df[df[column] > 6] result = df.query(f"{column}>6") diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index 25af197552335..3a0a1ea3be5a0 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -75,7 +75,7 @@ def func(): def test_delete(using_copy_on_write): - df = DataFrame(np.random.randn(4, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["a", "b", "c"]) del df["b"] if using_copy_on_write: # TODO: This should not have references, delete makes a shallow copy diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 9a5bd5b1d047b..5b9442dc857ed 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -19,7 +19,7 @@ class TestABCClasses: categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index) - sparse_array = pd.arrays.SparseArray(np.random.randn(10)) + sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).randn(10)) datetime_array = pd.core.arrays.DatetimeArray(datetime_index) timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 393c01488c234..3d2c1990bb7f8 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -2,7 +2,6 @@ import decimal import numbers -import random import sys from typing import TYPE_CHECKING @@ -282,7 +281,7 @@ def to_decimal(values, context=None): def make_data(): - return [decimal.Decimal(random.random()) for _ in range(100)] + return [decimal.Decimal(val) for val in np.random.default_rng(2).random(100)] DecimalArray._add_arithmetic_ops() diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index afd04817f05c7..3cbf897e7b128 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -267,7 +267,7 @@ def test_compare_scalar(self, data, comparison_op): def test_compare_array(self, data, comparison_op): s = pd.Series(data) - alter = np.random.choice([-1, 0, 1], len(data)) + alter = np.random.default_rng(2).choice([-1, 0, 1], len(data)) # Randomly double, halve or keep same value other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] self._compare_other(s, data, comparison_op, other) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 8495ffbbbe70d..0a5ccd662bc1f 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -19,7 +19,6 @@ ) import itertools import numbers -import random import string import sys from typing import ( @@ -236,11 +235,12 @@ def _values_for_argsort(self): def make_data(): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + rng = np.random.default_rng(2) return [ UserDict( [ - (random.choice(string.ascii_letters), random.randint(0, 100)) - for _ in range(random.randint(0, 10)) + (rng.choice(string.ascii_letters), rng.randint(0, 100)) + for _ in range(rng.randint(0, 10)) ] ) for _ in range(100) diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 68ffaed2b98f2..5324b10455eaa 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -6,7 +6,6 @@ from __future__ import annotations import numbers -import random import string from typing import TYPE_CHECKING @@ -126,9 +125,10 @@ def _concat_same_type(cls, to_concat): def make_data(): # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + rng = np.random.default_rng(2) data = np.empty(100, dtype=object) data[:] = [ - [random.choice(string.ascii_letters) for _ in range(random.randint(0, 10))] + [rng.choice(string.ascii_letters) for _ in range(rng.randint(0, 10))] for _ in range(100) ] return data diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 91ca358ca0709..1e17bd6bf8b69 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -31,7 +31,7 @@ def make_data(): while True: - values = np.random.choice(list(string.ascii_letters), size=100) + values = np.random.default_rng(2).choice(list(string.ascii_letters), size=100) # ensure we meet the requirements # 1. first two not null # 2. first and second are different diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 0f916cea9d518..a51c25abf1560 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -28,8 +28,8 @@ def make_data(): N = 100 - left_array = np.random.uniform(size=N).cumsum() - right_array = left_array + np.random.uniform(size=N) + left_array = np.random.default_rng(2).uniform(size=N).cumsum() + right_array = left_array + np.random.default_rng(2).uniform(size=N) return [Interval(left, right) for left, right in zip(left_array, right_array)] diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index d8bab71b76df4..5d77ef67b440a 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -28,9 +28,9 @@ def make_data(fill_value): if np.isnan(fill_value): - data = np.random.uniform(size=100) + data = np.random.default_rng(2).uniform(size=100) else: - data = np.random.randint(1, 100, size=100) + data = np.random.default_rng(2).randint(1, 100, size=100) if data[0] == data[1]: data[0] += 1 diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index eb166691d3314..645789365d70f 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -55,9 +55,9 @@ def dtype(string_storage): @pytest.fixture def data(dtype, chunked): - strings = np.random.choice(list(string.ascii_letters), size=100) + strings = np.random.default_rng(2).choice(list(string.ascii_letters), size=100) while strings[0] == strings[1]: - strings = np.random.choice(list(string.ascii_letters), size=100) + strings = np.random.default_rng(2).choice(list(string.ascii_letters), size=100) arr = dtype.construct_array_type()._from_sequence(strings) return split_array(arr) if chunked else arr diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 97cf75acbd629..86ddd8edfb210 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -253,9 +253,9 @@ def frame_of_index_cols(): "A": ["foo", "foo", "foo", "bar", "bar"], "B": ["one", "two", "three", "one", "two"], "C": ["a", "b", "c", "d", "e"], - "D": np.random.randn(5), - "E": np.random.randn(5), - ("tuple", "as", "label"): np.random.randn(5), + "D": np.random.default_rng(2).randn(5), + "E": np.random.default_rng(2).randn(5), + ("tuple", "as", "label"): np.random.default_rng(2).randn(5), } ) return df diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index bd8f1ae5adf5b..1a7142d59d839 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -59,13 +59,13 @@ def test_from_records_with_datetimes(self): def test_from_records_sequencelike(self): df = DataFrame( { - "A": np.array(np.random.randn(6), dtype=np.float64), - "A1": np.array(np.random.randn(6), dtype=np.float64), + "A": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), + "A1": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), "B": np.array(np.arange(6), dtype=np.int64), "C": ["foo"] * 6, "D": np.array([True, False] * 3, dtype=bool), - "E": np.array(np.random.randn(6), dtype=np.float32), - "E1": np.array(np.random.randn(6), dtype=np.float32), + "E": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), + "E1": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), "F": np.array(np.arange(6), dtype=np.int32), } ) @@ -140,13 +140,13 @@ def test_from_records_dictlike(self): # test the dict methods df = DataFrame( { - "A": np.array(np.random.randn(6), dtype=np.float64), - "A1": np.array(np.random.randn(6), dtype=np.float64), + "A": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), + "A1": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), "B": np.array(np.arange(6), dtype=np.int64), "C": ["foo"] * 6, "D": np.array([True, False] * 3, dtype=bool), - "E": np.array(np.random.randn(6), dtype=np.float32), - "E1": np.array(np.random.randn(6), dtype=np.float32), + "E": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), + "E1": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), "F": np.array(np.arange(6), dtype=np.int32), } ) @@ -175,15 +175,15 @@ def test_from_records_dictlike(self): tm.assert_frame_equal(r, df) def test_from_records_with_index_data(self): - df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"]) - data = np.random.randn(10) + data = np.random.default_rng(2).randn(10) with tm.assert_produces_warning(FutureWarning): df1 = DataFrame.from_records(df, index=data) tm.assert_index_equal(df1.index, Index(data)) def test_from_records_bad_index_column(self): - df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"]) # should pass with tm.assert_produces_warning(FutureWarning): @@ -390,8 +390,8 @@ def test_from_records_set_index_name(self): def create_dict(order_id): return { "order_id": order_id, - "quantity": np.random.randint(1, 10), - "price": np.random.randint(1, 10), + "quantity": np.random.default_rng(2).randint(1, 10), + "price": np.random.default_rng(2).randint(1, 10), } documents = [create_dict(i) for i in range(10)] diff --git a/pandas/tests/frame/indexing/test_delitem.py b/pandas/tests/frame/indexing/test_delitem.py index fa10c9ef7b85a..0f76576c2a526 100644 --- a/pandas/tests/frame/indexing/test_delitem.py +++ b/pandas/tests/frame/indexing/test_delitem.py @@ -16,7 +16,7 @@ def test_delitem(self, float_frame): def test_delitem_multiindex(self): midx = MultiIndex.from_product([["A", "B"], [1, 2]]) - df = DataFrame(np.random.randn(4, 4), columns=midx) + df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=midx) assert len(df.columns) == 4 assert ("A",) in df.columns assert "A" in df.columns @@ -55,6 +55,6 @@ def test_delitem_col_still_multiindex(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(3, 4), columns=index) + df = DataFrame(np.random.default_rng(2).randn(3, 4), columns=index) del df[("a", "", "")] assert isinstance(df.columns, MultiIndex) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 7880916f66812..31f1696b0e3c6 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -37,7 +37,7 @@ def test_getitem_unused_level_raises(self): def test_getitem_periodindex(self): rng = period_range("1/1/2000", periods=5) - df = DataFrame(np.random.randn(10, 5), columns=rng) + df = DataFrame(np.random.default_rng(2).randn(10, 5), columns=rng) ts = df[rng[0]] tm.assert_series_equal(ts, df.iloc[:, 0]) @@ -93,7 +93,7 @@ def test_getitem_list_missing_key(self): def test_getitem_list_duplicates(self): # GH#1943 - df = DataFrame(np.random.randn(4, 4), columns=list("AABC")) + df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=list("AABC")) df.columns.name = "foo" result = df[["B", "C"]] @@ -129,7 +129,7 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): else: # MultiIndex columns frame = DataFrame( - np.random.randn(8, 3), + np.random.default_rng(2).randn(8, 3), columns=Index( [("foo", "bar"), ("baz", "qux"), ("peek", "aboo")], name=("sth", "sth2"), @@ -425,7 +425,7 @@ def test_getitem_slice_float64(self, frame_or_series): start, end = values[[5, 15]] - data = np.random.randn(20, 3) + data = np.random.default_rng(2).randn(20, 3) if frame_or_series is not DataFrame: data = data[:, 0] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9005798d66d17..9e88b09e3b244 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -68,9 +68,9 @@ def test_getitem_numeric_should_not_fallback_to_positional(self, any_numeric_dty def test_getitem2(self, float_frame): df = float_frame.copy() - df["$10"] = np.random.randn(len(df)) + df["$10"] = np.random.default_rng(2).randn(len(df)) - ad = np.random.randn(len(df)) + ad = np.random.default_rng(2).randn(len(df)) df["@awesome_domain"] = ad with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")): @@ -224,7 +224,7 @@ def test_getitem_boolean_list(self, lst): tm.assert_frame_equal(result, expected) def test_getitem_boolean_iadd(self): - arr = np.random.randn(5, 5) + arr = np.random.default_rng(2).randn(5, 5) df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"]) @@ -245,7 +245,9 @@ def test_boolean_index_empty_corner(self): def test_getitem_ix_mixed_integer(self): df = DataFrame( - np.random.randn(4, 3), index=[1, 10, "C", "E"], columns=[1, 2, 3] + np.random.default_rng(2).randn(4, 3), + index=[1, 10, "C", "E"], + columns=[1, 2, 3], ) result = df.iloc[:-1] @@ -304,7 +306,7 @@ def test_setitem(self, float_frame, using_copy_on_write): tm.assert_series_equal(series, float_frame["col6"], check_names=False) # set ndarray - arr = np.random.randn(len(float_frame)) + arr = np.random.default_rng(2).randn(len(float_frame)) float_frame["col9"] = arr assert (float_frame["col9"] == arr).all() @@ -469,7 +471,7 @@ def test_setitem_corner(self, float_frame): def test_setitem_corner2(self): data = { "title": ["foobar", "bar", "foobar"] + ["foobar"] * 17, - "cruft": np.random.random(20), + "cruft": np.random.default_rng(2).random(20), } df = DataFrame(data) @@ -522,7 +524,7 @@ def test_loc_setitem_boolean_mask_allfalse(self): tm.assert_frame_equal(result, df) def test_getitem_fancy_slice_integers_step(self): - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) # this is OK df.iloc[:8:2] @@ -530,7 +532,7 @@ def test_getitem_fancy_slice_integers_step(self): assert isna(df.iloc[:8:2]).values.all() def test_getitem_setitem_integer_slice_keyerrors(self): - df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 5), index=range(0, 20, 2)) # this is OK cp = df.copy() @@ -600,13 +602,13 @@ def test_getitem_setitem_non_ix_labels(self): tm.assert_frame_equal(result2, expected) def test_ix_multi_take(self): - df = DataFrame(np.random.randn(3, 2)) + df = DataFrame(np.random.default_rng(2).randn(3, 2)) rs = df.loc[df.index == 0, :] xp = df.reindex([0]) tm.assert_frame_equal(rs, xp) # GH#1321 - df = DataFrame(np.random.randn(3, 2)) + df = DataFrame(np.random.default_rng(2).randn(3, 2)) rs = df.loc[df.index == 0, df.columns == 1] xp = df.reindex(index=[0], columns=[1]) tm.assert_frame_equal(rs, xp) @@ -632,7 +634,7 @@ def test_setitem_fancy_scalar(self, float_frame): f[col] for idx in f.index[::5]: i = f.index.get_loc(idx) - val = np.random.randn() + val = np.random.default_rng(2).randn() expected.iloc[i, j] = val ix[idx, col] = val @@ -706,7 +708,7 @@ def test_getitem_setitem_boolean_misaligned(self, float_frame): tm.assert_frame_equal(cp, expected) def test_getitem_setitem_boolean_multi(self): - df = DataFrame(np.random.randn(3, 2)) + df = DataFrame(np.random.default_rng(2).randn(3, 2)) # get k1 = np.array([True, False, True]) @@ -722,7 +724,7 @@ def test_getitem_setitem_boolean_multi(self): def test_getitem_setitem_float_labels(self, using_array_manager): index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(np.random.randn(5, 5), index=index) + df = DataFrame(np.random.default_rng(2).randn(5, 5), index=index) result = df.loc[1.5:4] expected = df.reindex([1.5, 2, 3, 4]) @@ -750,7 +752,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) - df = DataFrame(np.random.randn(5, 5), index=index) + df = DataFrame(np.random.default_rng(2).randn(5, 5), index=index) # positional slicing only via iloc! msg = ( @@ -809,7 +811,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): def test_setitem_single_column_mixed_datetime(self): df = DataFrame( - np.random.randn(5, 3), + np.random.default_rng(2).randn(5, 3), index=["a", "b", "c", "d", "e"], columns=["foo", "bar", "baz"], ) @@ -939,7 +941,10 @@ def test_setitem_frame_align(self, float_frame): def test_getitem_setitem_ix_duplicates(self): # #1201 - df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + df = DataFrame( + np.random.default_rng(2).randn(5, 3), + index=["foo", "foo", "bar", "baz", "bar"], + ) result = df.loc["foo"] expected = df[:2] @@ -955,7 +960,10 @@ def test_getitem_setitem_ix_duplicates(self): def test_getitem_ix_boolean_duplicates_multiple(self): # #1201 - df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + df = DataFrame( + np.random.default_rng(2).randn(5, 3), + index=["foo", "foo", "bar", "baz", "bar"], + ) result = df.loc[["bar"]] exp = df.iloc[[2, 4]] @@ -1001,7 +1009,7 @@ def test_single_element_ix_dont_upcast(self, float_frame): tm.assert_series_equal(result, expected) def test_iloc_row(self): - df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 4), index=range(0, 20, 2)) result = df.iloc[1] exp = df.loc[2] @@ -1022,7 +1030,7 @@ def test_iloc_row(self): tm.assert_frame_equal(result, expected) def test_iloc_row_slice_view(self, using_copy_on_write, request): - df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 4), index=range(0, 20, 2)) original = df.copy() # verify slice is view @@ -1042,7 +1050,7 @@ def test_iloc_row_slice_view(self, using_copy_on_write, request): tm.assert_series_equal(df[2], exp_col) def test_iloc_col(self): - df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + df = DataFrame(np.random.default_rng(2).randn(4, 10), columns=range(0, 20, 2)) result = df.iloc[:, 1] exp = df.loc[:, 2] @@ -1063,7 +1071,7 @@ def test_iloc_col(self): tm.assert_frame_equal(result, expected) def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + df = DataFrame(np.random.default_rng(2).randn(4, 10), columns=range(0, 20, 2)) original = df.copy() subset = df.iloc[:, slice(4, 8)] diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 13e43abe0dd7f..11bd11840655f 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -18,7 +18,9 @@ class TestDataFrameInsert: def test_insert(self): df = DataFrame( - np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + np.random.default_rng(2).randn(5, 3), + index=np.arange(5), + columns=["c", "b", "a"], ) df.insert(0, "foo", df["a"]) @@ -72,7 +74,7 @@ def test_insert_with_columns_dups(self): tm.assert_frame_equal(df, exp) def test_insert_item_cache(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.randn(4, 3)) + df = DataFrame(np.random.default_rng(2).randn(4, 3)) ser = df[0] if using_array_manager: @@ -97,7 +99,9 @@ def test_insert_item_cache(self, using_array_manager, using_copy_on_write): def test_insert_EA_no_warning(self): # PerformanceWarning about fragmented frame should not be raised when # using EAs (https://github.com/pandas-dev/pandas/issues/44098) - df = DataFrame(np.random.randint(0, 100, size=(3, 100)), dtype="Int64") + df = DataFrame( + np.random.default_rng(2).randint(0, 100, size=(3, 100)), dtype="Int64" + ) with tm.assert_produces_warning(None): df["a"] = np.array([1, 2, 3]) diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index ed0bf256d1ee7..8e8749ffe4dbc 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -18,14 +18,14 @@ class TestDataFrameMask: def test_mask(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) cond = df > 0 rs = df.where(cond, np.nan) tm.assert_frame_equal(rs, df.mask(df <= 0)) tm.assert_frame_equal(rs, df.mask(~cond)) - other = DataFrame(np.random.randn(5, 3)) + other = DataFrame(np.random.default_rng(2).randn(5, 3)) rs = df.where(cond, other) tm.assert_frame_equal(rs, df.mask(df <= 0, other)) tm.assert_frame_equal(rs, df.mask(~cond, other)) @@ -40,7 +40,7 @@ def test_mask2(self): def test_mask_inplace(self): # GH#8801 - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) cond = df > 0 rdf = df.copy() @@ -85,7 +85,7 @@ def test_mask_callable(self): def test_mask_dtype_bool_conversion(self): # GH#3733 - df = DataFrame(data=np.random.randn(100, 50)) + df = DataFrame(data=np.random.default_rng(2).randn(100, 50)) df = df.where(df > 0) # create nans bools = df > 0 mask = isna(df) diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 8d7a5cbcc08e0..098acac2d0097 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -41,7 +41,9 @@ def test_set_value_resize(self, float_frame): assert res.loc["foobar", "baz"] == "sam" def test_set_value_with_index_dtype_change(self): - df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) + df_orig = DataFrame( + np.random.default_rng(2).randn(3, 3), index=range(3), columns=list("ABC") + ) # this is actually ambiguous as the 2 is interpreted as a positional # so column is not created diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 6060f6b5e278a..ebe7515691ecb 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -58,13 +58,13 @@ class mystring(str): ) def test_setitem_dtype(self, dtype, float_frame): # Use randint since casting negative floats to uints is undefined - arr = np.random.randint(1, 10, len(float_frame)) + arr = np.random.default_rng(2).randint(1, 10, len(float_frame)) float_frame[dtype] = np.array(arr, dtype=dtype) assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): - data = np.random.randn(len(float_frame), 2) + data = np.random.default_rng(2).randn(len(float_frame), 2) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) @@ -84,7 +84,9 @@ def test_setitem_error_msmgs(self): df["newcol"] = ser # GH 4107, more descriptive error message - df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) + df = DataFrame( + np.random.default_rng(2).randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"] + ) msg = "Cannot set a DataFrame with multiple columns to the single column gr" with pytest.raises(ValueError, match=msg): @@ -95,7 +97,7 @@ def test_setitem_benchmark(self): N = 10 K = 5 df = DataFrame(index=range(N)) - new_col = np.random.randn(N) + new_col = np.random.default_rng(2).randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) @@ -103,7 +105,9 @@ def test_setitem_benchmark(self): def test_setitem_different_dtype(self): df = DataFrame( - np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + np.random.default_rng(2).randn(5, 3), + index=np.arange(5), + columns=["c", "b", "a"], ) df.insert(0, "foo", df["a"]) df.insert(2, "bar", df["c"]) @@ -352,7 +356,7 @@ def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") - df = DataFrame(np.random.randn(5, 3), index=rng) + df = DataFrame(np.random.default_rng(2).randn(5, 3), index=rng) df["Index"] = rng rs = Index(df["Index"]) @@ -398,7 +402,7 @@ def test_setitem_period_d_dtype(self): def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) - df = DataFrame(np.random.randn(3, 3), columns=cols) + df = DataFrame(np.random.default_rng(2).randn(3, 3), columns=cols) df[False] = ["a", "b", "c"] @@ -567,20 +571,20 @@ def test_setitem_multi_index(self): cols = MultiIndex.from_product(it) index = date_range("20141006", periods=20) - vals = np.random.randint(1, 1000, (len(index), len(cols))) + vals = np.random.default_rng(2).randint(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] - np.random.shuffle(i) + np.random.default_rng(2).shuffle(i) df["jim"] = df["jolie"].loc[i, ::-1] tm.assert_frame_equal(df["jim"], df["jolie"]) - np.random.shuffle(j) + np.random.default_rng(2).shuffle(j) df[("joe", "first")] = df[("jolie", "last")].loc[i, j] tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) - np.random.shuffle(j) + np.random.default_rng(2).shuffle(j) df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) @@ -772,7 +776,7 @@ def expected(self, idx): def test_setitem_dt64series(self, idx, expected): # convert to utc - df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).randn(2, 1), columns=["A"]) df["B"] = idx df["B"] = idx.to_series(index=[0, 1]).dt.tz_convert(None) @@ -782,7 +786,7 @@ def test_setitem_dt64series(self, idx, expected): def test_setitem_datetimeindex(self, idx, expected): # setting a DataFrame column with a tzaware DTI retains the dtype - df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).randn(2, 1), columns=["A"]) # assign to frame df["B"] = idx @@ -791,7 +795,7 @@ def test_setitem_datetimeindex(self, idx, expected): def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): # setting a DataFrame column with a tzaware DTI retains the dtype - df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).randn(2, 1), columns=["A"]) # object array of datetimes with a tz df["B"] = idx.to_pydatetime() @@ -838,7 +842,11 @@ def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): def test_setitem_with_expansion_categorical_dtype(self): # assignment df = DataFrame( - {"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")} + { + "value": np.array( + np.random.default_rng(2).randint(0, 10000, 100), dtype="int32" + ) + } ) labels = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 562f2fbe55c25..906921231d806 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -24,7 +24,7 @@ @pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"]) def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame): if request.param == "default": - return DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + return DataFrame(np.random.default_rng(2).randn(5, 3), columns=["A", "B", "C"]) if request.param == "float_string": return float_string_frame if request.param == "mixed_float": @@ -145,7 +145,7 @@ def _check_align(df, cond, other, check_dtypes=True): def test_where_invalid(self): # invalid conditions - df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=["A", "B", "C"]) cond = df > 0 err1 = (df + 1).values[0:2, :] @@ -367,7 +367,7 @@ def test_where_datetime(self): { "A": date_range("20130102", periods=5), "B": date_range("20130104", periods=5), - "C": np.random.randn(5), + "C": np.random.default_rng(2).randn(5), } ) @@ -430,7 +430,7 @@ def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): def test_where_align(self): def create(): - df = DataFrame(np.random.randn(10, 3)) + df = DataFrame(np.random.default_rng(2).randn(10, 3)) df.iloc[3:5, 0] = np.nan df.iloc[4:6, 1] = np.nan df.iloc[5:8, 2] = np.nan @@ -470,7 +470,7 @@ def test_where_complex(self): def test_where_axis(self): # GH 9736 - df = DataFrame(np.random.randn(2, 2)) + df = DataFrame(np.random.default_rng(2).randn(2, 2)) mask = DataFrame([[False, False], [False, False]]) ser = Series([0, 1]) @@ -526,8 +526,10 @@ def test_where_axis_multiple_dtypes(self): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ - DataFrame(np.random.randn(10, 2)), - DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype="int64"), + DataFrame(np.random.default_rng(2).randn(10, 2)), + DataFrame( + np.random.default_rng(2).randint(0, 10, size=(10, 2)), dtype="int64" + ), ], ignore_index=True, axis=1, diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 7e0623f7beaa9..7d66a037d68e3 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -94,7 +94,9 @@ def test_xs_corner(self): tm.assert_series_equal(result, expected) def test_xs_duplicates(self): - df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) + df = DataFrame( + np.random.default_rng(2).randn(5, 2), index=["b", "b", "c", "b", "a"] + ) cross = df.xs("c") exp = df.iloc[2] @@ -150,7 +152,9 @@ def test_xs_doc_example(self): tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=["first", "second"]) - df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + df = DataFrame( + np.random.default_rng(2).randn(3, 8), index=["A", "B", "C"], columns=index + ) result = df.xs(("one", "bar"), level=("second", "first"), axis=1) @@ -162,7 +166,9 @@ def test_xs_integer_key(self): dates = range(20111201, 20111205) ids = list("abcde") index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) - df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) + df = DataFrame( + np.random.default_rng(2).randn(len(index), 3), index, ["X", "Y", "Z"] + ) result = df.xs(20111201, level="date") expected = df.loc[20111201, :] @@ -176,7 +182,7 @@ def test_xs_level(self, multiindex_dataframe_random_data): tm.assert_frame_equal(result, expected) def test_xs_level_eq_2(self): - arr = np.random.randn(3, 5) + arr = np.random.default_rng(2).randn(3, 5) index = MultiIndex( levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], @@ -266,7 +272,7 @@ def test_xs_missing_values_in_index(self): ) def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): # see GH#2903 - arr = np.random.randn(4, 4) + arr = np.random.default_rng(2).randn(4, 4) index = MultiIndex( levels=[["a", "b"], ["bar", "foo", "hello", "world"]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]], @@ -337,7 +343,7 @@ def test_xs_IndexSlice_argument_not_implemented(self, frame_or_series): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - obj = DataFrame(np.random.randn(6, 4), index=index) + obj = DataFrame(np.random.default_rng(2).randn(6, 4), index=index) if frame_or_series is Series: obj = obj[0] diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index e56d542972e63..7f8953cdfa528 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -25,8 +25,8 @@ def test_align_asfreq_method_raises(self): def test_frame_align_aware(self): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") - df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) - df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + df1 = DataFrame(np.random.default_rng(2).randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.default_rng(2).randn(len(idx2), 3), idx2) new1, new2 = df1.align(df2) assert df1.index.tz == new1.index.tz assert df2.index.tz == new2.index.tz diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index a36f0d11fcfb4..c644e2b7a9c99 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -71,7 +71,7 @@ def test_asfreq_datetimeindex_empty(self, frame_or_series): def test_tz_aware_asfreq_smoke(self, tz, frame_or_series): dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) - obj = frame_or_series(np.random.randn(len(dr)), index=dr) + obj = frame_or_series(np.random.default_rng(2).randn(len(dr)), index=dr) # it works! obj.asfreq("T") @@ -80,7 +80,7 @@ def test_asfreq_normalize(self, frame_or_series): rng = date_range("1/1/2000 09:30", periods=20) norm = date_range("1/1/2000", periods=20) - vals = np.random.randn(20, 3) + vals = np.random.default_rng(2).randn(20, 3) obj = DataFrame(vals, index=rng) expected = DataFrame(vals, index=norm) @@ -103,7 +103,7 @@ def test_asfreq_keep_index_name(self, frame_or_series): def test_asfreq_ts(self, frame_or_series): index = period_range(freq="A", start="1/1/2001", end="12/31/2010") - obj = DataFrame(np.random.randn(len(index), 3), index=index) + obj = DataFrame(np.random.default_rng(2).randn(len(index), 3), index=index) obj = tm.get_obj(obj, frame_or_series) result = obj.asfreq("D", how="end") @@ -185,7 +185,7 @@ def test_asfreq_fillvalue(self): def test_asfreq_with_date_object_index(self, frame_or_series): rng = date_range("1/1/2000", periods=20) - ts = frame_or_series(np.random.randn(20), index=rng) + ts = frame_or_series(np.random.default_rng(2).randn(20), index=rng) ts2 = ts.copy() ts2.index = [x.date() for x in ts2.index] diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 09cb3fbe1bacb..b277679510c97 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -179,7 +179,7 @@ def test_is_copy(self, date_range_frame): def test_asof_periodindex_mismatched_freq(self): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") - df = DataFrame(np.random.randn(N), index=rng) + df = DataFrame(np.random.default_rng(2).randn(N), index=rng) # Mismatched freq msg = "Input has different freq" diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 5113630966201..ca1834defa006 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -264,7 +264,7 @@ def test_astype_duplicate_col(self): def test_astype_duplicate_col_series_arg(self): # GH#44417 - vals = np.random.randn(3, 4) + vals = np.random.default_rng(2).randn(3, 4) df = DataFrame(vals, columns=["A", "B", "C", "A"]) dtypes = df.dtypes dtypes.iloc[0] = str diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index 8537c32c24e3a..de168a149ff1c 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -19,7 +19,7 @@ def test_localized_at_time(self, tzstr, frame_or_series): tz = timezones.maybe_get_tz(tzstr) rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = frame_or_series(np.random.randn(len(rng)), index=rng) + ts = frame_or_series(np.random.default_rng(2).randn(len(rng)), index=rng) ts_local = ts.tz_localize(tzstr) @@ -30,7 +30,7 @@ def test_localized_at_time(self, tzstr, frame_or_series): def test_at_time(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() @@ -44,7 +44,7 @@ def test_at_time(self, frame_or_series): def test_at_time_midnight(self, frame_or_series): # midnight, everything rng = date_range("1/1/2000", "1/31/2000") - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 3), index=rng) ts = tm.get_obj(ts, frame_or_series) result = ts.at_time(time(0, 0)) @@ -53,7 +53,7 @@ def test_at_time_midnight(self, frame_or_series): def test_at_time_nonexistent(self, frame_or_series): # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = DataFrame(np.random.randn(len(rng)), rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng)), rng) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time("16:00") assert len(rs) == 0 @@ -93,7 +93,7 @@ def test_at_time_raises(self, frame_or_series): def test_at_time_axis(self, axis): # issue 8839 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), len(rng))) ts.index, ts.columns = rng, rng indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] @@ -112,7 +112,7 @@ def test_at_time_axis(self, axis): def test_at_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") - df = DataFrame(np.random.randn(len(index), 5), index=index) + df = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) akey = time(12, 0, 0) ainds = [24, 72, 120, 168] diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 4573e83c8eecc..b6097210cee9a 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -22,7 +22,7 @@ class TestBetweenTime: def test_between_time_formats(self, frame_or_series): # GH#11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) strings = [ @@ -45,7 +45,7 @@ def test_localized_between_time(self, tzstr, frame_or_series): tz = timezones.maybe_get_tz(tzstr) rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) if frame_or_series is DataFrame: ts = ts.to_frame() @@ -69,7 +69,7 @@ def test_between_time_types(self, frame_or_series): def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(0, 0) @@ -103,7 +103,7 @@ def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(22, 0) etime = time(9, 0) @@ -140,7 +140,7 @@ def test_between_time_raises(self, frame_or_series): def test_between_time_axis(self, frame_or_series): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) if frame_or_series is DataFrame: ts = ts.to_frame() @@ -156,7 +156,7 @@ def test_between_time_axis(self, frame_or_series): def test_between_time_axis_aliases(self, axis): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") - ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), len(rng))) stime, etime = ("08:00:00", "09:00:00") exp_len = 7 @@ -174,7 +174,7 @@ def test_between_time_axis_raises(self, axis): # issue 8839 rng = date_range("1/1/2000", periods=100, freq="10min") mask = np.arange(0, len(rng)) - rand_data = np.random.randn(len(rng), len(rng)) + rand_data = np.random.default_rng(2).randn(len(rng), len(rng)) ts = DataFrame(rand_data, index=rng, columns=rng) stime, etime = ("08:00:00", "09:00:00") @@ -193,7 +193,7 @@ def test_between_time_axis_raises(self, axis): def test_between_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") - df = DataFrame(np.random.randn(len(index), 5), index=index) + df = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) bkey = slice(time(13, 0, 0), time(14, 0, 0)) binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] @@ -207,7 +207,7 @@ def test_between_time_datetimeindex(self): def test_between_time_incorrect_arg_inclusive(self): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index da13711d607c5..d6b9ceab852d2 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -30,7 +30,7 @@ def test_inplace_clip(self, float_frame): def test_dataframe_clip(self): # GH#2747 - df = DataFrame(np.random.randn(1000, 2)) + df = DataFrame(np.random.default_rng(2).randn(1000, 2)) for lb, ub in [(-1, 1), (1, -1)]: clipped_df = df.clip(lb, ub) @@ -60,8 +60,8 @@ def test_clip_mixed_numeric(self): def test_clip_against_series(self, inplace): # GH#6966 - df = DataFrame(np.random.randn(1000, 2)) - lb = Series(np.random.randn(1000)) + df = DataFrame(np.random.default_rng(2).randn(1000, 2)) + lb = Series(np.random.default_rng(2).randn(1000)) ub = lb + 1 original = df.copy() @@ -107,8 +107,8 @@ def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): @pytest.mark.parametrize("axis", [0, 1, None]) def test_clip_against_frame(self, axis): - df = DataFrame(np.random.randn(1000, 2)) - lb = DataFrame(np.random.randn(1000, 2)) + df = DataFrame(np.random.default_rng(2).randn(1000, 2)) + lb = DataFrame(np.random.default_rng(2).randn(1000, 2)) ub = lb + 1 clipped_df = df.clip(lb, ub, axis=axis) @@ -123,8 +123,12 @@ def test_clip_against_frame(self, axis): def test_clip_against_unordered_columns(self): # GH#20911 - df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"]) - df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"]) + df1 = DataFrame( + np.random.default_rng(2).randn(1000, 4), columns=["A", "B", "C", "D"] + ) + df2 = DataFrame( + np.random.default_rng(2).randn(1000, 4), columns=["D", "A", "B", "C"] + ) df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) result_upper = df1.clip(lower=0, upper=df2) expected_upper = df1.clip(lower=0, upper=df2[df1.columns]) diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py index 1e685fcce9f05..42c873081cc4f 100644 --- a/pandas/tests/frame/methods/test_copy.py +++ b/pandas/tests/frame/methods/test_copy.py @@ -51,13 +51,13 @@ def test_copy_consolidates(self): # GH#42477 df = DataFrame( { - "a": np.random.randint(0, 100, size=55), - "b": np.random.randint(0, 100, size=55), + "a": np.random.default_rng(2).randint(0, 100, size=55), + "b": np.random.default_rng(2).randint(0, 100, size=55), } ) for i in range(0, 10): - df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55) + df.loc[:, f"n_{i}"] = np.random.default_rng(2).randint(0, 100, size=55) assert len(df._mgr.blocks) == 11 result = df.copy() diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 003bb1ec16f85..b311a7914cb26 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -66,8 +66,8 @@ def test_cov(self, float_frame, float_string_frame): @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) def test_cov_ddof(self, test_ddof): # GH#34611 - np_array1 = np.random.rand(10) - np_array2 = np.random.rand(10) + np_array1 = np.random.default_rng(2).rand(10) + np_array2 = np.random.default_rng(2).rand(10) df = DataFrame({0: np_array1, 1: np_array2}) result = df.cov(ddof=test_ddof) expected_np = np.cov(np_array1, np_array2, ddof=test_ddof) @@ -171,14 +171,16 @@ def test_corr_int_and_boolean(self, meth): @pytest.mark.parametrize("method", ["cov", "corr"]) def test_corr_cov_independent_index_column(self, method): # GH#14617 - df = DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).randn(4 * 10).reshape(10, 4), columns=list("abcd") + ) result = getattr(df, method)() assert result.index is not result.columns assert result.index.equals(result.columns) def test_corr_invalid_method(self): # GH#22298 - df = DataFrame(np.random.normal(size=(10, 2))) + df = DataFrame(np.random.default_rng(2).normal(size=(10, 2))) msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " with pytest.raises(ValueError, match=msg): df.corr(method="____") @@ -286,7 +288,7 @@ def test_corrwith(self, datetime_frame, dtype): datetime_frame = datetime_frame.astype(dtype) a = datetime_frame - noise = Series(np.random.randn(len(a)), index=a.index) + noise = Series(np.random.default_rng(2).randn(len(a)), index=a.index) b = datetime_frame.add(noise, axis=0) @@ -310,8 +312,12 @@ def test_corrwith(self, datetime_frame, dtype): # non time-series data index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] - df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns) - df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) + df1 = DataFrame( + np.random.default_rng(2).randn(5, 4), index=index, columns=columns + ) + df2 = DataFrame( + np.random.default_rng(2).randn(4, 4), index=index[:4], columns=columns + ) correls = df1.corrwith(df2, axis=1) for row in index[:4]: tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) @@ -371,16 +377,24 @@ def test_corrwith_mixed_dtypes(self, numeric_only): df.corrwith(s, numeric_only=numeric_only) def test_corrwith_index_intersection(self): - df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) - df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + df1 = DataFrame( + np.random.default_rng(2).random(size=(10, 2)), columns=["a", "b"] + ) + df2 = DataFrame( + np.random.default_rng(2).random(size=(10, 3)), columns=["a", "b", "c"] + ) result = df1.corrwith(df2, drop=True).index.sort_values() expected = df1.columns.intersection(df2.columns).sort_values() tm.assert_index_equal(result, expected) def test_corrwith_index_union(self): - df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) - df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + df1 = DataFrame( + np.random.default_rng(2).random(size=(10, 2)), columns=["a", "b"] + ) + df2 = DataFrame( + np.random.default_rng(2).random(size=(10, 3)), columns=["a", "b", "c"] + ) result = df1.corrwith(df2, drop=False).index.sort_values() expected = df1.columns.union(df2.columns).sort_values() @@ -406,7 +420,7 @@ def test_corr_numerical_instabilities(self): @td.skip_if_no_scipy def test_corrwith_spearman(self): # GH#21925 - df = DataFrame(np.random.random(size=(100, 3))) + df = DataFrame(np.random.default_rng(2).random(size=(100, 3))) result = df.corrwith(df**2, method="spearman") expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) @@ -414,7 +428,7 @@ def test_corrwith_spearman(self): @td.skip_if_no_scipy def test_corrwith_kendall(self): # GH#21925 - df = DataFrame(np.random.random(size=(100, 3))) + df = DataFrame(np.random.default_rng(2).random(size=(100, 3))) result = df.corrwith(df**2, method="kendall") expected = Series(np.ones(len(result))) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index fbe6ff356499f..c3173646ae0d7 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -92,7 +92,7 @@ def test_describe_bool_frame(self): tm.assert_frame_equal(result, expected) def test_describe_categorical(self): - df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + df = DataFrame({"value": np.random.default_rng(2).randint(0, 10000, 100)}) labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index a9454d73d5429..8dc8977a63400 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -13,7 +13,7 @@ class TestDataFrameDiff: def test_diff_requires_integer(self): - df = DataFrame(np.random.randn(2, 2)) + df = DataFrame(np.random.default_rng(2).randn(2, 2)) with pytest.raises(ValueError, match="periods must be an integer"): df.diff(1.5) @@ -156,7 +156,7 @@ def test_diff_timedelta(self): tm.assert_frame_equal(res, exp) def test_diff_mixed_dtype(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) df["A"] = np.array([1, 2, 3, 4, 5], dtype=object) result = df.diff() @@ -285,7 +285,7 @@ def test_diff_integer_na(self, axis, expected): def test_diff_readonly(self): # https://github.com/pandas-dev/pandas/issues/35559 - arr = np.random.randn(5, 2) + arr = np.random.default_rng(2).randn(5, 2) arr.flags.writeable = False df = DataFrame(arr) result = df.diff() diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py index 555e5f0e26eaf..d1fd4516b69e0 100644 --- a/pandas/tests/frame/methods/test_dot.py +++ b/pandas/tests/frame/methods/test_dot.py @@ -83,12 +83,14 @@ def test_dot_misaligned(self, obj, other): class TestSeriesDot(DotSharedTests): @pytest.fixture def obj(self): - return Series(np.random.randn(4), index=["p", "q", "r", "s"]) + return Series(np.random.default_rng(2).randn(4), index=["p", "q", "r", "s"]) @pytest.fixture def other(self): return DataFrame( - np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + np.random.default_rng(2).randn(3, 4), + index=["1", "2", "3"], + columns=["p", "q", "r", "s"], ).T @pytest.fixture @@ -107,13 +109,17 @@ class TestDataFrameDot(DotSharedTests): @pytest.fixture def obj(self): return DataFrame( - np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + np.random.default_rng(2).randn(3, 4), + index=["a", "b", "c"], + columns=["p", "q", "r", "s"], ) @pytest.fixture def other(self): return DataFrame( - np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["1", "2"] + np.random.default_rng(2).randn(4, 2), + index=["p", "q", "r", "s"], + columns=["1", "2"], ) @pytest.fixture diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 79fd48de91ed5..c64b1f0e731d1 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -55,7 +55,7 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): # define dataframe with unique datetime index df = DataFrame( - np.random.randn(5, 3), + np.random.default_rng(2).randn(5, 3), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), ) @@ -159,7 +159,7 @@ def test_drop(self): # inplace cache issue # GH#5628 - df = DataFrame(np.random.randn(10, 3), columns=list("abc")) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=list("abc")) expected = df[~(df.b > 0)] return_value = df.drop(labels=df[df.b > 0].index, inplace=True) assert return_value is None @@ -303,7 +303,7 @@ def test_mixed_depth_drop(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) result = df.drop("a", axis=1) expected = df.drop([("a", "", "")], axis=1) @@ -438,7 +438,7 @@ def test_drop_preserve_names(self): [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] ) - df = DataFrame(np.random.randn(6, 3), index=index) + df = DataFrame(np.random.default_rng(2).randn(6, 3), index=index) result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") @@ -497,9 +497,9 @@ def test_drop_with_duplicate_columns2(self): # drop buggy GH#6240 df = DataFrame( { - "A": np.random.randn(5), - "B": np.random.randn(5), - "C": np.random.randn(5), + "A": np.random.default_rng(2).randn(5), + "B": np.random.default_rng(2).randn(5), + "C": np.random.default_rng(2).randn(5), "D": ["a", "b", "c", "d", "e"], } ) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index d6e54559da7e3..991ea5646e006 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -15,7 +15,7 @@ class TestDataFrameMissingData: def test_dropEmptyRows(self, float_frame): N = len(float_frame.index) - mat = np.random.randn(N) + mat = np.random.default_rng(2).randn(N) mat[:5] = np.nan frame = DataFrame({"foo": mat}, index=float_frame.index) @@ -39,7 +39,7 @@ def test_dropEmptyRows(self, float_frame): def test_dropIncompleteRows(self, float_frame): N = len(float_frame.index) - mat = np.random.randn(N) + mat = np.random.default_rng(2).randn(N) mat[:5] = np.nan frame = DataFrame({"foo": mat}, index=float_frame.index) @@ -65,7 +65,7 @@ def test_dropIncompleteRows(self, float_frame): assert return_value is None def test_dropna(self): - df = DataFrame(np.random.randn(6, 4)) + df = DataFrame(np.random.default_rng(2).randn(6, 4)) df.iloc[:2, 2] = np.nan dropped = df.dropna(axis=1) @@ -211,9 +211,9 @@ def test_dropna_categorical_interval_index(self): def test_dropna_with_duplicate_columns(self): df = DataFrame( { - "A": np.random.randn(5), - "B": np.random.randn(5), - "C": np.random.randn(5), + "A": np.random.default_rng(2).randn(5), + "B": np.random.default_rng(2).randn(5), + "C": np.random.default_rng(2).randn(5), "D": ["a", "b", "c", "d", "e"], } ) diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py index 5b07572ca9ad1..6b407ced25796 100644 --- a/pandas/tests/frame/methods/test_duplicated.py +++ b/pandas/tests/frame/methods/test_duplicated.py @@ -26,7 +26,7 @@ def test_duplicated_implemented_no_recursion(): # gh-21524 # Ensure duplicated isn't implemented using recursion that # can fail on wide frames - df = DataFrame(np.random.randint(0, 1000, (10, 1000))) + df = DataFrame(np.random.default_rng(2).randint(0, 1000, (10, 1000))) rec_limit = sys.getrecursionlimit() try: sys.setrecursionlimit(100) diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py index beec3e965d542..4028a26dfdc65 100644 --- a/pandas/tests/frame/methods/test_equals.py +++ b/pandas/tests/frame/methods/test_equals.py @@ -30,8 +30,10 @@ def test_equals_different_blocks(self, using_array_manager): def test_equals(self): # Add object dtype column with nans - index = np.random.random(10) - df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) + index = np.random.default_rng(2).random(10) + df1 = DataFrame( + np.random.default_rng(2).random(10), index=index, columns=["floats"] + ) df1["text"] = "the sky is so blue. we could use more chocolate.".split() df1["start"] = date_range("2000-1-1", periods=10, freq="T") df1["end"] = date_range("2000-1-1", periods=10, freq="D") diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index a5f6f58e66392..8fd0f5f35cd45 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -135,7 +135,7 @@ def test_fillna_different_dtype(self): def test_fillna_limit_and_value(self): # limit and value - df = DataFrame(np.random.randn(10, 3)) + df = DataFrame(np.random.default_rng(2).randn(10, 3)) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan @@ -442,7 +442,7 @@ def test_bfill(self, datetime_frame): def test_frame_pad_backfill_limit(self): index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) + df = DataFrame(np.random.default_rng(2).randn(10, 4), index=index) result = df[:2].reindex(index, method="pad", limit=5) @@ -461,7 +461,7 @@ def test_frame_pad_backfill_limit(self): def test_frame_fillna_limit(self): index = np.arange(10) - df = DataFrame(np.random.randn(10, 4), index=index) + df = DataFrame(np.random.default_rng(2).randn(10, 4), index=index) result = df[:2].reindex(index) msg = "DataFrame.fillna with 'method' is deprecated" @@ -485,14 +485,14 @@ def test_frame_fillna_limit(self): def test_fillna_skip_certain_blocks(self): # don't try to fill boolean, int blocks - df = DataFrame(np.random.randn(10, 4).astype(int)) + df = DataFrame(np.random.default_rng(2).randn(10, 4).astype(int)) # it works! df.fillna(np.nan) @pytest.mark.parametrize("type", [int, float]) def test_fillna_positive_limit(self, type): - df = DataFrame(np.random.randn(10, 4)).astype(type) + df = DataFrame(np.random.default_rng(2).randn(10, 4)).astype(type) msg = "Limit must be greater than 0" with pytest.raises(ValueError, match=msg): @@ -500,14 +500,14 @@ def test_fillna_positive_limit(self, type): @pytest.mark.parametrize("type", [int, float]) def test_fillna_integer_limit(self, type): - df = DataFrame(np.random.randn(10, 4)).astype(type) + df = DataFrame(np.random.default_rng(2).randn(10, 4)).astype(type) msg = "Limit must be an integer" with pytest.raises(ValueError, match=msg): df.fillna(0, limit=0.5) def test_fillna_inplace(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) df.loc[:4, 1] = np.nan df.loc[-4:, 3] = np.nan @@ -595,7 +595,7 @@ def test_fillna_dataframe(self): tm.assert_frame_equal(result, expected) def test_fillna_columns(self): - arr = np.random.randn(10, 10) + arr = np.random.default_rng(2).randn(10, 10) arr[:, ::2] = np.nan df = DataFrame(arr) @@ -635,7 +635,7 @@ def test_fillna_invalid_value(self, float_frame): def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] - data = np.random.rand(20, 5) + data = np.random.default_rng(2).rand(20, 5) df = DataFrame(index=range(20), columns=cols, data=data) msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/frame/methods/test_filter.py b/pandas/tests/frame/methods/test_filter.py index af77db4058b43..1a2fbf8a65a55 100644 --- a/pandas/tests/frame/methods/test_filter.py +++ b/pandas/tests/frame/methods/test_filter.py @@ -133,7 +133,7 @@ def test_filter_corner(self): def test_filter_regex_non_string(self): # GH#5798 trying to filter on non-string columns should drop, # not raise - df = DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) + df = DataFrame(np.random.default_rng(2).random((3, 2)), columns=["STRING", 123]) result = df.filter(regex="STRING") expected = df[["STRING"]] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_first_valid_index.py b/pandas/tests/frame/methods/test_first_valid_index.py index 6009851bab643..9273955f22146 100644 --- a/pandas/tests/frame/methods/test_first_valid_index.py +++ b/pandas/tests/frame/methods/test_first_valid_index.py @@ -48,7 +48,7 @@ def test_first_last_valid_frame(self, data, idx, expected_first, expected_last): def test_first_last_valid(self, index_func): N = 30 index = index_func(N) - mat = np.random.randn(N) + mat = np.random.default_rng(2).randn(N) mat[:5] = np.nan mat[-5:] = np.nan diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py index 99cb7840c3eb6..28b8cede58bba 100644 --- a/pandas/tests/frame/methods/test_head_tail.py +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -9,7 +9,7 @@ def test_head_tail_generic(index, frame_or_series): ndim = 2 if frame_or_series is DataFrame else 1 shape = (len(index),) * ndim - vals = np.random.randn(*shape) + vals = np.random.default_rng(2).randn(*shape) obj = frame_or_series(vals, index=index) tm.assert_equal(obj.head(), obj.iloc[:5]) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index fcdf7db35446e..7d9f5d9e0d3bb 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -430,7 +430,7 @@ def test_interp_time_inplace_axis(self): # GH 9687 periods = 5 idx = date_range(start="2014-01-01", periods=periods) - data = np.random.rand(periods, periods) + data = np.random.default_rng(2).rand(periods, periods) data[data < 0.5] = np.nan expected = DataFrame(index=idx, columns=idx, data=data) diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py index 7ef51ebc4b6a3..7225e80f50b01 100644 --- a/pandas/tests/frame/methods/test_map.py +++ b/pandas/tests/frame/methods/test_map.py @@ -56,7 +56,7 @@ def func(x): def test_map_str(): # GH 2786 - df = DataFrame(np.random.random((3, 4))) + df = DataFrame(np.random.default_rng(2).random((3, 4))) df2 = df.copy() cols = ["a", "a", "a", "a"] df.columns = cols @@ -73,7 +73,7 @@ def test_map_str(): ) def test_map_datetimelike(col, val): # datetime/timedelta - df = DataFrame(np.random.random((3, 4))) + df = DataFrame(np.random.default_rng(2).random((3, 4))) df[col] = val result = df.map(str) assert result.loc[0, col] == str(df.loc[0, col]) @@ -106,7 +106,7 @@ def test_map_na_ignore(float_frame): # GH 23803 strlen_frame = float_frame.map(lambda x: len(str(x))) float_frame_with_na = float_frame.copy() - mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) + mask = np.random.default_rng(2).randint(0, 2, size=float_frame.shape, dtype=bool) float_frame_with_na[mask] = pd.NA strlen_frame_na_ignore = float_frame_with_na.map( lambda x: len(str(x)), na_action="ignore" diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py index 702ab3916d77a..ee11e4545db45 100644 --- a/pandas/tests/frame/methods/test_matmul.py +++ b/pandas/tests/frame/methods/test_matmul.py @@ -15,10 +15,14 @@ class TestMatMul: def test_matmul(self): # matmul test is for GH#10259 a = DataFrame( - np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + np.random.default_rng(2).randn(3, 4), + index=["a", "b", "c"], + columns=["p", "q", "r", "s"], ) b = DataFrame( - np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["one", "two"] + np.random.default_rng(2).randn(4, 2), + index=["p", "q", "r", "s"], + columns=["one", "two"], ) # DataFrame @ DataFrame @@ -65,8 +69,12 @@ def test_matmul(self): tm.assert_frame_equal(result, expected) # unaligned - df = DataFrame(np.random.randn(3, 4), index=[1, 2, 3], columns=range(4)) - df2 = DataFrame(np.random.randn(5, 3), index=range(5), columns=[1, 2, 3]) + df = DataFrame( + np.random.default_rng(2).randn(3, 4), index=[1, 2, 3], columns=range(4) + ) + df2 = DataFrame( + np.random.default_rng(2).randn(5, 3), index=range(5), columns=[1, 2, 3] + ) with pytest.raises(ValueError, match="aligned"): operator.matmul(df, df2) @@ -74,8 +82,8 @@ def test_matmul(self): def test_matmul_message_shapes(self): # GH#21581 exception message should reflect original shapes, # not transposed shapes - a = np.random.rand(10, 4) - b = np.random.rand(5, 3) + a = np.random.default_rng(2).rand(10, 4) + b = np.random.default_rng(2).rand(5, 3) df = DataFrame(b) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 17dea51263222..0bdf9a0e5c007 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -24,9 +24,9 @@ def df_duplicates(): def df_strings(): return pd.DataFrame( { - "a": np.random.permutation(10), + "a": np.random.default_rng(2).permutation(10), "b": list(ascii_lowercase[:10]), - "c": np.random.permutation(10).astype("float64"), + "c": np.random.default_rng(2).permutation(10).astype("float64"), } ) diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index a4f99b8287188..e1067f22baacc 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -54,7 +54,7 @@ def test_mixed_depth_pop(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) df1 = df.copy() df2 = df.copy() diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 0d9f4bd77d137..f5dd10b7eab93 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -773,7 +773,7 @@ def test_quantile_item_cache( ): # previous behavior incorrect retained an invalid _item_cache entry interpolation, method = interp_method - df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["A", "B", "C"]) df["D"] = df["A"] * 2 ser = df["A"] if not using_array_manager: @@ -844,7 +844,7 @@ def compute_quantile(self, obj, qs): def test_quantile_ea(self, request, obj, index): # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) - np.random.shuffle(indexer) + np.random.default_rng(2).shuffle(indexer) obj = obj.iloc[indexer] qs = [0.5, 0, 1] @@ -869,7 +869,7 @@ def test_quantile_ea_with_na(self, obj, index): # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) - np.random.shuffle(indexer) + np.random.default_rng(2).shuffle(indexer) obj = obj.iloc[indexer] qs = [0.5, 0, 1] @@ -889,7 +889,7 @@ def test_quantile_ea_all_na(self, request, obj, index): # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) - np.random.shuffle(indexer) + np.random.default_rng(2).shuffle(indexer) obj = obj.iloc[indexer] qs = [0.5, 0, 1] @@ -905,7 +905,7 @@ def test_quantile_ea_scalar(self, request, obj, index): # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) - np.random.shuffle(indexer) + np.random.default_rng(2).shuffle(indexer) obj = obj.iloc[indexer] qs = 0.5 diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 8b0251b2a9928..4ba946bf95663 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -64,7 +64,7 @@ def test_rank(self, float_frame): tm.assert_almost_equal(ranks1.values, exp1) # integers - df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + df = DataFrame(np.random.default_rng(2).randint(0, 5, size=40).reshape((10, 4))) result = df.rank() exp = df.astype(float).rank() @@ -126,7 +126,7 @@ def test_rank2(self): def test_rank_does_not_mutate(self): # GH#18521 # Check rank does not mutate DataFrame - df = DataFrame(np.random.randn(10, 3), dtype="float64") + df = DataFrame(np.random.default_rng(2).randn(10, 3), dtype="float64") expected = df.copy() df.rank() result = df @@ -229,7 +229,7 @@ def test_rank_methods_frame(self): import scipy.stats # noqa: F401 from scipy.stats import rankdata - xs = np.random.randint(0, 21, (100, 26)) + xs = np.random.default_rng(2).randint(0, 21, (100, 26)) xs = (xs - 10.0) / 10.0 cols = [chr(ord("z") - i) for i in range(xs.shape[1])] @@ -407,12 +407,12 @@ def test_rank_inf_and_nan(self, contents, dtype, frame_or_series): exp_order = np.array(range(len(values)), dtype="float64") + 1.0 if dtype in dtype_na_map: na_value = dtype_na_map[dtype] - nan_indices = np.random.choice(range(len(values)), 5) + nan_indices = np.random.default_rng(2).choice(range(len(values)), 5) values = np.insert(values, nan_indices, na_value) exp_order = np.insert(exp_order, nan_indices, np.nan) # Shuffle the testing array and expected results in the same way - random_order = np.random.permutation(len(values)) + random_order = np.random.default_rng(2).permutation(len(values)) obj = frame_or_series(values[random_order]) expected = frame_or_series(exp_order[random_order], dtype="float64") result = obj.rank() diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 63e2eb790a4ea..3c4cb3adde9e8 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -34,7 +34,7 @@ class TestReindexSetIndex: def test_dti_set_index_reindex_datetimeindex(self): # GH#6631 - df = DataFrame(np.random.random(6)) + df = DataFrame(np.random.default_rng(2).random(6)) idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") @@ -48,7 +48,9 @@ def test_dti_set_index_reindex_freq_with_tz(self): index = date_range( datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" ) - df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) + df = DataFrame( + np.random.default_rng(2).randn(24, 1), columns=["a"], index=index + ) new_index = date_range( datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" ) @@ -159,9 +161,9 @@ def test_reindex_tzaware_fill_value(self): def test_reindex_copies(self): # based on asv time_reindex_axis1 N = 10 - df = DataFrame(np.random.randn(N * 10, N)) + df = DataFrame(np.random.default_rng(2).randn(N * 10, N)) cols = np.arange(N) - np.random.shuffle(cols) + np.random.default_rng(2).shuffle(cols) result = df.reindex(columns=cols, copy=True) assert not np.shares_memory(result[0]._values, df[0]._values) @@ -174,9 +176,9 @@ def test_reindex_copies_ea(self, using_copy_on_write): # https://github.com/pandas-dev/pandas/pull/51197 # also ensure to honor copy keyword for ExtensionDtypes N = 10 - df = DataFrame(np.random.randn(N * 10, N), dtype="Float64") + df = DataFrame(np.random.default_rng(2).randn(N * 10, N), dtype="Float64") cols = np.arange(N) - np.random.shuffle(cols) + np.random.default_rng(2).shuffle(cols) result = df.reindex(columns=cols, copy=True) if using_copy_on_write: @@ -364,7 +366,7 @@ def test_reindex_nearest_tz_empty_frame(self): def test_reindex_frame_add_nat(self): rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") - df = DataFrame({"A": np.random.randn(len(rng)), "B": rng}) + df = DataFrame({"A": np.random.default_rng(2).randn(len(rng)), "B": rng}) result = df.reindex(range(15)) assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]")) @@ -427,7 +429,7 @@ def test_reindex_level_verify_first_level(self, idx, check_index_type): "jim": list("B" * 4 + "A" * 2 + "C" * 3), "joe": list("abcdeabcd")[::-1], "jolie": [10, 20, 30] * 3, - "joline": np.random.randint(0, 1000, 9), + "joline": np.random.default_rng(2).randint(0, 1000, 9), } ) icol = ["jim", "joe", "jolie"] @@ -480,11 +482,11 @@ def test_reindex_level_verify_first_level_repeats(self, idx): # out to needing unique groups of same size as joe "jolie": np.concatenate( [ - np.random.choice(1000, x, replace=False) + np.random.default_rng(2).choice(1000, x, replace=False) for x in [2, 3, 3, 2, 3, 2, 3, 2] ] ), - "joline": np.random.randn(20).round(3) * 10, + "joline": np.random.default_rng(2).randn(20).round(3) * 10, } ) icol = ["jim", "joe", "jolie"] @@ -529,11 +531,11 @@ def test_reindex_level_verify_repeats(self, idx, indexer): # out to needing unique groups of same size as joe "jolie": np.concatenate( [ - np.random.choice(1000, x, replace=False) + np.random.default_rng(2).choice(1000, x, replace=False) for x in [2, 3, 3, 2, 3, 2, 3, 2] ] ), - "joline": np.random.randn(20).round(3) * 10, + "joline": np.random.default_rng(2).randn(20).round(3) * 10, } ) icol = ["jim", "joe", "jolie"] @@ -560,7 +562,7 @@ def test_reindex_level_verify(self, idx, indexer, check_index_type): "jim": list("B" * 4 + "A" * 2 + "C" * 3), "joe": list("abcdeabcd")[::-1], "jolie": [10, 20, 30] * 3, - "joline": np.random.randint(0, 1000, 9), + "joline": np.random.default_rng(2).randint(0, 1000, 9), } ) icol = ["jim", "joe", "jolie"] @@ -570,7 +572,7 @@ def test_reindex_level_verify(self, idx, indexer, check_index_type): def test_non_monotonic_reindex_methods(self): dr = date_range("2013-08-01", periods=6, freq="B") - data = np.random.randn(6, 1) + data = np.random.default_rng(2).randn(6, 1) df = DataFrame(data, index=dr, columns=list("A")) df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) # index is not monotonic increasing or decreasing @@ -692,7 +694,7 @@ def test_reindex_nan(self): tm.assert_frame_equal(left, right) def test_reindex_name_remains(self): - s = Series(np.random.rand(10)) + s = Series(np.random.default_rng(2).rand(10)) df = DataFrame(s, index=np.arange(len(s))) i = Series(np.arange(10), name="iname") @@ -702,7 +704,7 @@ def test_reindex_name_remains(self): df = df.reindex(Index(np.arange(10), name="tmpname")) assert df.index.name == "tmpname" - s = Series(np.random.rand(10)) + s = Series(np.random.default_rng(2).rand(10)) df = DataFrame(s.T, index=np.arange(len(s))) i = Series(np.arange(10), name="iname") df = df.reindex(columns=i) @@ -798,7 +800,7 @@ def test_reindex_axes(self): assert index_freq == seq_freq def test_reindex_fill_value(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) # axis=0 result = df.reindex(list(range(15))) @@ -851,7 +853,7 @@ def test_reindex_single_column_ea_index_and_columns(self, any_numeric_ea_dtype): def test_reindex_dups(self): # GH4746, reindex on duplicate index error messages - arr = np.random.randn(10) + arr = np.random.default_rng(2).randn(10) df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]) # set index is ok @@ -1020,28 +1022,30 @@ def test_reindex_with_nans(self): tm.assert_frame_equal(result, expected) def test_reindex_multi(self): - df = DataFrame(np.random.randn(3, 3)) + df = DataFrame(np.random.default_rng(2).randn(3, 3)) result = df.reindex(index=range(4), columns=range(4)) expected = df.reindex(list(range(4))).reindex(columns=range(4)) tm.assert_frame_equal(result, expected) - df = DataFrame(np.random.randint(0, 10, (3, 3))) + df = DataFrame(np.random.default_rng(2).randint(0, 10, (3, 3))) result = df.reindex(index=range(4), columns=range(4)) expected = df.reindex(list(range(4))).reindex(columns=range(4)) tm.assert_frame_equal(result, expected) - df = DataFrame(np.random.randint(0, 10, (3, 3))) + df = DataFrame(np.random.default_rng(2).randint(0, 10, (3, 3))) result = df.reindex(index=range(2), columns=range(2)) expected = df.reindex(range(2)).reindex(columns=range(2)) tm.assert_frame_equal(result, expected) - df = DataFrame(np.random.randn(5, 3) + 1j, columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).randn(5, 3) + 1j, columns=["a", "b", "c"] + ) result = df.reindex(index=[0, 1], columns=["a", "b"]) expected = df.reindex([0, 1]).reindex(columns=["a", "b"]) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 6d8af97a5d210..315e2150637d8 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -87,7 +87,7 @@ def test_rename(self, float_frame): def test_rename_chainmap(self, args, kwargs): # see gh-23859 colAData = range(1, 11) - colBdata = np.random.randn(10) + colBdata = np.random.default_rng(2).randn(10) df = DataFrame({"A": colAData, "B": colBdata}) result = df.rename(*args, **kwargs) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 1846ac24e9cc5..7773fa3d9dd26 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -983,12 +983,12 @@ def test_replace_bool_with_string(self): tm.assert_frame_equal(result, expected) def test_replace_pure_bool_with_string_no_op(self): - df = DataFrame(np.random.rand(2, 2) > 0.5) + df = DataFrame(np.random.default_rng(2).rand(2, 2) > 0.5) result = df.replace("asdf", "fdsa") tm.assert_frame_equal(df, result) def test_replace_bool_with_bool(self): - df = DataFrame(np.random.rand(2, 2) > 0.5) + df = DataFrame(np.random.default_rng(2).rand(2, 2) > 0.5) result = df.replace(False, True) expected = DataFrame(np.ones((2, 2), dtype=bool)) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index a2ab02c5a6038..24f2b43a61230 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -94,7 +94,7 @@ def test_reset_index_tz(self, tz_aware_fixture): @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_frame_reset_index_tzaware_index(self, tz): dr = date_range("2012-06-02", periods=10, tz=tz) - df = DataFrame(np.random.randn(len(dr)), dr) + df = DataFrame(np.random.default_rng(2).randn(len(dr)), dr) roundtripped = df.reset_index().set_index("index") xp = df.index.tz rs = roundtripped.index.tz @@ -250,7 +250,7 @@ def test_reset_index_right_dtype(self): assert reset["time"].dtype == np.float64 def test_reset_index_multiindex_col(self): - vals = np.random.randn(3, 3).astype(object) + vals = np.random.default_rng(2).randn(3, 3).astype(object) idx = ["x", "y", "z"] full = np.hstack(([[x] for x in idx], vals)) df = DataFrame( @@ -309,13 +309,21 @@ def test_reset_index_multiindex_nan(self): # GH#6322, testing reset_index on MultiIndexes # when we have a nan or all nan df = DataFrame( - {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} + { + "A": ["a", "b", "c"], + "B": [0, 1, np.nan], + "C": np.random.default_rng(2).rand(3), + } ) rs = df.set_index(["A", "B"]).reset_index() tm.assert_frame_equal(rs, df) df = DataFrame( - {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} + { + "A": [np.nan, "b", "c"], + "B": [0, 1, 2], + "C": np.random.default_rng(2).rand(3), + } ) rs = df.set_index(["A", "B"]).reset_index() tm.assert_frame_equal(rs, df) @@ -328,7 +336,7 @@ def test_reset_index_multiindex_nan(self): { "A": ["a", "b", "c"], "B": [np.nan, np.nan, np.nan], - "C": np.random.rand(3), + "C": np.random.default_rng(2).rand(3), } ) rs = df.set_index(["A", "B"]).reset_index() @@ -593,7 +601,9 @@ def test_reset_index_period(self): def test_reset_index_delevel_infer_dtype(self): tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) - df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) + df = DataFrame( + np.random.default_rng(2).randn(8, 3), columns=["A", "B", "C"], index=index + ) deleveled = df.reset_index() assert is_integer_dtype(deleveled["prm1"]) assert is_float_dtype(deleveled["prm2"]) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index 5579df41c1912..45d51ed52fe7c 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -170,7 +170,7 @@ def test_round_with_duplicate_columns(self): # GH#11611 df = DataFrame( - np.random.random([3, 3]), + np.random.default_rng(2).random([3, 3]), columns=["A", "B", "C"], index=["first", "second", "third"], ) @@ -196,7 +196,7 @@ def test_round_builtin(self): def test_round_nonunique_categorical(self): # See GH#21809 idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) - df = DataFrame(np.random.rand(6, 3), columns=list("abc")) + df = DataFrame(np.random.default_rng(2).rand(6, 3), columns=list("abc")) expected = df.round(3) expected.index = idx diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 69e799b8ff189..02cef473a281d 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -14,9 +14,9 @@ class TestSample: @pytest.fixture def obj(self, frame_or_series): if frame_or_series is Series: - arr = np.random.randn(10) + arr = np.random.default_rng(2).randn(10) else: - arr = np.random.randn(10, 10) + arr = np.random.default_rng(2).randn(10, 10) return frame_or_series(arr, dtype=None) @pytest.mark.parametrize("test", list(range(10))) @@ -26,7 +26,7 @@ def test_sample(self, test, obj): # Check for stability when receives seed or random state -- run 10 # times. - seed = np.random.randint(0, 100) + seed = np.random.default_rng(2).randint(0, 100) tm.assert_equal( obj.sample(n=4, random_state=seed), obj.sample(n=4, random_state=seed) ) @@ -37,25 +37,36 @@ def test_sample(self, test, obj): ) tm.assert_equal( - obj.sample(n=4, random_state=np.random.RandomState(test)), - obj.sample(n=4, random_state=np.random.RandomState(test)), + obj.sample(n=4, random_state=np.random.default_rng(2).RandomState(test)), + obj.sample(n=4, random_state=np.random.default_rng(2).RandomState(test)), ) tm.assert_equal( - obj.sample(frac=0.7, random_state=np.random.RandomState(test)), - obj.sample(frac=0.7, random_state=np.random.RandomState(test)), + obj.sample( + frac=0.7, random_state=np.random.default_rng(2).RandomState(test) + ), + obj.sample( + frac=0.7, random_state=np.random.default_rng(2).RandomState(test) + ), ) tm.assert_equal( - obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), - obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), + obj.sample( + frac=2, + replace=True, + random_state=np.random.default_rng(2).RandomState(test), + ), + obj.sample( + frac=2, + replace=True, + random_state=np.random.default_rng(2).RandomState(test), + ), ) os1, os2 = [], [] for _ in range(2): - np.random.seed(test) - os1.append(obj.sample(n=4)) - os2.append(obj.sample(frac=0.7)) + os1.append(obj.sample(n=4, random_state=test)) + os2.append(obj.sample(frac=0.7, random_state=test)) tm.assert_equal(*os1) tm.assert_equal(*os2) @@ -158,8 +169,8 @@ def test_sample_none_weights(self, obj): "func_str,arg", [ ("np.array", [2, 3, 1, 0]), - ("np.random.MT19937", 3), - ("np.random.PCG64", 11), + ("np.random.default_rng(2).MT19937", 3), + ("np.random.default_rng(2).PCG64", 11), ], ) def test_sample_random_state(self, func_str, arg, frame_or_series): @@ -173,7 +184,7 @@ def test_sample_random_state(self, func_str, arg, frame_or_series): def test_sample_generator(self, frame_or_series): # GH#38100 obj = frame_or_series(np.arange(100)) - rng = np.random.default_rng() + rng = np.random.default_rng(2).default_rng() # Consecutive calls should advance the seed result1 = obj.sample(n=50, random_state=rng) @@ -182,8 +193,12 @@ def test_sample_generator(self, frame_or_series): # Matching generator initialization must give same result # Consecutive calls should advance the seed - result1 = obj.sample(n=50, random_state=np.random.default_rng(11)) - result2 = obj.sample(n=50, random_state=np.random.default_rng(11)) + result1 = obj.sample( + n=50, random_state=np.random.default_rng(2).default_rng(11) + ) + result2 = obj.sample( + n=50, random_state=np.random.default_rng(2).default_rng(11) + ) tm.assert_equal(result1, result2) def test_sample_upsampling_without_replacement(self, frame_or_series): @@ -329,7 +344,7 @@ def test_sample_aligns_weights_with_frame(self): def test_sample_is_copy(self): # GH#27357, GH#30784: ensure the result of sample is an actual copy and # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) df2 = df.sample(3) with tm.assert_produces_warning(None): diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 2e9c75fe25652..3bfb1af423bdd 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -380,7 +380,9 @@ def test_select_dtypes_bad_arg_raises(self): def test_select_dtypes_typecodes(self): # GH 11990 - df = tm.makeCustomDataframe(30, 3, data_gen_f=lambda x, y: np.random.random()) + df = tm.makeCustomDataframe( + 30, 3, data_gen_f=lambda x, y: np.random.default_rng(2).random() + ) expected = df FLOAT_TYPES = list(np.typecodes["AllFloat"]) tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 303eed0b813f4..30f91f62cebb0 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -67,7 +67,7 @@ def test_set_index_empty_dataframe(self): def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) - df = DataFrame(np.random.randn(3, 3), columns=columns) + df = DataFrame(np.random.default_rng(2).randn(3, 3), columns=columns) result = df.set_index(df.columns[0]) @@ -89,7 +89,7 @@ def test_set_index_cast_datetimeindex(self): df = DataFrame( { "A": [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], - "B": np.random.randn(1000), + "B": np.random.default_rng(2).randn(1000), } ) @@ -372,12 +372,12 @@ def test_construction_with_categorical_index(self): ci.name = "B" # with Categorical - df = DataFrame({"A": np.random.randn(10), "B": ci.values}) + df = DataFrame({"A": np.random.default_rng(2).randn(10), "B": ci.values}) idf = df.set_index("B") tm.assert_index_equal(idf.index, ci) # from a CategoricalIndex - df = DataFrame({"A": np.random.randn(10), "B": ci}) + df = DataFrame({"A": np.random.default_rng(2).randn(10), "B": ci}) idf = df.set_index("B") tm.assert_index_equal(idf.index, ci) @@ -435,7 +435,7 @@ def test_set_index_datetime(self): tm.assert_index_equal(df.index.levels[1], expected) assert df.index.names == ["label", "datetime"] - df = DataFrame(np.random.random(6)) + df = DataFrame(np.random.default_rng(2).random(6)) idx1 = DatetimeIndex( [ "2011-07-19 07:00:00", @@ -484,7 +484,7 @@ def test_set_index_datetime(self): def test_set_index_period(self): # GH#6631 - df = DataFrame(np.random.random(6)) + df = DataFrame(np.random.default_rng(2).random(6)) idx1 = period_range("2011-01-01", periods=3, freq="M") idx1 = idx1.append(idx1) idx2 = period_range("2013-01-01 09:00", periods=2, freq="H") @@ -569,7 +569,7 @@ def test_set_index_raise_on_len( # GH 24984 df = frame_of_index_cols # has length 5 - values = np.random.randint(0, 10, (length,)) + values = np.random.default_rng(2).randint(0, 10, (length,)) msg = "Length mismatch: Expected 5 rows, received array of length.*" @@ -688,7 +688,7 @@ def __str__(self) -> str: def test_set_index_periodindex(self): # GH#6631 - df = DataFrame(np.random.random(6)) + df = DataFrame(np.random.default_rng(2).random(6)) idx1 = period_range("2011/01/01", periods=6, freq="M") idx2 = period_range("2013", periods=6, freq="A") diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index ebbb7ca13646f..4fa732adf7968 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -20,7 +20,7 @@ class TestDataFrameShift: def test_shift_axis1_with_valid_fill_value_one_array(self): # Case with axis=1 that does not go through the "len(arrays)>1" path # in DataFrame.shift - data = np.random.randn(5, 3) + data = np.random.default_rng(2).randn(5, 3) df = DataFrame(data) res = df.shift(axis=1, periods=1, fill_value=12345) expected = df.T.shift(periods=1, fill_value=12345).T @@ -35,7 +35,8 @@ def test_shift_axis1_with_valid_fill_value_one_array(self): def test_shift_disallow_freq_and_fill_value(self, frame_or_series): # Can't pass both! obj = frame_or_series( - np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H") + np.random.default_rng(2).randn(5), + index=date_range("1/1/2000", periods=5, freq="H"), ) msg = "Cannot pass both 'freq' and 'fill_value' to (Series|DataFrame).shift" @@ -70,7 +71,8 @@ def test_shift_non_writable_array(self, input_data, output_data, frame_or_series def test_shift_mismatched_freq(self, frame_or_series): ts = frame_or_series( - np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H") + np.random.default_rng(2).randn(5), + index=date_range("1/1/2000", periods=5, freq="H"), ) result = ts.shift(1, freq="5T") @@ -268,7 +270,7 @@ def test_shift_with_periodindex(self, frame_or_series): def test_shift_other_axis(self): # shift other axis # GH#6371 - df = DataFrame(np.random.rand(10, 5)) + df = DataFrame(np.random.default_rng(2).rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, @@ -279,7 +281,7 @@ def test_shift_other_axis(self): def test_shift_named_axis(self): # shift named axis - df = DataFrame(np.random.rand(10, 5)) + df = DataFrame(np.random.default_rng(2).rand(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, @@ -397,7 +399,7 @@ def test_shift_duplicate_columns(self): # GH#9092; verify that position-based shifting works # in the presence of duplicate columns column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] - data = np.random.randn(20, 5) + data = np.random.default_rng(2).randn(20, 5) shifted = [] for columns in column_lists: @@ -417,8 +419,8 @@ def test_shift_duplicate_columns(self): def test_shift_axis1_multiple_blocks(self, using_array_manager): # GH#35488 - df1 = DataFrame(np.random.randint(1000, size=(5, 3))) - df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df1 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 2))) df3 = pd.concat([df1, df2], axis=1) if not using_array_manager: assert len(df3._mgr.blocks) == 2 @@ -461,8 +463,8 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager): @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support def test_shift_axis1_multiple_blocks_with_int_fill(self): # GH#42719 - df1 = DataFrame(np.random.randint(1000, size=(5, 3))) - df2 = DataFrame(np.random.randint(1000, size=(5, 2))) + df1 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 3))) + df2 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 2))) df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) result = df3.shift(2, axis=1, fill_value=np.int_(0)) assert len(df3._mgr.blocks) == 2 @@ -648,7 +650,7 @@ def test_shift_axis1_categorical_columns(self): def test_shift_axis1_many_periods(self): # GH#44978 periods > len(columns) - df = DataFrame(np.random.rand(5, 3)) + df = DataFrame(np.random.default_rng(2).rand(5, 3)) shifted = df.shift(6, axis=1, fill_value=None) expected = df * np.nan diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 38977470f455c..d9d19ce2992ab 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -56,7 +56,7 @@ def test_sort_index_non_existent_label_multiindex(self): def test_sort_index_reorder_on_ops(self): # GH#15687 df = DataFrame( - np.random.randn(8, 2), + np.random.default_rng(2).randn(8, 2), index=MultiIndex.from_product( [["a", "b"], ["big", "small"], ["red", "blu"]], names=["letter", "size", "color"], @@ -217,7 +217,9 @@ def test_sort_index_multi_index(self): def test_sort_index_inplace(self): frame = DataFrame( - np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + np.random.default_rng(2).randn(4, 4), + index=[1, 2, 3, 4], + columns=["A", "B", "C", "D"], ) # axis=0 @@ -256,11 +258,11 @@ def test_sort_index_different_sortorder(self): A = np.arange(20).repeat(5) B = np.tile(np.arange(5), 20) - indexer = np.random.permutation(100) + indexer = np.random.default_rng(2).permutation(100) A = A.take(indexer) B = B.take(indexer) - df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + df = DataFrame({"A": A, "B": B, "C": np.random.default_rng(2).randn(100)}) ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) @@ -374,9 +376,11 @@ def test_sort_index_multiindex(self, level): def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack # confirming that we sort in the order of the bins - y = Series(np.random.randn(100)) - x1 = Series(np.sign(np.random.randn(100))) - x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) + y = Series(np.random.default_rng(2).randn(100)) + x1 = Series(np.sign(np.random.default_rng(2).randn(100))) + x2 = pd.cut( + Series(np.random.default_rng(2).randn(100)), bins=[-3, -0.5, 0, 0.5, 3] + ) model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) result = model.groupby(["X1", "X2"], observed=True).mean().unstack() @@ -610,7 +614,9 @@ def test_sort_index_level2(self, multiindex_dataframe_random_data): def test_sort_index_level_large_cardinality(self): # GH#2684 (int64) index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000).astype("int64"), index=index) + df = DataFrame( + np.random.default_rng(2).randn(4000).astype("int64"), index=index + ) # it works! result = df.sort_index(level=0) @@ -618,7 +624,9 @@ def test_sort_index_level_large_cardinality(self): # GH#2684 (int32) index = MultiIndex.from_arrays([np.arange(4000)] * 3) - df = DataFrame(np.random.randn(4000).astype("int32"), index=index) + df = DataFrame( + np.random.default_rng(2).randn(4000).astype("int32"), index=index + ) # it works! result = df.sort_index(level=0) @@ -677,8 +685,7 @@ def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): ], ) def test_sort_index_multilevel_repr_8017(self, gen, extra): - np.random.seed(0) - data = np.random.randn(3, 4) + data = np.random.default_rng(2).randn(3, 4) columns = MultiIndex.from_tuples([("red", i) for i in gen]) df = DataFrame(data, index=list("def"), columns=columns) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 4c41632040dbe..734d933b878c5 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -1,5 +1,3 @@ -import random - import numpy as np import pytest @@ -97,7 +95,9 @@ def test_sort_values_by_empty_list(self): def test_sort_values_inplace(self): frame = DataFrame( - np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + np.random.default_rng(2).randn(4, 4), + index=[1, 2, 3, 4], + columns=["A", "B", "C", "D"], ) sorted_df = frame.copy() @@ -129,9 +129,9 @@ def test_sort_values_inplace(self): def test_sort_values_multicolumn(self): A = np.arange(5).repeat(20) B = np.tile(np.arange(5), 20) - random.shuffle(A) - random.shuffle(B) - frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + np.random.default_rng(2).shuffle(A) + np.random.default_rng(2).shuffle(B) + frame = DataFrame({"A": A, "B": B, "C": np.random.default_rng(2).randn(100)}) result = frame.sort_values(by=["A", "B"]) indexer = np.lexsort((frame["B"], frame["A"])) @@ -598,7 +598,7 @@ def test_sort_values_nat_na_position_default(self): def test_sort_values_item_cache(self, using_array_manager, using_copy_on_write): # previous behavior incorrect retained an invalid _item_cache entry - df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["A", "B", "C"]) df["D"] = df["A"] * 2 ser = df["A"] if not using_array_manager: @@ -642,7 +642,9 @@ def test_sort_values_no_op_reset_index(self): class TestDataFrameSortKey: # test key sorting (issue 27237) def test_sort_values_inplace_key(self, sort_by_key): frame = DataFrame( - np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + np.random.default_rng(2).randn(4, 4), + index=[1, 2, 3, 4], + columns=["A", "B", "C", "D"], ) sorted_df = frame.copy() diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py index f2667fc973cf4..68061d38de84d 100644 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -7,20 +7,20 @@ class TestSwapAxes: def test_swapaxes(self): - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) def test_swapaxes_noop(self): - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(df, df.swapaxes(0, 0)) def test_swapaxes_invalid_axis(self): - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): msg = "No axis named 2 for object type DataFrame" diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index ee9c4f05991a0..06145b87677aa 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -74,14 +74,20 @@ def test_to_csv_from_csv2(self, float_frame): with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: # duplicate index df = DataFrame( - np.random.randn(3, 3), index=["a", "a", "b"], columns=["x", "y", "z"] + np.random.default_rng(2).randn(3, 3), + index=["a", "a", "b"], + columns=["x", "y", "z"], ) df.to_csv(path) result = self.read_csv(path) tm.assert_frame_equal(result, df) midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) - df = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), + index=midx, + columns=["x", "y", "z"], + ) df.to_csv(path) result = self.read_csv(path, index_col=[0, 1, 2], parse_dates=False) @@ -102,8 +108,8 @@ def test_to_csv_from_csv2(self, float_frame): def test_to_csv_from_csv3(self): with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: - df1 = DataFrame(np.random.randn(3, 1)) - df2 = DataFrame(np.random.randn(3, 1)) + df1 = DataFrame(np.random.default_rng(2).randn(3, 1)) + df2 = DataFrame(np.random.default_rng(2).randn(3, 1)) df1.to_csv(path) df2.to_csv(path, mode="a", header=False) @@ -201,9 +207,9 @@ def make_dtnat_arr(n, nnat=None): nnat = int(n * 0.1) # 10% s = list(date_range("2000", freq="5min", periods=n)) if nnat: - for i in np.random.randint(0, len(s), nnat): + for i in np.random.default_rng(2).randint(0, len(s), nnat): s[i] = NaT - i = np.random.randint(100) + i = np.random.default_rng(2).randint(100) s[-i] = NaT s[i] = NaT return s @@ -412,7 +418,7 @@ def test_to_csv_params(self, nrows, df_params, func_params, ncols): def test_to_csv_from_csv_w_some_infs(self, float_frame): # test roundtrip with inf, -inf, nan, as full columns and mix float_frame["G"] = np.nan - f = lambda x: [np.inf, np.nan][np.random.rand() < 0.5] + f = lambda x: [np.inf, np.nan][np.random.default_rng(2).rand() < 0.5] float_frame["H"] = float_frame.index.map(f) with tm.ensure_clean() as path: @@ -530,7 +536,7 @@ def _make_frame(names=None): if names is True: names = ["first", "second"] return DataFrame( - np.random.randint(0, 10, size=(3, 3)), + np.random.default_rng(2).randint(0, 10, size=(3, 3)), columns=MultiIndex.from_tuples( [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names ), @@ -620,7 +626,7 @@ def test_to_csv_interval_index(self): tm.assert_frame_equal(result, expected) def test_to_csv_float32_nanrep(self): - df = DataFrame(np.random.randn(1, 4).astype(np.float32)) + df = DataFrame(np.random.default_rng(2).randn(1, 4).astype(np.float32)) df[1] = np.nan with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: @@ -644,10 +650,12 @@ def create_cols(name): return [f"{name}{i:03d}" for i in range(5)] df_float = DataFrame( - np.random.randn(100, 5), dtype="float64", columns=create_cols("float") + np.random.default_rng(2).randn(100, 5), + dtype="float64", + columns=create_cols("float"), ) df_int = DataFrame( - np.random.randn(100, 5).astype("int64"), + np.random.default_rng(2).randn(100, 5).astype("int64"), dtype="int64", columns=create_cols("int"), ) @@ -690,7 +698,7 @@ def create_cols(name): def test_to_csv_dups_cols(self): df = DataFrame( - np.random.randn(1000, 30), + np.random.default_rng(2).randn(1000, 30), columns=list(range(15)) + list(range(15)), dtype="float64", ) @@ -701,8 +709,8 @@ def test_to_csv_dups_cols(self): result.columns = df.columns tm.assert_frame_equal(result, df) - df_float = DataFrame(np.random.randn(1000, 3), dtype="float64") - df_int = DataFrame(np.random.randn(1000, 3)).astype("int64") + df_float = DataFrame(np.random.default_rng(2).randn(1000, 3), dtype="float64") + df_int = DataFrame(np.random.default_rng(2).randn(1000, 3)).astype("int64") df_bool = DataFrame(True, index=df_float.index, columns=range(3)) df_object = DataFrame("foo", index=df_float.index, columns=range(3)) df_dt = DataFrame( @@ -755,7 +763,9 @@ def test_to_csv_chunking(self, chunksize): def test_to_csv_wide_frame_formatting(self, monkeypatch): # Issue #8621 chunksize = 100 - df = DataFrame(np.random.randn(1, chunksize + 10), columns=None, index=None) + df = DataFrame( + np.random.default_rng(2).randn(1, chunksize + 10), columns=None, index=None + ) with tm.ensure_clean() as filename: with monkeypatch.context() as m: m.setattr("pandas.io.formats.csvs._DEFAULT_CHUNKSIZE_CELLS", chunksize) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index e64b212a8513c..d159e2e384845 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -149,7 +149,7 @@ def test_to_dict(self, mapping): @pytest.mark.parametrize("mapping", [list, defaultdict, []]) def test_to_dict_errors(self, mapping): # GH#16122 - df = DataFrame(np.random.randn(3, 3)) + df = DataFrame(np.random.default_rng(2).randn(3, 3)) msg = "|".join( [ "unsupported type: ", diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index ae0eafb0bf348..037e50e3a8452 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -24,7 +24,7 @@ def test_to_numpy_dtype(self): @td.skip_array_manager_invalid_test def test_to_numpy_copy(self, using_copy_on_write): - arr = np.random.randn(4, 3) + arr = np.random.default_rng(2).randn(4, 3) df = DataFrame(arr) if using_copy_on_write: assert df.values.base is not arr diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py index cd1b4b61ec033..d00a8fd7418ee 100644 --- a/pandas/tests/frame/methods/test_to_period.py +++ b/pandas/tests/frame/methods/test_to_period.py @@ -18,7 +18,9 @@ def test_to_period(self, frame_or_series): dr = date_range("1/1/2000", "1/1/2001", freq="D") obj = DataFrame( - np.random.randn(len(dr), K), index=dr, columns=["A", "B", "C", "D", "E"] + np.random.default_rng(2).randn(len(dr), K), + index=dr, + columns=["A", "B", "C", "D", "E"], ) obj["mix"] = "a" obj = tm.get_obj(obj, frame_or_series) @@ -39,7 +41,7 @@ def test_to_period_without_freq(self, frame_or_series): ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" ) - obj = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + obj = DataFrame(np.random.default_rng(2).randn(4, 4), index=idx, columns=idx) obj = tm.get_obj(obj, frame_or_series) expected = obj.copy() expected.index = exp_idx @@ -52,7 +54,7 @@ def test_to_period_without_freq(self, frame_or_series): def test_to_period_columns(self): dr = date_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df = DataFrame(np.random.default_rng(2).randn(len(dr), 5), index=dr) df["mix"] = "a" df = df.T @@ -66,7 +68,7 @@ def test_to_period_columns(self): def test_to_period_invalid_axis(self): dr = date_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df = DataFrame(np.random.default_rng(2).randn(len(dr), 5), index=dr) df["mix"] = "a" msg = "No axis named 2 for object type DataFrame" diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index f2eea452764a6..79a693e4b58d8 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -19,7 +19,9 @@ class TestDataFrameToRecords: def test_to_records_timeseries(self): index = date_range("1/1/2000", periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).randn(10, 3), index=index, columns=["a", "b", "c"] + ) result = df.to_records() assert result["index"].dtype == "M8[ns]" @@ -74,16 +76,16 @@ def test_to_records_with_Mapping_type(self): all(x in frame for x in ["Type", "Subject", "From"]) def test_to_records_floats(self): - df = DataFrame(np.random.rand(10, 10)) + df = DataFrame(np.random.default_rng(2).rand(10, 10)) df.to_records() def test_to_records_index_name(self): - df = DataFrame(np.random.randn(3, 3)) + df = DataFrame(np.random.default_rng(2).randn(3, 3)) df.index.name = "X" rs = df.to_records() assert "X" in rs.dtype.fields - df = DataFrame(np.random.randn(3, 3)) + df = DataFrame(np.random.default_rng(2).randn(3, 3)) rs = df.to_records() assert "index" in rs.dtype.fields diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index fea070a3c0b38..44d8676a63966 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -29,7 +29,7 @@ def test_to_timestamp(self, frame_or_series): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") obj = DataFrame( - np.random.randn(len(index), K), + np.random.default_rng(2).randn(len(index), K), index=index, columns=["A", "B", "C", "D", "E"], ) @@ -73,7 +73,7 @@ def test_to_timestamp_columns(self): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") df = DataFrame( - np.random.randn(len(index), K), + np.random.default_rng(2).randn(len(index), K), index=index, columns=["A", "B", "C", "D", "E"], ) @@ -123,7 +123,7 @@ def test_to_timestamp_columns(self): def test_to_timestamp_invalid_axis(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") - obj = DataFrame(np.random.randn(len(index), 5), index=index) + obj = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) # invalid axis with pytest.raises(ValueError, match="axis"): diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index 149fcfb35f44d..cbc1597c4158f 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -79,7 +79,11 @@ def test_truncate_nonsortedindex(self, frame_or_series): def test_sort_values_nonsortedindex(self): rng = date_range("2011-01-01", "2012-01-01", freq="W") ts = DataFrame( - {"A": np.random.randn(len(rng)), "B": np.random.randn(len(rng))}, index=rng + { + "A": np.random.default_rng(2).randn(len(rng)), + "B": np.random.default_rng(2).randn(len(rng)), + }, + index=rng, ) decreasing = ts.sort_values("A", ascending=False) @@ -93,10 +97,10 @@ def test_truncate_nonsortedindex_axis1(self): df = DataFrame( { - 3: np.random.randn(5), - 20: np.random.randn(5), - 2: np.random.randn(5), - 0: np.random.randn(5), + 3: np.random.default_rng(2).randn(5), + 20: np.random.default_rng(2).randn(5), + 2: np.random.default_rng(2).randn(5), + 0: np.random.default_rng(2).randn(5), }, columns=[3, 20, 2, 0], ) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index 8c2253820db31..e9a90a1cd8b3b 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -72,7 +72,7 @@ def test_values_casts_datetimelike_to_object(self, constructor): expected = series.astype("object") - df = DataFrame({"a": series, "b": np.random.randn(len(series))}) + df = DataFrame({"a": series, "b": np.random.default_rng(2).randn(len(series))}) result = df.values.squeeze() assert (result[:, 0] == expected.values).all() diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 7cf1c56d9342e..176adaae5d1a5 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -126,8 +126,8 @@ def test_column_name_contains_unicode_surrogate(self): assert df.columns[0] == colname def test_new_empty_index(self): - df1 = DataFrame(np.random.randn(0, 3)) - df2 = DataFrame(np.random.randn(0, 3)) + df1 = DataFrame(np.random.default_rng(2).randn(0, 3)) + df2 = DataFrame(np.random.default_rng(2).randn(0, 3)) df1.index.name = "foo" assert df2.index.name is None diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 52b60a0b83025..ad099233873c9 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -89,7 +89,7 @@ def test_comparison_with_categorical_dtype(self): def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks - df = DataFrame(np.random.randn(6, 4), columns=list("ABCD")) + df = DataFrame(np.random.default_rng(2).randn(6, 4), columns=list("ABCD")) msg = "The truth value of a DataFrame is ambiguous" with pytest.raises(ValueError, match=msg): df in [None] @@ -99,21 +99,21 @@ def test_frame_in_list(self): [ [ { - "a": np.random.randint(10, size=10), + "a": np.random.default_rng(2).randint(10, size=10), "b": pd.date_range("20010101", periods=10), }, { - "a": np.random.randint(10, size=10), - "b": np.random.randint(10, size=10), + "a": np.random.default_rng(2).randint(10, size=10), + "b": np.random.default_rng(2).randint(10, size=10), }, ], [ { - "a": np.random.randint(10, size=10), - "b": np.random.randint(10, size=10), + "a": np.random.default_rng(2).randint(10, size=10), + "b": np.random.default_rng(2).randint(10, size=10), }, { - "a": np.random.randint(10, size=10), + "a": np.random.default_rng(2).randint(10, size=10), "b": pd.date_range("20010101", periods=10), }, ], @@ -123,13 +123,13 @@ def test_frame_in_list(self): "b": pd.date_range("20010101", periods=10), }, { - "a": np.random.randint(10, size=10), - "b": np.random.randint(10, size=10), + "a": np.random.default_rng(2).randint(10, size=10), + "b": np.random.default_rng(2).randint(10, size=10), }, ], [ { - "a": np.random.randint(10, size=10), + "a": np.random.default_rng(2).randint(10, size=10), "b": pd.date_range("20010101", periods=10), }, { @@ -199,12 +199,12 @@ def test_timestamp_compare(self, left, right): { "dates1": pd.date_range("20010101", periods=10), "dates2": pd.date_range("20010102", periods=10), - "intcol": np.random.randint(1000000000, size=10), - "floatcol": np.random.randn(10), + "intcol": np.random.default_rng(2).randint(1000000000, size=10), + "floatcol": np.random.default_rng(2).randn(10), "stringcol": list(tm.rands(10)), } ) - df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT + df.loc[np.random.default_rng(2).rand(len(df)) > 0.5, "dates2"] = pd.NaT left_f = getattr(operator, left) right_f = getattr(operator, right) @@ -265,7 +265,11 @@ def test_df_boolean_comparison_error(self): tm.assert_frame_equal(result, expected) def test_df_float_none_comparison(self): - df = DataFrame(np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).randn(8, 3), + index=range(8), + columns=["A", "B", "C"], + ) result = df.__eq__(None) assert not result.any().any() @@ -285,8 +289,8 @@ class TestFrameFlexComparisons: # TODO: test_bool_flex_frame needs a better name @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"]) def test_bool_flex_frame(self, op): - data = np.random.randn(5, 3) - other_data = np.random.randn(5, 3) + data = np.random.default_rng(2).randn(5, 3) + other_data = np.random.default_rng(2).randn(5, 3) df = DataFrame(data) other = DataFrame(other_data) ndim_5 = np.ones(df.shape + (1, 3)) @@ -317,10 +321,10 @@ def test_bool_flex_frame(self, op): def test_bool_flex_series(self, box): # Series # list/tuple - data = np.random.randn(5, 3) + data = np.random.default_rng(2).randn(5, 3) df = DataFrame(data) - idx_ser = box(np.random.randn(5)) - col_ser = box(np.random.randn(3)) + idx_ser = box(np.random.default_rng(2).randn(5)) + col_ser = box(np.random.default_rng(2).randn(3)) idx_eq = df.eq(idx_ser, axis=0) col_eq = df.eq(col_ser) @@ -353,11 +357,11 @@ def test_bool_flex_series(self, box): tm.assert_frame_equal(idx_ge, -idx_lt) tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) - idx_ser = Series(np.random.randn(5)) - col_ser = Series(np.random.randn(3)) + idx_ser = Series(np.random.default_rng(2).randn(5)) + col_ser = Series(np.random.default_rng(2).randn(3)) def test_bool_flex_frame_na(self): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) # NA df.loc[0, 0] = np.nan rs = df.eq(df) @@ -701,7 +705,7 @@ def test_sub_alignment_with_duplicate_index(self): @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"]) def test_arithmetic_with_duplicate_columns(self, op): # operations - df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) + df = DataFrame({"A": np.arange(10), "B": np.random.default_rng(2).rand(10)}) expected = getattr(df, op)(df) expected.columns = ["A", "A"] df.columns = ["A", "A"] @@ -1009,7 +1013,7 @@ def test_arith_non_pandas_object(self): added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val2, axis="index"), added) - val3 = np.random.rand(*df.shape) + val3 = np.random.default_rng(2).rand(*df.shape) added = DataFrame(df.values + val3, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val3), added) @@ -1162,7 +1166,7 @@ def test_arithmetic_midx_cols_different_dtypes_different_order(self): def test_frame_with_zero_len_series_corner_cases(): # GH#28600 # easy all-float case - df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "B"]) + df = DataFrame(np.random.default_rng(2).randn(6).reshape(3, 2), columns=["A", "B"]) ser = Series(dtype=np.float64) result = df + ser @@ -1210,7 +1214,9 @@ def test_frame_single_columns_object_sum_axis_1(): class TestFrameArithmeticUnsorted: def test_frame_add_tz_mismatch_converts_to_utc(self): rng = pd.date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") - df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) + df = DataFrame( + np.random.default_rng(2).randn(len(rng)), index=rng, columns=["a"] + ) df_moscow = df.tz_convert("Europe/Moscow") result = df + df_moscow @@ -1221,7 +1227,7 @@ def test_frame_add_tz_mismatch_converts_to_utc(self): def test_align_frame(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") - ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + ts = DataFrame(np.random.default_rng(2).randn(len(rng), 3), index=rng) result = ts + ts[::2] expected = ts + ts @@ -1229,7 +1235,7 @@ def test_align_frame(self): tm.assert_frame_equal(result, expected) half = ts[::2] - result = ts + half.take(np.random.permutation(len(half))) + result = ts + half.take(np.random.default_rng(2).permutation(len(half))) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1694,7 +1700,9 @@ def test_inplace_ops_identity(self): # GH 5104 # make sure that we are actually changing the object s_orig = Series([1, 2, 3]) - df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5)) + df_orig = DataFrame( + np.random.default_rng(2).randint(0, 5, size=10).reshape(-1, 5) + ) # no dtype change s = s_orig.copy() @@ -1729,7 +1737,7 @@ def test_inplace_ops_identity(self): assert df._mgr is df2._mgr # mixed dtype - arr = np.random.randint(0, 10, size=5) + arr = np.random.default_rng(2).randint(0, 10, size=5) df_orig = DataFrame({"A": arr.copy(), "B": "foo"}) df = df_orig.copy() df2 = df @@ -1797,7 +1805,9 @@ def test_inplace_ops_identity2(self, op): def test_alignment_non_pandas(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] - df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), index=index, columns=columns + ) align = DataFrame._align_for_op @@ -1813,7 +1823,9 @@ def test_alignment_non_pandas(self, val): def test_alignment_non_pandas_length_mismatch(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] - df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), index=index, columns=columns + ) align = DataFrame._align_for_op # length mismatch @@ -1827,7 +1839,9 @@ def test_alignment_non_pandas_length_mismatch(self, val): def test_alignment_non_pandas_index_columns(self): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] - df = DataFrame(np.random.randn(3, 3), index=index, columns=columns) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), index=index, columns=columns + ) align = DataFrame._align_for_op val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) @@ -1912,7 +1926,9 @@ def test_pow_nan_with_zero(): def test_dataframe_series_extension_dtypes(): # https://github.com/pandas-dev/pandas/issues/34311 - df = DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).randint(0, 100, (10, 3)), columns=["a", "b", "c"] + ) ser = Series([1, 2, 3], index=["a", "b", "c"]) expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3) @@ -1927,7 +1943,7 @@ def test_dataframe_series_extension_dtypes(): def test_dataframe_blockwise_slicelike(): # GH#34367 - arr = np.random.randint(0, 1000, (100, 10)) + arr = np.random.default_rng(2).randint(0, 1000, (100, 10)) df1 = DataFrame(arr) # Explicit cast to float to avoid implicit cast when setting nan df2 = df1.copy().astype({1: "float", 3: "float", 7: "float"}) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 00df0530fe70f..f02270ea7d45e 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -335,7 +335,7 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): # this is chained, but ok with option_context("chained_assignment", None): Y = DataFrame( - np.random.random((4, 4)), + np.random.default_rng(2).random((4, 4)), index=("a", "b", "c", "d"), columns=("e", "f", "g", "h"), ) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8a4d1624fcb30..2b8d824e0aa2d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -13,7 +13,6 @@ timedelta, ) import functools -import random import re import warnings @@ -365,10 +364,15 @@ def test_constructor_list_of_2d_raises(self): def test_constructor_mixed_dtypes(self, typ, ad): if typ == "int": dtypes = MIXED_INT_DTYPES - arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] + arrays = [ + np.array(np.random.default_rng(2).rand(10), dtype=d) for d in dtypes + ] elif typ == "float": dtypes = MIXED_FLOAT_DTYPES - arrays = [np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes] + arrays = [ + np.array(np.random.default_rng(2).randint(10, size=10), dtype=d) + for d in dtypes + ] for d, a in zip(dtypes, arrays): assert a.dtype == d @@ -382,8 +386,8 @@ def test_constructor_mixed_dtypes(self, typ, ad): def test_constructor_complex_dtypes(self): # GH10952 - a = np.random.rand(10).astype(np.complex64) - b = np.random.rand(10).astype(np.complex128) + a = np.random.default_rng(2).rand(10).astype(np.complex64) + b = np.random.default_rng(2).rand(10).astype(np.complex128) df = DataFrame({"a": a, "b": b}) assert a.dtype == df.a.dtype @@ -485,7 +489,7 @@ def test_constructor_numpy_uints(self, values): def test_constructor_ordereddict(self): nitems = 100 nums = list(range(nitems)) - random.shuffle(nums) + np.random.default_rng(2).shuffle(nums) expected = [f"A{i:d}" for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) assert expected == list(df.columns) @@ -702,11 +706,15 @@ def test_constructor_error_msgs(self): # wrong size axis labels msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" with pytest.raises(ValueError, match=msg): - DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1]) + DataFrame( + np.random.default_rng(2).rand(2, 3), columns=["A", "B", "C"], index=[1] + ) msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" with pytest.raises(ValueError, match=msg): - DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2]) + DataFrame( + np.random.default_rng(2).rand(2, 3), columns=["A", "B"], index=[1, 2] + ) # gh-26429 msg = "2 columns passed, passed data had 10 columns" @@ -769,7 +777,7 @@ def test_constructor_dict_cast2(self): # can't cast to float test_data = { "A": dict(zip(range(20), tm.makeStringIndex(20))), - "B": dict(zip(range(15), np.random.randn(15))), + "B": dict(zip(range(15), np.random.default_rng(2).randn(15))), } with pytest.raises(ValueError, match="could not convert string"): DataFrame(test_data, dtype=float) @@ -945,7 +953,7 @@ def test_constructor_extension_scalar_data(self, data, dtype): def test_nested_dict_frame_constructor(self): rng = pd.period_range("1/1/2000", periods=5) - df = DataFrame(np.random.randn(10, 5), columns=rng) + df = DataFrame(np.random.default_rng(2).randn(10, 5), columns=rng) data = {} for col in df.columns: @@ -1188,7 +1196,7 @@ def test_constructor_scalar_inference(self): assert df["object"].dtype == np.object_ def test_constructor_arrays_and_scalars(self): - df = DataFrame({"a": np.random.randn(10), "b": True}) + df = DataFrame({"a": np.random.default_rng(2).randn(10), "b": True}) exp = DataFrame({"a": df["a"].values, "b": [True] * 10}) tm.assert_frame_equal(df, exp) @@ -1210,11 +1218,11 @@ def test_constructor_empty_dataframe(self): def test_constructor_more(self, float_frame): # used to be in test_matrix.py - arr = np.random.randn(10) + arr = np.random.default_rng(2).randn(10) dm = DataFrame(arr, columns=["A"], index=np.arange(10)) assert dm.values.ndim == 2 - arr = np.random.randn(0) + arr = np.random.default_rng(2).randn(0) dm = DataFrame(arr) assert dm.values.ndim == 2 assert dm.values.ndim == 2 @@ -1491,7 +1499,10 @@ class CustomDict(dict): tm.assert_frame_equal(result, result_custom) def test_constructor_ragged(self): - data = {"A": np.random.randn(10), "B": np.random.randn(8)} + data = { + "A": np.random.default_rng(2).randn(10), + "B": np.random.default_rng(2).randn(8), + } with pytest.raises(ValueError, match="All arrays must be of the same length"): DataFrame(data) @@ -1624,7 +1635,7 @@ def test_constructor_Series_named(self): tm.assert_index_equal(df.index, a.index) # ndarray like - arr = np.random.randn(10) + arr = np.random.default_rng(2).randn(10) s = Series(arr, name="x") df = DataFrame(s) expected = DataFrame({"x": s}) @@ -2436,20 +2447,20 @@ def test_frame_ctor_datetime64_column(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") dates = np.asarray(rng) - df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) + df = DataFrame({"A": np.random.default_rng(2).randn(len(rng)), "B": dates}) assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) def test_dataframe_constructor_infer_multiindex(self): index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] multi = DataFrame( - np.random.randn(4, 4), + np.random.default_rng(2).randn(4, 4), index=[np.array(x) for x in index_lists], ) assert isinstance(multi.index, MultiIndex) assert not isinstance(multi.columns, MultiIndex) - multi = DataFrame(np.random.randn(4, 4), columns=index_lists) + multi = DataFrame(np.random.default_rng(2).randn(4, 4), columns=index_lists) assert isinstance(multi.columns, MultiIndex) @pytest.mark.parametrize( @@ -2668,10 +2679,10 @@ def test_construct_with_strings_and_none(self): class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M") - s1 = Series(np.random.randn(len(rng1)), rng1) + s1 = Series(np.random.default_rng(2).randn(len(rng1)), rng1) rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M") - s2 = Series(np.random.randn(len(rng2)), rng2) + s2 = Series(np.random.default_rng(2).randn(len(rng2)), rng2) df = DataFrame({"s1": s1, "s2": s2}) exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") @@ -2745,7 +2756,7 @@ def test_floating_values_integer_dtype(self): # GH#40110 make DataFrame behavior with arraylike floating data and # inty dtype match Series behavior - arr = np.random.randn(10, 5) + arr = np.random.default_rng(2).randn(10, 5) # GH#49599 in 2.0 we raise instead of either # a) silently ignoring dtype and returningfloat (the old Series behavior) or @@ -2977,7 +2988,7 @@ def test_construction_from_ndarray_datetimelike(self): assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): - arr = np.random.randn(10, 2) + arr = np.random.default_rng(2).randn(10, 2) dtype = pd.array([2.0]).dtype msg = r"len\(arrays\) must match len\(columns\)" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/frame/test_iteration.py b/pandas/tests/frame/test_iteration.py index 6d4849d60084f..a0cae2ffa4e8b 100644 --- a/pandas/tests/frame/test_iteration.py +++ b/pandas/tests/frame/test_iteration.py @@ -92,7 +92,8 @@ def test_itertuples(self, float_frame): tm.assert_series_equal(ser, expected) df = DataFrame( - {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] + {"floats": np.random.default_rng(2).randn(5), "ints": range(5)}, + columns=["floats", "ints"], ) for tup in df.itertuples(index=False): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index bd708408f4246..0ca53b17177c4 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -21,7 +21,7 @@ class TestDataFrameNonuniqueIndexes: def test_setattr_columns_vs_construct_with_columns(self): # assignment # GH 3687 - arr = np.random.randn(3, 2) + arr = np.random.default_rng(2).randn(3, 2) idx = list(range(2)) df = DataFrame(arr, columns=["A", "A"]) df.columns = idx @@ -164,7 +164,7 @@ def test_dup_across_dtypes(self): def test_column_dups_indexes(self): # check column dups with index equal and not equal to df's index df = DataFrame( - np.random.randn(5, 3), + np.random.default_rng(2).randn(5, 3), index=["a", "b", "c", "d", "e"], columns=["A", "B", "A"], ) @@ -182,13 +182,13 @@ def test_changing_dtypes_with_duplicate_columns(self): # multiple assignments that change dtypes # the location indexer is a slice # GH 6120 - df = DataFrame(np.random.randn(5, 2), columns=["that", "that"]) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=["that", "that"]) expected = DataFrame(1.0, index=range(5), columns=["that", "that"]) df["that"] = 1.0 check(df, expected) - df = DataFrame(np.random.rand(5, 2), columns=["that", "that"]) + df = DataFrame(np.random.default_rng(2).rand(5, 2), columns=["that", "that"]) expected = DataFrame(1, index=range(5), columns=["that", "that"]) df["that"] = 1 @@ -295,8 +295,8 @@ def test_multi_dtype2(self): def test_dups_across_blocks(self, using_array_manager): # dups across blocks - df_float = DataFrame(np.random.randn(10, 3), dtype="float64") - df_int = DataFrame(np.random.randn(10, 3).astype("int64")) + df_float = DataFrame(np.random.default_rng(2).randn(10, 3), dtype="float64") + df_int = DataFrame(np.random.default_rng(2).randn(10, 3).astype("int64")) df_bool = DataFrame(True, index=df_float.index, columns=df_float.columns) df_object = DataFrame("foo", index=df_float.index, columns=df_float.columns) df_dt = DataFrame( diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index b734dafb6c31b..083421fe72b90 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -31,7 +31,7 @@ def test_sum_deprecated_axis_behavior(self): # GH#52042 deprecated behavior of df.sum(axis=None), which gets # called when we do np.sum(df) - arr = np.random.randn(4, 3) + arr = np.random.default_rng(2).randn(4, 3) df = DataFrame(arr) msg = "The behavior of DataFrame.sum with axis=None is deprecated" diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index ae199d5c373d5..4f2a139767f5a 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -132,7 +132,7 @@ def test_ops(self, op_str, op, rop, n): def test_dataframe_sub_numexpr_path(self): # GH7192: Note we need a large number of rows to ensure this # goes through the numexpr path - df = DataFrame({"A": np.random.randn(25000)}) + df = DataFrame({"A": np.random.default_rng(2).randn(25000)}) df.iloc[0:5] = np.nan expected = 1 - np.isnan(df.iloc[0:25]) result = (1 - np.isnan(df)).iloc[0:25] @@ -159,7 +159,7 @@ def test_query_empty_string(self): def test_eval_resolvers_as_list(self): # GH 14095 - df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("ab")) dict1 = {"a": 1} dict2 = {"b": 2} assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] @@ -167,7 +167,7 @@ def test_eval_resolvers_as_list(self): def test_eval_resolvers_combined(self): # GH 34966 - df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("ab")) dict1 = {"c": 2} # Both input and default index/column resolvers should be usable @@ -187,10 +187,10 @@ def test_eval_object_dtype_binop(self): class TestDataFrameQueryWithMultiIndex: def test_query_with_named_multiindex(self, parser, engine): skip_if_no_pandas_parser(parser) - a = np.random.choice(["red", "green"], size=10) - b = np.random.choice(["eggs", "ham"], size=10) + a = np.random.default_rng(2).choice(["red", "green"], size=10) + b = np.random.default_rng(2).choice(["eggs", "ham"], size=10) index = MultiIndex.from_arrays([a, b], names=["color", "food"]) - df = DataFrame(np.random.randn(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).randn(10, 2), index=index) ind = Series( df.index.get_level_values("color").values, index=index, name="color" ) @@ -237,10 +237,10 @@ def test_query_with_named_multiindex(self, parser, engine): def test_query_with_unnamed_multiindex(self, parser, engine): skip_if_no_pandas_parser(parser) - a = np.random.choice(["red", "green"], size=10) - b = np.random.choice(["eggs", "ham"], size=10) + a = np.random.default_rng(2).choice(["red", "green"], size=10) + b = np.random.default_rng(2).choice(["eggs", "ham"], size=10) index = MultiIndex.from_arrays([a, b]) - df = DataFrame(np.random.randn(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).randn(10, 2), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) @@ -325,11 +325,11 @@ def test_query_with_unnamed_multiindex(self, parser, engine): def test_query_with_partially_named_multiindex(self, parser, engine): skip_if_no_pandas_parser(parser) - a = np.random.choice(["red", "green"], size=10) + a = np.random.default_rng(2).choice(["red", "green"], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, "rating"] - df = DataFrame(np.random.randn(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).randn(10, 2), index=index) res = df.query("rating == 1", parser=parser, engine=engine) ind = Series( df.index.get_level_values("rating").values, index=index, name="rating" @@ -395,7 +395,7 @@ def parser(self): def test_date_query_with_attribute_access(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -406,7 +406,7 @@ def test_date_query_with_attribute_access(self, engine, parser): tm.assert_frame_equal(res, expec) def test_date_query_no_attribute_access(self, engine, parser): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -416,19 +416,19 @@ def test_date_query_no_attribute_access(self, engine, parser): def test_date_query_with_NaT(self, engine, parser): n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.default_rng(2).randn(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) - df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT - df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates3"] = pd.NaT res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] tm.assert_frame_equal(res, expec) def test_date_index_query(self, engine, parser): n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.default_rng(2).randn(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) return_value = df.set_index("dates1", inplace=True, drop=True) @@ -440,7 +440,7 @@ def test_date_index_query(self, engine, parser): def test_date_index_query_with_NaT(self, engine, parser): n = 10 # Cast to object to avoid implicit cast when setting entry to pd.NaT below - df = DataFrame(np.random.randn(n, 3)).astype({0: object}) + df = DataFrame(np.random.default_rng(2).randn(n, 3)).astype({0: object}) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.iloc[0, 0] = pd.NaT @@ -456,7 +456,7 @@ def test_date_index_query_with_NaT_duplicates(self, engine, parser): d["dates1"] = date_range("1/1/2012", periods=n) d["dates3"] = date_range("1/1/2014", periods=n) df = DataFrame(d) - df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT return_value = df.set_index("dates1", inplace=True, drop=True) assert return_value is None res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) @@ -489,7 +489,7 @@ def test_query_syntax_error(self, engine, parser): def test_query_scope(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.randn(20, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).randn(20, 2), columns=list("ab")) a, b = 1, 2 # noqa: F841 res = df.query("a > b", engine=engine, parser=parser) @@ -512,7 +512,9 @@ def test_query_scope(self, engine, parser): def test_query_doesnt_pickup_local(self, engine, parser): n = m = 10 - df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + df = DataFrame( + np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + ) # we don't pick up the local 'sin' with pytest.raises(UndefinedVariableError, match="name 'sin' is not defined"): @@ -520,7 +522,9 @@ def test_query_doesnt_pickup_local(self, engine, parser): def test_query_builtin(self, engine, parser): n = m = 10 - df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + df = DataFrame( + np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + ) df.index.name = "sin" msg = "Variables in expression.+" @@ -528,7 +532,7 @@ def test_query_builtin(self, engine, parser): df.query("sin > 5", engine=engine, parser=parser) def test_query(self, engine, parser): - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) tm.assert_frame_equal( df.query("a < b", engine=engine, parser=parser), df[df.a < df.b] @@ -540,7 +544,7 @@ def test_query(self, engine, parser): def test_query_index_with_name(self, engine, parser): df = DataFrame( - np.random.randint(10, size=(10, 3)), + np.random.default_rng(2).randint(10, size=(10, 3)), index=Index(range(10), name="blob"), columns=["a", "b", "c"], ) @@ -555,7 +559,7 @@ def test_query_index_with_name(self, engine, parser): def test_query_index_without_name(self, engine, parser): df = DataFrame( - np.random.randint(10, size=(10, 3)), + np.random.default_rng(2).randint(10, size=(10, 3)), index=range(10), columns=["a", "b", "c"], ) @@ -573,8 +577,8 @@ def test_query_index_without_name(self, engine, parser): def test_nested_scope(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.randn(5, 3)) - df2 = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df2 = DataFrame(np.random.default_rng(2).randn(5, 3)) expected = df[(df > 0) & (df2 > 0)] result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) @@ -594,7 +598,7 @@ def test_nested_scope(self, engine, parser): tm.assert_frame_equal(result, expected) def test_nested_raises_on_local_self_reference(self, engine, parser): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) # can't reference ourself b/c we're a local so @ is necessary with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): @@ -603,7 +607,9 @@ def test_nested_raises_on_local_self_reference(self, engine, parser): def test_local_syntax(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.randn(100, 10), columns=list("abcdefghij")) + df = DataFrame( + np.random.default_rng(2).randn(100, 10), columns=list("abcdefghij") + ) b = 1 expect = df[df.a < b] result = df.query("a < @b", engine=engine, parser=parser) @@ -616,7 +622,7 @@ def test_local_syntax(self, engine, parser): def test_chained_cmp_and_in(self, engine, parser): skip_if_no_pandas_parser(parser) cols = list("abc") - df = DataFrame(np.random.randn(100, len(cols)), columns=cols) + df = DataFrame(np.random.default_rng(2).randn(100, len(cols)), columns=cols) res = df.query( "a < b < c and a not in b not in c", engine=engine, parser=parser ) @@ -626,15 +632,15 @@ def test_chained_cmp_and_in(self, engine, parser): def test_local_variable_with_in(self, engine, parser): skip_if_no_pandas_parser(parser) - a = Series(np.random.randint(3, size=15), name="a") - b = Series(np.random.randint(10, size=15), name="b") + a = Series(np.random.default_rng(2).randint(3, size=15), name="a") + b = Series(np.random.default_rng(2).randint(10, size=15), name="b") df = DataFrame({"a": a, "b": b}) expected = df.loc[(df.b - 1).isin(a)] result = df.query("b - 1 in a", engine=engine, parser=parser) tm.assert_frame_equal(expected, result) - b = Series(np.random.randint(10, size=15), name="b") + b = Series(np.random.default_rng(2).randint(10, size=15), name="b") expected = df.loc[(b - 1).isin(a)] result = df.query("@b - 1 in a", engine=engine, parser=parser) tm.assert_frame_equal(expected, result) @@ -651,7 +657,7 @@ def test_query_undefined_local(self): engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.rand(10, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=list("ab")) with pytest.raises( UndefinedVariableError, match="local variable 'c' is not defined" ): @@ -663,18 +669,18 @@ def test_index_resolvers_come_after_columns_with_the_same_name( n = 1 # noqa: F841 a = np.r_[20:101:20] - df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + df = DataFrame({"index": a, "b": np.random.default_rng(2).randn(a.size)}) df.index.name = "index" result = df.query("index > 5", engine=engine, parser=parser) expected = df[df["index"] > 5] tm.assert_frame_equal(result, expected) - df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + df = DataFrame({"index": a, "b": np.random.default_rng(2).randn(a.size)}) result = df.query("ilevel_0 > 5", engine=engine, parser=parser) expected = df.loc[df.index[df.index > 5]] tm.assert_frame_equal(result, expected) - df = DataFrame({"a": a, "b": np.random.randn(a.size)}) + df = DataFrame({"a": a, "b": np.random.default_rng(2).randn(a.size)}) df.index.name = "a" result = df.query("a > 5", engine=engine, parser=parser) expected = df[df.a > 5] @@ -687,7 +693,12 @@ def test_index_resolvers_come_after_columns_with_the_same_name( @pytest.mark.parametrize("op, f", [["==", operator.eq], ["!=", operator.ne]]) def test_inf(self, op, f, engine, parser): n = 10 - df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)}) + df = DataFrame( + { + "a": np.random.default_rng(2).rand(n), + "b": np.random.default_rng(2).rand(n), + } + ) df.loc[::2, 0] = np.inf q = f"a {op} inf" expected = df[f(df.a, np.inf)] @@ -712,15 +723,24 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture): def test_method_calls_in_query(self, engine, parser): # https://github.com/pandas-dev/pandas/issues/22435 n = 10 - df = DataFrame({"a": 2 * np.random.rand(n), "b": np.random.rand(n)}) + df = DataFrame( + { + "a": 2 * np.random.default_rng(2).rand(n), + "b": np.random.default_rng(2).rand(n), + } + ) expected = df[df["a"].astype("int") == 0] result = df.query("a.astype('int') == 0", engine=engine, parser=parser) tm.assert_frame_equal(result, expected) df = DataFrame( { - "a": np.where(np.random.rand(n) < 0.5, np.nan, np.random.randn(n)), - "b": np.random.randn(n), + "a": np.where( + np.random.default_rng(2).rand(n) < 0.5, + np.nan, + np.random.default_rng(2).randn(n), + ), + "b": np.random.default_rng(2).randn(n), } ) expected = df[df["a"].notnull()] @@ -739,7 +759,7 @@ def parser(self): return "python" def test_date_query_no_attribute_access(self, engine, parser): - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -751,12 +771,12 @@ def test_date_query_no_attribute_access(self, engine, parser): def test_date_query_with_NaT(self, engine, parser): n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.default_rng(2).randn(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) - df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT - df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates3"] = pd.NaT res = df.query( "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser ) @@ -765,7 +785,7 @@ def test_date_query_with_NaT(self, engine, parser): def test_date_index_query(self, engine, parser): n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.default_rng(2).randn(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) return_value = df.set_index("dates1", inplace=True, drop=True) @@ -779,7 +799,7 @@ def test_date_index_query(self, engine, parser): def test_date_index_query_with_NaT(self, engine, parser): n = 10 # Cast to object to avoid implicit cast when setting entry to pd.NaT below - df = DataFrame(np.random.randn(n, 3)).astype({0: object}) + df = DataFrame(np.random.default_rng(2).randn(n, 3)).astype({0: object}) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.iloc[0, 0] = pd.NaT @@ -793,10 +813,10 @@ def test_date_index_query_with_NaT(self, engine, parser): def test_date_index_query_with_NaT_duplicates(self, engine, parser): n = 10 - df = DataFrame(np.random.randn(n, 3)) + df = DataFrame(np.random.default_rng(2).randn(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) - df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT return_value = df.set_index("dates1", inplace=True, drop=True) assert return_value is None msg = r"'BoolOp' nodes are not implemented" @@ -809,8 +829,8 @@ def test_nested_scope(self, engine, parser): result = pd.eval("x + 1", engine=engine, parser=parser) assert result == 2 - df = DataFrame(np.random.randn(5, 3)) - df2 = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df2 = DataFrame(np.random.default_rng(2).randn(5, 3)) # don't have the pandas parser msg = r"The '@' prefix is only supported by the pandas parser" @@ -858,7 +878,9 @@ def parser(self): def test_query_builtin(self, engine, parser): n = m = 10 - df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + df = DataFrame( + np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + ) df.index.name = "sin" expected = df[df.index > 5] @@ -877,7 +899,9 @@ def parser(self): def test_query_builtin(self, engine, parser): n = m = 10 - df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + df = DataFrame( + np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + ) df.index.name = "sin" expected = df[df.index > 5] @@ -887,7 +911,7 @@ def test_query_builtin(self, engine, parser): class TestDataFrameQueryStrings: def test_str_query_method(self, parser, engine): - df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df = DataFrame(np.random.default_rng(2).randn(10, 1), columns=["b"]) df["strings"] = Series(list("aabbccddee")) expect = df[df.strings == "a"] @@ -928,7 +952,7 @@ def test_str_query_method(self, parser, engine): tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) def test_str_list_query_method(self, parser, engine): - df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df = DataFrame(np.random.default_rng(2).randn(10, 1), columns=["b"]) df["strings"] = Series(list("aabbccddee")) expect = df[df.strings.isin(["a", "b"])] @@ -967,8 +991,8 @@ def test_query_with_string_columns(self, parser, engine): { "a": list("aaaabbbbcccc"), "b": list("aabbccddeeff"), - "c": np.random.randint(5, size=12), - "d": np.random.randint(9, size=12), + "c": np.random.default_rng(2).randint(5, size=12), + "d": np.random.default_rng(2).randint(9, size=12), } ) if parser == "pandas": @@ -993,8 +1017,8 @@ def test_object_array_eq_ne(self, parser, engine): { "a": list("aaaabbbbcccc"), "b": list("aabbccddeeff"), - "c": np.random.randint(5, size=12), - "d": np.random.randint(9, size=12), + "c": np.random.default_rng(2).randint(5, size=12), + "d": np.random.default_rng(2).randint(9, size=12), } ) res = df.query("a == b", parser=parser, engine=engine) @@ -1041,7 +1065,7 @@ def test_query_with_nested_special_character(self, parser, engine): ], ) def test_query_lex_compare_strings(self, parser, engine, op, func): - a = Series(np.random.choice(list("abcde"), 20)) + a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) @@ -1051,7 +1075,7 @@ def test_query_lex_compare_strings(self, parser, engine, op, func): def test_query_single_element_booleans(self, parser, engine): columns = "bid", "bidsize", "ask", "asksize" - data = np.random.randint(2, size=(1, len(columns))).astype(bool) + data = np.random.default_rng(2).randint(2, size=(1, len(columns))).astype(bool) df = DataFrame(data, columns=columns) res = df.query("bid & ask", engine=engine, parser=parser) expected = df[df.bid & df.ask] @@ -1074,7 +1098,7 @@ def test_query_string_scalar_variable(self, parser, engine): class TestDataFrameEvalWithFrame: @pytest.fixture def frame(self): - return DataFrame(np.random.randn(10, 3), columns=list("abc")) + return DataFrame(np.random.default_rng(2).randn(10, 3), columns=list("abc")) def test_simple_expr(self, frame, parser, engine): res = frame.eval("a + b", engine=engine, parser=parser) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index b4a4324593d22..f0c6d97ac7627 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -453,7 +453,7 @@ def test_var_std(self, datetime_frame): expected = datetime_frame.apply(lambda x: x.var(ddof=4)) tm.assert_almost_equal(result, expected) - arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + arr = np.repeat(np.random.default_rng(2).random((1, 1000)), 1000, 0) result = nanops.nanvar(arr, axis=0) assert not (result < 0).any() @@ -464,13 +464,17 @@ def test_var_std(self, datetime_frame): @pytest.mark.parametrize("meth", ["sem", "var", "std"]) def test_numeric_only_flag(self, meth): # GH 9201 - df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + df1 = DataFrame( + np.random.default_rng(2).randn(5, 3), columns=["foo", "bar", "baz"] + ) # Cast to object to avoid implicit cast when setting entry to "100" below df1 = df1.astype({"foo": object}) # set one entry to a number in str format df1.loc[0, "foo"] = "100" - df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + df2 = DataFrame( + np.random.default_rng(2).randn(5, 3), columns=["foo", "bar", "baz"] + ) # Cast to object to avoid implicit cast when setting entry to "a" below df2 = df2.astype({"foo": object}) # set one entry to a non-number str @@ -497,7 +501,7 @@ def test_sem(self, datetime_frame): expected = datetime_frame.apply(lambda x: x.std(ddof=4) / np.sqrt(len(x))) tm.assert_almost_equal(result, expected) - arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + arr = np.repeat(np.random.default_rng(2).random((1, 1000)), 1000, 0) result = nanops.nansem(arr, axis=0) assert not (result < 0).any() @@ -932,7 +936,7 @@ def test_mean_datetimelike_numeric_only_false(self): def test_mean_extensionarray_numeric_only_true(self): # https://github.com/pandas-dev/pandas/issues/33256 - arr = np.random.randint(1000, size=(10, 5)) + arr = np.random.default_rng(2).randint(1000, size=(10, 5)) df = DataFrame(arr, dtype="Int64") result = df.mean(numeric_only=True) expected = DataFrame(arr).mean() @@ -1120,7 +1124,7 @@ def test_idxmax_dt64_multicolumn_axis1(self): def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame): # make sure op works on mixed-type frame mixed = float_string_frame - mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5 + mixed["_bool_"] = np.random.default_rng(2).randn(len(mixed)) > 0.5 getattr(mixed, opname)(axis=axis, bool_only=bool_only) @@ -1751,7 +1755,7 @@ def test_prod_sum_min_count_mixed_object(): def test_reduction_axis_none_returns_scalar(method, numeric_only): # GH#21597 As of 2.0, axis=None reduces over all axes. - df = DataFrame(np.random.randn(4, 4)) + df = DataFrame(np.random.default_rng(2).randn(4, 4)) result = getattr(df, method)(axis=None, numeric_only=numeric_only) np_arr = df.to_numpy() diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 42e018c781111..4c48d4ac3e762 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -31,7 +31,10 @@ def test_repr_bytes_61_lines(self): lets = list("ACDEFGHIJKLMNOP") slen = 50 nseqs = 1000 - words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] + words = [ + [np.random.default_rng(2).choice(lets) for x in range(slen)] + for _ in range(nseqs) + ] df = DataFrame(words).astype("U1") assert (df.dtypes == object).all() @@ -43,7 +46,7 @@ def test_repr_bytes_61_lines(self): def test_repr_unicode_level_names(self, frame_or_series): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) - obj = DataFrame(np.random.randn(2, 4), index=index) + obj = DataFrame(np.random.default_rng(2).randn(2, 4), index=index) obj = tm.get_obj(obj, frame_or_series) repr(obj) @@ -153,7 +156,8 @@ def test_repr_mixed(self, float_string_frame): def test_repr_mixed_big(self): # big mixed biggie = DataFrame( - {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, index=range(200) + {"A": np.random.default_rng(2).randn(200), "B": tm.makeStringIndex(200)}, + index=range(200), ) biggie.loc[:20, "A"] = np.nan biggie.loc[:20, "B"] = np.nan @@ -256,7 +260,9 @@ def test_str_to_bytes_raises(self): bytes(df) def test_very_wide_info_repr(self): - df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20)) + df = DataFrame( + np.random.default_rng(2).randn(10, 20), columns=tm.rands_array(10, 20) + ) repr(df) def test_repr_column_name_unicode_truncation_bug(self): @@ -335,7 +341,7 @@ def test_frame_datetime64_pre1900_repr(self): def test_frame_to_string_with_periodindex(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") - frame = DataFrame(np.random.randn(3, 4), index=index) + frame = DataFrame(np.random.default_rng(2).randn(3, 4), index=index) # it works! frame.to_string() diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index ffdcb06ee2847..290bde4242282 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -347,7 +347,7 @@ def test_unstack_preserve_dtypes(self): "state": ["IL", "MI", "NC"], "index": ["a", "b", "c"], "some_categories": Series(["a", "b", "c"]).astype("category"), - "A": np.random.rand(3), + "A": np.random.default_rng(2).rand(3), "B": 1, "C": "foo", "D": pd.Timestamp("20010102"), @@ -384,7 +384,7 @@ def unstack_and_compare(df, column_name): def test_stack_ints(self): columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) - df = DataFrame(np.random.randn(30, 27), columns=columns) + df = DataFrame(np.random.default_rng(2).randn(30, 27), columns=columns) tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) tm.assert_frame_equal( @@ -409,7 +409,7 @@ def test_stack_mixed_levels(self): ], names=["exp", "animal", "hair_length"], ) - df = DataFrame(np.random.randn(4, 4), columns=columns) + df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=columns) animal_hair_stacked = df.stack(level=["animal", "hair_length"]) exp_hair_stacked = df.stack(level=["exp", "hair_length"]) @@ -453,7 +453,7 @@ def test_stack_int_level_names(self): ], names=["exp", "animal", "hair_length"], ) - df = DataFrame(np.random.randn(4, 4), columns=columns) + df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=columns) exp_animal_stacked = df.stack(level=["exp", "animal"]) animal_hair_stacked = df.stack(level=["animal", "hair_length"]) @@ -973,7 +973,7 @@ def test_unstack_nan_index5(self): "1st": [1, 2, 1, 2, 1, 2], "2nd": date_range("2014-02-01", periods=6, freq="D"), "jim": 100 + np.arange(6), - "joe": (np.random.randn(6) * 10).round(2), + "joe": (np.random.default_rng(2).randn(6) * 10).round(2), } ) @@ -1384,10 +1384,10 @@ def test_unstack_non_slice_like_blocks(using_array_manager): mi = MultiIndex.from_product([range(5), ["A", "B", "C"]]) df = DataFrame( { - 0: np.random.randn(15), - 1: np.random.randn(15).astype(np.int64), - 2: np.random.randn(15), - 3: np.random.randn(15), + 0: np.random.default_rng(2).randn(15), + 1: np.random.default_rng(2).randn(15).astype(np.int64), + 2: np.random.default_rng(2).randn(15), + 3: np.random.default_rng(2).randn(15), }, index=mi, ) @@ -1504,7 +1504,7 @@ def test_unstack_multiple_no_empty_columns(self): [(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)] ) - s = Series(np.random.randn(4), index=index) + s = Series(np.random.default_rng(2).randn(4), index=index) unstacked = s.unstack([1, 2]) expected = unstacked.dropna(axis=1, how="all") @@ -1891,7 +1891,7 @@ def test_stack_multiple_bug(self): id_col = ([1] * 3) + ([2] * 3) name = (["a"] * 3) + (["b"] * 3) date = pd.to_datetime(["2013-01-03", "2013-01-04", "2013-01-05"] * 2) - var1 = np.random.randint(0, 100, 6) + var1 = np.random.default_rng(2).randint(0, 100, 6) df = DataFrame({"ID": id_col, "NAME": name, "DATE": date, "VAR1": var1}) multi = df.set_index(["DATE", "ID"]) @@ -1940,12 +1940,12 @@ def test_unstack_sparse_keyspace(self): df = DataFrame( { - "A": np.random.randint(100, size=NUM_ROWS), - "B": np.random.randint(300, size=NUM_ROWS), - "C": np.random.randint(-7, 7, size=NUM_ROWS), - "D": np.random.randint(-19, 19, size=NUM_ROWS), - "E": np.random.randint(3000, size=NUM_ROWS), - "F": np.random.randn(NUM_ROWS), + "A": np.random.default_rng(2).randint(100, size=NUM_ROWS), + "B": np.random.default_rng(2).randint(300, size=NUM_ROWS), + "C": np.random.default_rng(2).randint(-7, 7, size=NUM_ROWS), + "D": np.random.default_rng(2).randint(-19, 19, size=NUM_ROWS), + "E": np.random.default_rng(2).randint(3000, size=NUM_ROWS), + "F": np.random.default_rng(2).randn(NUM_ROWS), } ) @@ -1961,7 +1961,7 @@ def test_unstack_unobserved_keys(self): index = MultiIndex(levels, codes) - df = DataFrame(np.random.randn(4, 2), index=index) + df = DataFrame(np.random.default_rng(2).randn(4, 2), index=index) result = df.unstack() assert len(result.columns) == 4 @@ -1983,7 +1983,7 @@ def __init__(self, *args, **kwargs) -> None: with monkeypatch.context() as m: m.setattr(reshape_lib, "_Unstacker", MockUnstacker) df = DataFrame( - np.random.randn(2**16, 2), + np.random.default_rng(2).randn(2**16, 2), index=[np.arange(2**16), np.arange(2**16)], ) msg = "The following operation may generate" diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 3d1e9d26c1ea6..f49f1165094b1 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -497,8 +497,7 @@ def test_subclassed_melt(self): def test_subclassed_wide_to_long(self): # GH 9762 - np.random.seed(123) - x = np.random.randn(3) + x = np.random.default_rng(2).randn(3) df = tm.SubclassedDataFrame( { "A1970": {0: "a", 1: "b", 2: "c"}, @@ -661,10 +660,10 @@ def test_corrwith(self): index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] df1 = tm.SubclassedDataFrame( - np.random.randn(5, 4), index=index, columns=columns + np.random.default_rng(2).randn(5, 4), index=index, columns=columns ) df2 = tm.SubclassedDataFrame( - np.random.randn(4, 4), index=index[:4], columns=columns + np.random.default_rng(2).randn(4, 4), index=index[:4], columns=columns ) correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 79f055909fdea..46064afb12b4e 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -74,8 +74,8 @@ def test_metadata_propagation_indiv_groupby(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) result = df.groupby("A").sum() @@ -84,7 +84,7 @@ def test_metadata_propagation_indiv_groupby(self): def test_metadata_propagation_indiv_resample(self): # resample df = DataFrame( - np.random.randn(1000, 2), + np.random.default_rng(2).randn(1000, 2), index=date_range("20130101", periods=1000, freq="s"), ) result = df.resample("1T") @@ -114,9 +114,12 @@ def finalize(self, other, method=None, **kwargs): m.setattr(DataFrame, "_metadata", ["filename"]) m.setattr(DataFrame, "__finalize__", finalize) - np.random.seed(10) - df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["a", "b"]) - df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["c", "d"]) + df1 = DataFrame( + np.random.default_rng(2).randint(0, 4, (3, 2)), columns=["a", "b"] + ) + df2 = DataFrame( + np.random.default_rng(2).randint(0, 4, (3, 2)), columns=["c", "d"] + ) DataFrame._metadata = ["filename"] df1.filename = "fname1.csv" df2.filename = "fname2.csv" @@ -126,7 +129,9 @@ def finalize(self, other, method=None, **kwargs): # concat # GH#6927 - df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list("ab")) + df1 = DataFrame( + np.random.default_rng(2).randint(0, 4, (3, 2)), columns=list("ab") + ) df1.filename = "foo" result = pd.concat([df1, df1]) @@ -183,7 +188,7 @@ def test_validate_bool_args(self, value): def test_unexpected_keyword(self): # GH8597 - df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"]) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=["jim", "joe"]) ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) ts = df["joe"].copy() ts[2] = np.nan diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 6226f97c73f92..55e5fc8076fed 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -46,7 +46,7 @@ def construct(box, shape, value=None, dtype=None, **kwargs): arr = np.repeat(arr, new_shape).reshape(shape) else: - arr = np.random.randn(*shape) + arr = np.random.default_rng(2).randn(*shape) return box(arr, dtype=dtype, **kwargs) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index ee0a7fb77f336..f384ced88389e 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -107,7 +107,7 @@ def test_nonbool_single_element_raise(self, data): def test_metadata_propagation_indiv_resample(self): # resample ts = Series( - np.random.rand(1000), + np.random.default_rng(2).rand(1000), index=date_range("20130101", periods=1000, freq="s"), name="foo", ) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2875e1ae80501..6ae3049e2b4d3 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -359,7 +359,7 @@ def test_agg_multiple_functions_maintain_order(df): def test_agg_multiple_functions_same_name(): # GH 30880 df = DataFrame( - np.random.randn(1000, 3), + np.random.default_rng(2).randn(1000, 3), index=pd.date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], ) @@ -381,7 +381,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): # GH 30880 # ohlc expands dimensions, so different test to the above is required. df = DataFrame( - np.random.randn(1000, 3), + np.random.default_rng(2).randn(1000, 3), index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"), columns=Index(["A", "B", "C"], name="alpha"), ) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 873e3e73c7cf5..6011593f442c8 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -48,7 +48,7 @@ def test_cythonized_aggers(op_name): data = { "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan], "B": ["A", "B"] * 6, - "C": np.random.randn(12), + "C": np.random.default_rng(2).randn(12), } df = DataFrame(data) df.loc[2:10:2, "C"] = np.nan @@ -80,8 +80,8 @@ def test_cythonized_aggers(op_name): def test_cython_agg_boolean(): frame = DataFrame( { - "a": np.random.randint(0, 5, 50), - "b": np.random.randint(0, 2, 50).astype("bool"), + "a": np.random.default_rng(2).randint(0, 5, 50), + "b": np.random.default_rng(2).randint(0, 2, 50).astype("bool"), } ) result = frame.groupby("a")["b"].mean() @@ -94,13 +94,17 @@ def test_cython_agg_boolean(): def test_cython_agg_nothing_to_agg(): - frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) + frame = DataFrame( + {"a": np.random.default_rng(2).randint(0, 5, 50), "b": ["foo", "bar"] * 25} + ) msg = "Cannot use numeric_only=True with SeriesGroupBy.mean and non-numeric dtypes" with pytest.raises(TypeError, match=msg): frame.groupby("a")["b"].mean(numeric_only=True) - frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) + frame = DataFrame( + {"a": np.random.default_rng(2).randint(0, 5, 50), "b": ["foo", "bar"] * 25} + ) result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) expected = DataFrame( @@ -112,7 +116,7 @@ def test_cython_agg_nothing_to_agg(): def test_cython_agg_nothing_to_agg_with_dates(): frame = DataFrame( { - "a": np.random.randint(0, 5, 50), + "a": np.random.default_rng(2).randint(0, 5, 50), "b": ["foo", "bar"] * 25, "dates": pd.date_range("now", periods=50, freq="T"), } @@ -143,8 +147,8 @@ def test_cython_agg_return_dict(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) @@ -185,8 +189,8 @@ def test_cython_fail_agg(): ], ) def test__cython_agg_general(op, targop): - df = DataFrame(np.random.randn(1000)) - labels = np.random.randint(0, 50, size=1000).astype(float) + df = DataFrame(np.random.default_rng(2).randn(1000)) + labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) result = df.groupby(labels)._cython_agg_general(op, alt=None, numeric_only=True) warn = FutureWarning if targop in com._cython_table else None diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index 2514e988e4e80..ba2fb04eae62a 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -354,8 +354,8 @@ def test_multilabel_numba_vs_cython(numba_supported_reductions): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) gb = df.groupby(["A", "B"]) @@ -374,8 +374,8 @@ def test_multilabel_udf_numba_vs_cython(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) gb = df.groupby(["A", "B"]) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 8772e3cfb45f4..34051ae3c996a 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -30,8 +30,8 @@ def test_agg_partial_failure_raises(): df = DataFrame( { - "data1": np.random.randn(5), - "data2": np.random.randn(5), + "data1": np.random.default_rng(2).randn(5), + "data2": np.random.default_rng(2).randn(5), "key1": ["a", "a", "b", "b", "a"], "key2": ["one", "two", "one", "two", "one"], } @@ -87,14 +87,14 @@ def test_agg_datetimes_mixed(): def test_agg_period_index(): prng = period_range("2012-1-1", freq="M", periods=3) - df = DataFrame(np.random.randn(3, 2), index=prng) + df = DataFrame(np.random.default_rng(2).randn(3, 2), index=prng) rs = df.groupby(level=0).sum() assert isinstance(rs.index, PeriodIndex) # GH 3579 index = period_range(start="1999-01", periods=5, freq="M") - s1 = Series(np.random.rand(len(index)), index=index) - s2 = Series(np.random.rand(len(index)), index=index) + s1 = Series(np.random.default_rng(2).rand(len(index)), index=index) + s2 = Series(np.random.default_rng(2).rand(len(index)), index=index) df = DataFrame.from_dict({"s1": s1, "s2": s2}) grouped = df.groupby(df.index.month) list(grouped) @@ -175,7 +175,7 @@ def test_aggregate_api_consistency(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.randn(8) + 1.0, + "C": np.random.default_rng(2).randn(8) + 1.0, "D": np.arange(8), } ) @@ -239,7 +239,7 @@ def test_agg_compat(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.randn(8) + 1.0, + "C": np.random.default_rng(2).randn(8) + 1.0, "D": np.arange(8), } ) @@ -260,7 +260,7 @@ def test_agg_nested_dicts(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.randn(8) + 1.0, + "C": np.random.default_rng(2).randn(8) + 1.0, "D": np.arange(8), } ) @@ -284,7 +284,7 @@ def test_agg_nested_dicts(): def test_agg_item_by_item_raise_typeerror(): - df = DataFrame(np.random.randint(10, size=(20, 10))) + df = DataFrame(np.random.default_rng(2).randint(10, size=(20, 10))) def raiseException(df): pprint_thing("----------------------------------------") @@ -346,9 +346,9 @@ def test_series_agg_multi_pure_python(): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index b1b1d455d5027..67e1aabda05bb 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -45,8 +45,8 @@ def df(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) @@ -72,8 +72,8 @@ def df_mixed_floats(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.array(np.random.randn(8), dtype="float32"), + "C": np.random.default_rng(2).randn(8), + "D": np.array(np.random.default_rng(2).randn(8), dtype="float32"), } ) @@ -121,9 +121,9 @@ def three_group(): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 832192d8a33e6..7df60a06b6eda 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -250,7 +250,7 @@ def test_apply_with_mixed_dtype(): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 df = DataFrame( { - "foo1": np.random.randn(6), + "foo1": np.random.default_rng(2).randn(6), "foo2": ["one", "two", "two", "three", "one", "two"], } ) @@ -347,7 +347,7 @@ def f(piece): ) dr = bdate_range("1/1/2000", periods=100) - ts = Series(np.random.randn(100), index=dr) + ts = Series(np.random.default_rng(2).randn(100), index=dr) grouped = ts.groupby(lambda x: x.month, group_keys=False) result = grouped.apply(f) @@ -401,9 +401,9 @@ def trans2(group): df = DataFrame( { - "A": np.random.randint(0, 5, 1000), - "B": np.random.randint(0, 5, 1000), - "C": np.random.randn(1000), + "A": np.random.default_rng(2).randint(0, 5, 1000), + "B": np.random.default_rng(2).randint(0, 5, 1000), + "C": np.random.default_rng(2).randn(1000), } ) @@ -586,11 +586,11 @@ def test_apply_corner_cases(): # #535, can't use sliding iterator N = 1000 - labels = np.random.randint(0, 100, size=N) + labels = np.random.default_rng(2).randint(0, 100, size=N) df = DataFrame( { "key": labels, - "value1": np.random.randn(N), + "value1": np.random.default_rng(2).randn(N), "value2": ["foo", "bar", "baz", "qux"] * (N // 4), } ) @@ -1123,7 +1123,7 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): # by_rows operation would work fine, but by_cols would throw a ValueError df = DataFrame( - np.random.random([6, 4]), + np.random.default_rng(2).random([6, 4]), columns=MultiIndex.from_product([["A", "B"], [1, 2]]), ) diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index 1df55abdc038d..a437f226b48b3 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -34,7 +34,7 @@ def test_mutate_groups(): + ["d"] * 2 + ["e"] * 2, "cat3": [f"g{x}" for x in range(1, 15)], - "val": np.random.randint(100, size=14), + "val": np.random.default_rng(2).randint(100, size=14), } ) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 3ab62bb7656b7..ab7e3a36d7506 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -207,11 +207,11 @@ def f(x): # more basic levels = ["foo", "bar", "baz", "qux"] - codes = np.random.randint(0, 4, size=100) + codes = np.random.default_rng(2).randint(0, 4, size=100) cats = Categorical.from_codes(codes, levels, ordered=True) - data = DataFrame(np.random.randn(100, 4)) + data = DataFrame(np.random.default_rng(2).randn(100, 4)) result = data.groupby(cats, observed=False).mean() @@ -462,9 +462,9 @@ def test_observed_perf(): # gh-14942 df = DataFrame( { - "cat": np.random.randint(0, 255, size=30000), - "int_id": np.random.randint(0, 255, size=30000), - "other_id": np.random.randint(0, 10000, size=30000), + "cat": np.random.default_rng(2).randint(0, 255, size=30000), + "int_id": np.random.default_rng(2).randint(0, 255, size=30000), + "other_id": np.random.default_rng(2).randint(0, 10000, size=30000), "foo": 0, } ) @@ -642,11 +642,11 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): def test_datetime(): # GH9049: ensure backward compatibility levels = pd.date_range("2014-01-01", periods=4) - codes = np.random.randint(0, 4, size=100) + codes = np.random.default_rng(2).randint(0, 4, size=100) cats = Categorical.from_codes(codes, levels, ordered=True) - data = DataFrame(np.random.randn(100, 4)) + data = DataFrame(np.random.default_rng(2).randn(100, 4)) result = data.groupby(cats, observed=False).mean() expected = data.groupby(np.asarray(cats), observed=False).mean() @@ -679,7 +679,7 @@ def test_datetime(): def test_categorical_index(): - s = np.random.RandomState(12345) + s = np.random.default_rng(2).RandomState(12345) levels = ["foo", "bar", "baz", "qux"] codes = s.randint(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) @@ -710,7 +710,7 @@ def test_describe_categorical_columns(): categories=["foo", "bar", "baz", "qux"], ordered=True, ) - df = DataFrame(np.random.randn(20, 4), columns=cats) + df = DataFrame(np.random.default_rng(2).randn(20, 4), columns=cats) result = df.groupby([1, 2, 3, 4] * 5).describe() tm.assert_index_equal(result.stack().columns, cats) @@ -920,7 +920,7 @@ def test_preserve_on_ordered_ops(func, values): def test_categorical_no_compress(): - data = Series(np.random.randn(9)) + data = Series(np.random.default_rng(2).randn(9)) codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True) @@ -977,7 +977,7 @@ def test_sort(): # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') - df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + df = DataFrame({"value": np.random.default_rng(2).randint(0, 10000, 100)}) labels = [f"{i} - {i+499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 97e4e8a852429..5ed1540c1ed1f 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -189,8 +189,7 @@ def test_ngroup_cumcount_pair(self): tm.assert_series_equal(g.cumcount(), Series(cumcounted)) def test_ngroup_respects_groupby_order(self, sort): - np.random.seed(0) - df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) + df = DataFrame({"a": np.random.default_rng(2).choice(list("abcdef"), 100)}) g = df.groupby("a", sort=sort) df["group_id"] = -1 df["group_index"] = -1 @@ -270,20 +269,21 @@ def test_count(): df = DataFrame( { - "1st": np.random.choice(list(ascii_lowercase), n), - "2nd": np.random.randint(0, 5, n), - "3rd": np.random.randn(n).round(3), - "4th": np.random.randint(-10, 10, n), - "5th": np.random.choice(dr, n), - "6th": np.random.randn(n).round(3), - "7th": np.random.randn(n).round(3), - "8th": np.random.choice(dr, n) - np.random.choice(dr, 1), - "9th": np.random.choice(list(ascii_lowercase), n), + "1st": np.random.default_rng(2).choice(list(ascii_lowercase), n), + "2nd": np.random.default_rng(2).randint(0, 5, n), + "3rd": np.random.default_rng(2).randn(n).round(3), + "4th": np.random.default_rng(2).randint(-10, 10, n), + "5th": np.random.default_rng(2).choice(dr, n), + "6th": np.random.default_rng(2).randn(n).round(3), + "7th": np.random.default_rng(2).randn(n).round(3), + "8th": np.random.default_rng(2).choice(dr, n) + - np.random.default_rng(2).choice(dr, 1), + "9th": np.random.default_rng(2).choice(list(ascii_lowercase), n), } ) for col in df.columns.drop(["1st", "2nd", "4th"]): - df.loc[np.random.choice(n, n // 10), col] = np.nan + df.loc[np.random.default_rng(2).choice(n, n // 10), col] = np.nan df["9th"] = df["9th"].astype("category") @@ -329,7 +329,10 @@ def test_count_cross_type(): # GH8169 # Set float64 dtype to avoid upcast when setting nan below vals = np.hstack( - (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2))) + ( + np.random.default_rng(2).randint(0, 5, (100, 2)), + np.random.default_rng(2).randint(0, 2, (100, 2)), + ) ).astype("float64") df = DataFrame(vals, columns=["a", "b", "c", "d"]) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index ccf307fbdf380..4d7aaea03a04d 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -191,9 +191,8 @@ def test_filter_pdna_is_false(): def test_filter_against_workaround(): - np.random.seed(0) # Series of ints - s = Series(np.random.randint(0, 100, 1000)) + s = Series(np.random.default_rng(2).randint(0, 100, 1000)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -203,7 +202,7 @@ def test_filter_against_workaround(): tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) # Series of floats - s = 100 * Series(np.random.random(1000)) + s = 100 * Series(np.random.default_rng(2).random(1000)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -214,11 +213,11 @@ def test_filter_against_workaround(): # Set up DataFrame of ints, floats, strings. letters = np.array(list(ascii_lowercase)) N = 1000 - random_letters = letters.take(np.random.randint(0, 26, N)) + random_letters = letters.take(np.random.default_rng(2).randint(0, 26, N)) df = DataFrame( { - "ints": Series(np.random.randint(0, 100, N)), - "floats": N / 10 * Series(np.random.random(N)), + "ints": Series(np.random.default_rng(2).randint(0, 100, N)), + "floats": N / 10 * Series(np.random.default_rng(2).random(N)), "letters": Series(random_letters), } ) @@ -607,7 +606,7 @@ def test_filter_non_bool_raises(): def test_filter_dropna_with_empty_groups(): # GH 10780 - data = Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3)) + data = Series(np.random.default_rng(2).rand(9), index=np.repeat([1, 2, 3], 3)) grouped = data.groupby(level=0) result_false = grouped.filter(lambda x: x.mean() > 1, dropna=False) expected_false = Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3)) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index e3a5d308c4346..1789a0f4f5205 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -74,7 +74,7 @@ def test_intercept_builtin_sum(): @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key def test_builtins_apply(keys, f): # see gh-8155 - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randint(1, 7, (10, 2)), columns=["jim", "joe"]) df["jolie"] = rs.randn(10) @@ -371,11 +371,11 @@ def test_cython_api2(): def test_cython_median(): - arr = np.random.randn(1000) + arr = np.random.default_rng(2).randn(1000) arr[::2] = np.nan df = DataFrame(arr) - labels = np.random.randint(0, 50, size=1000).astype(float) + labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) labels[::17] = np.nan result = df.groupby(labels).median() @@ -384,7 +384,7 @@ def test_cython_median(): exp = df.groupby(labels).agg(np.nanmedian) tm.assert_frame_equal(result, exp) - df = DataFrame(np.random.randn(1000, 5)) + df = DataFrame(np.random.default_rng(2).randn(1000, 5)) msg = "using DataFrameGroupBy.median" with tm.assert_produces_warning(FutureWarning, match=msg): rs = df.groupby(labels).agg(np.median) @@ -393,7 +393,7 @@ def test_cython_median(): def test_median_empty_bins(observed): - df = DataFrame(np.random.randint(0, 44, 500)) + df = DataFrame(np.random.default_rng(2).randint(0, 44, 500)) grps = range(0, 55, 5) bins = pd.cut(df[0], grps) @@ -517,7 +517,7 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): def test_idxmin_idxmax_axis1(): - df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=["A", "B", "C", "D"]) df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] gb = df.groupby("A") @@ -548,7 +548,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): msg = "GH#47723 groupby.corrwith and skew do not correctly implement axis=1" request.node.add_marker(pytest.mark.xfail(reason=msg)) - df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=["A", "B", "C", "D"]) df["E"] = "x" groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] gb = df.groupby(groups) @@ -691,8 +691,8 @@ def scipy_sem(*args, **kwargs): ], ) def test_ops_general(op, targop): - df = DataFrame(np.random.randn(1000)) - labels = np.random.randint(0, 50, size=1000).astype(float) + df = DataFrame(np.random.default_rng(2).randn(1000)) + labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) result = getattr(df.groupby(labels), op)() warn = None if op in ("first", "last", "count", "sem") else FutureWarning @@ -739,7 +739,7 @@ def test_nlargest(): def test_nlargest_mi_grouper(): # see gh-21411 - npr = np.random.RandomState(123456789) + npr = np.random.default_rng(2).RandomState(123456789) dts = date_range("20180101", periods=10) iterables = [dts, ["one", "two"]] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 635416f0cb1d6..81779b2bf651d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -70,7 +70,7 @@ def test_basic_aggregations(dtype): data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) index = np.arange(9) - np.random.shuffle(index) + np.random.default_rng(2).shuffle(index) data = data.reindex(index) grouped = data.groupby(lambda x: x // 3, group_keys=False) @@ -314,7 +314,7 @@ def test_basic_regression(): # regression result = Series([1.0 * x for x in list(range(1, 10)) * 10]) - data = np.random.random(1100) * 10.0 + data = np.random.default_rng(2).random(1100) * 10.0 groupings = Series(data) grouped = result.groupby(groupings) @@ -539,8 +539,8 @@ def test_multi_func(df): # some "groups" with no data df = DataFrame( { - "v1": np.random.randn(6), - "v2": np.random.randn(6), + "v1": np.random.default_rng(2).randn(6), + "v2": np.random.default_rng(2).randn(6), "k1": np.array(["b", "b", "b", "a", "a", "a"]), "k2": np.array(["1", "1", "1", "2", "2", "2"]), }, @@ -588,9 +588,9 @@ def test_frame_multi_key_function_list(): "two", "one", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) @@ -649,9 +649,9 @@ def test_frame_multi_key_function_list_partial_failure(): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) @@ -773,8 +773,11 @@ def test_groupby_as_index_agg(df): tm.assert_frame_equal(result3, expected3) # GH7115 & GH8112 & GH8582 - df = DataFrame(np.random.randint(0, 100, (50, 3)), columns=["jim", "joe", "jolie"]) - ts = Series(np.random.randint(5, 10, 50), name="jim") + df = DataFrame( + np.random.default_rng(2).randint(0, 100, (50, 3)), + columns=["jim", "joe", "jolie"], + ) + ts = Series(np.random.default_rng(2).randint(5, 10, 50), name="jim") gr = df.groupby(ts) gr.nth(0) # invokes set_selection_from_grouper internally @@ -803,7 +806,9 @@ def test_ops_not_as_index(reduction_func): if reduction_func in ("corrwith", "nth", "ngroup"): pytest.skip(f"GH 5755: Test not applicable for {reduction_func}") - df = DataFrame(np.random.randint(0, 5, size=(100, 2)), columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randint(0, 5, size=(100, 2)), columns=["a", "b"] + ) expected = getattr(df.groupby("a"), reduction_func)() if reduction_func == "size": expected = expected.rename("size") @@ -1036,8 +1041,8 @@ def test_empty_groups_corner(mframe): "k1": np.array(["b", "b", "b", "a", "a", "a"]), "k2": np.array(["1", "1", "1", "2", "2", "2"]), "k3": ["foo", "bar"] * 3, - "v1": np.random.randn(6), - "v2": np.random.randn(6), + "v1": np.random.default_rng(2).randn(6), + "v2": np.random.default_rng(2).randn(6), } ) @@ -1205,7 +1210,7 @@ def test_groupby_with_hier_columns(): columns = MultiIndex.from_tuples( [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")] ) - df = DataFrame(np.random.randn(8, 4), index=index, columns=columns) + df = DataFrame(np.random.default_rng(2).randn(8, 4), index=index, columns=columns) result = df.groupby(level=0).mean() tm.assert_index_equal(result.columns, columns) @@ -1292,7 +1297,7 @@ def test_consistency_name(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.randn(8) + 1.0, + "C": np.random.default_rng(2).randn(8) + 1.0, "D": np.arange(8), } ) @@ -1354,8 +1359,8 @@ def test_cython_grouper_series_bug_noncontig(): def test_series_grouper_noncontig_index(): index = Index(tm.rands_array(10, 100)) - values = Series(np.random.randn(50), index=index[::2]) - labels = np.random.randint(0, 5, 50) + values = Series(np.random.default_rng(2).randn(50), index=index[::2]) + labels = np.random.default_rng(2).randint(0, 5, 50) # it works! grouped = values.groupby(labels) @@ -1420,7 +1425,9 @@ def test_groupby_list_infer_array_like(df): df.groupby(list(df["A"][:-1])) # pathological case of ambiguity - df = DataFrame({"foo": [0, 1], "bar": [3, 4], "val": np.random.randn(2)}) + df = DataFrame( + {"foo": [0, 1], "bar": [3, 4], "val": np.random.default_rng(2).randn(2)} + ) result = df.groupby(["foo", "bar"]).mean() expected = df.groupby([df["foo"], df["bar"]]).mean()[["val"]] @@ -1442,10 +1449,10 @@ def test_groupby_keys_same_size_as_index(): def test_groupby_one_row(): # GH 11741 msg = r"^'Z'$" - df1 = DataFrame(np.random.randn(1, 4), columns=list("ABCD")) + df1 = DataFrame(np.random.default_rng(2).randn(1, 4), columns=list("ABCD")) with pytest.raises(KeyError, match=msg): df1.groupby("Z") - df2 = DataFrame(np.random.randn(2, 4), columns=list("ABCD")) + df2 = DataFrame(np.random.default_rng(2).randn(2, 4), columns=list("ABCD")) with pytest.raises(KeyError, match=msg): df2.groupby("Z") @@ -1454,7 +1461,7 @@ def test_groupby_nat_exclude(): # GH 6992 df = DataFrame( { - "values": np.random.randn(8), + "values": np.random.default_rng(2).randn(8), "dt": [ np.nan, Timestamp("2013-01-01"), @@ -1535,7 +1542,9 @@ def test_groupby_2d_malformed(): def test_int32_overflow(): B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000))) A = np.arange(25000) - df = DataFrame({"A": A, "B": B, "C": A, "D": B, "E": np.random.randn(25000)}) + df = DataFrame( + {"A": A, "B": B, "C": A, "D": B, "E": np.random.default_rng(2).randn(25000)} + ) left = df.groupby(["A", "B", "C", "D"]).sum() right = df.groupby(["D", "C", "B", "A"]).sum() @@ -1548,7 +1557,7 @@ def test_groupby_sort_multi(): "a": ["foo", "bar", "baz"], "b": [3, 2, 1], "c": [0, 1, 2], - "d": np.random.randn(3), + "d": np.random.default_rng(2).randn(3), } ) @@ -1568,7 +1577,11 @@ def test_groupby_sort_multi(): tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) df = DataFrame( - {"a": [0, 1, 2, 0, 1, 2], "b": [0, 0, 0, 1, 1, 1], "d": np.random.randn(6)} + { + "a": [0, 1, 2, 0, 1, 2], + "b": [0, 0, 0, 1, 1, 1], + "d": np.random.default_rng(2).randn(6), + } ) grouped = df.groupby(["a", "b"])["d"] result = grouped.sum() @@ -2070,7 +2083,7 @@ def get_categorical_invalid_expected(): def test_empty_groupby_apply_nonunique_columns(): # GH#44417 - df = DataFrame(np.random.randn(0, 4)) + df = DataFrame(np.random.default_rng(2).randn(0, 4)) df[3] = df[3].astype(np.int64) df.columns = [0, 1, 2, 0] gb = df.groupby(df[1], group_keys=False) @@ -2411,7 +2424,7 @@ def test_groupby_list_level(): ) def test_groups_repr_truncates(max_seq_items, expected): # GH 1135 - df = DataFrame(np.random.randn(5, 1)) + df = DataFrame(np.random.default_rng(2).randn(5, 1)) df["a"] = df.index with pd.option_context("display.max_seq_items", max_seq_items): @@ -2534,7 +2547,7 @@ def test_groupby_numerical_stability_cumsum(): def test_groupby_cumsum_skipna_false(): # GH#46216 don't propagate np.nan above the diagonal - arr = np.random.randn(5, 5) + arr = np.random.default_rng(2).randn(5, 5) df = DataFrame(arr) for i in range(5): df.iloc[i, i] = np.nan diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 03e3086b8c847..099e7bc3890d0 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -514,7 +514,7 @@ def test_categorical_reducers( request.node.add_marker(pytest.mark.xfail(reason=msg)) # Ensure there is at least one null value by appending to the end - values = np.append(np.random.choice([1, 2, None], size=19), None) + values = np.append(np.random.default_rng(2).choice([1, 2, None], size=19), None) df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(20)} ) @@ -594,7 +594,7 @@ def test_categorical_transformers( msg = "GH#49651 fillna may incorrectly reorders results when dropna=False" request.node.add_marker(pytest.mark.xfail(reason=msg, strict=False)) - values = np.append(np.random.choice([1, 2, None], size=19), None) + values = np.append(np.random.default_rng(2).choice([1, 2, None], size=19), None) df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(20)} ) @@ -649,7 +649,7 @@ def test_categorical_transformers( @pytest.mark.parametrize("method", ["head", "tail"]) def test_categorical_head_tail(method, observed, sort, as_index): # GH#36327 - values = np.random.choice([1, 2, None], 30) + values = np.random.default_rng(2).choice([1, 2, None], 30) df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))} ) @@ -674,7 +674,7 @@ def test_categorical_head_tail(method, observed, sort, as_index): def test_categorical_agg(): # GH#36327 - values = np.random.choice([1, 2, None], 30) + values = np.random.default_rng(2).choice([1, 2, None], 30) df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))} ) @@ -686,7 +686,7 @@ def test_categorical_agg(): def test_categorical_transform(): # GH#36327 - values = np.random.choice([1, 2, None], 30) + values = np.random.default_rng(2).choice([1, 2, None], 30) df = pd.DataFrame( {"x": pd.Categorical(values, categories=[1, 2, 3]), "y": range(len(values))} ) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 1e9c4b446c4d0..5e889ba776eb0 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -71,9 +71,9 @@ def test_getitem_list_of_columns(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), - "E": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), + "E": np.random.default_rng(2).randn(8), } ) @@ -90,9 +90,9 @@ def test_getitem_numeric_column_names(self): df = DataFrame( { 0: list("abcd") * 2, - 2: np.random.randn(8), - 4: np.random.randn(8), - 6: np.random.randn(8), + 2: np.random.default_rng(2).randn(8), + 4: np.random.default_rng(2).randn(8), + 6: np.random.default_rng(2).randn(8), } ) result = df.groupby(0)[df.columns[1:3]].mean() @@ -117,9 +117,9 @@ def test_getitem_single_column(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), - "E": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), + "E": np.random.default_rng(2).randn(8), } ) @@ -134,7 +134,13 @@ def test_getitem_single_column(self): def test_indices_grouped_by_tuple_with_lambda(self): # GH 36158 df = DataFrame( - {"Tuples": ((x, y) for x in [0, 1] for y in np.random.randint(3, 5, 5))} + { + "Tuples": ( + (x, y) + for x in [0, 1] + for y in np.random.default_rng(2).randint(3, 5, 5) + ) + } ) gb = df.groupby("Tuples") @@ -178,7 +184,7 @@ def test_grouper_multilevel_freq(self): d0 = date.today() - timedelta(days=14) dates = date_range(d0, date.today()) date_index = MultiIndex.from_product([dates, dates], names=["foo", "bar"]) - df = DataFrame(np.random.randint(0, 100, 225), index=date_index) + df = DataFrame(np.random.default_rng(2).randint(0, 100, 225), index=date_index) # Check string level expected = ( @@ -434,7 +440,7 @@ def test_groupby_series_named_with_tuple(self, frame_or_series, index): def test_groupby_grouper_f_sanity_checked(self): dates = date_range("01-Jan-2013", periods=12, freq="MS") - ts = Series(np.random.randn(12), index=dates) + ts = Series(np.random.default_rng(2).randn(12), index=dates) # GH51979 # simple check that the passed function doesn't operates on the whole index @@ -866,7 +872,13 @@ def test_get_group_grouped_by_tuple(self): def test_get_group_grouped_by_tuple_with_lambda(self): # GH 36158 df = DataFrame( - {"Tuples": ((x, y) for x in [0, 1] for y in np.random.randint(3, 5, 5))} + { + "Tuples": ( + (x, y) + for x in [0, 1] + for y in np.random.default_rng(2).randint(3, 5, 5) + ) + } ) gb = df.groupby("Tuples") @@ -964,7 +976,12 @@ def test_multi_iter_frame(self, three_group): k1 = np.array(["b", "b", "b", "a", "a", "a"]) k2 = np.array(["1", "2", "1", "2", "1", "2"]) df = DataFrame( - {"v1": np.random.randn(6), "v2": np.random.randn(6), "k1": k1, "k2": k2}, + { + "v1": np.random.default_rng(2).randn(6), + "v2": np.random.default_rng(2).randn(6), + "k1": k1, + "k2": k2, + }, index=["one", "two", "three", "four", "five", "six"], ) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index 1c22da68499f8..d2d2342118128 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -1,7 +1,5 @@ # Test GroupBy._positional_selector positional grouped indexing GH#42864 -import random - import numpy as np import pytest @@ -122,6 +120,7 @@ def test_doc_examples(): @pytest.fixture() def multiindex_data(): + rng = np.random.default_rng(2) ndates = 100 nitems = 20 dates = pd.date_range("20130101", periods=ndates, freq="D") @@ -129,9 +128,9 @@ def multiindex_data(): data = {} for date in dates: - nitems_for_date = nitems - random.randint(0, 12) + nitems_for_date = nitems - rng.randint(0, 12) levels = [ - (item, random.randint(0, 10000) / 100, random.randint(0, 10000) / 100) + (item, rng.randint(0, 10000) / 100, rng.randint(0, 10000) / 100) for item in items[:nitems_for_date] ] levels.sort(key=lambda x: x[1]) diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index 92c3b68d87fad..23de180553080 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -18,7 +18,7 @@ class GroupVarTestMixin: def test_group_var_generic_1d(self): - prng = np.random.RandomState(1234) + prng = np.random.default_rng(2).RandomState(1234) out = (np.nan * np.ones((5, 1))).astype(self.dtype) counts = np.zeros(5, dtype="int64") @@ -35,7 +35,7 @@ def test_group_var_generic_1d(self): tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_1d_flat_labels(self): - prng = np.random.RandomState(1234) + prng = np.random.default_rng(2).RandomState(1234) out = (np.nan * np.ones((1, 1))).astype(self.dtype) counts = np.zeros(1, dtype="int64") @@ -51,7 +51,7 @@ def test_group_var_generic_1d_flat_labels(self): tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_all_finite(self): - prng = np.random.RandomState(1234) + prng = np.random.default_rng(2).RandomState(1234) out = (np.nan * np.ones((5, 2))).astype(self.dtype) counts = np.zeros(5, dtype="int64") @@ -66,7 +66,7 @@ def test_group_var_generic_2d_all_finite(self): tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_some_nan(self): - prng = np.random.RandomState(1234) + prng = np.random.default_rng(2).RandomState(1234) out = (np.nan * np.ones((5, 2))).astype(self.dtype) counts = np.zeros(5, dtype="int64") @@ -109,7 +109,7 @@ class TestGroupVarFloat64(GroupVarTestMixin): rtol = 1e-5 def test_group_var_large_inputs(self): - prng = np.random.RandomState(1234) + prng = np.random.default_rng(2).RandomState(1234) out = np.array([[np.nan]], dtype=self.dtype) counts = np.array([0], dtype="int64") @@ -133,7 +133,7 @@ class TestGroupVarFloat32(GroupVarTestMixin): @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_group_ohlc(dtype): - obj = np.array(np.random.randn(20), dtype=dtype) + obj = np.array(np.random.default_rng(2).randn(20), dtype=dtype) bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index f0ca42c2e2719..0834d32b8e585 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -238,7 +238,7 @@ def test_nth(): # GH 7559 # from the vbench - df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype="int64") + df = DataFrame(np.random.default_rng(2).randint(1, 10, (100, 2)), dtype="int64") s = df[1] g = df[0] expected = s.groupby(g).first() diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index f4ebd54a7a1a9..90932fa96ce3e 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -28,9 +28,9 @@ def test_series_groupby_nunique(sort, dropna, as_index, with_nan, keys): days = date_range("2015-08-23", periods=10) df = DataFrame( { - "jim": np.random.choice(list(ascii_lowercase), n), - "joe": np.random.choice(days, n), - "julie": np.random.randint(0, m, n), + "jim": np.random.default_rng(2).choice(list(ascii_lowercase), n), + "joe": np.random.default_rng(2).choice(days, n), + "julie": np.random.default_rng(2).randint(0, m, n), } ) if with_nan: diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 49ce51bedbf9f..249c316dda697 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -12,7 +12,7 @@ def test_pipe(): # Test the pipe method of DataFrameGroupBy. # Issue #17871 - random_state = np.random.RandomState(1234567890) + random_state = np.random.default_rng(2).RandomState(1234567890) df = DataFrame( { diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 7bf168944a1ac..e2d6175f17f87 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -93,7 +93,11 @@ def test_quantile_array(): def test_quantile_array2(): # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 - arr = np.random.RandomState(0).randint(0, 5, size=(10, 3), dtype=np.int64) + arr = ( + np.random.default_rng(2) + .RandomState(0) + .randint(0, 5, size=(10, 3), dtype=np.int64) + ) df = DataFrame(arr, columns=list("ABC")) result = df.groupby("A").quantile([0.3, 0.7]) expected = DataFrame( diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index cf28925a49d88..21355d07a606b 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -33,12 +33,12 @@ def test_rank_unordered_categorical_typeerror(): def test_rank_apply(): lev1 = tm.rands_array(10, 100) lev2 = tm.rands_array(10, 130) - lab1 = np.random.randint(0, 100, size=500) - lab2 = np.random.randint(0, 130, size=500) + lab1 = np.random.default_rng(2).randint(0, 100, size=500) + lab2 = np.random.default_rng(2).randint(0, 130, size=500) df = DataFrame( { - "value": np.random.randn(500), + "value": np.random.default_rng(2).randn(500), "key1": lev1.take(lab1), "key2": lev2.take(lab2), } diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index b96fe41c26c3e..1f1a76a2e0c2a 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -51,7 +51,7 @@ def test_size_axis_1(df, axis_1, by, sort, dropna): @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) @pytest.mark.parametrize("sort", [True, False]) def test_size_sort(sort, by): - df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC")) + df = DataFrame(np.random.default_rng(2).choice(20, (1000, 3)), columns=list("ABC")) left = df.groupby(by=by, sort=sort).size() right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0]) tm.assert_series_equal(left, right, check_names=False) diff --git a/pandas/tests/groupby/test_skew.py b/pandas/tests/groupby/test_skew.py index 9be156150740f..203c1cce213ee 100644 --- a/pandas/tests/groupby/test_skew.py +++ b/pandas/tests/groupby/test_skew.py @@ -12,11 +12,11 @@ def test_groupby_skew_equivalence(): ncols = 2 nan_frac = 0.05 - arr = np.random.randn(nrows, ncols) - arr[np.random.random(nrows) < nan_frac] = np.nan + arr = np.random.default_rng(2).randn(nrows, ncols) + arr[np.random.default_rng(2).random(nrows) < nan_frac] = np.nan df = pd.DataFrame(arr) - grps = np.random.randint(0, ngroups, size=nrows) + grps = np.random.default_rng(2).randint(0, ngroups, size=nrows) gb = df.groupby(grps) result = gb.skew() diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 60c35064d9aa7..917fc56e0bd19 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -732,10 +732,10 @@ def test_groupby_datetime64_32_bit(self): def test_groupby_with_timezone_selection(self): # GH 11616 # Test that column selection returns output in correct timezone. - np.random.seed(42) + df = DataFrame( { - "factor": np.random.randint(0, 3, size=60), + "factor": np.random.default_rng(2).randint(0, 3, size=60), "time": date_range("01/01/2000 00:00", periods=60, freq="s", tz="UTC"), } ) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 78c8b6b236b65..6da9113f18d8d 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -46,14 +46,13 @@ def tests_value_counts_index_names_category_column(): # our starting frame def seed_df(seed_nans, n, m): - np.random.seed(1234) days = date_range("2015-08-24", periods=10) frame = DataFrame( { - "1st": np.random.choice(list("abcd"), n), - "2nd": np.random.choice(days, n), - "3rd": np.random.randint(1, m + 1, n), + "1st": np.random.default_rng(2).choice(list("abcd"), n), + "2nd": np.random.default_rng(2).choice(days, n), + "3rd": np.random.default_rng(2).randint(1, m + 1, n), } ) diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index ddc3fc7e2de3a..3216e84e52bb3 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -245,8 +245,8 @@ def test_multilabel_numba_vs_cython(numba_supported_reductions): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) gb = df.groupby(["A", "B"]) @@ -261,8 +261,8 @@ def test_multilabel_udf_numba_vs_cython(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) gb = df.groupby(["A", "B"]) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index cfa336907e71a..444b41f7cc36e 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -30,7 +30,7 @@ def test_transform(): data = Series(np.arange(9) // 3, index=np.arange(9)) index = np.arange(9) - np.random.shuffle(index) + np.random.default_rng(2).shuffle(index) data = data.reindex(index) grouped = data.groupby(lambda x: x // 3) @@ -59,7 +59,7 @@ def demean(arr): return arr - arr.mean(axis=0) people = DataFrame( - np.random.randn(5, 5), + np.random.default_rng(2).randn(5, 5), columns=["a", "b", "c", "d", "e"], index=["Joe", "Steve", "Wes", "Jim", "Travis"], ) @@ -83,7 +83,9 @@ def demean(arr): def test_transform_fast(): - df = DataFrame({"id": np.arange(100000) / 3, "val": np.random.randn(100000)}) + df = DataFrame( + {"id": np.arange(100000) / 3, "val": np.random.default_rng(2).randn(100000)} + ) grp = df.groupby("id")["val"] @@ -220,7 +222,10 @@ def test_transform_axis_ts(tsframe): r = len(base.index) c = len(base.columns) tso = DataFrame( - np.random.randn(r, c), index=base.index, columns=base.columns, dtype="float64" + np.random.default_rng(2).randn(r, c), + index=base.index, + columns=base.columns, + dtype="float64", ) # monotonic ts = tso @@ -650,10 +655,10 @@ def f(group): ) def test_cython_transform_series(op, args, targop): # GH 4095 - s = Series(np.random.randn(1000)) + s = Series(np.random.default_rng(2).randn(1000)) s_missing = s.copy() s_missing.iloc[2:10] = np.nan - labels = np.random.randint(0, 50, size=1000).astype(float) + labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) # series for data in [s, s_missing]: @@ -722,7 +727,7 @@ def test_groupby_cum_skipna(op, skipna, input, exp): @pytest.fixture def frame(): - floating = Series(np.random.randn(10)) + floating = Series(np.random.default_rng(2).randn(10)) floating_missing = floating.copy() floating_missing.iloc[2:7] = np.nan strings = list("abcde") * 2 @@ -764,7 +769,7 @@ def frame_mi(frame): @pytest.mark.parametrize( "gb_target", [ - {"by": np.random.randint(0, 50, size=10).astype(float)}, + {"by": np.random.default_rng(2).randint(0, 50, size=10).astype(float)}, {"level": 0}, {"by": "string"}, # {"by": 'string_missing'}]: @@ -814,7 +819,7 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target): @pytest.mark.parametrize( "gb_target", [ - {"by": np.random.randint(0, 50, size=10).astype(float)}, + {"by": np.random.default_rng(2).randint(0, 50, size=10).astype(float)}, {"level": 0}, {"by": "string"}, # {"by": 'string_missing'}]: @@ -889,7 +894,9 @@ def test_transform_with_non_scalar_group(): ] ) df = DataFrame( - np.random.randint(1, 10, (4, 12)), columns=cols, index=["A", "C", "G", "T"] + np.random.default_rng(2).randint(1, 10, (4, 12)), + columns=cols, + index=["A", "C", "G", "T"], ) msg = "DataFrame.groupby with axis=1 is deprecated" @@ -1411,16 +1418,16 @@ def test_transform_cumcount(): def test_null_group_lambda_self(sort, dropna, keys): # GH 17093 size = 50 - nulls1 = np.random.choice([False, True], size) - nulls2 = np.random.choice([False, True], size) + nulls1 = np.random.default_rng(2).choice([False, True], size) + nulls2 = np.random.default_rng(2).choice([False, True], size) # Whether a group contains a null value or not nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2 - a1 = np.random.randint(0, 5, size=size).astype(float) + a1 = np.random.default_rng(2).randint(0, 5, size=size).astype(float) a1[nulls1] = np.nan - a2 = np.random.randint(0, 5, size=size).astype(float) + a2 = np.random.default_rng(2).randint(0, 5, size=size).astype(float) a2[nulls2] = np.nan - values = np.random.randint(0, 5, size=a1.shape) + values = np.random.default_rng(2).randint(0, 5, size=a1.shape) df = DataFrame({"A1": a1, "A2": a2, "B": values}) expected_values = values diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 873d06db58fab..917cc50347fba 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -204,7 +204,7 @@ def test_repr_roundtrip(self): # long format # this is not reprable - ci = CategoricalIndex(np.random.randint(0, 5, size=100)) + ci = CategoricalIndex(np.random.default_rng(2).randint(0, 5, size=100)) str(ci) def test_isin(self): diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 01077616c50db..3a1e8150b3691 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -218,15 +218,13 @@ def test_get_indexer_base(self): idx.get_indexer(idx, method="invalid") def test_get_indexer_requires_unique(self): - np.random.seed(123456789) - ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) oidx = Index(np.array(ci)) msg = "Reindexing only valid with uniquely valued Index objects" for n in [1, 2, 5, len(ci)]: - finder = oidx[np.random.randint(0, len(ci), size=n)] + finder = oidx[np.random.default_rng(2).randint(0, len(ci), size=n)] with pytest.raises(InvalidIndexError, match=msg): ci.get_indexer(finder) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index af1a94391a353..81a32eb9047bf 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -138,7 +138,10 @@ def test_misc_coverage(self): assert isinstance(list(result.values())[0][0], Timestamp) def test_groupby_function_tuple_1677(self): - df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) + df = DataFrame( + np.random.default_rng(2).rand(100), + index=date_range("1/1/2000", periods=100), + ) monthly_group = df.groupby(lambda x: (x.year, x.month)) result = monthly_group.mean() diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index ecdea9ea25c9d..f2d518aba1432 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -427,7 +427,7 @@ def test_get_loc_time_obj2(self): for n in ns: idx = date_range("2014-11-26", periods=n, freq="S") - ts = pd.Series(np.random.randn(n), index=idx) + ts = pd.Series(np.random.default_rng(2).randn(n), index=idx) locs = np.arange(start, n, step, dtype=np.intp) result = ts.index.get_loc(key) diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index 3739d247e9a2d..56d35e6c0c930 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -26,7 +26,7 @@ def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe( 10, 10, - data_gen_f=lambda *args, **kwargs: np.random.randn(), + data_gen_f=lambda *args, **kwargs: np.random.default_rng(2).randn(), r_idx_type="i", c_idx_type="dt", ) @@ -45,7 +45,7 @@ def test_join_with_period_index(self, join_type): df = tm.makeCustomDataframe( 10, 10, - data_gen_f=lambda *args: np.random.randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).randint(2), c_idx_type="p", r_idx_type="dt", ) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 28c9c07a9c9ef..ae872ee72d4c0 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -20,7 +20,10 @@ class TestSlicing: def test_string_index_series_name_converted(self): # GH#1644 - df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10)) + df = DataFrame( + np.random.default_rng(2).randn(10, 4), + index=date_range("1/1/2000", periods=10), + ) result = df.loc["1/3/2000"] assert result.name == df.index[2] @@ -124,7 +127,7 @@ def test_slice_year(self): expected = s[s.index.year == 2005] tm.assert_series_equal(result, expected) - df = DataFrame(np.random.rand(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).rand(len(dti), 5), index=dti) result = df.loc["2005"] expected = df[df.index.year == 2005] tm.assert_frame_equal(result, expected) @@ -155,7 +158,7 @@ def test_slice_quarter(self): s = Series(np.arange(len(dti)), index=dti) assert len(s["2001Q1"]) == 90 - df = DataFrame(np.random.rand(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).rand(len(dti), 5), index=dti) assert len(df.loc["1Q01"]) == 90 def test_slice_month(self): @@ -163,7 +166,7 @@ def test_slice_month(self): s = Series(np.arange(len(dti)), index=dti) assert len(s["2005-11"]) == 30 - df = DataFrame(np.random.rand(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).rand(len(dti), 5), index=dti) assert len(df.loc["2005-11"]) == 30 tm.assert_series_equal(s["2005-11"], s["11-2005"]) @@ -358,7 +361,8 @@ def test_partial_slicing_with_multiindex_series(self): # GH 4294 # partial slice on a series mi ser = DataFrame( - np.random.rand(1000, 1000), index=date_range("2000-1-1", periods=1000) + np.random.default_rng(2).rand(1000, 1000), + index=date_range("2000-1-1", periods=1000), ).stack() s2 = ser[:-1].copy() diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 124eb29b7789e..5149bd68e10b7 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -172,10 +172,10 @@ def test_union_freq_infer(self): def test_union_dataframe_index(self): rng1 = date_range("1/1/1999", "1/1/2012", freq="MS") - s1 = Series(np.random.randn(len(rng1)), rng1) + s1 = Series(np.random.default_rng(2).randn(len(rng1)), rng1) rng2 = date_range("1/1/1980", "12/1/2001", freq="MS") - s2 = Series(np.random.randn(len(rng2)), rng2) + s2 = Series(np.random.default_rng(2).randn(len(rng2)), rng2) df = DataFrame({"s1": s1, "s2": s2}) exp = date_range("1/1/1980", "1/1/2012", freq="MS") diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 93f3e51d878c0..ee1edaa27f804 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -239,7 +239,7 @@ def test_duplicated_hashtable_impl(keep, monkeypatch): # GH 9125 n, k = 6, 10 levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - codes = [np.random.choice(n, k * n) for _ in levels] + codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels] with monkeypatch.context() as m: m.setattr(libindex, "_SIZE_CUTOFF", 50) mi = MultiIndex(levels=levels, codes=codes) @@ -266,7 +266,7 @@ def test_duplicated_with_nan_multi_shape(n, m): codes = product(range(-1, n), range(-1, m)) mi = MultiIndex( levels=[list("abcde")[:n], list("WXYZ")[:m]], - codes=np.random.permutation(list(codes)).T, + codes=np.random.default_rng(2).permutation(list(codes)).T, ) assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 8f5bba7debf2a..9100ba100afc1 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -322,7 +322,9 @@ def test_set_value_keeps_names(): lev2 = ["1", "2", "3"] * 2 idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) df = pd.DataFrame( - np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx + np.random.default_rng(2).randn(6, 4), + columns=["one", "two", "three", "four"], + index=idx, ) df = df.sort_index() assert df._is_copy is None diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 3f1aaebc0f47c..9d39e8f0dcd09 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -1,5 +1,3 @@ -import random - import numpy as np import pytest @@ -23,7 +21,7 @@ def test_sortlevel(idx): tuples = list(idx) - random.shuffle(tuples) + np.random.default_rng(2).shuffle(tuples) index = MultiIndex.from_tuples(tuples) @@ -140,7 +138,11 @@ def test_unsortedindex(): def test_unsortedindex_doc_examples(): # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex dfm = DataFrame( - {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)} + { + "jim": [0, 0, 1, 1], + "joe": ["x", "x", "z", "y"], + "jolie": np.random.default_rng(2).rand(4), + } ) dfm = dfm.set_index(["jim", "joe"]) @@ -239,7 +241,7 @@ def test_remove_unused_levels_large(first_type, second_type): # because tests should be deterministic (and this test in particular # checks that levels are removed, which is not the case for every # random input): - rng = np.random.RandomState(4) # seed is arbitrary value that works + rng = np.random.default_rng(2).RandomState(4) # seed is arbitrary value that works size = 1 << 16 df = DataFrame( diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index b5b595d5cc8b5..fe241198d0703 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -110,7 +110,7 @@ def test_getitem_index(self): def test_getitem_partial(self): rng = period_range("2007-01", periods=50, freq="M") - ts = Series(np.random.randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), rng) with pytest.raises(KeyError, match=r"^'2006'$"): ts["2006"] @@ -193,7 +193,7 @@ def test_getitem_seconds(self): with pytest.raises(IndexError, match="only integers, slices"): idx[val] - ser = Series(np.random.rand(len(idx)), index=idx) + ser = Series(np.random.default_rng(2).rand(len(idx)), index=idx) tm.assert_series_equal(ser["2013/01/01 10:00"], ser[3600:3660]) tm.assert_series_equal(ser["2013/01/01 9H"], ser[:3600]) for d in ["2013/01/01", "2013/01", "2013"]: @@ -225,7 +225,7 @@ def test_getitem_day(self, idx_range): with pytest.raises(IndexError, match="only integers, slices"): idx[val] - ser = Series(np.random.rand(len(idx)), index=idx) + ser = Series(np.random.default_rng(2).rand(len(idx)), index=idx) tm.assert_series_equal(ser["2013/01"], ser[0:31]) tm.assert_series_equal(ser["2013/02"], ser[31:59]) tm.assert_series_equal(ser["2014"], ser[365:]) diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py index 27cba8676d22b..e70a6922a3d3c 100644 --- a/pandas/tests/indexes/period/test_join.py +++ b/pandas/tests/indexes/period/test_join.py @@ -38,7 +38,7 @@ def test_join_does_not_recur(self): df = tm.makeCustomDataframe( 3, 2, - data_gen_f=lambda *args: np.random.randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).randint(2), c_idx_type="p", r_idx_type="dt", ) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index bf81d2cde98e7..aa2231d1b8f2b 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -15,7 +15,7 @@ class TestPeriodIndex: def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") - ts = Series(np.random.randn(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).randn(len(idx)), index=idx) original = ts.copy() result = ts["2007"] @@ -29,7 +29,7 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") - ts = Series(np.random.randn(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).randn(len(idx)), index=idx) result = ts["2007"] expected = ts[idx == "2007"] @@ -37,13 +37,13 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): def test_getitem_periodindex_quarter_string(self): pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") - ser = Series(np.random.rand(len(pi)), index=pi).cumsum() + ser = Series(np.random.default_rng(2).rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! assert ser["05Q4"] == ser.iloc[2] def test_pindex_slice_index(self): pi = period_range(start="1/1/10", end="12/31/12", freq="M") - s = Series(np.random.rand(len(pi)), index=pi) + s = Series(np.random.default_rng(2).rand(len(pi)), index=pi) res = s["2010"] exp = s[0:12] tm.assert_series_equal(res, exp) @@ -69,7 +69,7 @@ def test_range_slice_day(self, make_range): with pytest.raises(TypeError, match=msg): idx[v:] - s = Series(np.random.rand(len(idx)), index=idx) + s = Series(np.random.default_rng(2).rand(len(idx)), index=idx) tm.assert_series_equal(s["2013/01/02":], s[1:]) tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5]) @@ -99,7 +99,7 @@ def test_range_slice_seconds(self, make_range): with pytest.raises(TypeError, match=msg): idx[v:] - s = Series(np.random.rand(len(idx)), index=idx) + s = Series(np.random.default_rng(2).rand(len(idx)), index=idx) tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660]) tm.assert_series_equal(s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960]) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 22182416c79fd..b138f6cb0fd81 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -10,7 +10,7 @@ def _permute(obj): - return obj.take(np.random.permutation(len(obj))) + return obj.take(np.random.default_rng(2).permutation(len(obj))) class TestPeriodIndex: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3d3c27bc8f6c7..251b152d9344f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -150,7 +150,7 @@ def test_constructor_from_frame_series_freq(self): dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] expected = DatetimeIndex(dts, freq="MS") - df = DataFrame(np.random.rand(5, 3)) + df = DataFrame(np.random.default_rng(2).rand(5, 3)) df["date"] = dts result = DatetimeIndex(df["date"], freq="MS") @@ -1036,7 +1036,7 @@ def test_indexing_doesnt_change_class(self): assert index[[0, 1]].identical(Index([1, 2], dtype=np.object_)) def test_outer_join_sort(self): - left_index = Index(np.random.permutation(15)) + left_index = Index(np.random.default_rng(2).permutation(15)) right_index = tm.makeDateIndex(10) with tm.assert_produces_warning(RuntimeWarning): diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index b73bd7c78f009..ff2051c77a5e8 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -315,7 +315,7 @@ def test_drop_duplicates(self, index_flat, keep): # make duplicated index n = len(unique_idx) - duplicated_selection = np.random.choice(n, int(n * 1.5)) + duplicated_selection = np.random.default_rng(2).choice(n, int(n * 1.5)) idx = holder(unique_idx.values[duplicated_selection]) # Series.duplicated is tested separately diff --git a/pandas/tests/indexes/test_subclass.py b/pandas/tests/indexes/test_subclass.py index 2ddf3baabbec0..721ce0bc30a95 100644 --- a/pandas/tests/indexes/test_subclass.py +++ b/pandas/tests/indexes/test_subclass.py @@ -32,7 +32,9 @@ def test_insert_fallback_to_base_index(): tm.assert_index_equal(result, expected) df = DataFrame( - np.random.randn(2, 3), columns=idx, index=Index([1, 2], name="string") + np.random.default_rng(2).randn(2, 3), + columns=idx, + index=Index([1, 2], name="string"), ) result = df.reset_index() tm.assert_index_equal(result.columns, expected) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 2d2711520d44f..31cc8e18f58ce 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -285,7 +285,7 @@ def tdi(self, monotonic): tdi = tdi[::-1] elif monotonic is None: taker = np.arange(10, dtype=np.intp) - np.random.shuffle(taker) + np.random.default_rng(2).shuffle(taker) tdi = tdi.take(taker) return tdi diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py index 2d8795b45f276..1acae47ef3859 100644 --- a/pandas/tests/indexes/timedeltas/test_join.py +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -28,7 +28,7 @@ def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe( 10, 10, - data_gen_f=lambda *args, **kwargs: np.random.randn(), + data_gen_f=lambda *args, **kwargs: np.random.default_rng(2).randn(), r_idx_type="i", c_idx_type="td", ) diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py index ec817649ec5ea..075bb43a207f7 100644 --- a/pandas/tests/indexing/conftest.py +++ b/pandas/tests/indexing/conftest.py @@ -12,25 +12,30 @@ @pytest.fixture def series_ints(): - return Series(np.random.rand(4), index=np.arange(0, 8, 2)) + return Series(np.random.default_rng(2).rand(4), index=np.arange(0, 8, 2)) @pytest.fixture def frame_ints(): return DataFrame( - np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + np.random.default_rng(2).randn(4, 4), + index=np.arange(0, 8, 2), + columns=np.arange(0, 12, 3), ) @pytest.fixture def series_uints(): - return Series(np.random.rand(4), index=Index(np.arange(0, 8, 2, dtype=np.uint64))) + return Series( + np.random.default_rng(2).rand(4), + index=Index(np.arange(0, 8, 2, dtype=np.uint64)), + ) @pytest.fixture def frame_uints(): return DataFrame( - np.random.randn(4, 4), + np.random.default_rng(2).randn(4, 4), index=Index(range(0, 8, 2), dtype=np.uint64), columns=Index(range(0, 12, 3), dtype=np.uint64), ) @@ -38,33 +43,41 @@ def frame_uints(): @pytest.fixture def series_labels(): - return Series(np.random.randn(4), index=list("abcd")) + return Series(np.random.default_rng(2).randn(4), index=list("abcd")) @pytest.fixture def frame_labels(): - return DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) + return DataFrame( + np.random.default_rng(2).randn(4, 4), index=list("abcd"), columns=list("ABCD") + ) @pytest.fixture def series_ts(): - return Series(np.random.randn(4), index=date_range("20130101", periods=4)) + return Series( + np.random.default_rng(2).randn(4), index=date_range("20130101", periods=4) + ) @pytest.fixture def frame_ts(): - return DataFrame(np.random.randn(4, 4), index=date_range("20130101", periods=4)) + return DataFrame( + np.random.default_rng(2).randn(4, 4), index=date_range("20130101", periods=4) + ) @pytest.fixture def series_floats(): - return Series(np.random.rand(4), index=Index(range(0, 8, 2), dtype=np.float64)) + return Series( + np.random.default_rng(2).rand(4), index=Index(range(0, 8, 2), dtype=np.float64) + ) @pytest.fixture def frame_floats(): return DataFrame( - np.random.randn(4, 4), + np.random.default_rng(2).randn(4, 4), index=Index(range(0, 8, 2), dtype=np.float64), columns=Index(range(0, 12, 3), dtype=np.float64), ) @@ -72,12 +85,12 @@ def frame_floats(): @pytest.fixture def series_mixed(): - return Series(np.random.randn(4), index=[2, 4, "null", 8]) + return Series(np.random.default_rng(2).randn(4), index=[2, 4, "null", 8]) @pytest.fixture def frame_mixed(): - return DataFrame(np.random.randn(4, 4), index=[2, 4, "null", 8]) + return DataFrame(np.random.default_rng(2).randn(4, 4), index=[2, 4, "null", 8]) @pytest.fixture @@ -93,7 +106,7 @@ def series_empty(): @pytest.fixture def frame_multi(): return DataFrame( - np.random.randn(4, 4), + np.random.default_rng(2).randn(4, 4), index=MultiIndex.from_product([[1, 2], [3, 4]]), columns=MultiIndex.from_product([[5, 6], [7, 8]]), ) @@ -101,4 +114,7 @@ def frame_multi(): @pytest.fixture def series_multi(): - return Series(np.random.rand(4), index=MultiIndex.from_product([[1, 2], [3, 4]])) + return Series( + np.random.default_rng(2).rand(4), + index=MultiIndex.from_product([[1, 2], [3, 4]]), + ) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index fdbbdbdd45169..6ff086f5c3c59 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -42,7 +42,7 @@ def test_detect_chained_assignment(using_copy_on_write): def test_cache_updating(using_copy_on_write): # 5216 # make sure that we don't try to set a dead cache - a = np.random.rand(10, 3) + a = np.random.default_rng(2).rand(10, 3) df = DataFrame(a, columns=["x", "y", "z"]) df_original = df.copy() tuples = [(i, j) for i in range(5) for j in range(2)] diff --git a/pandas/tests/indexing/multiindex/test_datetime.py b/pandas/tests/indexing/multiindex/test_datetime.py index a49cb0bc2c43e..7876a65648a7f 100644 --- a/pandas/tests/indexing/multiindex/test_datetime.py +++ b/pandas/tests/indexing/multiindex/test_datetime.py @@ -19,7 +19,7 @@ def test_multiindex_period_datetime(): idx1 = Index(["a", "a", "a", "b", "b"]) idx2 = period_range("2012-01", periods=len(idx1), freq="M") - s = Series(np.random.randn(len(idx1)), [idx1, idx2]) + s = Series(np.random.default_rng(2).randn(len(idx1)), [idx1, idx2]) # try Period as index expected = s.iloc[0] diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index f8ce59527d4fc..cae149248f7be 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -45,7 +45,7 @@ def test_series_getitem_duplicates_multiindex(level0_value): codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=["tag", "day"], ) - arr = np.random.randn(len(index), 1) + arr = np.random.default_rng(2).randn(len(index), 1) df = DataFrame(arr, index=index, columns=["val"]) # confirm indexing on missing value raises KeyError @@ -187,7 +187,7 @@ def test_frame_mixed_depth_get(): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) result = df["a"] expected = df["a", "", ""].rename("a") diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index db91d5ad88252..c387ad1f4c444 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -17,7 +17,7 @@ def simple_multiindex_dataframe(): random data by default. """ - data = np.random.randn(3, 3) + data = np.random.default_rng(2).randn(3, 3) return DataFrame( data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] ) @@ -67,7 +67,7 @@ def test_iloc_getitem_multiple_items(): # GH 5528 tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) index = MultiIndex.from_tuples(tup) - df = DataFrame(np.random.randn(4, 4), index=index) + df = DataFrame(np.random.default_rng(2).randn(4, 4), index=index) result = df.iloc[[2, 3]] expected = df.xs("b", drop_level=False) tm.assert_frame_equal(result, expected) @@ -75,7 +75,7 @@ def test_iloc_getitem_multiple_items(): def test_iloc_getitem_labels(): # this is basically regular indexing - arr = np.random.randn(4, 3) + arr = np.random.default_rng(2).randn(4, 3) df = DataFrame( arr, columns=[["i", "i", "j"], ["A", "A", "B"]], diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index 507ef63344a26..94e3fbd9b6946 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -27,11 +27,13 @@ def cols(): @pytest.fixture def vals(n): vals = [ - np.random.randint(0, 10, n), - np.random.choice(list("abcdefghij"), n), - np.random.choice(pd.date_range("20141009", periods=10).tolist(), n), - np.random.choice(list("ZYXWVUTSRQ"), n), - np.random.randn(n), + np.random.default_rng(2).randint(0, 10, n), + np.random.default_rng(2).choice(list("abcdefghij"), n), + np.random.default_rng(2).choice( + pd.date_range("20141009", periods=10).tolist(), n + ), + np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n), + np.random.default_rng(2).randn(n), ] vals = list(map(tuple, zip(*vals))) return vals @@ -41,10 +43,12 @@ def vals(n): def keys(n, m, vals): # bunch of keys for testing keys = [ - np.random.randint(0, 11, m), - np.random.choice(list("abcdefghijk"), m), - np.random.choice(pd.date_range("20141009", periods=11).tolist(), m), - np.random.choice(list("ZYXWVUTSRQP"), m), + np.random.default_rng(2).randint(0, 11, m), + np.random.default_rng(2).choice(list("abcdefghijk"), m), + np.random.default_rng(2).choice( + pd.date_range("20141009", periods=11).tolist(), m + ), + np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m), ] keys = list(map(tuple, zip(*keys))) keys += [t[:-1] for t in vals[:: n // m]] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 3bf8c2eaa7e94..b8333da5121be 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -29,7 +29,7 @@ def frame_random_data_integer_multi_index(): levels = [[0, 1], [0, 1, 2]] codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, codes=codes) - return DataFrame(np.random.randn(6, 2), index=index) + return DataFrame(np.random.default_rng(2).randn(6, 2), index=index) class TestMultiIndexLoc: @@ -76,7 +76,7 @@ def test_loc_getitem_general(self, any_real_numpy_dtype): def test_loc_getitem_multiindex_missing_label_raises(self): # GH#21593 df = DataFrame( - np.random.randn(3, 3), + np.random.default_rng(2).randn(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -155,7 +155,7 @@ def test_loc_getitem_array(self): def test_loc_multiindex_labels(self): df = DataFrame( - np.random.randn(3, 3), + np.random.default_rng(2).randn(3, 3), columns=[["i", "i", "j"], ["A", "A", "B"]], index=[["i", "i", "j"], ["X", "X", "Y"]], ) @@ -182,7 +182,7 @@ def test_loc_multiindex_labels(self): def test_loc_multiindex_ints(self): df = DataFrame( - np.random.randn(3, 3), + np.random.default_rng(2).randn(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -192,7 +192,7 @@ def test_loc_multiindex_ints(self): def test_loc_multiindex_missing_label_raises(self): df = DataFrame( - np.random.randn(3, 3), + np.random.default_rng(2).randn(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -204,7 +204,7 @@ def test_loc_multiindex_missing_label_raises(self): def test_loc_multiindex_list_missing_label(self, key, pos): # GH 27148 - lists with missing labels _do_ raise df = DataFrame( - np.random.randn(3, 3), + np.random.default_rng(2).randn(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -233,7 +233,7 @@ def test_loc_multiindex_indexer_none(self): attribute_values = ["Value" + str(i) for i in range(5)] index = MultiIndex.from_product([attributes, attribute_values]) - df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 + df = 0.1 * np.random.default_rng(2).randn(10, 1 * 5) + 0.5 df = DataFrame(df, columns=index) result = df[attributes] tm.assert_frame_equal(result, df) @@ -279,7 +279,9 @@ def test_loc_multiindex_incomplete(self): def test_get_loc_single_level(self, single_level_multiindex): single_level = single_level_multiindex - s = Series(np.random.randn(len(single_level)), index=single_level) + s = Series( + np.random.default_rng(2).randn(len(single_level)), index=single_level + ) for k in single_level.values: s[k] @@ -288,13 +290,13 @@ def test_loc_getitem_int_slice(self): # loc should treat integer slices like label slices index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) - df = DataFrame(np.random.randn(6, 6), index, index) + df = DataFrame(np.random.default_rng(2).randn(6, 6), index, index) result = df.loc[6:8, :] expected = df tm.assert_frame_equal(result, expected) index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) - df = DataFrame(np.random.randn(6, 6), index, index) + df = DataFrame(np.random.default_rng(2).randn(6, 6), index, index) result = df.loc[20:30, :] expected = df.iloc[2:] tm.assert_frame_equal(result, expected) @@ -472,7 +474,9 @@ def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): # GH 8737 # empty indexer multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) - df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = DataFrame( + np.random.default_rng(2).randn(5, 6), index=range(5), columns=multi_index + ) df = df.sort_index(level=0, axis=1) expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) @@ -499,8 +503,8 @@ def test_loc_getitem_tuple_plus_slice(): { "a": np.arange(10), "b": np.arange(10), - "c": np.random.randn(10), - "d": np.random.randn(10), + "c": np.random.default_rng(2).randn(10), + "d": np.random.default_rng(2).randn(10), } ).set_index(["a", "b"]) expected = df.loc[0, 0] @@ -745,7 +749,7 @@ def test_missing_key_combination(self): ], names=["one", "two", "three"], ) - df = DataFrame(np.random.rand(4, 3), index=mi) + df = DataFrame(np.random.default_rng(2).rand(4, 3), index=mi) msg = r"\('b', '1', slice\(None, None, None\)\)" with pytest.raises(KeyError, match=msg): df.loc[("b", "1", slice(None)), :] diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 955c4acfd4c97..2bab1c2b77b1c 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -22,7 +22,7 @@ def test_multiindex_perf_warn(self): { "jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], - "jolie": np.random.rand(4), + "jolie": np.random.default_rng(2).rand(4), } ).set_index(["jim", "joe"]) @@ -117,7 +117,7 @@ def test_multiindex_with_datatime_level_preserves_freq(self): idx = Index(range(2), name="A") dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") mi = MultiIndex.from_product([idx, dti]) - df = DataFrame(np.random.randn(14, 2), index=mi) + df = DataFrame(np.random.default_rng(2).randn(14, 2), index=mi) result = df.loc[0].index tm.assert_index_equal(result, dti) assert result.freq == dti.freq diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index d34daaf640305..e8c5aa789f714 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -65,7 +65,9 @@ def test_xs_partial( [0, 1, 0, 1, 0, 1, 0, 1], ], ) - df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).randn(8, 4), index=index, columns=list("abcd") + ) result = df.xs(("foo", "one")) expected = df.loc["foo", "one"] @@ -101,7 +103,7 @@ def test_getitem_partial_column_select(self): codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], levels=[["a", "b"], ["x", "y"], ["p", "q"]], ) - df = DataFrame(np.random.rand(3, 2), index=idx) + df = DataFrame(np.random.default_rng(2).rand(3, 2), index=idx) result = df.loc[("a", "y"), :] expected = df.loc[("a", "y")] @@ -250,7 +252,7 @@ def test_loc_getitem_partial_both_axis(): iterables = [["a", "b"], [2, 1]] columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) - df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns) + df = DataFrame(np.random.default_rng(2).randn(4, 4), index=rows, columns=columns) expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) result = df.loc["a", "b"] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index e6f44359a1a62..e7c07afbcf42a 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -87,7 +87,9 @@ def test_setitem_multiindex3(self): [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] ) - df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + df = DataFrame( + np.random.default_rng(2).random((12, 4)), index=idx, columns=cols + ) subidx = MultiIndex.from_tuples( [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] @@ -96,7 +98,9 @@ def test_setitem_multiindex3(self): [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] ) - vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + vals = DataFrame( + np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols + ) self.check( target=df, indexers=(subidx, subcols), @@ -104,7 +108,9 @@ def test_setitem_multiindex3(self): compare_fn=tm.assert_frame_equal, ) # set all columns - vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + vals = DataFrame( + np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols + ) self.check( target=df, indexers=(subidx, slice(None, None, None)), @@ -134,7 +140,7 @@ def test_multiindex_setitem(self): ] df_orig = DataFrame( - np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"] + np.random.default_rng(2).randn(6, 3), index=arrays, columns=["A", "B", "C"] ).sort_index() expected = df_orig.loc[["bar"]] * 2 @@ -183,7 +189,7 @@ def test_multiindex_assignment(self): # mixed dtype df = DataFrame( - np.random.randint(5, 10, size=9).reshape(3, 3), + np.random.default_rng(2).randint(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], ) @@ -199,7 +205,7 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): arr = np.array([0.0, 1.0]) df = DataFrame( - np.random.randint(5, 10, size=9).reshape(3, 3), + np.random.default_rng(2).randint(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], dtype=np.int64, @@ -248,7 +254,7 @@ def test_groupby_example(self): index_cols = col_names[:5] df = DataFrame( - np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), + np.random.default_rng(2).randint(5, size=(NUM_ROWS, NUM_COLS)), dtype=np.int64, columns=col_names, ) @@ -328,7 +334,8 @@ def test_frame_getitem_setitem_multislice(self): def test_frame_setitem_multi_column(self): df = DataFrame( - np.random.randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]] + np.random.default_rng(2).randn(10, 4), + columns=[["a", "a", "b", "b"], [0, 1, 0, 1]], ) cp = df.copy() @@ -380,7 +387,9 @@ def test_loc_getitem_setitem_slice_integers(self, frame_or_series): ) obj = DataFrame( - np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"] + np.random.default_rng(2).randn(len(index), 4), + index=index, + columns=["a", "b", "c", "d"], ) obj = tm.get_obj(obj, frame_or_series) @@ -463,7 +472,7 @@ def test_setitem_new_column_mixed_depth(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) result = df.copy() expected = df.copy() diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py index ffea7fefe2d8d..9b310c32920dc 100644 --- a/pandas/tests/indexing/multiindex/test_sorted.py +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -141,7 +141,7 @@ def test_series_getitem_not_sorted(self): ] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) - s = Series(np.random.randn(8), index=index) + s = Series(np.random.default_rng(2).randn(8), index=index) arrays = [np.array(x) for x in zip(*index.values)] diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 01315647c464b..dc48bf261746b 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -142,7 +142,7 @@ def test_at_with_duplicate_axes_requires_scalar_lookup(self): # GH#33041 check that falling back to loc doesn't allow non-scalar # args to slip in - arr = np.random.randn(6).reshape(3, 2) + arr = np.random.default_rng(2).randn(6).reshape(3, 2) df = DataFrame(arr, columns=["A", "A"]) msg = "Invalid call for scalar access" diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 05d6eadc5aed0..8f27af641014f 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -402,7 +402,9 @@ def test_loc_getitem_listlike_unused_category_raises_keyerror(self): def test_ix_categorical_index(self): # GH 12531 - df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ")) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), index=list("ABC"), columns=list("XYZ") + ) cdf = df.copy() cdf.index = CategoricalIndex(df.index) cdf.columns = CategoricalIndex(df.columns) @@ -423,7 +425,9 @@ def test_ix_categorical_index(self): def test_ix_categorical_index_non_unique(self): # non-unique - df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX")) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), index=list("ABA"), columns=list("XYX") + ) cdf = df.copy() cdf.index = CategoricalIndex(df.index) cdf.columns = CategoricalIndex(df.columns) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index d2224988b70fc..91b77d8ecc6af 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -24,7 +24,7 @@ def random_text(nobs=100): # Construct a DataFrame where each row is a random slice from 'letters' - idxs = np.random.randint(len(letters), size=(nobs, 2)) + idxs = np.random.default_rng(2).randint(len(letters), size=(nobs, 2)) idxs.sort(axis=1) strings = [letters[x[0] : x[1]] for x in idxs] @@ -400,7 +400,7 @@ def test_detect_chained_assignment_is_copy(self): @pytest.mark.arm_slow def test_detect_chained_assignment_sorting(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) ser = df.iloc[:, 0].sort_values() tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) @@ -445,7 +445,7 @@ def test_detect_chained_assignment_changing_dtype( df = DataFrame( { "A": date_range("20130101", periods=5), - "B": np.random.randn(5), + "B": np.random.default_rng(2).randn(5), "C": np.arange(5, dtype="int64"), "D": ["a", "b", "c", "d", "e"], } diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 0bcc2aa75d78a..807df5e08862b 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -15,7 +15,9 @@ def gen_obj(klass, index): obj = Series(np.arange(len(index)), index=index) else: obj = DataFrame( - np.random.randn(len(index), len(index)), index=index, columns=index + np.random.default_rng(2).randn(len(index), len(index)), + index=index, + columns=index, ) return obj @@ -351,7 +353,7 @@ def test_slice_integer_frame_getitem(self, index_func): # similar to above, but on the getitem dim (of a DataFrame) index = index_func(5) - s = DataFrame(np.random.randn(5, 2), index=index) + s = DataFrame(np.random.default_rng(2).randn(5, 2), index=index) # getitem for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: @@ -406,7 +408,7 @@ def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): # similar to above, but on the getitem dim (of a DataFrame) index = index_func(5) - s = DataFrame(np.random.randn(5, 2), index=index) + s = DataFrame(np.random.default_rng(2).randn(5, 2), index=index) # setitem sc = s.copy() diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index 40d3e0c3e9430..083a6e83649b1 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -24,7 +24,7 @@ def test_iat_duplicate_columns(): def test_iat_getitem_series_with_period_index(): # GH#4390, iat incorrectly indexing index = period_range("1/1/2001", periods=10) - ser = Series(np.random.randn(10), index=index) + ser = Series(np.random.default_rng(2).randn(10), index=index) expected = ser[index[0]] result = ser.iat[0] assert expected == result diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 3dec187a54439..4539ff57844c2 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -152,7 +152,9 @@ def test_is_scalar_access(self): def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds - df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).random_sample((20, 5)), columns=list("ABCDE") + ) # lists of positions should raise IndexError! msg = "positional indexers are out-of-bounds" @@ -238,7 +240,7 @@ def check(result, expected): result.dtypes tm.assert_frame_equal(result, expected) - dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) + dfl = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("AB")) check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[])) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) @@ -262,7 +264,9 @@ def check(result, expected): def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): # GH 25753 df = DataFrame( - np.random.randn(len(index), len(columns)), index=index, columns=columns + np.random.default_rng(2).randn(len(index), len(columns)), + index=index, + columns=columns, ) msg = ".iloc requires numeric indexers, got" with pytest.raises(IndexError, match=msg): @@ -401,9 +405,12 @@ def test_iloc_getitem_slice(self): tm.assert_frame_equal(result, expected) def test_iloc_getitem_slice_dups(self): - df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df1 = DataFrame( + np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + ) df2 = DataFrame( - np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + columns=["A", "C"], ) # axis=1 @@ -427,7 +434,9 @@ def test_iloc_getitem_slice_dups(self): def test_iloc_setitem(self): df = DataFrame( - np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) + np.random.default_rng(2).randn(4, 4), + index=np.arange(0, 8, 2), + columns=np.arange(0, 12, 3), ) df.iloc[1, 1] = 1 @@ -543,7 +552,9 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( # is redundant with another test comparing iloc against loc def test_iloc_getitem_frame(self): df = DataFrame( - np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) + np.random.default_rng(2).randn(10, 4), + index=range(0, 20, 2), + columns=range(0, 8, 2), ) result = df.iloc[2] @@ -591,7 +602,9 @@ def test_iloc_getitem_frame(self): def test_iloc_getitem_labelled_frame(self): # try with labelled frame df = DataFrame( - np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + np.random.default_rng(2).randn(10, 4), + index=list("abcdefghij"), + columns=list("ABCD"), ) result = df.iloc[1, 1] @@ -625,7 +638,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): # multi axis slicing issue with single block # surfaced in GH 6059 - arr = np.random.randn(6, 4) + arr = np.random.default_rng(2).randn(6, 4) index = date_range("20130101", periods=6) columns = list("ABCD") df = DataFrame(arr, index=index, columns=columns) @@ -650,7 +663,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): tm.assert_frame_equal(result, expected) # related - arr = np.random.randn(6, 4) + arr = np.random.default_rng(2).randn(6, 4) index = list(range(0, 12, 2)) columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) @@ -665,7 +678,9 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): def test_iloc_setitem_series(self): df = DataFrame( - np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + np.random.default_rng(2).randn(10, 4), + index=list("abcdefghij"), + columns=list("ABCD"), ) df.iloc[1, 1] = 1 @@ -677,7 +692,7 @@ def test_iloc_setitem_series(self): result = df.iloc[:, 2:3] tm.assert_frame_equal(result, expected) - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.default_rng(2).randn(10), index=range(0, 20, 2)) s.iloc[1] = 1 result = s.iloc[1] @@ -962,8 +977,10 @@ def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture): def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): idx = Index([]) - obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx) - nd3 = np.random.randint(5, size=(2, 2, 2)) + obj = DataFrame( + np.random.default_rng(2).randn(len(idx), len(idx)), index=idx, columns=idx + ) + nd3 = np.random.default_rng(2).randint(5, size=(2, 2, 2)) msg = f"Cannot set values with ndim > {obj.ndim}" with pytest.raises(ValueError, match=msg): @@ -1040,7 +1057,9 @@ def test_iloc_setitem_dictionary_value(self): def test_iloc_getitem_float_duplicates(self): df = DataFrame( - np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") + np.random.default_rng(2).randn(3, 3), + index=[0.1, 0.2, 0.2], + columns=list("abc"), ) expect = df.iloc[1:] tm.assert_frame_equal(df.loc[0.2], expect) @@ -1056,7 +1075,9 @@ def test_iloc_getitem_float_duplicates(self): tm.assert_series_equal(df.loc[0.2, "a"], expect) df = DataFrame( - np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc") + np.random.default_rng(2).randn(4, 3), + index=[1, 0.2, 0.2, 1], + columns=list("abc"), ) expect = df.iloc[1:-1] tm.assert_frame_equal(df.loc[0.2], expect) @@ -1106,7 +1127,9 @@ def view(self): tm.assert_frame_equal(result, df) def test_iloc_getitem_with_duplicates(self): - df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) + df = DataFrame( + np.random.default_rng(2).rand(3, 3), columns=list("ABC"), index=list("aab") + ) result = df.iloc[0] assert isinstance(result, Series) @@ -1390,7 +1413,7 @@ def test_frame_iloc_setitem_callable(self): class TestILocSeries: def test_iloc(self, using_copy_on_write): - ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) ser_original = ser.copy() for i in range(len(ser)): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 21036598f46df..321772e73deff 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -81,7 +81,7 @@ def test_getitem_ndarray_3d( # GH 25567 obj = gen_obj(frame_or_series, index) idxr = indexer_sli(obj) - nd3 = np.random.randint(5, size=(2, 2, 2)) + nd3 = np.random.default_rng(2).randint(5, size=(2, 2, 2)) msgs = [] if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: @@ -125,7 +125,7 @@ def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): # GH 25567 obj = gen_obj(frame_or_series, index) idxr = indexer_sli(obj) - nd3 = np.random.randint(5, size=(2, 2, 2)) + nd3 = np.random.default_rng(2).randint(5, size=(2, 2, 2)) if indexer_sli is tm.iloc: err = ValueError @@ -279,7 +279,7 @@ def test_dups_fancy_indexing_not_in_order(self): def test_dups_fancy_indexing_only_missing_label(self): # List containing only missing label - dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) + dfnu = DataFrame(np.random.default_rng(2).randn(5, 3), index=list("AABCD")) with pytest.raises( KeyError, match=re.escape( @@ -304,7 +304,9 @@ def test_dups_fancy_indexing_non_unique(self): def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values - df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) + df = DataFrame( + np.random.default_rng(2).randn(5, 5), columns=["A", "B", "B", "B", "A"] + ) with pytest.raises(KeyError, match="not in index"): df.loc[:, ["A", "B", "C"]] @@ -312,7 +314,9 @@ def test_dups_fancy_indexing2(self): def test_dups_fancy_indexing3(self): # GH 6504, multi-axis indexing df = DataFrame( - np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] + np.random.default_rng(2).randn(9, 2), + index=[1, 1, 1, 2, 2, 2, 3, 3, 3], + columns=["a", "b"], ) expected = df.iloc[0:6] @@ -351,7 +355,9 @@ def test_indexing_mixed_frame_bug(self): def test_multitype_list_index_access(self): # GH 10610 - df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) + df = DataFrame( + np.random.default_rng(2).random((10, 5)), columns=["a"] + [20, 21, 22, 23] + ) with pytest.raises(KeyError, match=re.escape("'[26, -8] not in index'")): df[[22, 26, -8]] @@ -634,7 +640,12 @@ def test_index_type_coercion(self, indexer): class TestMisc: def test_float_index_to_mixed(self): - df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) + df = DataFrame( + { + 0.0: np.random.default_rng(2).rand(10), + 1.0: np.random.default_rng(2).rand(10), + } + ) df["a"] = 10 expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4017a0e3a2f80..5530780409a52 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -305,7 +305,8 @@ def test_loc_getitem_dups(self): # GH 5678 # repeated getitems on a dup index returning a ndarray df = DataFrame( - np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] + np.random.default_rng(2).random_sample((20, 5)), + index=["ABCDE"[x % 5] for x in range(20)], ) expected = df.loc["A", 0] result = df.loc[:, 0].loc["A"] @@ -432,7 +433,9 @@ def test_loc_getitem_int_slice(self): def test_loc_to_fail(self): # GH3449 df = DataFrame( - np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] + np.random.default_rng(2).random((3, 3)), + index=["a", "b", "c"], + columns=["e", "f", "g"], ) msg = ( @@ -517,7 +520,7 @@ def test_loc_index(self): # a boolean index should index like a boolean numpy array df = DataFrame( - np.random.random(size=(5, 10)), + np.random.default_rng(2).random(size=(5, 10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], ) @@ -535,7 +538,7 @@ def test_loc_index(self): def test_loc_general(self): df = DataFrame( - np.random.rand(4, 4), + np.random.default_rng(2).rand(4, 4), columns=["A", "B", "C", "D"], index=["A", "B", "C", "D"], ) @@ -778,7 +781,11 @@ def test_loc_setitem_empty_frame(self): tm.assert_frame_equal(df, expected) def test_loc_setitem_frame(self): - df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) + df = DataFrame( + np.random.default_rng(2).randn(4, 4), + index=list("abcd"), + columns=list("ABCD"), + ) result = df.iloc[0, 0] @@ -1020,7 +1027,7 @@ def test_loc_non_unique_memory_error(self, length, l2): df = pd.concat( [ DataFrame( - np.random.randn(length, len(columns)), + np.random.default_rng(2).randn(length, len(columns)), index=np.arange(length), columns=columns, ), @@ -1091,7 +1098,7 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): assert (sliced_df["a"] == 4).all() # These should not return copies - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) if using_copy_on_write: assert df[0] is not df.loc[:, 0] else: @@ -1240,8 +1247,8 @@ def test_loc_reverse_assignment(self): def test_loc_setitem_str_to_small_float_conversion_type(self): # GH#20388 - np.random.seed(13) - col_data = [str(np.random.random() * 1e-12) for _ in range(5)] + + col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)] result = DataFrame(col_data, columns=["A"]) expected = DataFrame(col_data, columns=["A"], dtype=object) tm.assert_frame_equal(result, expected) @@ -1262,7 +1269,7 @@ def test_loc_getitem_time_object(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") mask = (rng.hour == 9) & (rng.minute == 30) - obj = DataFrame(np.random.randn(len(rng), 3), index=rng) + obj = DataFrame(np.random.default_rng(2).randn(len(rng), 3), index=rng) obj = tm.get_obj(obj, frame_or_series) result = obj.loc[time(9, 30)] @@ -1357,7 +1364,7 @@ def test_loc_getitem_iterable(self, float_frame, key_type): def test_loc_getitem_timedelta_0seconds(self): # GH#10583 - df = DataFrame(np.random.normal(size=(10, 4))) + df = DataFrame(np.random.default_rng(2).normal(size=(10, 4))) df.index = timedelta_range(start="0s", periods=10, freq="s") expected = df.loc[Timedelta("0s") :, :] result = df.loc["0s":, :] @@ -1460,7 +1467,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): def test_loc_setitem_time_key(self, using_array_manager): index = date_range("2012-01-01", "2012-01-05", freq="30min") - df = DataFrame(np.random.randn(len(index), 5), index=index) + df = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) akey = time(12, 0, 0) bkey = slice(time(13, 0, 0), time(14, 0, 0)) ainds = [24, 72, 120, 168] @@ -1528,7 +1535,7 @@ def test_loc_setitem_td64_non_nano(self): tm.assert_series_equal(ser, expected) def test_loc_setitem_2d_to_1d_raises(self): - data = np.random.randn(2, 2) + data = np.random.default_rng(2).randn(2, 2) # float64 dtype to avoid upcast when trying to set float data ser = Series(range(2), dtype="float64") @@ -1595,7 +1602,7 @@ def test_loc_getitem_index_namedtuple(self): def test_loc_setitem_single_column_mixed(self): df = DataFrame( - np.random.randn(5, 3), + np.random.default_rng(2).randn(5, 3), index=["a", "b", "c", "d", "e"], columns=["foo", "bar", "baz"], ) @@ -1607,7 +1614,7 @@ def test_loc_setitem_single_column_mixed(self): def test_loc_setitem_cast2(self): # GH#7704 # dtype conversion on setting - df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) + df = DataFrame(np.random.default_rng(2).rand(30, 3), columns=tuple("ABC")) df["event"] = np.nan df.loc[10, "event"] = "foo" result = df.dtypes @@ -2403,7 +2410,7 @@ def test_loc_getitem_label_slice_period_timedelta(self, index): def test_loc_getitem_slice_floats_inexact(self): index = [52195.504153, 52196.303147, 52198.369883] - df = DataFrame(np.random.rand(3, 2), index=index) + df = DataFrame(np.random.default_rng(2).rand(3, 2), index=index) s1 = df.loc[52195.1:52196.5] assert len(s1) == 2 @@ -2416,7 +2423,9 @@ def test_loc_getitem_slice_floats_inexact(self): def test_loc_getitem_float_slice_floatindex(self, float_numpy_dtype): dtype = float_numpy_dtype - ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=dtype)) + ser = Series( + np.random.default_rng(2).rand(10), index=np.arange(10, 20, dtype=dtype) + ) assert len(ser.loc[12.0:]) == 8 assert len(ser.loc[12.5:]) == 7 @@ -2737,7 +2746,9 @@ def test_loc_named_index(self): ) def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): # gh-14836 - df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC")) + df = DataFrame( + np.random.default_rng(2).rand(3, 3), columns=columns, index=list("ABC") + ) expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] @@ -2746,7 +2757,7 @@ def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_col def test_loc_setitem_float_intindex(): # GH 8720 - rand_data = np.random.randn(8, 4) + rand_data = np.random.default_rng(2).randn(8, 4) result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) @@ -2804,7 +2815,7 @@ def test_loc_with_positional_slice_raises(): def test_loc_slice_disallows_positional(): # GH#16121, GH#24612, GH#31810 dti = date_range("2016-01-01", periods=3) - df = DataFrame(np.random.random((3, 2)), index=dti) + df = DataFrame(np.random.default_rng(2).random((3, 2)), index=dti) ser = df[0] @@ -2833,7 +2844,7 @@ def test_loc_datetimelike_mismatched_dtypes(): # GH#32650 dont mix and match datetime/timedelta/period dtypes df = DataFrame( - np.random.randn(5, 3), + np.random.default_rng(2).randn(5, 3), columns=["a", "b", "c"], index=date_range("2012", freq="H", periods=5), ) @@ -2854,7 +2865,7 @@ def test_loc_datetimelike_mismatched_dtypes(): def test_loc_with_period_index_indexer(): # GH#4125 idx = pd.period_range("2002-01", "2003-12", freq="M") - df = DataFrame(np.random.randn(24, 10), index=idx) + df = DataFrame(np.random.default_rng(2).randn(24, 10), index=idx) tm.assert_frame_equal(df, df.loc[idx]) tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df, df.loc[list(idx)]) @@ -2864,7 +2875,7 @@ def test_loc_with_period_index_indexer(): def test_loc_setitem_multiindex_timestamp(): # GH#13831 - vals = np.random.randn(8, 6) + vals = np.random.default_rng(2).randn(8, 6) idx = date_range("1/1/2000", periods=8) cols = ["A", "B", "C", "D", "E", "F"] exp = DataFrame(vals, index=idx, columns=cols) @@ -3039,7 +3050,7 @@ def test_loc_getitem_not_monotonic(self, datetime_series): ts2.loc[d1:d2] = 0 def test_loc_getitem_setitem_integer_slice_keyerrors(self): - ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) # this is OK cp = ser.copy() @@ -3103,7 +3114,7 @@ def test_basic_setitem_with_labels(self, datetime_series): def test_loc_setitem_listlike_of_ints(self): # integer indexes, be careful - ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) inds = [0, 4, 6] arr_inds = np.array([0, 4, 6]) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 5d1d4ba6f638a..3506efe9f09d9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -332,7 +332,9 @@ def test_partial_setting2(self): # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame( - np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"] + np.random.default_rng(2).randn(8, 4), + index=dates, + columns=["A", "B", "C", "D"], ) expected = pd.concat( diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 1bc7953014d89..fc0dd43a3c744 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -75,7 +75,11 @@ def test_float_index_at_iat(self): def test_at_iat_coercion(self): # as timestamp is not a tuple! dates = date_range("1/1/2000", periods=8) - df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]) + df = DataFrame( + np.random.default_rng(2).randn(8, 4), + index=dates, + columns=["A", "B", "C", "D"], + ) s = df["A"] result = s.at[dates[5]] @@ -132,7 +136,7 @@ def test_imethods_with_dups(self): def test_frame_at_with_duplicate_axes(self): # GH#33041 - arr = np.random.randn(6).reshape(3, 2) + arr = np.random.default_rng(2).randn(6).reshape(3, 2) df = DataFrame(arr, columns=["A", "A"]) result = df.at[0, "A"] diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 5fce4f162d71f..2e5ee9f894eae 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -1,5 +1,4 @@ from datetime import datetime -import random import numpy as np import pytest @@ -129,14 +128,14 @@ def test_bitmasks_pyarrow(offset, length, expected_values): @pytest.mark.parametrize( "data", [ - lambda: random.randint(-100, 100), - lambda: random.randint(1, 100), - lambda: random.random(), - lambda: random.choice([True, False]), + lambda: np.random.default_rng(2).randint(-100, 100), + lambda: np.random.default_rng(2).randint(1, 100), + lambda: np.random.default_rng(2).random(), + lambda: np.random.default_rng(2).choice([True, False]), lambda: datetime( - year=random.randint(1900, 2100), - month=random.randint(1, 12), - day=random.randint(1, 20), + year=np.random.default_rng(2).randint(1900, 2100), + month=np.random.default_rng(2).randint(1, 12), + day=np.random.default_rng(2).randint(1, 20), ), ], ) @@ -177,7 +176,7 @@ def test_missing_from_masked(): df2 = df.__dataframe__() - rng = np.random.RandomState(42) + rng = np.random.default_rng(2).RandomState(42) dict_null = {col: rng.randint(low=0, high=len(df)) for col in df.columns} for col, num_nulls in dict_null.items(): null_idx = df.index[ diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 47e7092743b00..a6e624f91b241 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -350,7 +350,7 @@ def test_delete_datetimelike(self): def test_split(self): # GH#37799 - values = np.random.randn(3, 4) + values = np.random.default_rng(2).randn(3, 4) blk = new_block(values, placement=BlockPlacement([3, 1, 6]), ndim=2) result = blk._split() @@ -424,7 +424,7 @@ def test_categorical_block_pickle(self): def test_iget(self): cols = Index(list("abc")) - values = np.random.rand(3, 3) + values = np.random.default_rng(2).rand(3, 3) block = new_block( values=values.copy(), placement=BlockPlacement(np.arange(3, dtype=np.intp)), @@ -462,11 +462,13 @@ def test_set_change_dtype(self, mgr): idx = mgr2.items.get_loc("baz") assert mgr2.iget(idx).dtype == np.object_ - mgr2.insert(len(mgr2.items), "quux", np.random.randn(N).astype(int)) + mgr2.insert( + len(mgr2.items), "quux", np.random.default_rng(2).randn(N).astype(int) + ) idx = mgr2.items.get_loc("quux") assert mgr2.iget(idx).dtype == np.int_ - mgr2.iset(mgr2.items.get_loc("quux"), np.random.randn(N)) + mgr2.iset(mgr2.items.get_loc("quux"), np.random.default_rng(2).randn(N)) assert mgr2.iget(idx).dtype == np.float_ def test_copy(self, mgr): @@ -700,11 +702,11 @@ def test_interleave_dtype(self, mgr_string, dtype): assert mgr.as_array().dtype == "object" def test_consolidate_ordering_issues(self, mgr): - mgr.iset(mgr.items.get_loc("f"), np.random.randn(N)) - mgr.iset(mgr.items.get_loc("d"), np.random.randn(N)) - mgr.iset(mgr.items.get_loc("b"), np.random.randn(N)) - mgr.iset(mgr.items.get_loc("g"), np.random.randn(N)) - mgr.iset(mgr.items.get_loc("h"), np.random.randn(N)) + mgr.iset(mgr.items.get_loc("f"), np.random.default_rng(2).randn(N)) + mgr.iset(mgr.items.get_loc("d"), np.random.default_rng(2).randn(N)) + mgr.iset(mgr.items.get_loc("b"), np.random.default_rng(2).randn(N)) + mgr.iset(mgr.items.get_loc("g"), np.random.default_rng(2).randn(N)) + mgr.iset(mgr.items.get_loc("h"), np.random.default_rng(2).randn(N)) # we have datetime/tz blocks in mgr cons = mgr.consolidate() diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 710f1f272cd7f..c4576ad57a8ff 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -38,7 +38,7 @@ def assert_equal_cell_styles(cell1, cell2): def test_styler_to_excel_unstyled(engine): # compare DataFrame.to_excel and Styler.to_excel when no styles applied pytest.importorskip(engine) - df = DataFrame(np.random.randn(2, 2)) + df = DataFrame(np.random.default_rng(2).randn(2, 2)) with tm.ensure_clean(".xlsx") as path: with ExcelWriter(path, engine=engine) as writer: df.to_excel(writer, sheet_name="dataframe") @@ -130,7 +130,7 @@ def test_styler_to_excel_unstyled(engine): @pytest.mark.parametrize("css, attrs, expected", shared_style_params) def test_styler_to_excel_basic(engine, css, attrs, expected): pytest.importorskip(engine) - df = DataFrame(np.random.randn(1, 1)) + df = DataFrame(np.random.default_rng(2).randn(1, 1)) styler = df.style.map(lambda x: css) with tm.ensure_clean(".xlsx") as path: @@ -161,7 +161,7 @@ def test_styler_to_excel_basic(engine, css, attrs, expected): @pytest.mark.parametrize("css, attrs, expected", shared_style_params) def test_styler_to_excel_basic_indexes(engine, css, attrs, expected): pytest.importorskip(engine) - df = DataFrame(np.random.randn(1, 1)) + df = DataFrame(np.random.default_rng(2).randn(1, 1)) styler = df.style styler.map_index(lambda x: css, axis=0) @@ -230,7 +230,7 @@ def test_styler_to_excel_border_style(engine, border_style): expected = border_style pytest.importorskip(engine) - df = DataFrame(np.random.randn(1, 1)) + df = DataFrame(np.random.default_rng(2).randn(1, 1)) styler = df.style.map(lambda x: css) with tm.ensure_clean(".xlsx") as path: @@ -260,7 +260,7 @@ def test_styler_custom_converter(): def custom_converter(css): return {"font": {"color": {"rgb": "111222"}}} - df = DataFrame(np.random.randn(1, 1)) + df = DataFrame(np.random.default_rng(2).randn(1, 1)) styler = df.style.map(lambda x: "color: #888999") with tm.ensure_clean(".xlsx") as path: with ExcelWriter(path, engine="openpyxl") as writer: diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 0560e12a00bf5..36a16a4270e20 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -357,7 +357,7 @@ def test_excel_sheet_size(self, path): col_df.to_excel(path) def test_excel_sheet_by_name_raise(self, path): - gt = DataFrame(np.random.randn(10, 2)) + gt = DataFrame(np.random.default_rng(2).randn(10, 2)) gt.to_excel(path) with ExcelFile(path) as xl: @@ -465,7 +465,9 @@ def test_basics_with_nan(self, frame, path): def test_int_types(self, np_type, path): # Test np.int values read come back as int # (rather than float which is Excel's format). - df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) + df = DataFrame( + np.random.default_rng(2).randint(-10, 10, size=(10, 2)), dtype=np_type + ) df.to_excel(path, "test1") with ExcelFile(path) as reader: @@ -480,7 +482,7 @@ def test_int_types(self, np_type, path): @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) def test_float_types(self, np_type, path): # Test np.float values read come back as float. - df = DataFrame(np.random.random_sample(10), dtype=np_type) + df = DataFrame(np.random.default_rng(2).random_sample(10), dtype=np_type) df.to_excel(path, "test1") with ExcelFile(path) as reader: @@ -565,7 +567,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): frame.to_excel(path, "test1", index=False) # test index_label - df = DataFrame(np.random.randn(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).randn(10, 2)) >= 0 df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( @@ -574,7 +576,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.randn(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).randn(10, 2)) >= 0 df.to_excel( path, "test1", @@ -588,7 +590,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.randn(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).randn(10, 2)) >= 0 df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( @@ -613,7 +615,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): tm.assert_frame_equal(df, recons) def test_excel_roundtrip_indexname(self, merge_cells, path): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) df.index.name = "foo" df.to_excel(path, merge_cells=merge_cells) @@ -689,7 +691,9 @@ def test_to_excel_interval_no_labels(self, path): # see gh-19242 # # Test writing Interval without labels. - df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + df = DataFrame( + np.random.default_rng(2).randint(-10, 10, size=(20, 1)), dtype=np.int64 + ) expected = df.copy() df["new"] = pd.cut(df[0], 10) @@ -704,7 +708,9 @@ def test_to_excel_interval_labels(self, path): # see gh-19242 # # Test writing Interval with labels. - df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + df = DataFrame( + np.random.default_rng(2).randint(-10, 10, size=(20, 1)), dtype=np.int64 + ) expected = df.copy() intervals = pd.cut( df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] @@ -722,7 +728,9 @@ def test_to_excel_timedelta(self, path): # # Test writing timedelta to xls. df = DataFrame( - np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64 + np.random.default_rng(2).randint(-10, 10, size=(20, 1)), + columns=["A"], + dtype=np.int64, ) expected = df.copy() @@ -761,7 +769,13 @@ def test_to_excel_multiindex(self, merge_cells, frame, path): # GH13511 def test_to_excel_multiindex_nan_label(self, merge_cells, path): - df = DataFrame({"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)}) + df = DataFrame( + { + "A": [None, 2, 3], + "B": [10, 20, 30], + "C": np.random.default_rng(2).sample(3), + } + ) df = df.set_index(["A", "B"]) df.to_excel(path, merge_cells=merge_cells) @@ -1106,7 +1120,7 @@ def test_datetimes(self, path): def test_bytes_io(self, engine): # see gh-7074 with BytesIO() as bio: - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) # Pass engine explicitly, as there is no file path to infer from. with ExcelWriter(bio, engine=engine) as writer: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 661c204da90e1..42b3290c57c1c 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -87,15 +87,13 @@ def blank_value(): @pytest.fixture def df(): - np.random.seed(24) - df = DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).randn(2)}) return df @pytest.fixture def styler(df): - np.random.seed(24) - df = DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).randn(2)}) return Styler(df) @@ -733,7 +731,7 @@ def test_map_subset_multiindex(self, slice_): idx = MultiIndex.from_product([["a", "b"], [1, 2]]) col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) - df = DataFrame(np.random.rand(4, 4), columns=col, index=idx) + df = DataFrame(np.random.default_rng(2).rand(4, 4), columns=col, index=idx) with ctx: df.style.map(lambda x: "color: red;", subset=slice_).to_html() diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 92ced33ab338a..1ca535cc65cc1 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -212,9 +212,11 @@ def test_repr_truncation(self): with option_context("display.max_colwidth", max_len): df = DataFrame( { - "A": np.random.randn(10), + "A": np.random.default_rng(2).randn(10), "B": [ - tm.rands(np.random.randint(max_len - 1, max_len + 1)) + tm.rands( + np.random.default_rng(2).randint(max_len - 1, max_len + 1) + ) for i in range(10) ], } @@ -325,7 +327,7 @@ def test_repr_should_return_str(self): def test_repr_no_backslash(self): with option_context("mode.sim_interactive", True): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) assert "\\" not in repr(df) def test_expand_frame_repr(self): @@ -405,7 +407,7 @@ def test_repr_truncates_terminal_size(self, monkeypatch): def test_repr_truncates_terminal_size_full(self, monkeypatch): # GH 22984 ensure entire window is filled terminal_size = (80, 24) - df = DataFrame(np.random.rand(1, 7)) + df = DataFrame(np.random.default_rng(2).rand(1, 7)) monkeypatch.setattr( "pandas.io.formats.format.get_terminal_size", lambda: terminal_size @@ -574,7 +576,7 @@ def test_to_string_repr_unicode(self): repr(df) idx = Index(["abc", "\u03c3a", "aegdvg"]) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) rs = repr(ser).split("\n") line_len = len(rs[0]) for line in rs[1:]: @@ -943,7 +945,7 @@ def test_to_string_buffer_all_unicode(self): buf.getvalue() def test_to_string_with_col_space(self): - df = DataFrame(np.random.random(size=(1, 3))) + df = DataFrame(np.random.default_rng(2).random(size=(1, 3))) c10 = len(df.to_string(col_space=10).split("\n")[1]) c20 = len(df.to_string(col_space=20).split("\n")[1]) c30 = len(df.to_string(col_space=30).split("\n")[1]) @@ -957,7 +959,9 @@ def test_to_string_with_col_space(self): assert len(with_header_row1) == len(no_header) def test_to_string_with_column_specific_col_space_raises(self): - df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] + ) msg = ( "Col_space length\\(\\d+\\) should match " @@ -974,7 +978,9 @@ def test_to_string_with_column_specific_col_space_raises(self): df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) def test_to_string_with_column_specific_col_space(self): - df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] + ) result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) # 3 separating space + each col_space for (id, a, b, c) @@ -1187,7 +1193,8 @@ def test_wide_repr(self): def test_wide_repr_wide_columns(self): with option_context("mode.sim_interactive", True, "display.max_columns", 20): df = DataFrame( - np.random.randn(5, 3), columns=["a" * 90, "b" * 90, "c" * 90] + np.random.default_rng(2).randn(5, 3), + columns=["a" * 90, "b" * 90, "c" * 90], ) rep_str = repr(df) @@ -1274,7 +1281,7 @@ def test_wide_repr_wide_long_columns(self): def test_long_series(self): n = 1000 s = Series( - np.random.randint(-50, 50, n), + np.random.default_rng(2).randint(-50, 50, n), index=[f"s{x:04d}" for x in range(n)], dtype="int64", ) @@ -1363,7 +1370,7 @@ def test_index_with_nan(self): def test_to_string(self): # big mixed biggie = DataFrame( - {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + {"A": np.random.default_rng(2).randn(200), "B": tm.makeStringIndex(200)}, ) biggie.loc[:20, "A"] = np.nan @@ -1964,7 +1971,9 @@ def test_repr_html_long_multiindex(self): tuples = list(itertools.product(np.arange(max_L1), ["foo", "bar"])) idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) - df = DataFrame(np.random.randn(max_L1 * 2, 2), index=idx, columns=["A", "B"]) + df = DataFrame( + np.random.default_rng(2).randn(max_L1 * 2, 2), index=idx, columns=["A", "B"] + ) with option_context("display.max_rows", 60, "display.max_columns", 20): reg_repr = df._repr_html_() assert "..." not in reg_repr @@ -1972,7 +1981,9 @@ def test_repr_html_long_multiindex(self): tuples = list(itertools.product(np.arange(max_L1 + 1), ["foo", "bar"])) idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.randn((max_L1 + 1) * 2, 2), index=idx, columns=["A", "B"] + np.random.default_rng(2).randn((max_L1 + 1) * 2, 2), + index=idx, + columns=["A", "B"], ) long_repr = df._repr_html_() assert "..." in long_repr @@ -2016,7 +2027,7 @@ def test_info_repr(self): def test_info_repr_max_cols(self): # GH #6939 - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) with option_context( "display.large_repr", "info", @@ -2199,7 +2210,7 @@ def test_max_rows_fitted(self, length, min_rows, max_rows, expected): https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options """ formatter = fmt.DataFrameFormatter( - DataFrame(np.random.rand(length, 3)), + DataFrame(np.random.default_rng(2).rand(length, 3)), max_rows=max_rows, min_rows=min_rows, ) @@ -2269,7 +2280,9 @@ def test_to_string(self): def test_freq_name_separation(self): s = Series( - np.random.randn(10), index=date_range("1/1/2000", periods=10), name=0 + np.random.default_rng(2).randn(10), + index=date_range("1/1/2000", periods=10), + name=0, ) result = repr(s) @@ -2718,7 +2731,7 @@ def test_max_multi_index_display(self): ] tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=["first", "second"]) - s = Series(np.random.randn(8), index=index) + s = Series(np.random.default_rng(2).randn(8), index=index) with option_context("display.max_rows", 10): assert len(str(s).split("\n")) == 10 @@ -2732,7 +2745,7 @@ def test_max_multi_index_display(self): assert len(str(s).split("\n")) == 10 # index - s = Series(np.random.randn(8), None) + s = Series(np.random.default_rng(2).randn(8), None) with option_context("display.max_rows", 10): assert len(str(s).split("\n")) == 9 diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index cd906ca27fa79..a809793109d52 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -26,7 +26,9 @@ @pytest.fixture def duplicate_columns_frame(): """Dataframe with duplicate column names.""" - return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"]) + return DataFrame( + np.random.default_rng(2).randn(1500, 4), columns=["a", "a", "b", "b"] + ) def test_info_empty(): @@ -46,9 +48,11 @@ def test_info_empty(): def test_info_categorical_column_smoke_test(): n = 2500 - df = DataFrame({"int64": np.random.randint(100, size=n)}) + df = DataFrame({"int64": np.random.default_rng(2).randint(100, size=n)}) df["category"] = Series( - np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) + np.array(list("abcdefghij")).take( + np.random.default_rng(2).randint(0, 10, size=n) + ) ).astype("category") df.isna() buf = StringIO() @@ -87,7 +91,7 @@ def test_info_smoke_test(fixture_func_name, request): ], ) def test_info_default_verbose_selection(num_columns, max_info_columns, verbose): - frame = DataFrame(np.random.randn(5, num_columns)) + frame = DataFrame(np.random.default_rng(2).randn(5, num_columns)) with option_context("display.max_info_columns", max_info_columns): io_default = StringIO() frame.info(buf=io_default) @@ -104,7 +108,7 @@ def test_info_verbose_check_header_separator_body(): buf = StringIO() size = 1001 start = 5 - frame = DataFrame(np.random.randn(3, size)) + frame = DataFrame(np.random.default_rng(2).randn(3, size)) frame.info(verbose=True, buf=buf) res = buf.getvalue() @@ -166,7 +170,7 @@ def test_info_verbose_with_counts_spacing( size, header_exp, separator_exp, first_line_exp, last_line_exp ): """Test header column, spacer, first line and last line in verbose mode.""" - frame = DataFrame(np.random.randn(3, size)) + frame = DataFrame(np.random.default_rng(2).randn(3, size)) with StringIO() as buf: frame.info(verbose=True, show_counts=True, buf=buf) all_lines = buf.getvalue().splitlines() @@ -204,7 +208,7 @@ def test_info_memory(): def test_info_wide(): io = StringIO() - df = DataFrame(np.random.randn(5, 101)) + df = DataFrame(np.random.default_rng(2).randn(5, 101)) df.info(buf=io) io = StringIO() @@ -243,7 +247,7 @@ def test_info_shows_column_dtypes(): data = {} n = 10 for i, dtype in enumerate(dtypes): - data[i] = np.random.randint(2, size=n).astype(dtype) + data[i] = np.random.default_rng(2).randint(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() df.info(buf=buf) @@ -259,7 +263,7 @@ def test_info_shows_column_dtypes(): def test_info_max_cols(): - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) for len_, verbose in [(5, None), (5, False), (12, True)]: # For verbose always ^ setting ^ summarize ^ full output with option_context("max_info_columns", 4): @@ -306,7 +310,7 @@ def test_info_memory_usage(): data = {} n = 10 for i, dtype in enumerate(dtypes): - data[i] = np.random.randint(2, size=n).astype(dtype) + data[i] = np.random.default_rng(2).randint(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() @@ -337,7 +341,7 @@ def test_info_memory_usage(): data = {} n = 100 for i, dtype in enumerate(dtypes): - data[i] = np.random.randint(2, size=n).astype(dtype) + data[i] = np.random.default_rng(2).randint(2, size=n).astype(dtype) df = DataFrame(data) df.columns = dtypes @@ -452,7 +456,7 @@ def memory_usage(f): [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"], ) - df = DataFrame({"value": np.random.randn(N * M)}, index=index) + df = DataFrame({"value": np.random.default_rng(2).randn(N * M)}, index=index) unstacked = df.unstack("id") assert df.values.nbytes == unstacked.values.nbytes diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index 555efd8d90ade..d24f9bcbc89d9 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -154,7 +154,7 @@ def test_publishes_not_implemented(self, ip): # column MultiIndex # GH 15996 midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) - df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx) + df = pd.DataFrame(np.random.default_rng(2).randn(5, len(midx)), columns=midx) opt = pd.option_context("display.html.table_schema", True) diff --git a/pandas/tests/io/formats/test_series_info.py b/pandas/tests/io/formats/test_series_info.py index 761dd07dbef51..e6dd61cb72d91 100644 --- a/pandas/tests/io/formats/test_series_info.py +++ b/pandas/tests/io/formats/test_series_info.py @@ -17,7 +17,9 @@ def test_info_categorical_column_just_works(): n = 2500 - data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) + data = np.array(list("abcdefghij")).take( + np.random.default_rng(2).randint(0, 10, size=n) + ) s = Series(data).astype("category") s.isna() buf = StringIO() @@ -90,7 +92,7 @@ def test_info_memory(): def test_info_wide(): - s = Series(np.random.randn(101)) + s = Series(np.random.default_rng(2).randn(101)) msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info" with pytest.raises(ValueError, match=msg): s.info(max_cols=1) @@ -108,7 +110,7 @@ def test_info_shows_dtypes(): ] n = 10 for dtype in dtypes: - s = Series(np.random.randint(2, size=n).astype(dtype)) + s = Series(np.random.default_rng(2).randint(2, size=n).astype(dtype)) buf = StringIO() s.info(buf=buf) res = buf.getvalue() @@ -168,7 +170,7 @@ def test_info_memory_usage_bug_on_multiindex(): [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"], ) - s = Series(np.random.randn(N * M), index=index) + s = Series(np.random.default_rng(2).randn(N * M), index=index) unstacked = s.unstack("id") assert s.values.nbytes == unstacked.values.nbytes diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 9c128756339ab..507831a97b039 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -54,7 +54,7 @@ def biggie_df_fixture(request): """Fixture for a big mixed Dataframe and an empty Dataframe""" if request.param == "mixed": df = DataFrame( - {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + {"A": np.random.default_rng(2).randn(200), "B": tm.makeStringIndex(200)}, index=np.arange(200), ) df.loc[:20, "A"] = np.nan @@ -72,7 +72,7 @@ def justify(request): @pytest.mark.parametrize("col_space", [30, 50]) def test_to_html_with_col_space(col_space): - df = DataFrame(np.random.random(size=(1, 3))) + df = DataFrame(np.random.default_rng(2).random(size=(1, 3))) # check that col_space affects HTML generation # and be very brittle about it. result = df.to_html(col_space=col_space) @@ -84,7 +84,9 @@ def test_to_html_with_col_space(col_space): def test_to_html_with_column_specific_col_space_raises(): - df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] + ) msg = ( "Col_space length\\(\\d+\\) should match " @@ -102,7 +104,9 @@ def test_to_html_with_column_specific_col_space_raises(): def test_to_html_with_column_specific_col_space(): - df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random(size=(3, 3)), columns=["a", "b", "c"] + ) result = df.to_html(col_space={"a": "2em", "b": 23}) hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] @@ -280,8 +284,8 @@ def test_to_html_regression_GH6098(): { "clé1": ["a", "a", "b", "b", "a"], "clé2": ["1er", "2ème", "1er", "2ème", "1er"], - "données1": np.random.randn(5), - "données2": np.random.randn(5), + "données1": np.random.default_rng(2).randn(5), + "données2": np.random.default_rng(2).randn(5), } ) @@ -394,7 +398,7 @@ def test_to_html_filename(biggie_df_fixture, tmpdir): def test_to_html_with_no_bold(): - df = DataFrame({"x": np.random.randn(5)}) + df = DataFrame({"x": np.random.default_rng(2).randn(5)}) html = df.to_html(bold_rows=False) result = html[html.find("")] assert "\s]", x)] diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 31ba018a178ca..f28e3ee672537 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -18,7 +18,7 @@ def test_repr_embedded_ndarray(): arr = np.empty(10, dtype=[("err", object)]) for i in range(len(arr)): - arr["err"][i] = np.random.randn(i) + arr["err"][i] = np.random.default_rng(2).randn(i) df = DataFrame(arr) repr(df["err"]) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 25b0e4a9f1de9..e26e5e5a21ac7 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -651,7 +651,7 @@ def test_overlapping_names(self, case): def test_mi_falsey_name(self): # GH 16203 df = DataFrame( - np.random.randn(4, 4), + np.random.default_rng(2).randn(4, 4), index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]), ) result = [x["name"] for x in build_table_schema(df)["fields"]] diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 11909bf56f05c..626d024928a4a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -976,7 +976,7 @@ def test_weird_nested_json(self): read_json(StringIO(s)) def test_doc_example(self): - dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB")) + dfj2 = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("AB")) dfj2["date"] = Timestamp("20130101") dfj2["ints"] = range(5) dfj2["bools"] = True @@ -1171,14 +1171,14 @@ def test_datetime_tz(self): def test_sparse(self): # GH4377 df.to_json segfaults with non-ndarray blocks - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) df.loc[:8] = np.nan sdf = df.astype("Sparse") expected = df.to_json() assert expected == sdf.to_json() - s = Series(np.random.randn(10)) + s = Series(np.random.default_rng(2).randn(10)) s.loc[:8] = np.nan ss = s.astype("Sparse") diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 0df6b1eef72c0..7fe12ecca9e61 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -940,7 +940,7 @@ def test_datetime_index(self): decoded = DatetimeIndex(np.array(ujson.decode(encoded))) tm.assert_index_equal(rng, decoded) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) decoded = Series(ujson.decode(ujson.encode(ts, date_unit=date_unit))) idx_values = decoded.index.values.astype(np.int64) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 915cc9a9a1f95..f8d8a88a31cb4 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -31,7 +31,7 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): parser = all_parsers df = DataFrame( - np.random.rand(5, 2).round(4), + np.random.default_rng(2).rand(5, 2).round(4), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"], ) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index ceee9f13e07f8..d244a06d72566 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -130,7 +130,9 @@ def test_dtype_and_names_error(c_parser_only): def test_unsupported_dtype(c_parser_only, match, kwargs): parser = c_parser_only df = DataFrame( - np.random.rand(5, 2), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"] + np.random.default_rng(2).rand(5, 2), + columns=list("AB"), + index=["1A", "1B", "1C", "1D", "1E"], ) with tm.ensure_clean("__unsupported_dtype__.csv") as path: diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 4b0567d6265ad..a38997e115ea3 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -196,7 +196,9 @@ def test_no_multi_index_level_names_empty(all_parsers): # GH 10984 parser = all_parsers midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) - expected = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + expected = DataFrame( + np.random.default_rng(2).randn(3, 3), index=midx, columns=["x", "y", "z"] + ) with tm.ensure_clean() as path: expected.to_csv(path) result = parser.read_csv(path, index_col=[0, 1, 2]) @@ -233,7 +235,9 @@ def test_index_col_large_csv(all_parsers, monkeypatch): parser = all_parsers ARR_LEN = 100 - df = DataFrame({"a": range(ARR_LEN + 1), "b": np.random.randn(ARR_LEN + 1)}) + df = DataFrame( + {"a": range(ARR_LEN + 1), "b": np.random.default_rng(2).randn(ARR_LEN + 1)} + ) with tm.ensure_clean() as path: df.to_csv(path, index=False) diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index c3520a92f11b3..cf897107be4fa 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -125,11 +125,11 @@ def test_multi_thread_path_multipart_read_csv(all_parsers): file_name = "__thread_pool_reader__.csv" df = DataFrame( { - "a": np.random.rand(num_rows), - "b": np.random.rand(num_rows), - "c": np.random.rand(num_rows), - "d": np.random.rand(num_rows), - "e": np.random.rand(num_rows), + "a": np.random.default_rng(2).rand(num_rows), + "b": np.random.default_rng(2).rand(num_rows), + "c": np.random.default_rng(2).rand(num_rows), + "d": np.random.default_rng(2).rand(num_rows), + "e": np.random.default_rng(2).rand(num_rows), "foo": ["foo"] * num_rows, "bar": ["bar"] * num_rows, "baz": ["baz"] * num_rows, diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index ea89d9cf42824..a8a82c8e8fcb0 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -296,7 +296,7 @@ def test_read_csv_chunked_download(self, s3_public_bucket, caplog, s3so): # 8 MB, S3FS uses 5MB chunks import s3fs - df = DataFrame(np.random.randn(100000, 4), columns=list("abcd")) + df = DataFrame(np.random.default_rng(2).randn(100000, 4), columns=list("abcd")) str_buf = StringIO() df.to_csv(str_buf) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index b31a520924d5f..9f2e586b43df3 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -66,13 +66,16 @@ def test_append(setup_path): uint_data = DataFrame( { "u08": Series( - np.random.randint(0, high=255, size=5), dtype=np.uint8 + np.random.default_rng(2).randint(0, high=255, size=5), + dtype=np.uint8, ), "u16": Series( - np.random.randint(0, high=65535, size=5), dtype=np.uint16 + np.random.default_rng(2).randint(0, high=65535, size=5), + dtype=np.uint16, ), "u32": Series( - np.random.randint(0, high=2**30, size=5), dtype=np.uint32 + np.random.default_rng(2).randint(0, high=2**30, size=5), + dtype=np.uint32, ), "u64": Series( [2**58, 2**59, 2**60, 2**61, 2**62], @@ -126,7 +129,7 @@ def test_append_series(setup_path): tm.assert_series_equal(result, expected, check_index_type=True) # multi-index - mi = DataFrame(np.random.randn(5, 1), columns=["A"]) + mi = DataFrame(np.random.default_rng(2).randn(5, 1), columns=["A"]) mi["B"] = np.arange(len(mi)) mi["C"] = "foo" mi.loc[3:5, "C"] = "bar" @@ -141,9 +144,9 @@ def test_append_some_nans(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame( { - "A": Series(np.random.randn(20)).astype("int32"), - "A1": np.random.randn(20), - "A2": np.random.randn(20), + "A": Series(np.random.default_rng(2).randn(20)).astype("int32"), + "A1": np.random.default_rng(2).randn(20), + "A2": np.random.default_rng(2).randn(20), "B": "foo", "C": "bar", "D": Timestamp("2001-01-01").as_unit("ns"), @@ -186,7 +189,10 @@ def test_append_some_nans(setup_path): def test_append_all_nans(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame( - {"A1": np.random.randn(20), "A2": np.random.randn(20)}, + { + "A1": np.random.default_rng(2).randn(20), + "A2": np.random.default_rng(2).randn(20), + }, index=np.arange(20), ) df.loc[0:15, :] = np.nan @@ -219,8 +225,8 @@ def test_append_all_nans(setup_path): # nan some entire rows (string are still written!) df = DataFrame( { - "A1": np.random.randn(20), - "A2": np.random.randn(20), + "A1": np.random.default_rng(2).randn(20), + "A2": np.random.default_rng(2).randn(20), "B": "foo", "C": "bar", }, @@ -243,8 +249,8 @@ def test_append_all_nans(setup_path): # written!) df = DataFrame( { - "A1": np.random.randn(20), - "A2": np.random.randn(20), + "A1": np.random.default_rng(2).randn(20), + "A2": np.random.default_rng(2).randn(20), "B": "foo", "C": "bar", "D": Timestamp("2001-01-01").as_unit("ns"), @@ -299,7 +305,7 @@ def test_append_with_different_block_ordering(setup_path): # GH 4096; using same frames, but different block orderings with ensure_clean_store(setup_path) as store: for i in range(10): - df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) df["index"] = range(10) df["index"] += i * 10 df["int64"] = Series([1] * len(df), dtype="int64") @@ -319,7 +325,9 @@ def test_append_with_different_block_ordering(setup_path): # test a different ordering but with more fields (like invalid # combinations) with ensure_clean_store(setup_path) as store: - df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64") + df = DataFrame( + np.random.default_rng(2).randn(10, 2), columns=list("AB"), dtype="float64" + ) df["int64"] = Series([1] * len(df), dtype="int64") df["int16"] = Series([1] * len(df), dtype="int16") store.append("df", df) @@ -592,9 +600,11 @@ def check_col(key, name, size): with ensure_clean_store(setup_path) as store: # doc example part 2 - np.random.seed(1234) + index = date_range("1/1/2000", periods=8) - df_dc = DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"]) + df_dc = DataFrame( + np.random.default_rng(2).randn(8, 3), index=index, columns=["A", "B", "C"] + ) df_dc["string"] = "foo" df_dc.loc[df_dc.index[4:6], "string"] = np.nan df_dc.loc[df_dc.index[7:9], "string"] = "bar" @@ -672,7 +682,7 @@ def test_append_misc_empty_frame(setup_path): store.select("df") # repeated append of 0/non-zero frames - df = DataFrame(np.random.rand(10, 3), columns=list("ABC")) + df = DataFrame(np.random.default_rng(2).rand(10, 3), columns=list("ABC")) store.append("df", df) tm.assert_frame_equal(store.select("df"), df) store.append("df", df_empty) diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index 051221d165060..cbb40bbf831e1 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -16,7 +16,7 @@ def test_complex_fixed(tmp_path, setup_path): df = DataFrame( - np.random.rand(4, 5).astype(np.complex64), + np.random.default_rng(2).rand(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) @@ -27,7 +27,7 @@ def test_complex_fixed(tmp_path, setup_path): tm.assert_frame_equal(df, reread) df = DataFrame( - np.random.rand(4, 5).astype(np.complex128), + np.random.default_rng(2).rand(4, 5).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) @@ -39,7 +39,7 @@ def test_complex_fixed(tmp_path, setup_path): def test_complex_table(tmp_path, setup_path): df = DataFrame( - np.random.rand(4, 5).astype(np.complex64), + np.random.default_rng(2).rand(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) @@ -50,7 +50,7 @@ def test_complex_table(tmp_path, setup_path): tm.assert_frame_equal(df, reread) df = DataFrame( - np.random.rand(4, 5).astype(np.complex128), + np.random.default_rng(2).rand(4, 5).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) @@ -189,7 +189,10 @@ def test_complex_series_error(tmp_path, setup_path): def test_complex_append(setup_path): df = DataFrame( - {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} + { + "a": np.random.default_rng(2).randn(100).astype(np.complex128), + "b": np.random.default_rng(2).randn(100), + } ) with ensure_clean_store(setup_path) as store: diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 295cce970889c..cdb3c9ad63f19 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -115,7 +115,7 @@ def test_invalid_terms(tmp_path, setup_path): # from the docs path = tmp_path / setup_path dfq = DataFrame( - np.random.randn(10, 4), + np.random.default_rng(2).randn(10, 4), columns=list("ABCD"), index=date_range("20130101", periods=10), ) @@ -128,7 +128,7 @@ def test_invalid_terms(tmp_path, setup_path): # catch the invalid reference path = tmp_path / setup_path dfq = DataFrame( - np.random.randn(10, 4), + np.random.default_rng(2).randn(10, 4), columns=list("ABCD"), index=date_range("20130101", periods=10), ) @@ -146,11 +146,11 @@ def test_invalid_terms(tmp_path, setup_path): def test_append_with_diff_col_name_types_raises_value_error(setup_path): - df = DataFrame(np.random.randn(10, 1)) - df2 = DataFrame({"a": np.random.randn(10)}) - df3 = DataFrame({(1, 2): np.random.randn(10)}) - df4 = DataFrame({("1", 2): np.random.randn(10)}) - df5 = DataFrame({("1", 2, object): np.random.randn(10)}) + df = DataFrame(np.random.default_rng(2).randn(10, 1)) + df2 = DataFrame({"a": np.random.default_rng(2).randn(10)}) + df3 = DataFrame({(1, 2): np.random.default_rng(2).randn(10)}) + df4 = DataFrame({("1", 2): np.random.default_rng(2).randn(10)}) + df5 = DataFrame({("1", 2, object): np.random.default_rng(2).randn(10)}) with ensure_clean_store(setup_path) as store: name = f"df_{tm.rands(10)}" @@ -165,7 +165,9 @@ def test_append_with_diff_col_name_types_raises_value_error(setup_path): def test_invalid_complib(setup_path): - df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) with tm.ensure_clean(setup_path) as path: msg = r"complib only supports \[.*\] compression." with pytest.raises(ValueError, match=msg): @@ -201,7 +203,9 @@ def test_unsuppored_hdf_file_error(datapath): def test_read_hdf_errors(setup_path, tmp_path): - df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) path = tmp_path / setup_path msg = r"File [\S]* does not exist" diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 910f83e0b997c..c327d1992b4d4 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -222,7 +222,7 @@ def test_store_index_types(setup_path, format, index): # test storing various index types with ensure_clean_store(setup_path) as store: - df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) df.index = index(len(df)) _maybe_remove(store, "df") @@ -352,7 +352,8 @@ def test_store_periodindex(tmp_path, setup_path, format): # GH 7796 # test of PeriodIndex in HDFStore df = DataFrame( - np.random.randn(5, 1), index=pd.period_range("20220101", freq="M", periods=5) + np.random.default_rng(2).randn(5, 1), + index=pd.period_range("20220101", freq="M", periods=5), ) path = tmp_path / setup_path diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index 61dabf15653f0..c7965cc91f9ef 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -142,7 +142,7 @@ def test_read_column(setup_path): tm.assert_almost_equal(result.values, df3["string"].values[-2:2]) # GH 10392 - make sure column name is preserved - df4 = DataFrame({"A": np.random.randn(10), "B": "foo"}) + df4 = DataFrame({"A": np.random.default_rng(2).randn(10), "B": "foo"}) store.append("df4", df4, data_columns=True) expected = df4["B"] result = store.select_column("df4", "B") @@ -213,7 +213,9 @@ def test_legacy_table_read_py2(datapath): def test_read_hdf_open_store(tmp_path, setup_path): # GH10330 # No check for non-string path_or-buf, and no test of open store - df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -230,7 +232,9 @@ def test_read_hdf_index_not_view(tmp_path, setup_path): # GH 37441 # Ensure that the index of the DataFrame is not a view # into the original recarray that pytables reads in - df = DataFrame(np.random.rand(4, 5), index=[0, 1, 2, 3], columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=[0, 1, 2, 3], columns=list("ABCDE") + ) path = tmp_path / setup_path df.to_hdf(path, "df", mode="w", format="table") @@ -241,7 +245,9 @@ def test_read_hdf_index_not_view(tmp_path, setup_path): def test_read_hdf_iterator(tmp_path, setup_path): - df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -257,7 +263,9 @@ def test_read_hdf_iterator(tmp_path, setup_path): def test_read_nokey(tmp_path, setup_path): # GH10443 - df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) # Categorical dtype not supported for "fixed" format. So no need # to test with that dtype in the dataframe here. @@ -302,7 +310,7 @@ def test_read_nokey_empty(tmp_path, setup_path): def test_read_from_pathlib_path(tmp_path, setup_path): # GH11773 expected = DataFrame( - np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) filename = tmp_path / setup_path path_obj = Path(filename) @@ -319,7 +327,7 @@ def test_read_from_py_localpath(tmp_path, setup_path): from py.path import local as LocalPath expected = DataFrame( - np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") ) filename = tmp_path / setup_path path_obj = LocalPath(filename) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 42f020a8f3708..fb4f40b702871 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -183,7 +183,7 @@ def test_get(setup_path): def test_put_integer(setup_path): # non-date, non-string index - df = DataFrame(np.random.randn(50, 100)) + df = DataFrame(np.random.default_rng(2).randn(50, 100)) _check_roundtrip(df, tm.assert_frame_equal, setup_path) @@ -219,7 +219,7 @@ def test_table_values_dtypes_roundtrip(setup_path): # check with mixed dtypes df1 = DataFrame( { - c: Series(np.random.randint(5), dtype=c) + c: Series(np.random.default_rng(2).randint(5), dtype=c) for c in ["float32", "float64", "int32", "int64", "int16", "int8"] } ) @@ -271,8 +271,8 @@ def test_series(setup_path): def test_float_index(setup_path): # GH #454 - index = np.random.randn(10) - s = Series(np.random.randn(10), index=index) + index = np.random.default_rng(2).randn(10) + s = Series(np.random.default_rng(2).randn(10), index=index) _check_roundtrip(s, tm.assert_series_equal, path=setup_path) @@ -280,7 +280,7 @@ def test_tuple_index(setup_path): # GH #492 col = np.arange(10) idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)] - data = np.random.randn(30).reshape((3, 10)) + data = np.random.default_rng(2).randn(30).reshape((3, 10)) DF = DataFrame(data, index=idx, columns=col) with catch_warnings(record=True): @@ -291,7 +291,7 @@ def test_tuple_index(setup_path): @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") def test_index_types(setup_path): with catch_warnings(record=True): - values = np.random.randn(2) + values = np.random.default_rng(2).randn(2) func = lambda lhs, rhs: tm.assert_series_equal(lhs, rhs, check_index_type=True) @@ -341,7 +341,7 @@ def test_index_types(setup_path): def test_timeseries_preepoch(setup_path, request): dr = bdate_range("1/1/1940", "1/1/1960") - ts = Series(np.random.randn(len(dr)), index=dr) + ts = Series(np.random.default_rng(2).randn(len(dr)), index=dr) try: _check_roundtrip(ts, tm.assert_series_equal, path=setup_path) except OverflowError: @@ -376,7 +376,7 @@ def test_frame(compression, setup_path): with ensure_clean_store(setup_path) as store: # not consolidated - df["foo"] = np.random.randn(len(df)) + df["foo"] = np.random.default_rng(2).randn(len(df)) store["df"] = df recons = store["df"] assert recons._mgr.is_consolidated() @@ -407,7 +407,7 @@ def test_empty_series(dtype, setup_path): def test_can_serialize_dates(setup_path): rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) @@ -502,7 +502,7 @@ def test_unicode_index(setup_path): # PerformanceWarning with catch_warnings(record=True): simplefilter("ignore", pd.errors.PerformanceWarning) - s = Series(np.random.randn(len(unicode_values)), unicode_values) + s = Series(np.random.default_rng(2).randn(len(unicode_values)), unicode_values) _check_roundtrip(s, tm.assert_series_equal, path=setup_path) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index f14a3ad7c5e10..0858302bfcdba 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -40,7 +40,9 @@ def test_select_columns_in_where(setup_path): ) # With a DataFrame - df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).randn(10, 3), index=index, columns=["A", "B", "C"] + ) with ensure_clean_store(setup_path) as store: store.put("df", df, format="table") @@ -51,7 +53,7 @@ def test_select_columns_in_where(setup_path): tm.assert_frame_equal(store.select("df", where="columns=['A']"), expected) # With a Series - s = Series(np.random.randn(10), index=index, name="A") + s = Series(np.random.default_rng(2).randn(10), index=index, name="A") with ensure_clean_store(setup_path) as store: store.put("s", s, format="table") tm.assert_series_equal(store.select("s", where="columns=['A']"), s) @@ -59,7 +61,7 @@ def test_select_columns_in_where(setup_path): def test_select_with_dups(setup_path): # single dtypes - df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"]) df.index = date_range("20130101 9:30", periods=10, freq="T") with ensure_clean_store(setup_path) as store: @@ -80,9 +82,12 @@ def test_select_with_dups(setup_path): # dups across dtypes df = concat( [ - DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), DataFrame( - np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + ), + DataFrame( + np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + columns=["A", "C"], ), ], axis=1, @@ -163,7 +168,7 @@ def test_select_dtypes(setup_path): df = DataFrame( { "ts": bdate_range("2012-01-01", periods=300), - "A": np.random.randn(300), + "A": np.random.default_rng(2).randn(300), } ) _maybe_remove(store, "df") @@ -174,7 +179,7 @@ def test_select_dtypes(setup_path): tm.assert_frame_equal(expected, result) # bool columns (GH #2849) - df = DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=["A", "B"]) df["object"] = "foo" df.loc[4:5, "object"] = "bar" df["boolv"] = df["A"] > 0 @@ -192,7 +197,12 @@ def test_select_dtypes(setup_path): tm.assert_frame_equal(expected, result) # integer index - df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)}) + df = DataFrame( + { + "A": np.random.default_rng(2).rand(20), + "B": np.random.default_rng(2).rand(20), + } + ) _maybe_remove(store, "df_int") store.append("df_int", df) result = store.select("df_int", "index<10 and columns=['A']") @@ -202,8 +212,8 @@ def test_select_dtypes(setup_path): # float index df = DataFrame( { - "A": np.random.rand(20), - "B": np.random.rand(20), + "A": np.random.default_rng(2).rand(20), + "B": np.random.default_rng(2).rand(20), "index": np.arange(20, dtype="f8"), } ) @@ -267,7 +277,7 @@ def test_select_with_many_inputs(setup_path): df = DataFrame( { "ts": bdate_range("2012-01-01", periods=300), - "A": np.random.randn(300), + "A": np.random.default_rng(2).randn(300), "B": range(300), "users": ["a"] * 50 + ["b"] * 50 @@ -647,7 +657,7 @@ def test_frame_select_complex2(tmp_path): selection = read_hdf(pp, "df", where="A=[2,3]") hist = DataFrame( - np.random.randn(25, 1), + np.random.default_rng(2).randn(25, 1), columns=["data"], index=MultiIndex.from_tuples( [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 17b7eef011a0a..2e2271be3f668 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -499,7 +499,10 @@ def test_remove(setup_path): def test_same_name_scoping(setup_path): with ensure_clean_store(setup_path) as store: - df = DataFrame(np.random.randn(20, 2), index=date_range("20130101", periods=20)) + df = DataFrame( + np.random.default_rng(2).randn(20, 2), + index=date_range("20130101", periods=20), + ) store.put("df", df, format="table") expected = df[df.index > Timestamp("20130105")] @@ -621,7 +624,8 @@ def test_coordinates(setup_path): # pass array/mask as the coordinates with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.randn(1000, 2), index=date_range("20000101", periods=1000) + np.random.default_rng(2).randn(1000, 2), + index=date_range("20000101", periods=1000), ) store.append("df", df) c = store.select_column("df", "index") @@ -660,7 +664,7 @@ def test_coordinates(setup_path): tm.assert_frame_equal(result, expected) # list - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) store.append("df2", df) result = store.select("df2", where=[0, 3, 5]) expected = df.iloc[[0, 3, 5]] @@ -682,7 +686,12 @@ def test_coordinates(setup_path): def test_start_stop_table(setup_path): with ensure_clean_store(setup_path) as store: # table - df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)}) + df = DataFrame( + { + "A": np.random.default_rng(2).rand(20), + "B": np.random.default_rng(2).rand(20), + } + ) store.append("df", df) result = store.select("df", "columns=['A']", start=0, stop=5) @@ -715,7 +724,10 @@ def test_start_stop_fixed(setup_path): with ensure_clean_store(setup_path) as store: # fixed, GH 8287 df = DataFrame( - {"A": np.random.rand(20), "B": np.random.rand(20)}, + { + "A": np.random.default_rng(2).rand(20), + "B": np.random.default_rng(2).rand(20), + }, index=date_range("20130101", periods=20), ) store.put("df", df) @@ -751,7 +763,7 @@ def test_start_stop_fixed(setup_path): def test_select_filter_corner(setup_path): - df = DataFrame(np.random.randn(50, 100)) + df = DataFrame(np.random.default_rng(2).randn(50, 100)) df.index = [f"{c:3d}" for c in df.index] df.columns = [f"{c:3d}" for c in df.columns] @@ -879,7 +891,7 @@ def test_duplicate_column_name(tmp_path, setup_path): def test_preserve_timedeltaindex_type(setup_path): # GH9635 - df = DataFrame(np.random.normal(size=(10, 5))) + df = DataFrame(np.random.default_rng(2).normal(size=(10, 5))) df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example") with ensure_clean_store(setup_path) as store: @@ -890,7 +902,9 @@ def test_preserve_timedeltaindex_type(setup_path): def test_columns_multiindex_modified(tmp_path, setup_path): # BUG: 7212 - df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df = DataFrame( + np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -927,7 +941,7 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): ] for index in types_should_fail: - df = DataFrame(np.random.randn(10, 2), columns=index(2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=index(2)) path = tmp_path / setup_path with catch_warnings(record=True): msg = "cannot have non-object label DataIndexableCol" @@ -935,7 +949,7 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): df.to_hdf(path, "df", format="table", data_columns=True) for index in types_should_run: - df = DataFrame(np.random.randn(10, 2), columns=index(2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=index(2)) path = tmp_path / setup_path with catch_warnings(record=True): df.to_hdf(path, "df", format="table", data_columns=True) diff --git a/pandas/tests/io/pytables/test_time_series.py b/pandas/tests/io/pytables/test_time_series.py index 262f25e77b69c..08ce04606982a 100644 --- a/pandas/tests/io/pytables/test_time_series.py +++ b/pandas/tests/io/pytables/test_time_series.py @@ -24,7 +24,7 @@ def test_store_datetime_fractional_secs(setup_path): def test_tseries_indices_series(setup_path): with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) store["a"] = ser result = store["a"] @@ -33,7 +33,7 @@ def test_tseries_indices_series(setup_path): tm.assert_class_equal(result.index, ser.index, obj="series index") idx = tm.makePeriodIndex(10) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) store["a"] = ser result = store["a"] @@ -45,7 +45,7 @@ def test_tseries_indices_series(setup_path): def test_tseries_indices_frame(setup_path): with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) - df = DataFrame(np.random.randn(len(idx), 3), index=idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) store["a"] = df result = store["a"] @@ -54,7 +54,7 @@ def test_tseries_indices_frame(setup_path): tm.assert_class_equal(result.index, df.index, obj="dataframe index") idx = tm.makePeriodIndex(10) - df = DataFrame(np.random.randn(len(idx), 3), idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), idx) store["a"] = df result = store["a"] diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index e6c0c918a73cc..217b1f7fc1375 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -180,7 +180,7 @@ def test_tseries_select_index_column(setup_path): # check that no tz still works rng = date_range("1/1/2000", "1/30/2000") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -189,7 +189,7 @@ def test_tseries_select_index_column(setup_path): # check utc rng = date_range("1/1/2000", "1/30/2000", tz="UTC") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -198,7 +198,7 @@ def test_tseries_select_index_column(setup_path): # double check non-utc rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -211,7 +211,7 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): # index rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") rng = rng._with_freq(None) # freq doesn't round-trip - df = DataFrame(np.random.randn(len(rng), 4), index=rng) + df = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) store["df"] = df result = store["df"] tm.assert_frame_equal(result, df) @@ -262,7 +262,7 @@ def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): def test_fixed_offset_tz(setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") - frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store["frame"] = frame diff --git a/pandas/tests/io/sas/test_byteswap.py b/pandas/tests/io/sas/test_byteswap.py index 347e8778e44eb..5787dcecec320 100644 --- a/pandas/tests/io/sas/test_byteswap.py +++ b/pandas/tests/io/sas/test_byteswap.py @@ -39,7 +39,9 @@ def test_float_byteswap(read_offset, number, float_type, should_byteswap): def _test(number, number_type, read_offset, should_byteswap): number = number_type(number) - data = np.random.default_rng().integers(0, 256, size=20, dtype="uint8") + data = ( + np.random.default_rng(2).default_rng().integers(0, 256, size=20, dtype="uint8") + ) data[read_offset : read_offset + number.itemsize] = number[None].view("uint8") swap_func = { np.float32: read_float_with_byteswap, diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index ff81d0125144e..6ec89d822c60a 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -78,7 +78,7 @@ def df(request): return tm.makeCustomDataframe( max_rows + 1, 3, - data_gen_f=lambda *args: np.random.randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).randint(2), c_idx_type="s", r_idx_type="i", c_idx_names=[None], @@ -119,7 +119,7 @@ def df(request): return tm.makeCustomDataframe( 5, 3, - data_gen_f=lambda *args: np.random.randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).randint(2), c_idx_type="s", r_idx_type="i", c_idx_names=[None], diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index e884d9bea26fe..74616421184dd 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -157,7 +157,7 @@ def test_to_html_compat(self): tm.makeCustomDataframe( 4, 3, - data_gen_f=lambda *args: np.random.rand(), + data_gen_f=lambda *args: np.random.default_rng(2).rand(), c_idx_names=False, r_idx_names=False, ) @@ -1278,7 +1278,7 @@ def test_fallback_success(self, datapath): def test_to_html_timestamp(self): rng = date_range("2000-01-01", periods=10) - df = DataFrame(np.random.randn(10, 4), index=rng) + df = DataFrame(np.random.default_rng(2).randn(10, 4), index=rng) result = df.to_html() assert "2000-01-01" in result diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 35bf75d3928f8..02852708ef1fc 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -465,7 +465,9 @@ def test_write_multiindex(self, pa): def test_multiindex_with_columns(self, pa): engine = pa dates = pd.date_range("01-Jan-2018", "01-Dec-2018", freq="MS") - df = pd.DataFrame(np.random.randn(2 * len(dates), 3), columns=list("ABC")) + df = pd.DataFrame( + np.random.default_rng(2).randn(2 * len(dates), 3), columns=list("ABC") + ) index1 = pd.MultiIndex.from_product( [["Level1", "Level2"], dates], names=["level", "date"] ) @@ -513,7 +515,7 @@ def test_write_ignoring_index(self, engine): def test_write_column_multiindex(self, engine): # Not able to write column multi-indexes with non-string column names. mi_columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) - df = pd.DataFrame(np.random.randn(4, 3), columns=mi_columns) + df = pd.DataFrame(np.random.default_rng(2).randn(4, 3), columns=mi_columns) if engine == "fastparquet": self.check_error_on_write( @@ -530,7 +532,7 @@ def test_write_column_multiindex_nonstring(self, engine): ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], [1, 2, 1, 2, 1, 2, 1, 2], ] - df = pd.DataFrame(np.random.randn(8, 8), columns=arrays) + df = pd.DataFrame(np.random.default_rng(2).randn(8, 8), columns=arrays) df.columns.names = ["Level1", "Level2"] if engine == "fastparquet": self.check_error_on_write(df, engine, ValueError, "Column name") @@ -547,7 +549,7 @@ def test_write_column_multiindex_string(self, pa): ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] - df = pd.DataFrame(np.random.randn(8, 8), columns=arrays) + df = pd.DataFrame(np.random.default_rng(2).randn(8, 8), columns=arrays) df.columns.names = ["ColLevel1", "ColLevel2"] check_round_trip(df, engine) @@ -559,7 +561,7 @@ def test_write_column_index_string(self, pa): # Write column indexes with string column names arrays = ["bar", "baz", "foo", "qux"] - df = pd.DataFrame(np.random.randn(8, 4), columns=arrays) + df = pd.DataFrame(np.random.default_rng(2).randn(8, 4), columns=arrays) df.columns.name = "StringCol" check_round_trip(df, engine) @@ -569,7 +571,7 @@ def test_write_column_index_nonstring(self, engine): # Write column indexes with string column names arrays = [1, 2, 3, 4] - df = pd.DataFrame(np.random.randn(8, 4), columns=arrays) + df = pd.DataFrame(np.random.default_rng(2).randn(8, 4), columns=arrays) df.columns.name = "NonStringCol" if engine == "fastparquet": self.check_error_on_write( @@ -983,7 +985,9 @@ def test_filter_row_groups(self, pa): def test_read_parquet_manager(self, pa, using_array_manager): # ensure that read_parquet honors the pandas.options.mode.data_manager option - df = pd.DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) + df = pd.DataFrame( + np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"] + ) with tm.ensure_clean() as path: df.to_parquet(path, pa) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 71ff029ed2201..bb7a2263b321b 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -527,7 +527,7 @@ def _test_roundtrip(frame): def test_pickle_timeseries_periodindex(): # GH#2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") - ts = Series(np.random.randn(len(prng)), prng) + ts = Series(np.random.default_rng(2).randn(len(prng)), prng) new_ts = tm.round_trip_pickle(ts) assert new_ts.index.freq == "M" diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 066a68cf9e7c7..e912fe525fdc3 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1543,7 +1543,7 @@ def test_get_schema_keys(self, test_frame1): assert constraint_sentence in create_sql def test_chunksize_read(self): - df = DataFrame(np.random.randn(22, 5), columns=list("abcde")) + df = DataFrame(np.random.default_rng(2).randn(22, 5), columns=list("abcde")) df.to_sql("test_chunksize", self.conn, index=False) # reading the query in one time diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c4035ea867962..0f38711108175 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -372,13 +372,13 @@ def test_read_write_dta10(self, version): def test_stata_doc_examples(self): with tm.ensure_clean() as path: - df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) df.to_stata(path) def test_write_preserves_original(self): # 9795 - np.random.seed(423) - df = DataFrame(np.random.randn(5, 4), columns=list("abcd")) + + df = DataFrame(np.random.default_rng(2).randn(5, 4), columns=list("abcd")) df.loc[2, "a":"c"] = np.nan df_copy = df.copy() with tm.ensure_clean() as path: @@ -1978,7 +1978,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten if use_dict: compression_arg = {"method": compression} - df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) df.index.name = "index" with tm.ensure_clean(file_name) as path: df.to_stata(path, version=version, compression=compression_arg) @@ -2016,7 +2016,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten def test_compression_dict(method, file_ext): file_name = f"test.{file_ext}" archive_name = "test.dta" - df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) df.index.name = "index" with tm.ensure_clean(file_name) as path: compression = {"method": method, "archive_name": archive_name} diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index 41ea557924b25..32888ddd7e58b 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -333,7 +333,7 @@ def test_vector_resize( ): # Test for memory errors after internal vector # reallocations (GH 7157) - # Changed from using np.random.rand to range + # Changed from using np.random.default_rng(2).rand to range # which could cause flaky CI failures when safely_resizes=False vals = np.array(range(1000), dtype=dtype) @@ -660,14 +660,14 @@ def test_unique_label_indices_intp(writable): def test_unique_label_indices(): - a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp) + a = np.random.default_rng(2).randint(1, 1 << 10, 1 << 15).astype(np.intp) left = ht.unique_label_indices(a) right = np.unique(a, return_index=True)[1] tm.assert_numpy_array_equal(left, right, check_dtype=False) - a[np.random.choice(len(a), 10)] = -1 + a[np.random.default_rng(2).choice(len(a), 10)] = -1 left = ht.unique_label_indices(a) right = np.unique(a, return_index=True)[1][1:] tm.assert_numpy_array_equal(left, right, check_dtype=False) diff --git a/pandas/tests/plotting/conftest.py b/pandas/tests/plotting/conftest.py index ae98877dec2ce..01847608257c8 100644 --- a/pandas/tests/plotting/conftest.py +++ b/pandas/tests/plotting/conftest.py @@ -32,19 +32,19 @@ def mpl_cleanup(): @pytest.fixture def hist_df(): n = 50 - np_random = np.random.RandomState(42) - gender = np_random.choice(["Male", "Female"], size=n) - classroom = np_random.choice(["A", "B", "C"], size=n) + rng = np.random.default_rng(42) + gender = rng.choice(["Male", "Female"], size=n) + classroom = rng.choice(["A", "B", "C"], size=n) hist_df = DataFrame( { "gender": gender, "classroom": classroom, - "height": np.random.normal(66, 4, size=n), - "weight": np.random.normal(161, 32, size=n), - "category": np.random.randint(4, size=n), + "height": rng.normal(66, 4, size=n), + "weight": rng.normal(161, 32, size=n), + "category": rng.randint(4, size=n), "datetime": to_datetime( - np.random.randint( + rng.randint( 812419200000000000, 819331200000000000, size=n, diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index f3b3fb43edf36..e61d31532fb49 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -96,7 +96,9 @@ def test_plot_invalid_arg(self): @pytest.mark.slow def test_plot_tick_props(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) ax = _check_plot_works(df.plot, use_index=True) _check_ticks_props(ax, xrot=0) @@ -112,12 +114,16 @@ def test_plot_tick_props(self): ], ) def test_plot_other_args(self, kwargs): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) _check_plot_works(df.plot, **kwargs) @pytest.mark.slow def test_plot_visible_ax(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) # We have to redo it here because _check_plot_works does two plots, # once without an ax kwarg and once with an ax kwarg and the new sharex # behaviour does not remove the visibility of the latter axis (as ax is @@ -138,13 +144,17 @@ def test_plot_visible_ax(self): @pytest.mark.slow def test_plot_title(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) _check_plot_works(df.plot, title="blah") @pytest.mark.slow def test_plot_multiindex(self): tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + ) ax = _check_plot_works(df.plot, use_index=True) _check_ticks_props(ax, xrot=0) @@ -167,7 +177,11 @@ def test_plot_multiindex_unicode(self): columns = MultiIndex.from_tuples( [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"] ) - df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) + df = DataFrame( + np.random.default_rng(2).randint(0, 10, (8, 2)), + columns=columns, + index=index, + ) _check_plot_works(df.plot, title="\u03A3") @pytest.mark.slow @@ -175,7 +189,7 @@ def test_plot_multiindex_unicode(self): def test_plot_single_column_bar(self, layout): # GH 6951 # Test with single column - df = DataFrame({"x": np.random.rand(10)}) + df = DataFrame({"x": np.random.default_rng(2).rand(10)}) axes = _check_plot_works(df.plot.bar, subplots=True, layout=layout) _check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -183,7 +197,7 @@ def test_plot_single_column_bar(self, layout): def test_plot_passed_ax(self): # When ax is supplied and required number of axes is 1, # passed ax should be used: - df = DataFrame({"x": np.random.rand(10)}) + df = DataFrame({"x": np.random.default_rng(2).rand(10)}) _, ax = mpl.pyplot.subplots() axes = df.plot.bar(subplots=True, ax=ax) assert len(axes) == 1 @@ -250,13 +264,13 @@ def test_nonnumeric_exclude(self): assert len(ax.get_lines()) == 1 # B was plotted def test_implicit_label(self): - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") _check_text_labels(ax.xaxis.get_label(), "a") def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) + df = DataFrame(np.random.default_rng(2).randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -362,7 +376,7 @@ def test_period_compat(self): # GH 9012 # period-array conversions df = DataFrame( - np.random.rand(21, 2), + np.random.default_rng(2).rand(21, 2), index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), columns=["a", "b"], ) @@ -410,7 +424,7 @@ def test_unsorted_index_lims_x_y(self): def test_negative_log(self): df = -DataFrame( - np.random.rand(6, 4), + np.random.default_rng(2).rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -430,8 +444,9 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): @pytest.mark.parametrize("kind", ["line", "area"]) @pytest.mark.parametrize("mult", [1, -1]) def test_line_area_stacked(self, kind, mult): - np_random = np.random.RandomState(42) - df = mult * DataFrame(np_random.rand(6, 4), columns=["w", "x", "y", "z"]) + df = mult * DataFrame( + np.random.default_rng(2).rand(6, 4), columns=["w", "x", "y", "z"] + ) ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) ax2 = _check_plot_works(df.plot, kind=kind, stacked=True) @@ -439,14 +454,13 @@ def test_line_area_stacked(self, kind, mult): @pytest.mark.parametrize("kind", ["line", "area"]) def test_line_area_stacked_sep_df(self, kind): - np_random = np.random.RandomState(42) # each column has either positive or negative value sep_df = DataFrame( { - "w": np_random.rand(6), - "x": np_random.rand(6), - "y": -np_random.rand(6), - "z": -np_random.rand(6), + "w": np.random.default_rng(2).rand(6), + "x": np.random.default_rng(2).rand(6), + "y": -np.random.default_rng(2).rand(6), + "z": -np.random.default_rng(2).rand(6), } ) ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False) @@ -455,9 +469,8 @@ def test_line_area_stacked_sep_df(self, kind): self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) def test_line_area_stacked_mixed(self): - np_random = np.random.RandomState(42) mixed_df = DataFrame( - np_random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -473,8 +486,9 @@ def test_line_area_stacked_mixed(self): @pytest.mark.parametrize("kind", ["line", "area"]) def test_line_area_stacked_positive_idx(self, kind): - np_random = np.random.RandomState(42) - df = DataFrame(np_random.rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame( + np.random.default_rng(2).rand(6, 4), columns=["w", "x", "y", "z"] + ) # Use an index with strictly positive values, preventing # matplotlib from warning about ignoring xlim df2 = df.set_index(df.index + 1) @@ -541,7 +555,7 @@ def test_line_area_nan_df_stacked_area(self, idx, kwargs): @pytest.mark.parametrize("kwargs", [{}, {"secondary_y": True}]) def test_line_lim(self, kwargs): - df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.default_rng(2).rand(6, 3), columns=["x", "y", "z"]) ax = df.plot(**kwargs) xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -549,7 +563,7 @@ def test_line_lim(self, kwargs): assert xmax >= lines[0].get_data()[0][-1] def test_line_lim_subplots(self): - df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.default_rng(2).rand(6, 3), columns=["x", "y", "z"]) axes = df.plot(secondary_y=True, subplots=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) for ax in axes: @@ -567,7 +581,9 @@ def test_line_lim_subplots(self): ) @pytest.mark.parametrize("stacked", [True, False]) def test_area_lim(self, stacked): - df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame( + np.random.default_rng(2).rand(6, 4), columns=["x", "y", "z", "four"] + ) neg_df = -df @@ -585,7 +601,7 @@ def test_area_lim(self, stacked): def test_area_sharey_dont_overwrite(self): # GH37942 - df = DataFrame(np.random.rand(4, 2), columns=["x", "y"]) + df = DataFrame(np.random.default_rng(2).rand(4, 2), columns=["x", "y"]) fig, (ax1, ax2) = mpl.pyplot.subplots(1, 2, sharey=True) df.plot(ax=ax1, kind="area") @@ -596,14 +612,14 @@ def test_area_sharey_dont_overwrite(self): @pytest.mark.parametrize("stacked", [True, False]) def test_bar_linewidth(self, stacked): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.bar(stacked=stacked, linewidth=2) for r in ax.patches: assert r.get_linewidth() == 2 def test_bar_linewidth_subplots(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # subplots axes = df.plot.bar(linewidth=2, subplots=True) _check_axes_shape(axes, axes_num=5, layout=(5, 1)) @@ -616,7 +632,7 @@ def test_bar_linewidth_subplots(self): ) @pytest.mark.parametrize("stacked", [True, False]) def test_bar_barwidth(self, meth, dim, stacked): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) width = 0.9 @@ -631,7 +647,7 @@ def test_bar_barwidth(self, meth, dim, stacked): "meth, dim", [("bar", "get_width"), ("barh", "get_height")] ) def test_barh_barwidth_subplots(self, meth, dim): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) width = 0.9 @@ -641,7 +657,7 @@ def test_barh_barwidth_subplots(self, meth, dim): assert getattr(r, dim)() == width def test_bar_bottom_left_bottom(self): - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -651,7 +667,7 @@ def test_bar_bottom_left_bottom(self): assert result == [-1, -2, -3, -4, -5] def test_bar_bottom_left_left(self): - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) result = [p.get_x() for p in ax.patches] assert result == [1] * 25 @@ -661,7 +677,7 @@ def test_bar_bottom_left_left(self): assert result == [1, 2, 3, 4, 5] def test_bar_bottom_left_subplots(self): - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) axes = df.plot.bar(subplots=True, bottom=-1) for ax in axes: result = [p.get_y() for p in ax.patches] @@ -694,7 +710,7 @@ def test_bar_nan_stacked(self): def test_bar_categorical(self, idx): # GH 13019 df = DataFrame( - np.random.randn(6, 5), + np.random.default_rng(2).randn(6, 5), index=idx(list("ABCDEF")), columns=idx(list("abcde")), ) @@ -716,7 +732,7 @@ def test_bar_categorical(self, idx): @pytest.mark.parametrize("x, y", [("x", "y"), (1, 2)]) def test_plot_scatter(self, x, y): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -725,7 +741,7 @@ def test_plot_scatter(self, x, y): def test_plot_scatter_error(self): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -738,7 +754,7 @@ def test_plot_scatter_error(self): def test_plot_scatter_shape(self): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -748,7 +764,7 @@ def test_plot_scatter_shape(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = DataFrame(np.random.randn(10), columns=["a"]) + df = DataFrame(np.random.default_rng(2).randn(10), columns=["a"]) df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a (real )?number, not 'datetime.time'" @@ -759,7 +775,7 @@ def test_raise_error_on_datetime_time_data(self): def test_scatterplot_datetime_data(self, x, y): # GH 30391 dates = date_range(start=date(2019, 1, 1), periods=12, freq="W") - vals = np.random.normal(0, 1, len(dates)) + vals = np.random.default_rng(2).normal(0, 1, len(dates)) df = DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x=x, y=y) @@ -813,7 +829,7 @@ def test_plot_scatter_with_categorical_data(self, x, y): @pytest.mark.parametrize("x, y, c", [("x", "y", "z"), (0, 1, 2)]) def test_plot_scatter_with_c(self, x, y, c): df = DataFrame( - np.random.randint(low=0, high=100, size=(6, 4)), + np.random.default_rng(2).randint(low=0, high=100, size=(6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -826,7 +842,7 @@ def test_plot_scatter_with_c(self, x, y, c): def test_plot_scatter_with_c_props(self): df = DataFrame( - np.random.randint(low=0, high=100, size=(6, 4)), + np.random.default_rng(2).randint(low=0, high=100, size=(6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -868,21 +884,27 @@ def test_plot_scatter_with_c_array(self): def test_plot_scatter_with_s(self): # this refers to GH 32904 - df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random((10, 3)) * 100, columns=["a", "b", "c"] + ) ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) def test_plot_scatter_with_norm(self): # added while fixing GH 45809 - df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random((10, 3)) * 100, columns=["a", "b", "c"] + ) norm = mpl.colors.LogNorm() ax = df.plot.scatter(x="a", y="b", c="c", norm=norm) assert ax.collections[0].norm is norm def test_plot_scatter_without_norm(self): # added while fixing GH 45809 - df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).random((10, 3)) * 100, columns=["a", "b", "c"] + ) ax = df.plot.scatter(x="a", y="b", c="c") plot_norm = ax.collections[0].norm color_min_max = (df.c.min(), df.c.max()) @@ -902,7 +924,7 @@ def test_plot_scatter_without_norm(self): ) def test_plot_bar(self, kwargs): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -912,7 +934,7 @@ def test_plot_bar(self, kwargs): @pytest.mark.slow def test_plot_bar_int_col(self): df = DataFrame( - np.random.randn(10, 15), + np.random.default_rng(2).randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15), ) @@ -1007,7 +1029,7 @@ def test_boxplot_vertical_positions(self, hist_df): def test_boxplot_return_type_invalid(self): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1018,7 +1040,7 @@ def test_boxplot_return_type_invalid(self): @pytest.mark.parametrize("return_type", ["dict", "axes", "both"]) def test_boxplot_return_type_invalid_type(self, return_type): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1027,7 +1049,7 @@ def test_boxplot_return_type_invalid_type(self, return_type): @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(np.random.default_rng(2).randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] _check_legend_labels(ax, labels=expected) @@ -1035,13 +1057,13 @@ def test_kde_df(self): @td.skip_if_no_scipy def test_kde_df_rot(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) ax = df.plot(kind="kde", rot=20, fontsize=5) _check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) @td.skip_if_no_scipy def test_kde_df_subplots(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) axes = _check_plot_works( df.plot, default_axes=True, @@ -1052,18 +1074,18 @@ def test_kde_df_subplots(self): @td.skip_if_no_scipy def test_kde_df_logy(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) axes = df.plot(kind="kde", logy=True, subplots=True) _check_ax_scales(axes, yaxis="log") @td.skip_if_no_scipy def test_kde_missing_vals(self): - df = DataFrame(np.random.uniform(size=(100, 4))) + df = DataFrame(np.random.default_rng(2).uniform(size=(100, 4))) df.loc[0, 0] = np.nan _check_plot_works(df.plot, kind="kde") def test_hist_df(self): - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(np.random.default_rng(2).randn(100, 4)) ax = _check_plot_works(df.plot.hist) expected = [pprint_thing(c) for c in df.columns] @@ -1079,14 +1101,14 @@ def test_hist_df(self): _check_ax_scales(axes, yaxis="log") def test_hist_df_series(self): - series = Series(np.random.rand(10)) + series = Series(np.random.default_rng(2).rand(10)) axes = series.plot.hist(rot=40) _check_ticks_props(axes, xrot=40, yrot=0) def test_hist_df_series_cumulative_density(self): from matplotlib.patches import Rectangle - series = Series(np.random.rand(10)) + series = Series(np.random.default_rng(2).rand(10)) ax = series.plot.hist(cumulative=True, bins=4, density=True) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -1095,14 +1117,14 @@ def test_hist_df_series_cumulative_density(self): def test_hist_df_series_cumulative(self): from matplotlib.patches import Rectangle - series = Series(np.random.rand(10)) + series = Series(np.random.default_rng(2).rand(10)) ax = series.plot.hist(cumulative=True, bins=4) rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] tm.assert_almost_equal(rects[-2].get_height(), 10.0) def test_hist_df_orientation(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) # if horizontal, yticklabels are rotated axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") _check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) @@ -1112,8 +1134,8 @@ def test_hist_df_orientation(self): ) def test_hist_weights(self, weights): # GH 33173 - np.random.seed(0) - df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + + df = DataFrame(dict(zip(["A", "B"], np.random.default_rng(2).randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1279,7 +1301,7 @@ def test_hist_df_coord(self, data): ) def test_plot_int_columns(self): - df = DataFrame(np.random.randn(100, 4)).cumsum() + df = DataFrame(np.random.default_rng(2).randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.parametrize( @@ -1297,7 +1319,7 @@ def test_style_by_column(self, markers): fig = plt.gcf() fig.clf() fig.add_subplot(111) - df = DataFrame(np.random.randn(10, 3)) + df = DataFrame(np.random.default_rng(2).randn(10, 3)) ax = df.plot(style=markers) for idx, line in enumerate(ax.get_lines()[: len(markers)]): assert line.get_marker() == markers[idx] @@ -1321,7 +1343,7 @@ def test_line_label_none(self): ) def test_specified_props_kwd_plot_box(self, props, expected): # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) + df = DataFrame({k: np.random.default_rng(2).random(100) for k in "ABC"}) kwd = {props: {"color": "C1"}} result = df.plot.box(return_type="dict", **kwd) @@ -1364,14 +1386,16 @@ def test_all_invalid_plot_data(self, kind): "kind", list(plotting.PlotAccessor._common_kinds) + ["area"] ) def test_partially_invalid_plot_data_numeric(self, kind): - df = DataFrame(np.random.RandomState(42).randn(10, 2), dtype=object) - df[np.random.rand(df.shape[0]) > 0.5] = "a" + df = DataFrame( + np.random.default_rng(2).RandomState(42).randn(10, 2), dtype=object + ) + df[np.random.default_rng(2).rand(df.shape[0]) > 0.5] = "a" msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) msg = "invalid_plot_kind is not a valid plot kind" with pytest.raises(ValueError, match=msg): df.plot(kind="invalid_plot_kind") @@ -1429,9 +1453,9 @@ def test_xy_args_integer(self, x, y, colnames): def test_hexbin_basic(self): df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) @@ -1442,9 +1466,9 @@ def test_hexbin_basic(self): def test_hexbin_basic_subplots(self): df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) # GH 6951 @@ -1459,9 +1483,9 @@ def test_hexbin_basic_subplots(self): def test_hexbin_with_c(self, reduce_C): df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) @@ -1479,9 +1503,9 @@ def test_hexbin_with_c(self, reduce_C): def test_hexbin_cmap(self, kwargs, expected): df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) ax = df.plot.hexbin(x="A", y="B", **kwargs) @@ -1489,7 +1513,7 @@ def test_hexbin_cmap(self, kwargs, expected): def test_pie_df_err(self): df = DataFrame( - np.random.rand(5, 3), + np.random.default_rng(2).rand(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1500,7 +1524,7 @@ def test_pie_df_err(self): @pytest.mark.parametrize("y", ["Y", 2]) def test_pie_df(self, y): df = DataFrame( - np.random.rand(5, 3), + np.random.default_rng(2).rand(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1509,7 +1533,7 @@ def test_pie_df(self, y): def test_pie_df_subplots(self): df = DataFrame( - np.random.rand(5, 3), + np.random.default_rng(2).rand(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1526,7 +1550,7 @@ def test_pie_df_subplots(self): def test_pie_df_labels_colors(self): df = DataFrame( - np.random.rand(5, 3), + np.random.default_rng(2).rand(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1546,7 +1570,7 @@ def test_pie_df_labels_colors(self): _check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - df = DataFrame(np.random.rand(4, 4)) + df = DataFrame(np.random.default_rng(2).rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan _, axes = mpl.pyplot.subplots(ncols=4) @@ -1630,7 +1654,7 @@ def test_errorbar_plot_external_valueerror(self): d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} df = DataFrame(d) with tm.external_error_raised(ValueError): - df.plot(yerr=np.random.randn(11)) + df.plot(yerr=np.random.default_rng(2).randn(11)) @pytest.mark.slow def test_errorbar_plot_external_typeerror(self): @@ -1700,8 +1724,8 @@ def test_errorbar_plot_iterator(self): def test_errorbar_with_integer_column_names(self): # test with integer column names - df = DataFrame(np.abs(np.random.randn(10, 2))) - df_err = DataFrame(np.abs(np.random.randn(10, 2))) + df = DataFrame(np.abs(np.random.default_rng(2).randn(10, 2))) + df_err = DataFrame(np.abs(np.random.default_rng(2).randn(10, 2))) ax = _check_plot_works(df.plot, yerr=df_err) _check_has_errorbars(ax, xerr=0, yerr=2) ax = _check_plot_works(df.plot, y=0, yerr=1) @@ -1710,15 +1734,19 @@ def test_errorbar_with_integer_column_names(self): @pytest.mark.slow @pytest.mark.parametrize("kind", ["line", "bar"]) def test_errorbar_with_partial_columns_kind(self, kind): - df = DataFrame(np.abs(np.random.randn(10, 3))) - df_err = DataFrame(np.abs(np.random.randn(10, 2)), columns=[0, 2]) + df = DataFrame(np.abs(np.random.default_rng(2).randn(10, 3))) + df_err = DataFrame( + np.abs(np.random.default_rng(2).randn(10, 2)), columns=[0, 2] + ) ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) _check_has_errorbars(ax, xerr=0, yerr=2) @pytest.mark.slow def test_errorbar_with_partial_columns_dti(self): - df = DataFrame(np.abs(np.random.randn(10, 3))) - df_err = DataFrame(np.abs(np.random.randn(10, 2)), columns=[0, 2]) + df = DataFrame(np.abs(np.random.default_rng(2).randn(10, 3))) + df_err = DataFrame( + np.abs(np.random.default_rng(2).randn(10, 2)), columns=[0, 2] + ) ix = date_range("1/1/2000", periods=10, freq="M") df.set_index(ix, inplace=True) df_err.set_index(ix, inplace=True) @@ -1769,8 +1797,7 @@ def test_errorbar_timeseries(self, kind): _check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): - np.random.seed(0) - err = np.random.rand(3, 2, 5) + err = np.random.default_rng(2).rand(3, 2, 5) # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... df = DataFrame(np.arange(15).reshape(3, 5)).T @@ -1788,7 +1815,9 @@ def test_errorbar_asymmetrical(self): df.plot(yerr=err.T) def test_table(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) _check_plot_works(df.plot, table=True) _check_plot_works(df.plot, table=df) @@ -1801,10 +1830,14 @@ def test_table(self): def test_errorbar_scatter(self): df = DataFrame( - np.abs(np.random.randn(5, 2)), index=range(5), columns=["x", "y"] + np.abs(np.random.default_rng(2).randn(5, 2)), + index=range(5), + columns=["x", "y"], ) df_err = DataFrame( - np.abs(np.random.randn(5, 2)) / 5, index=range(5), columns=["x", "y"] + np.abs(np.random.default_rng(2).randn(5, 2)) / 5, + index=range(5), + columns=["x", "y"], ) ax = _check_plot_works(df.plot.scatter, x="x", y="y") @@ -1831,7 +1864,8 @@ def _check_errorbar_color(containers, expected, has_err="has_xerr"): # GH 8081 df = DataFrame( - np.abs(np.random.randn(10, 5)), columns=["a", "b", "c", "d", "e"] + np.abs(np.random.default_rng(2).randn(10, 5)), + columns=["a", "b", "c", "d", "e"], ) ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") _check_has_errorbars(ax, xerr=1, yerr=1) @@ -1986,9 +2020,9 @@ def test_memory_leak(self, kind): if kind in ["hexbin", "scatter", "pie"]: df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) args = {"x": "A", "y": "B"} @@ -2012,9 +2046,13 @@ def test_df_gridspec_patterns_vert_horiz(self): from matplotlib import gridspec import matplotlib.pyplot as plt - ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) + ts = Series( + np.random.default_rng(2).randn(10), index=date_range("1/1/2000", periods=10) + ) - df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB")) + df = DataFrame( + np.random.default_rng(2).randn(10, 2), index=ts.index, columns=list("AB") + ) def _get_vertical_grid(): gs = gridspec.GridSpec(3, 1) @@ -2088,7 +2126,9 @@ def test_df_gridspec_patterns_boxed(self): from matplotlib import gridspec import matplotlib.pyplot as plt - ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) + ts = Series( + np.random.default_rng(2).randn(10), index=date_range("1/1/2000", periods=10) + ) # boxed def _get_boxed_grid(): @@ -2101,7 +2141,9 @@ def _get_boxed_grid(): return ax1, ax2, ax3, ax4 axes = _get_boxed_grid() - df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD")) + df = DataFrame( + np.random.default_rng(2).randn(10, 4), index=ts.index, columns=list("ABCD") + ) axes = df.plot(subplots=True, ax=axes) for ax in axes: assert len(ax.lines) == 1 @@ -2142,12 +2184,17 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = mpl.pyplot.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.default_rng(2).rand(10)).plot(ax=ax) def test_plain_axes_df(self): # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) + df = DataFrame( + { + "a": np.random.default_rng(2).randn(8), + "b": np.random.default_rng(2).randn(8), + } + ) fig = mpl.pyplot.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -2159,22 +2206,22 @@ def test_plain_axes_make_axes_locatable(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=cax) + Series(np.random.default_rng(2).rand(10)).plot(ax=ax) + Series(np.random.default_rng(2).rand(10)).plot(ax=cax) def test_plain_axes_make_inset_axes(self): fig, ax = mpl.pyplot.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=iax) + Series(np.random.default_rng(2).rand(10)).plot(ax=ax) + Series(np.random.default_rng(2).rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - DataFrame(np.random.randn(15, 2), columns=list("AB")) + DataFrame(np.random.default_rng(2).randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2209,7 +2256,9 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = DataFrame( + np.random.default_rng(2).randn(4, 2), columns=["A", "B"], index=index + ) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 3d041fdbb5de6..8bd1dce2a885c 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -37,7 +37,7 @@ class TestDataFrameColor: ) def test_mpl2_color_cycle_str(self, color): # GH 15516 - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) _check_plot_works(df.plot, color=color) def test_color_single_series_list(self): @@ -52,7 +52,7 @@ def test_rgb_tuple_color(self, color): _check_plot_works(df.plot, x="x", y="y", color=color) def test_color_empty_string(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) with pytest.raises(ValueError, match="Invalid color argument:"): df.plot(color="") @@ -86,7 +86,7 @@ def test_color_and_style_arguments(self): ) def test_color_and_marker(self, color, expected): # GH 21003 - df = DataFrame(np.random.random((7, 4))) + df = DataFrame(np.random.default_rng(2).random((7, 4))) ax = df.plot(color=color, style="d--") # check colors result = [i.get_color() for i in ax.lines] @@ -98,31 +98,31 @@ def test_color_and_marker(self, color, expected): def test_bar_colors(self): default_colors = _unpack_cycler(plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.bar() _check_colors(ax.patches[::5], facecolors=default_colors[:5]) def test_bar_colors_custom(self): custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.bar(color=custom_colors) _check_colors(ax.patches[::5], facecolors=custom_colors) @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_bar_colors_cmap(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.bar(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] _check_colors(ax.patches[::5], facecolors=rgba_colors) def test_bar_colors_single_col(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") _check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) def test_bar_colors_green(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot(kind="bar", color="green") _check_colors(ax.patches[::5], facecolors=["green"] * 5) @@ -146,7 +146,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # addressing issue #10611, to ensure colobar does not # interfere with x-axis label and ticklabels with # ipython inline backend. - random_array = np.random.random((10, 3)) + random_array = np.random.default_rng(2).random((10, 3)) df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") @@ -168,7 +168,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not # interfere with x-axis label and ticklabels with # ipython inline backend. - random_array = np.random.random((10, 3)) + random_array = np.random.default_rng(2).random((10, 3)) df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) @@ -177,7 +177,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): assert ax.xaxis.get_label().get_visible() def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): - random_array = np.random.random((10, 3)) + random_array = np.random.default_rng(2).random((10, 3)) df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) @@ -244,7 +244,7 @@ def test_scatter_colorbar_different_cmap(self): def test_line_colors(self): custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -259,40 +259,40 @@ def test_line_colors(self): @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_line_colors_cmap(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=rgba_colors) def test_line_colors_single_col(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') ax = df.loc[:, [0]].plot(color="DodgerBlue") _check_colors(ax.lines, linecolors=["DodgerBlue"]) def test_line_colors_single_color(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot(color="red") _check_colors(ax.get_lines(), linecolors=["red"] * 5) def test_line_colors_hex(self): # GH 10299 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] ax = df.plot(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.default_rng(2).rand(10, 2)).plot(color=colors) assert len(colors) == 3 def test_line_colors_and_styles_subplots(self): # GH 9894 default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -300,7 +300,7 @@ def test_line_colors_and_styles_subplots(self): @pytest.mark.parametrize("color", ["k", "green"]) def test_line_colors_and_styles_subplots_single_color_str(self, color): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) axes = df.plot(subplots=True, color=color) for ax in axes: _check_colors(ax.get_lines(), linecolors=[color]) @@ -308,14 +308,14 @@ def test_line_colors_and_styles_subplots_single_color_str(self, color): @pytest.mark.parametrize("color", ["rgcby", list("rgcby")]) def test_line_colors_and_styles_subplots_custom_colors(self, color): # GH 9894 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) axes = df.plot(color=color, subplots=True) for ax, c in zip(axes, list(color)): _check_colors(ax.get_lines(), linecolors=[c]) def test_line_colors_and_styles_subplots_colormap_hex(self): # GH 9894 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # GH 10299 custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] axes = df.plot(color=custom_colors, subplots=True) @@ -325,7 +325,7 @@ def test_line_colors_and_styles_subplots_colormap_hex(self): @pytest.mark.parametrize("cmap", ["jet", cm.jet]) def test_line_colors_and_styles_subplots_colormap_subplot(self, cmap): # GH 9894 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] axes = df.plot(colormap=cmap, subplots=True) for ax, c in zip(axes, rgba_colors): @@ -333,7 +333,7 @@ def test_line_colors_and_styles_subplots_colormap_subplot(self, cmap): def test_line_colors_and_styles_subplots_single_col(self): # GH 9894 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) @@ -341,7 +341,7 @@ def test_line_colors_and_styles_subplots_single_col(self): def test_line_colors_and_styles_subplots_single_char(self): # GH 9894 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # single character style axes = df.plot(style="r", subplots=True) for ax in axes: @@ -349,7 +349,7 @@ def test_line_colors_and_styles_subplots_single_char(self): def test_line_colors_and_styles_subplots_list_styles(self): # GH 9894 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # list of styles styles = list("rgcby") axes = df.plot(style=styles, subplots=True) @@ -360,7 +360,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) ax = df.plot.area(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -377,7 +377,7 @@ def test_area_colors_poly(self): from matplotlib import cm from matplotlib.collections import PolyCollection - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) ax = df.plot.area(colormap="jet") jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=jet_colors) @@ -393,7 +393,7 @@ def test_area_colors_stacked_false(self): from matplotlib import cm from matplotlib.collections import PolyCollection - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] # When stacked=False, alpha is set to 0.5 ax = df.plot.area(colormap=cm.jet, stacked=False) @@ -411,37 +411,37 @@ def test_area_colors_stacked_false(self): def test_hist_colors(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.hist() _check_colors(ax.patches[::10], facecolors=default_colors[:5]) def test_hist_colors_single_custom(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) custom_colors = "rgcby" ax = df.plot.hist(color=custom_colors) _check_colors(ax.patches[::10], facecolors=custom_colors) @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_hist_colors_cmap(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.hist(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] _check_colors(ax.patches[::10], facecolors=rgba_colors) def test_hist_colors_single_col(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") _check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) def test_hist_colors_single_color(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot(kind="hist", color="green") _check_colors(ax.patches[::10], facecolors=["green"] * 5) @td.skip_if_no_scipy def test_kde_colors(self): custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(np.random.default_rng(2).rand(5, 5)) ax = df.plot.kde(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -449,7 +449,7 @@ def test_kde_colors(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_kde_colors_cmap(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.kde(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=rgba_colors) @@ -458,7 +458,7 @@ def test_kde_colors_cmap(self, colormap): def test_kde_colors_and_styles_subplots(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -467,14 +467,14 @@ def test_kde_colors_and_styles_subplots(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["k", "red"]) def test_kde_colors_and_styles_subplots_single_col_str(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) axes = df.plot(kind="kde", color=colormap, subplots=True) for ax in axes: _check_colors(ax.get_lines(), linecolors=[colormap]) @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_custom_color(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) custom_colors = "rgcby" axes = df.plot(kind="kde", color=custom_colors, subplots=True) for ax, c in zip(axes, list(custom_colors)): @@ -483,7 +483,7 @@ def test_kde_colors_and_styles_subplots_custom_color(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_kde_colors_and_styles_subplots_cmap(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] axes = df.plot(kind="kde", colormap=colormap, subplots=True) for ax, c in zip(axes, rgba_colors): @@ -491,7 +491,7 @@ def test_kde_colors_and_styles_subplots_cmap(self, colormap): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_single_col(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) @@ -499,7 +499,7 @@ def test_kde_colors_and_styles_subplots_single_col(self): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_single_char(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # list of styles # single character style axes = df.plot(kind="kde", style="r", subplots=True) @@ -508,7 +508,7 @@ def test_kde_colors_and_styles_subplots_single_char(self): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_list(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # list of styles styles = list("rgcby") axes = df.plot(kind="kde", style=styles, subplots=True) @@ -518,7 +518,7 @@ def test_kde_colors_and_styles_subplots_list(self): def test_boxplot_colors(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors_box( bp, @@ -529,7 +529,7 @@ def test_boxplot_colors(self): ) def test_boxplot_colors_dict_colors(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) dict_colors = { "boxes": "#572923", "whiskers": "#982042", @@ -548,7 +548,7 @@ def test_boxplot_colors_dict_colors(self): def test_boxplot_colors_default_color(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # partial colors dict_colors = {"whiskers": "c", "medians": "m"} bp = df.plot.box(color=dict_colors, return_type="dict") @@ -556,7 +556,7 @@ def test_boxplot_colors_default_color(self): @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_boxplot_colors_cmap(self, colormap): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) bp = df.plot.box(colormap=colormap, return_type="dict") jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] _check_colors_box( @@ -564,19 +564,19 @@ def test_boxplot_colors_cmap(self, colormap): ) def test_boxplot_colors_single(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # string color is applied to all artists except fliers bp = df.plot.box(color="DodgerBlue", return_type="dict") _check_colors_box(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") def test_boxplot_colors_tuple(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # tuple is also applied to all artists except fliers bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") _check_colors_box(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") def test_boxplot_colors_invalid(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) msg = re.escape( "color dict contains invalid key 'xxxx'. The key must be either " "['boxes', 'whiskers', 'medians', 'caps']" @@ -591,7 +591,7 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(np.random.default_rng(2).randn(5, 3)) ax = df.plot() expected = _unpack_cycler(plt.rcParams)[:3] @@ -600,9 +600,9 @@ def test_default_color_cycle(self): def test_no_color_bar(self): df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) ax = df.plot.hexbin(x="A", y="B", colorbar=None) @@ -611,9 +611,9 @@ def test_no_color_bar(self): def test_mixing_cmap_and_colormap_raises(self): df = DataFrame( { - "A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20), + "A": np.random.default_rng(2).uniform(size=20), + "B": np.random.default_rng(2).uniform(size=20), + "C": np.arange(20) + np.random.default_rng(2).uniform(size=20), } ) msg = "Only specify one of `cmap` and `colormap`" @@ -648,7 +648,7 @@ def test_colors_of_columns_with_same_name(self): assert legend.get_color() == line.get_color() def test_invalid_colormap(self): - df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) + df = DataFrame(np.random.default_rng(2).randn(3, 2), columns=["A", "B"]) msg = "(is not a valid value)|(is not a known colormap)" with pytest.raises((ValueError, KeyError), match=msg): df.plot(colormap="invalid_colormap") diff --git a/pandas/tests/plotting/frame/test_frame_legend.py b/pandas/tests/plotting/frame/test_frame_legend.py index 13c31a6a202d7..83855fe4d8106 100644 --- a/pandas/tests/plotting/frame/test_frame_legend.py +++ b/pandas/tests/plotting/frame/test_frame_legend.py @@ -62,10 +62,10 @@ def test_legend_false(self): @td.skip_if_no_scipy @pytest.mark.parametrize("kind", ["line", "bar", "barh", "kde", "area", "hist"]) def test_df_legend_labels(self, kind): - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["j", "k", "l"]) ax = df.plot(kind=kind, legend=True) _check_legend_labels(ax, labels=df.columns) @@ -82,9 +82,9 @@ def test_df_legend_labels(self, kind): @td.skip_if_no_scipy def test_df_legend_labels_secondary_y(self): - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) + df = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["g", "h", "i"]) # Secondary Y ax = df.plot(legend=True, secondary_y="b") _check_legend_labels(ax, labels=["a", "b (right)", "c"]) @@ -97,9 +97,15 @@ def test_df_legend_labels_secondary_y(self): def test_df_legend_labels_time_series(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["a", "b", "c"], index=ind + ) + df2 = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["d", "e", "f"], index=ind + ) + df3 = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["g", "h", "i"], index=ind + ) ax = df.plot(legend=True, secondary_y="b") _check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -111,9 +117,15 @@ def test_df_legend_labels_time_series(self): def test_df_legend_labels_time_series_scatter(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["a", "b", "c"], index=ind + ) + df2 = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["d", "e", "f"], index=ind + ) + df3 = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["g", "h", "i"], index=ind + ) # scatter ax = df.plot.scatter(x="a", y="b", label="data1") _check_legend_labels(ax, labels=["data1"]) @@ -125,7 +137,9 @@ def test_df_legend_labels_time_series_scatter(self): @td.skip_if_no_scipy def test_df_legend_labels_time_series_no_mutate(self): ind = date_range("1/1/2014", periods=3) - df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) + df = DataFrame( + np.random.default_rng(2).randn(3, 3), columns=["a", "b", "c"], index=ind + ) # ensure label args pass through and # index name does not mutate # column names don't mutate @@ -164,7 +178,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - np.random.randn(4, 4), + np.random.default_rng(2).randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -173,7 +187,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() _check_text_labels(leg_title, "group,individual") - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() _check_text_labels(leg_title, "group,individual") @@ -199,13 +213,13 @@ def test_legend_name(self): ], ) def test_no_legend(self, kind): - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["a", "b", "c"]) ax = df.plot(kind=kind, legend=False) _check_legend_labels(ax, visible=False) def test_missing_markers_legend(self): # 14958 - df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.default_rng(2).randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 69a68c3d0568a..ac24fda59d64b 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -35,7 +35,9 @@ class TestDataFramePlotsSubplots: @pytest.mark.slow @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots(self, kind): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -62,7 +64,9 @@ def test_subplots(self, kind): @pytest.mark.slow @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots_no_share_x(self, kind): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) axes = df.plot(kind=kind, subplots=True, sharex=False) for ax in axes: _check_visible(ax.xaxis) @@ -74,7 +78,9 @@ def test_subplots_no_share_x(self, kind): @pytest.mark.slow @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots_no_legend(self, kind): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) axes = df.plot(kind=kind, subplots=True, legend=False) for ax in axes: assert ax.get_legend() is None @@ -82,7 +88,7 @@ def test_subplots_no_legend(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries(self, kind): idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).rand(10, 3), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -105,7 +111,7 @@ def test_subplots_timeseries(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries_rot(self, kind): idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).rand(10, 3), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) for ax in axes: _check_visible(ax.xaxis) @@ -221,7 +227,9 @@ def test_subplots_timeseries_y_axis_not_supported(self): ) def test_subplots_layout_multi_column(self, layout, exp_layout): # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) axes = df.plot(subplots=True, layout=layout) _check_axes_shape(axes, axes_num=3, layout=exp_layout) @@ -229,7 +237,9 @@ def test_subplots_layout_multi_column(self, layout, exp_layout): def test_subplots_layout_multi_column_error(self): # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) msg = "Layout of 1x1 must be larger than required size 3" with pytest.raises(ValueError, match=msg): @@ -250,7 +260,9 @@ def test_subplots_layout_single_column( self, kwargs, expected_axes_num, expected_layout, expected_shape ): # GH 6667 - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 1), index=list(string.ascii_letters[:10]) + ) axes = df.plot(subplots=True, **kwargs) _check_axes_shape( axes, @@ -264,13 +276,15 @@ def test_subplots_layout_single_column( def test_subplots_warnings(self, idx): # GH 9464 with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(5, 4), index=idx) + df = DataFrame(np.random.default_rng(2).randn(5, 4), index=idx) df.plot(subplots=True, layout=(3, 2)) def test_subplots_multiple_axes(self): # GH 5353, 6970, GH 7069 fig, axes = mpl.pyplot.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) _check_axes_shape(returned, axes_num=3, layout=(1, 3)) @@ -285,7 +299,9 @@ def test_subplots_multiple_axes(self): def test_subplots_multiple_axes_error(self): # GH 5353, 6970, GH 7069 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + ) msg = "The number of passed axes must be 3, the same as the output plot" _, axes = mpl.pyplot.subplots(2, 3) @@ -308,7 +324,9 @@ def test_subplots_multiple_axes_2_dim(self, layout, exp_layout): # (show warning is tested in # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes _, axes = mpl.pyplot.subplots(2, 2) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 4), index=list(string.ascii_letters[:10]) + ) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) @@ -322,7 +340,9 @@ def test_subplots_multiple_axes_single_col(self): # GH 5353, 6970, GH 7069 # single column _, axes = mpl.pyplot.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + df = DataFrame( + np.random.default_rng(2).rand(10, 1), index=list(string.ascii_letters[:10]) + ) axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) _check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -333,7 +353,7 @@ def test_subplots_ts_share_axes(self): _, axes = mpl.pyplot.subplots(3, 3, sharex=True, sharey=True) mpl.pyplot.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) df = DataFrame( - np.random.randn(10, 9), + np.random.default_rng(2).randn(10, 9), index=date_range(start="2014-07-01", freq="M", periods=10), ) for i, ax in enumerate(axes.ravel()): @@ -372,7 +392,7 @@ def test_subplots_sharex_axes_existing_axes(self): def test_subplots_dup_columns(self): # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).rand(5, 5), columns=list("aaaaa")) axes = df.plot(subplots=True) for ax in axes: _check_legend_labels(ax, labels=["a"]) @@ -380,7 +400,7 @@ def test_subplots_dup_columns(self): def test_subplots_dup_columns_secondary_y(self): # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).rand(5, 5), columns=list("aaaaa")) axes = df.plot(subplots=True, secondary_y="a") for ax in axes: # (right) is only attached when subplots=False @@ -389,7 +409,7 @@ def test_subplots_dup_columns_secondary_y(self): def test_subplots_dup_columns_secondary_y_no_subplot(self): # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).rand(5, 5), columns=list("aaaaa")) ax = df.plot(secondary_y="a") _check_legend_labels(ax, labels=["a (right)"] * 5) assert len(ax.lines) == 0 @@ -449,7 +469,7 @@ def test_boxplot_subplots_return_type(self, hist_df, rt): def test_df_subplots_patterns_minorticks(self): # GH 10657 df = DataFrame( - np.random.randn(10, 2), + np.random.default_rng(2).randn(10, 2), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -469,7 +489,7 @@ def test_df_subplots_patterns_minorticks(self): def test_df_subplots_patterns_minorticks_1st_ax_hidden(self): # GH 10657 df = DataFrame( - np.random.randn(10, 2), + np.random.default_rng(2).randn(10, 2), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -488,7 +508,7 @@ def test_df_subplots_patterns_minorticks_1st_ax_hidden(self): def test_df_subplots_patterns_minorticks_not_shared(self): # GH 10657 df = DataFrame( - np.random.randn(10, 2), + np.random.default_rng(2).randn(10, 2), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -504,7 +524,7 @@ def test_df_subplots_patterns_minorticks_not_shared(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.default_rng(2).rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -520,7 +540,7 @@ def test_subplots_sharex_false(self): def test_subplots_constrained_layout(self): # GH 25261 idx = date_range(start="now", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).rand(10, 3), index=idx) kwargs = {} if hasattr(mpl.pyplot.Figure, "get_constrained_layout"): kwargs["constrained_layout"] = True @@ -608,7 +628,7 @@ def test_bar_align_multiple_columns(self, kwargs): ], ) def test_bar_align_single_column(self, kwargs): - df = DataFrame(np.random.randn(5)) + df = DataFrame(np.random.default_rng(2).randn(5)) self._check_bar_alignment(df, **kwargs) @pytest.mark.parametrize( @@ -623,13 +643,13 @@ def test_bar_align_single_column(self, kwargs): ], ) def test_bar_barwidth_position(self, kwargs): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs) @pytest.mark.parametrize("w", [1, 1.0]) def test_bar_barwidth_position_int(self, w): # GH 12979 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) ax = df.plot.bar(stacked=True, width=w) ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) @@ -650,7 +670,7 @@ def test_bar_barwidth_position_int(self, w): ) def test_bar_barwidth_position_int_width_1(self, kind, kwargs): # GH 12979 - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) self._check_bar_alignment(df, kind=kind, width=1, **kwargs) def _check_bar_alignment( diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index c8b71c04001e5..a2e0b87bf41e9 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -17,10 +17,9 @@ @pytest.fixture def hist_df(): - np.random.seed(0) - df = DataFrame(np.random.randn(30, 2), columns=["A", "B"]) - df["C"] = np.random.choice(["a", "b", "c"], 30) - df["D"] = np.random.choice(["a", "b", "c"], 30) + df = DataFrame(np.random.default_rng(2).randn(30, 2), columns=["A", "B"]) + df["C"] = np.random.default_rng(2).choice(["a", "b", "c"], 30) + df["D"] = np.random.default_rng(2).choice(["a", "b", "c"], 30) return df diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 2ade2bf6de51a..53ee98e2146da 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -43,9 +43,9 @@ def test_stacked_boxplot_set_axis(self): n = 80 df = DataFrame( { - "Clinical": np.random.choice([0, 1, 2, 3], n), - "Confirmed": np.random.choice([0, 1, 2, 3], n), - "Discarded": np.random.choice([0, 1, 2, 3], n), + "Clinical": np.random.default_rng(2).choice([0, 1, 2, 3], n), + "Confirmed": np.random.default_rng(2).choice([0, 1, 2, 3], n), + "Discarded": np.random.default_rng(2).choice([0, 1, 2, 3], n), }, index=np.arange(0, n), ) @@ -73,7 +73,7 @@ def test_stacked_boxplot_set_axis(self): ) def test_boxplot_legacy1(self, kwargs, warn): df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -85,18 +85,18 @@ def test_boxplot_legacy1(self, kwargs, warn): _check_plot_works(df.boxplot, **kwargs) def test_boxplot_legacy1_series(self): - ser = Series(np.random.randn(6)) + ser = Series(np.random.default_rng(2).randn(6)) _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict") def test_boxplot_legacy2(self): - df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): _check_plot_works(df.boxplot, by="X") def test_boxplot_legacy2_with_ax(self): - df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # When ax is supplied and required number of axes is 1, @@ -107,7 +107,7 @@ def test_boxplot_legacy2_with_ax(self): assert ax_axes is axes def test_boxplot_legacy2_with_ax_return_type(self): - df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) fig, ax = mpl.pyplot.subplots() @@ -116,7 +116,7 @@ def test_boxplot_legacy2_with_ax_return_type(self): assert ax_axes is axes["A"] def test_boxplot_legacy2_with_multi_col(self): - df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # Multiple columns with an ax argument should use same figure @@ -128,7 +128,7 @@ def test_boxplot_legacy2_with_multi_col(self): assert axes["Col1"].get_figure() is fig def test_boxplot_legacy2_by_none(self): - df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # When by is None, check that all relevant lines are present in the @@ -147,7 +147,7 @@ def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -163,7 +163,7 @@ def test_boxplot_return_type_legacy_return_type(self, return_type): # API change in https://github.com/pandas-dev/pandas/pull/7096 df = DataFrame( - np.random.randn(6, 4), + np.random.default_rng(2).randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -173,7 +173,7 @@ def test_boxplot_return_type_legacy_return_type(self, return_type): def test_boxplot_axis_limits(self, hist_df): df = hist_df.copy() - df["age"] = np.random.randint(1, 20, df.shape[0]) + df["age"] = np.random.default_rng(2).randint(1, 20, df.shape[0]) # One full row height_ax, weight_ax = df.boxplot(["height", "weight"], by="category") _check_ax_limits(df["height"], height_ax) @@ -182,7 +182,7 @@ def test_boxplot_axis_limits(self, hist_df): def test_boxplot_axis_limits_two_rows(self, hist_df): df = hist_df.copy() - df["age"] = np.random.randint(1, 20, df.shape[0]) + df["age"] = np.random.default_rng(2).randint(1, 20, df.shape[0]) # Two rows, one partial p = df.boxplot(["height", "weight", "age"], by="category") height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] @@ -196,12 +196,14 @@ def test_boxplot_axis_limits_two_rows(self, hist_df): assert dummy_ax._sharey is None def test_boxplot_empty_column(self): - df = DataFrame(np.random.randn(20, 4)) + df = DataFrame(np.random.default_rng(2).randn(20, 4)) df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type="axes") def test_figsize(self): - df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"]) + df = DataFrame( + np.random.default_rng(2).rand(10, 5), columns=["A", "B", "C", "D", "E"] + ) result = df.boxplot(return_type="axes", figsize=(12, 8)) assert result.figure.bbox_inches.width == 12 assert result.figure.bbox_inches.height == 8 @@ -215,8 +217,8 @@ def test_boxplot_numeric_data(self): df = DataFrame( { "a": date_range("2012-01-01", periods=100), - "b": np.random.randn(100), - "c": np.random.randn(100) + 2, + "b": np.random.default_rng(2).randn(100), + "c": np.random.default_rng(2).randn(100) + 2, "d": date_range("2012-01-01", periods=100).astype(str), "e": date_range("2012-01-01", periods=100, tz="UTC"), "f": timedelta_range("1 days", periods=100), @@ -238,7 +240,7 @@ def test_boxplot_numeric_data(self): ) def test_color_kwd(self, colors_kwd, expected): # GH: 26214 - df = DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.default_rng(2).rand(10, 2)) result = df.boxplot(color=colors_kwd, return_type="dict") for k, v in expected.items(): assert result[k][0].get_color() == v @@ -268,7 +270,7 @@ def test_color_kwd(self, colors_kwd, expected): ) def test_colors_in_theme(self, scheme, expected): # GH: 40769 - df = DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.default_rng(2).rand(10, 2)) import matplotlib.pyplot as plt plt.style.use(scheme) @@ -282,7 +284,7 @@ def test_colors_in_theme(self, scheme, expected): ) def test_color_kwd_errors(self, dict_colors, msg): # GH: 26214 - df = DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.default_rng(2).rand(10, 2)) with pytest.raises(ValueError, match=msg): df.boxplot(color=dict_colors, return_type="dict") @@ -297,7 +299,7 @@ def test_color_kwd_errors(self, dict_colors, msg): ) def test_specified_props_kwd(self, props, expected): # GH 30346 - df = DataFrame({k: np.random.random(10) for k in "ABC"}) + df = DataFrame({k: np.random.default_rng(2).random(10) for k in "ABC"}) kwd = {props: {"color": "C1"}} result = df.boxplot(return_type="dict", **kwd) @@ -307,9 +309,9 @@ def test_specified_props_kwd(self, props, expected): def test_plot_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.randn(10), - "b": np.random.randn(10), - "group": np.random.choice(["group1", "group2"], 10), + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) xlabel, ylabel = "x", "y" @@ -321,9 +323,9 @@ def test_plot_xlabel_ylabel(self, vert): def test_boxplot_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.randn(10), - "b": np.random.randn(10), - "group": np.random.choice(["group1", "group2"], 10), + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) xlabel, ylabel = "x", "y" @@ -335,9 +337,9 @@ def test_boxplot_xlabel_ylabel(self, vert): def test_boxplot_group_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.randn(10), - "b": np.random.randn(10), - "group": np.random.choice(["group1", "group2"], 10), + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) xlabel, ylabel = "x", "y" @@ -351,9 +353,9 @@ def test_boxplot_group_xlabel_ylabel(self, vert): def test_boxplot_group_no_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.randn(10), - "b": np.random.randn(10), - "group": np.random.choice(["group1", "group2"], 10), + "a": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).randn(10), + "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) ax = df.boxplot(by="group", vert=vert) @@ -378,7 +380,9 @@ def test_boxplot_legacy1_return_type(self, hist_df): @pytest.mark.slow def test_boxplot_legacy2(self): tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + ) grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): axes = _check_plot_works(grouped.boxplot, return_type="axes") @@ -387,7 +391,9 @@ def test_boxplot_legacy2(self): @pytest.mark.slow def test_boxplot_legacy2_return_type(self): tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + ) grouped = df.groupby(level=1) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") _check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -398,7 +404,9 @@ def test_boxplot_legacy2_return_type(self): ) def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + df = DataFrame( + np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + ) msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): grouped = df.unstack(level=1).groupby(level=0, axis=1) @@ -410,9 +418,11 @@ def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): def test_grouped_plot_fignums(self): n = 10 - weight = Series(np.random.normal(166, 20, size=n)) - height = Series(np.random.normal(60, 10, size=n)) - gender = np.random.RandomState(42).choice(["male", "female"], size=n) + weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) + height = Series(np.random.default_rng(2).normal(60, 10, size=n)) + gender = ( + np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) + ) df = DataFrame({"height": height, "weight": weight, "gender": gender}) gb = df.groupby("gender") @@ -427,9 +437,11 @@ def test_grouped_plot_fignums(self): def test_grouped_plot_fignums_excluded_col(self): n = 10 - weight = Series(np.random.normal(166, 20, size=n)) - height = Series(np.random.normal(60, 10, size=n)) - gender = np.random.RandomState(42).choice(["male", "female"], size=n) + weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) + height = Series(np.random.default_rng(2).normal(60, 10, size=n)) + gender = ( + np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) + ) df = DataFrame({"height": height, "weight": weight, "gender": gender}) # now works with GH 5610 as gender is excluded df.groupby("gender").hist() @@ -469,7 +481,7 @@ def test_grouped_box_return_type_arg(self, hist_df, return_type): @pytest.mark.parametrize("return_type", ["dict", "axes", "both"]) def test_grouped_box_return_type_arg_duplcate_cats(self, return_type): columns2 = "X B C D A".split() - df2 = DataFrame(np.random.randn(6, 5), columns=columns2) + df2 = DataFrame(np.random.default_rng(2).randn(6, 5), columns=columns2) categories2 = "A B".split() df2["category"] = categories2 * 3 @@ -678,9 +690,9 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): # GH 16748 df = DataFrame( { - "cat": np.random.choice(list("abcde"), 100), - "v": np.random.rand(100), - "v1": np.random.rand(100), + "cat": np.random.default_rng(2).choice(list("abcde"), 100), + "v": np.random.default_rng(2).rand(100), + "v1": np.random.default_rng(2).rand(100), } ) grouped = df.groupby("cat") @@ -708,7 +720,9 @@ def test_boxplot_multiindex_column(self): ] tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=["first", "second"]) - df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + df = DataFrame( + np.random.default_rng(2).randn(3, 8), index=["A", "B", "C"], columns=index + ) col = [("bar", "one"), ("bar", "two")] axes = _check_plot_works(df.boxplot, column=col, return_type="axes") diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index af8bcd943765e..ff13feeb413cf 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -60,7 +60,7 @@ def test_ts_plot_with_tz(self, tz_aware_fixture): def test_fontsize_set_correctly(self): # For issue #8765 - df = DataFrame(np.random.randn(10, 9), index=range(10)) + df = DataFrame(np.random.default_rng(2).randn(10, 9), index=range(10)) _, ax = mpl.pyplot.subplots() df.plot(fontsize=2, ax=ax) for label in ax.get_xticklabels() + ax.get_yticklabels(): @@ -71,19 +71,19 @@ def test_frame_inferred(self): idx = date_range("1/1/1987", freq="MS", periods=100) idx = DatetimeIndex(idx.values, freq=None) - df = DataFrame(np.random.randn(len(idx), 3), index=idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) _check_plot_works(df.plot) # axes freq idx = idx[0:40].union(idx[45:99]) - df2 = DataFrame(np.random.randn(len(idx), 3), index=idx) + df2 = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) _check_plot_works(df2.plot) def test_frame_inferred_n_gt_1(self): # N > 1 idx = date_range("2008-1-1 00:15:00", freq="15T", periods=10) idx = DatetimeIndex(idx.values, freq=None) - df = DataFrame(np.random.randn(len(idx), 3), index=idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) _check_plot_works(df.plot) def test_is_error_nozeroindex(self): @@ -112,7 +112,7 @@ def test_nonnumeric_exclude_error(self): @pytest.mark.parametrize("freq", ["S", "T", "H", "D", "W", "M", "Q", "A"]) def test_tsplot_period(self, freq): idx = period_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(ser.plot, ax=ax) @@ -121,7 +121,7 @@ def test_tsplot_period(self, freq): ) def test_tsplot_datetime(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(ser.plot, ax=ax) @@ -150,7 +150,7 @@ def test_both_style_and_color(self): def test_high_freq(self, freq): _, ax = mpl.pyplot.subplots() rng = date_range("1/1/2012", periods=100, freq=freq) - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _check_plot_works(ser.plot, ax=ax) def test_get_datevalue(self): @@ -182,7 +182,7 @@ def check_format_of_first_point(ax, expected_string): @pytest.mark.parametrize("freq", ["S", "T", "H", "D", "W", "M", "Q", "A"]) def test_line_plot_period_series(self, freq): idx = period_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) _check_plot_works(ser.plot, ser.index.freq) @pytest.mark.parametrize( @@ -192,7 +192,7 @@ def test_line_plot_period_mlt_series(self, frqncy): # test period index line plot for series with multiples (`mlt`) of the # frequency (`frqncy`) rule code. tests resolution of issue #14763 idx = period_range("12/31/1999", freq=frqncy, periods=100) - s = Series(np.random.randn(len(idx)), idx) + s = Series(np.random.default_rng(2).randn(len(idx)), idx) _check_plot_works(s.plot, s.index.freq.rule_code) @pytest.mark.parametrize( @@ -200,13 +200,17 @@ def test_line_plot_period_mlt_series(self, frqncy): ) def test_line_plot_datetime_series(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) _check_plot_works(ser.plot, ser.index.freq.rule_code) @pytest.mark.parametrize("freq", ["S", "T", "H", "D", "W", "M", "Q", "A"]) def test_line_plot_period_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - df = DataFrame(np.random.randn(len(idx), 3), index=idx, columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).randn(len(idx), 3), + index=idx, + columns=["A", "B", "C"], + ) _check_plot_works(df.plot, df.index.freq) @pytest.mark.parametrize( @@ -217,7 +221,11 @@ def test_line_plot_period_mlt_frame(self, frqncy): # of the frequency (`frqncy`) rule code. tests resolution of issue # #14763 idx = period_range("12/31/1999", freq=frqncy, periods=100) - df = DataFrame(np.random.randn(len(idx), 3), index=idx, columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).randn(len(idx), 3), + index=idx, + columns=["A", "B", "C"], + ) freq = df.index.asfreq(df.index.freq.rule_code).freq _check_plot_works(df.plot, freq) @@ -226,7 +234,11 @@ def test_line_plot_period_mlt_frame(self, frqncy): ) def test_line_plot_datetime_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - df = DataFrame(np.random.randn(len(idx), 3), index=idx, columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).randn(len(idx), 3), + index=idx, + columns=["A", "B", "C"], + ) freq = df.index.to_period(df.index.freq.rule_code).freq _check_plot_works(df.plot, freq) @@ -235,7 +247,7 @@ def test_line_plot_datetime_frame(self, freq): ) def test_line_plot_inferred_freq(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) ser = Series(ser.values, Index(np.asarray(ser.index))) _check_plot_works(ser.plot, ser.index.inferred_freq) @@ -256,12 +268,12 @@ def test_plot_offset_freq(self): def test_plot_offset_freq_business(self): dr = date_range("2023-01-01", freq="BQS", periods=10) - ser = Series(np.random.randn(len(dr)), index=dr) + ser = Series(np.random.default_rng(2).randn(len(dr)), index=dr) _check_plot_works(ser.plot) def test_plot_multiple_inferred_freq(self): dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime(2000, 1, 11)]) - ser = Series(np.random.randn(len(dr)), index=dr) + ser = Series(np.random.default_rng(2).randn(len(dr)), index=dr) _check_plot_works(ser.plot) @pytest.mark.xfail(reason="Api changed in 3.6.0") @@ -269,7 +281,7 @@ def test_uhf(self): import pandas.plotting._matplotlib.converter as conv idx = date_range("2012-6-22 21:59:51.960928", freq="L", periods=500) - df = DataFrame(np.random.randn(len(idx), 2), index=idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), index=idx) _, ax = mpl.pyplot.subplots() df.plot(ax=ax) @@ -285,7 +297,7 @@ def test_uhf(self): def test_irreg_hf(self): idx = date_range("2012-6-22 21:59:51", freq="S", periods=10) - df = DataFrame(np.random.randn(len(idx), 2), index=idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), index=idx) irreg = df.iloc[[0, 1, 3, 4]] _, ax = mpl.pyplot.subplots() @@ -297,7 +309,7 @@ def test_irreg_hf(self): def test_irreg_hf_object(self): idx = date_range("2012-6-22 21:59:51", freq="S", periods=10) - df2 = DataFrame(np.random.randn(len(idx), 2), index=idx) + df2 = DataFrame(np.random.default_rng(2).randn(len(idx), 2), index=idx) _, ax = mpl.pyplot.subplots() df2.index = df2.index.astype(object) df2.plot(ax=ax) @@ -412,7 +424,7 @@ def test_finder_daily(self): rs2 = [] for n in day_lst: rng = bdate_range("1999-1-1", periods=n) - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -434,7 +446,7 @@ def test_finder_quarterly(self): rs2 = [] for n in yrs: rng = period_range("1987Q2", periods=int(n * 4), freq="Q") - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -456,7 +468,7 @@ def test_finder_monthly(self): rs2 = [] for n in yrs: rng = period_range("1987Q2", periods=int(n * 12), freq="M") - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -472,7 +484,7 @@ def test_finder_monthly(self): def test_finder_monthly_long(self): rng = period_range("1988Q1", periods=24 * 12, freq="M") - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -486,7 +498,7 @@ def test_finder_annual(self): rs = [] for nyears in [5, 10, 19, 49, 99, 199, 599, 1001]: rng = period_range("1987", periods=nyears, freq="A") - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -499,7 +511,7 @@ def test_finder_annual(self): def test_finder_minutely(self): nminutes = 50 * 24 * 60 rng = date_range("1/1/1999", freq="Min", periods=nminutes) - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -511,7 +523,7 @@ def test_finder_minutely(self): def test_finder_hourly(self): nhours = 23 rng = date_range("1/1/1999", freq="H", periods=nhours) - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -559,7 +571,7 @@ def test_gaps_irregular(self): def test_gaps_non_ts(self): # non-ts idx = [0, 1, 2, 5, 7, 9, 12, 15, 20] - ser = Series(np.random.randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx) ser.iloc[2:5] = np.nan _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) @@ -580,7 +592,7 @@ def test_gap_upsample(self): low.plot(ax=ax) idxh = date_range(low.index[0], low.index[-1], freq="12h") - s = Series(np.random.randn(len(idxh)), idxh) + s = Series(np.random.default_rng(2).randn(len(idxh)), idxh) s.plot(secondary_y=True) lines = ax.get_lines() assert len(lines) == 1 @@ -595,7 +607,7 @@ def test_gap_upsample(self): assert mask[5:25, 1].all() def test_secondary_y(self): - ser = Series(np.random.randn(10)) + ser = Series(np.random.default_rng(2).randn(10)) fig, _ = mpl.pyplot.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, "left_ax") @@ -609,16 +621,16 @@ def test_secondary_y(self): mpl.pyplot.close(fig) def test_secondary_y_yaxis(self): - Series(np.random.randn(10)) - ser2 = Series(np.random.randn(10)) + Series(np.random.default_rng(2).randn(10)) + ser2 = Series(np.random.default_rng(2).randn(10)) _, ax2 = mpl.pyplot.subplots() ser2.plot(ax=ax2) assert ax2.get_yaxis().get_ticks_position() == "left" mpl.pyplot.close(ax2.get_figure()) def test_secondary_both(self): - ser = Series(np.random.randn(10)) - ser2 = Series(np.random.randn(10)) + ser = Series(np.random.default_rng(2).randn(10)) + ser2 = Series(np.random.default_rng(2).randn(10)) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) assert ax.get_yaxis().get_visible() @@ -629,7 +641,7 @@ def test_secondary_both(self): def test_secondary_y_ts(self): idx = date_range("1/1/2000", periods=10) - ser = Series(np.random.randn(10), idx) + ser = Series(np.random.default_rng(2).randn(10), idx) fig, _ = mpl.pyplot.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, "left_ax") @@ -644,7 +656,7 @@ def test_secondary_y_ts(self): def test_secondary_y_ts_yaxis(self): idx = date_range("1/1/2000", periods=10) - ser2 = Series(np.random.randn(10), idx) + ser2 = Series(np.random.default_rng(2).randn(10), idx) _, ax2 = mpl.pyplot.subplots() ser2.plot(ax=ax2) assert ax2.get_yaxis().get_ticks_position() == "left" @@ -652,13 +664,13 @@ def test_secondary_y_ts_yaxis(self): def test_secondary_y_ts_visible(self): idx = date_range("1/1/2000", periods=10) - ser2 = Series(np.random.randn(10), idx) + ser2 = Series(np.random.default_rng(2).randn(10), idx) ax = ser2.plot() assert ax.get_yaxis().get_visible() @td.skip_if_no_scipy def test_secondary_kde(self): - ser = Series(np.random.randn(10)) + ser = Series(np.random.default_rng(2).randn(10)) fig, ax = mpl.pyplot.subplots() ax = ser.plot(secondary_y=True, kind="density", ax=ax) assert hasattr(ax, "left_ax") @@ -667,21 +679,21 @@ def test_secondary_kde(self): assert axes[1].get_yaxis().get_ticks_position() == "right" def test_secondary_bar(self): - ser = Series(np.random.randn(10)) + ser = Series(np.random.default_rng(2).randn(10)) fig, ax = mpl.pyplot.subplots() ser.plot(secondary_y=True, kind="bar", ax=ax) axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == "right" def test_secondary_frame(self): - df = DataFrame(np.random.randn(5, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=["a", "b", "c"]) axes = df.plot(secondary_y=["a", "c"], subplots=True) assert axes[0].get_yaxis().get_ticks_position() == "right" assert axes[1].get_yaxis().get_ticks_position() == "left" assert axes[2].get_yaxis().get_ticks_position() == "right" def test_secondary_bar_frame(self): - df = DataFrame(np.random.randn(5, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=["a", "b", "c"]) axes = df.plot(kind="bar", secondary_y=["a", "c"], subplots=True) assert axes[0].get_yaxis().get_ticks_position() == "right" assert axes[1].get_yaxis().get_ticks_position() == "left" @@ -756,8 +768,8 @@ def test_mixed_freq_irregular_first_df(self): def test_mixed_freq_hf_first(self): idxh = date_range("1/1/1999", periods=365, freq="D") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() high.plot(ax=ax) low.plot(ax=ax) @@ -766,7 +778,7 @@ def test_mixed_freq_hf_first(self): def test_mixed_freq_alignment(self): ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H") - ts_data = np.random.randn(12) + ts_data = np.random.default_rng(2).randn(12) ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq("T").interpolate() @@ -780,8 +792,8 @@ def test_mixed_freq_alignment(self): def test_mixed_freq_lf_first(self): idxh = date_range("1/1/1999", periods=365, freq="D") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(legend=True, ax=ax) high.plot(legend=True, ax=ax) @@ -794,8 +806,8 @@ def test_mixed_freq_lf_first(self): def test_mixed_freq_lf_first_hourly(self): idxh = date_range("1/1/1999", periods=240, freq="T") idxl = date_range("1/1/1999", periods=4, freq="H") - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(ax=ax) high.plot(ax=ax) @@ -806,7 +818,7 @@ def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range("1/3/2000", periods=30, freq="B") - ps = Series(np.random.randn(len(rng)), rng) + ps = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() irreg.plot(ax=ax) ps.plot(ax=ax) @@ -863,8 +875,8 @@ def test_nat_handling(self): def test_to_weekly_resampling(self): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() high.plot(ax=ax) low.plot(ax=ax) @@ -874,8 +886,8 @@ def test_to_weekly_resampling(self): def test_from_weekly_resampling(self): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(ax=ax) high.plot(ax=ax) @@ -897,8 +909,12 @@ def test_from_weekly_resampling(self): def test_from_resampling_area_line_mixed(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") - high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) - low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) + high = DataFrame( + np.random.default_rng(2).rand(len(idxh), 3), index=idxh, columns=[0, 1, 2] + ) + low = DataFrame( + np.random.default_rng(2).rand(len(idxl), 3), index=idxl, columns=[0, 1, 2] + ) _, ax = mpl.pyplot.subplots() low.plot(kind=kind1, stacked=True, ax=ax) @@ -945,8 +961,12 @@ def test_from_resampling_area_line_mixed(self, kind1, kind2): def test_from_resampling_area_line_mixed_high_to_low(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") - high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) - low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) + high = DataFrame( + np.random.default_rng(2).rand(len(idxh), 3), index=idxh, columns=[0, 1, 2] + ) + low = DataFrame( + np.random.default_rng(2).rand(len(idxl), 3), index=idxl, columns=[0, 1, 2] + ) _, ax = mpl.pyplot.subplots() high.plot(kind=kind1, stacked=True, ax=ax) low.plot(kind=kind2, stacked=True, ax=ax) @@ -991,8 +1011,8 @@ def test_mixed_freq_second_millisecond(self): # GH 7772, GH 7760 idxh = date_range("2014-07-01 09:00", freq="S", periods=50) idxl = date_range("2014-07-01 09:00", freq="100L", periods=500) - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) # high to low _, ax = mpl.pyplot.subplots() high.plot(ax=ax) @@ -1005,8 +1025,8 @@ def test_mixed_freq_second_millisecond_low_to_high(self): # GH 7772, GH 7760 idxh = date_range("2014-07-01 09:00", freq="S", periods=50) idxl = date_range("2014-07-01 09:00", freq="100L", periods=500) - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) # low to high _, ax = mpl.pyplot.subplots() low.plot(ax=ax) @@ -1018,23 +1038,29 @@ def test_mixed_freq_second_millisecond_low_to_high(self): def test_irreg_dtypes(self): # date idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] - df = DataFrame(np.random.randn(len(idx), 3), Index(idx, dtype=object)) + df = DataFrame( + np.random.default_rng(2).randn(len(idx), 3), Index(idx, dtype=object) + ) _check_plot_works(df.plot) def test_irreg_dtypes_dt64(self): # np.datetime64 idx = date_range("1/1/2000", periods=10) idx = idx[[0, 2, 5, 9]].astype(object) - df = DataFrame(np.random.randn(len(idx), 3), idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(df.plot, ax=ax) def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) - deltas = np.random.randint(1, 20, 3).cumsum() + deltas = np.random.default_rng(2).randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) df = DataFrame( - {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + { + "a": np.random.default_rng(2).randn(len(ts)), + "b": np.random.default_rng(2).randn(len(ts)), + }, + index=ts, ) _, ax = mpl.pyplot.subplots() df.plot(ax=ax) @@ -1055,10 +1081,14 @@ def test_time(self): def test_time_change_xlim(self): t = datetime(1, 1, 1, 3, 30, 0) - deltas = np.random.randint(1, 20, 3).cumsum() + deltas = np.random.default_rng(2).randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) df = DataFrame( - {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + { + "a": np.random.default_rng(2).randn(len(ts)), + "b": np.random.default_rng(2).randn(len(ts)), + }, + index=ts, ) _, ax = mpl.pyplot.subplots() df.plot(ax=ax) @@ -1096,10 +1126,14 @@ def test_time_change_xlim(self): def test_time_musec(self): t = datetime(1, 1, 1, 3, 30, 0) - deltas = np.random.randint(1, 20, 3).cumsum() + deltas = np.random.default_rng(2).randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) df = DataFrame( - {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + { + "a": np.random.default_rng(2).randn(len(ts)), + "b": np.random.default_rng(2).randn(len(ts)), + }, + index=ts, ) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) @@ -1128,8 +1162,8 @@ def test_time_musec(self): def test_secondary_upsample(self): idxh = date_range("1/1/1999", periods=365, freq="D") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.randn(len(idxh)), idxh) - low = Series(np.random.randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(ax=ax) ax = high.plot(secondary_y=True, ax=ax) @@ -1246,7 +1280,7 @@ def test_secondary_legend_nonts_multi_col(self): @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_format_date_axis(self): rng = date_range("1/1/2012", periods=12, freq="M") - df = DataFrame(np.random.randn(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).randn(len(rng), 3), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) xaxis = ax.get_xaxis() @@ -1374,7 +1408,7 @@ def test_format_timedelta_ticks_narrow(self): expected_labels = [f"00:00:00.0000000{i:0>2d}" for i in np.arange(10)] rng = timedelta_range("0", periods=10, freq="ns") - df = DataFrame(np.random.randn(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).randn(len(rng), 3), rng) _, ax = mpl.pyplot.subplots() df.plot(fontsize=2, ax=ax) mpl.pyplot.draw() @@ -1398,7 +1432,7 @@ def test_format_timedelta_ticks_wide(self): ] rng = timedelta_range("0", periods=10, freq="1 d") - df = DataFrame(np.random.randn(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).randn(len(rng), 3), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(fontsize=2, ax=ax) mpl.pyplot.draw() @@ -1417,14 +1451,14 @@ def test_timedelta_plot(self): def test_timedelta_long_period(self): # test long period index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 d") - s = Series(np.random.randn(len(index)), index) + s = Series(np.random.default_rng(2).randn(len(index)), index) _, ax = mpl.pyplot.subplots() _check_plot_works(s.plot, ax=ax) def test_timedelta_short_period(self): # test short period index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 ns") - s = Series(np.random.randn(len(index)), index) + s = Series(np.random.default_rng(2).randn(len(index)), index) _, ax = mpl.pyplot.subplots() _check_plot_works(s.plot, ax=ax) @@ -1468,7 +1502,10 @@ def test_add_matplotlib_datetime64(self): # GH9053 - ensure that a plot with PeriodConverter still understands # datetime64 data. This still fails because matplotlib overrides the # ax.xaxis.converter with a DatetimeConverter - s = Series(np.random.randn(10), index=date_range("1970-01-02", periods=10)) + s = Series( + np.random.default_rng(2).randn(10), + index=date_range("1970-01-02", periods=10), + ) ax = s.plot() with tm.assert_produces_warning(DeprecationWarning): # multi-dimensional indexing @@ -1478,7 +1515,9 @@ def test_add_matplotlib_datetime64(self): def test_matplotlib_scatter_datetime64(self): # https://github.com/matplotlib/matplotlib/issues/11391 - df = DataFrame(np.random.RandomState(0).rand(10, 2), columns=["x", "y"]) + df = DataFrame( + np.random.default_rng(2).RandomState(0).rand(10, 2), columns=["x", "y"] + ) df["time"] = date_range("2018-01-01", periods=10, freq="D") _, ax = mpl.pyplot.subplots() ax.scatter(x="time", y="y", data=df) diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index bf59558d10f24..848131882f2e6 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -20,28 +20,37 @@ class TestDataFrameGroupByPlots: def test_series_groupby_plotting_nominally_works(self): n = 10 - weight = Series(np.random.normal(166, 20, size=n)) - gender = np.random.RandomState(42).choice(["male", "female"], size=n) + weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) + gender = ( + np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) + ) weight.groupby(gender).plot() def test_series_groupby_plotting_nominally_works_hist(self): n = 10 - height = Series(np.random.normal(60, 10, size=n)) - gender = np.random.RandomState(42).choice(["male", "female"], size=n) + height = Series(np.random.default_rng(2).normal(60, 10, size=n)) + gender = ( + np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) + ) height.groupby(gender).hist() def test_series_groupby_plotting_nominally_works_alpha(self): n = 10 - height = Series(np.random.normal(60, 10, size=n)) - gender = np.random.RandomState(42).choice(["male", "female"], size=n) + height = Series(np.random.default_rng(2).normal(60, 10, size=n)) + gender = ( + np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) + ) # Regression test for GH8733 height.groupby(gender).plot(alpha=0.5) def test_plotting_with_float_index_works(self): # GH 7025 df = DataFrame( - {"def": [1, 1, 1, 2, 2, 2, 3, 3, 3], "val": np.random.randn(9)}, + { + "def": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "val": np.random.default_rng(2).randn(9), + }, index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0], ) @@ -50,7 +59,10 @@ def test_plotting_with_float_index_works(self): def test_plotting_with_float_index_works_apply(self): # GH 7025 df = DataFrame( - {"def": [1, 1, 1, 2, 2, 2, 3, 3, 3], "val": np.random.randn(9)}, + { + "def": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "val": np.random.default_rng(2).randn(9), + }, index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0], ) df.groupby("def")["val"].apply(lambda x: x.plot()) @@ -95,7 +107,9 @@ def test_groupby_hist_frame_with_legend(self, column, expected_axes_num): expected_labels = column or [["a"], ["b"]] index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + ) g = df.groupby("c") for axes in g.hist(legend=True, column=column): @@ -107,7 +121,9 @@ def test_groupby_hist_frame_with_legend(self, column, expected_axes_num): def test_groupby_hist_frame_with_legend_raises(self, column): # GH 6279 - DataFrameGroupBy histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + ) g = df.groupby("c") with pytest.raises(ValueError, match="Cannot use both legend and label"): @@ -116,7 +132,9 @@ def test_groupby_hist_frame_with_legend_raises(self, column): def test_groupby_hist_series_with_legend(self): # GH 6279 - SeriesGroupBy histogram can have a legend index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + ) g = df.groupby("c") for ax in g["a"].hist(legend=True): @@ -126,7 +144,9 @@ def test_groupby_hist_series_with_legend(self): def test_groupby_hist_series_with_legend_raises(self): # GH 6279 - SeriesGroupBy histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + ) g = df.groupby("c") with pytest.raises(ValueError, match="Cannot use both legend and label"): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 77cc2f38dc6f0..9daeb83871801 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -72,7 +72,7 @@ def test_hist_legacy_by_fig_error(self, ts): ts.hist(by=ts.index, figure=fig) def test_hist_bins_legacy(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 @@ -120,8 +120,8 @@ def test_hist_no_overlap(self): subplot, ) - x = Series(np.random.randn(2)) - y = Series(np.random.randn(2)) + x = Series(np.random.default_rng(2).randn(2)) + y = Series(np.random.default_rng(2).randn(2)) subplot(121) x.hist() subplot(122) @@ -156,7 +156,7 @@ def test_plot_fails_when_ax_differs_from_figure(self, ts): ) def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument - ser = Series(np.random.randint(1, 10)) + ser = Series(np.random.default_rng(2).randint(1, 10)) ax = ser.hist(histtype=histtype) _check_patches_all_filled(ax, filled=expected) @@ -166,7 +166,7 @@ def test_histtype_argument(self, histtype, expected): def test_hist_with_legend(self, by, expected_axes_num, expected_layout): # GH 6279 - Series histogram can have a legend index = 15 * ["1"] + 15 * ["2"] - s = Series(np.random.randn(30), index=index, name="a") + s = Series(np.random.default_rng(2).randn(30), index=index, name="a") s.index.name = "b" # Use default_axes=True when plotting method generate subplots itself @@ -178,7 +178,7 @@ def test_hist_with_legend(self, by, expected_axes_num, expected_layout): def test_hist_with_legend_raises(self, by): # GH 6279 - Series histogram with legend and label raises index = 15 * ["1"] + 15 * ["2"] - s = Series(np.random.randn(30), index=index, name="a") + s = Series(np.random.default_rng(2).randn(30), index=index, name="a") s.index.name = "b" with pytest.raises(ValueError, match="Cannot use both legend and label"): @@ -259,9 +259,9 @@ def test_hist_df_legacy(self, hist_df): @pytest.mark.slow def test_hist_df_legacy_layout(self): # make sure layout is handled - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) df[2] = to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 812419200000000000, 819331200000000000, size=10, @@ -277,15 +277,15 @@ def test_hist_df_legacy_layout(self): @pytest.mark.slow def test_hist_df_legacy_layout2(self): - df = DataFrame(np.random.randn(10, 1)) + df = DataFrame(np.random.default_rng(2).randn(10, 1)) _check_plot_works(df.hist) @pytest.mark.slow def test_hist_df_legacy_layout3(self): # make sure layout is handled - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) df[5] = to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 812419200000000000, 819331200000000000, size=10, @@ -301,9 +301,9 @@ def test_hist_df_legacy_layout3(self): "kwargs", [{"sharex": True, "sharey": True}, {"figsize": (8, 10)}, {"bins": 5}] ) def test_hist_df_legacy_layout_kwargs(self, kwargs): - df = DataFrame(np.random.randn(10, 5)) + df = DataFrame(np.random.default_rng(2).randn(10, 5)) df[5] = to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 812419200000000000, 819331200000000000, size=10, @@ -353,15 +353,15 @@ def test_hist_non_numerical_or_datetime_raises(self): # gh-10444, GH32590 df = DataFrame( { - "a": np.random.rand(10), - "b": np.random.randint(0, 10, 10), + "a": np.random.default_rng(2).rand(10), + "b": np.random.default_rng(2).randint(0, 10, 10), "c": to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 ) ), "d": to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 ), utc=True, @@ -389,9 +389,9 @@ def test_hist_non_numerical_or_datetime_raises(self): ), ) def test_hist_layout(self, layout_test): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) df[2] = to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 812419200000000000, 819331200000000000, size=10, @@ -403,9 +403,9 @@ def test_hist_layout(self, layout_test): _check_axes_shape(axes, axes_num=3, layout=expected) def test_hist_layout_error(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) df[2] = to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 812419200000000000, 819331200000000000, size=10, @@ -427,9 +427,9 @@ def test_hist_layout_error(self): # GH 9351 def test_tight_layout(self): - df = DataFrame(np.random.randn(100, 2)) + df = DataFrame(np.random.default_rng(2).randn(100, 2)) df[2] = to_datetime( - np.random.randint( + np.random.default_rng(2).randint( 812419200000000000, 819331200000000000, size=100, @@ -499,7 +499,9 @@ def test_hist_column_order_unchanged(self, column, expected): ) def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument - df = DataFrame(np.random.randint(1, 10, size=(100, 2)), columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randint(1, 10, size=(100, 2)), columns=["a", "b"] + ) ax = df.hist(histtype=histtype) _check_patches_all_filled(ax, filled=expected) @@ -514,7 +516,9 @@ def test_hist_with_legend(self, by, column): expected_labels = [expected_labels] * 2 index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + ) # Use default_axes=True when plotting method generate subplots itself axes = _check_plot_works( @@ -536,13 +540,15 @@ def test_hist_with_legend(self, by, column): def test_hist_with_legend_raises(self, by, column): # GH 6279 - DataFrame histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") - df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + ) with pytest.raises(ValueError, match="Cannot use both legend and label"): df.hist(legend=True, by=by, column=column, label="d") def test_hist_df_kwargs(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(np.random.default_rng(2).randn(10, 2)) _, ax = mpl.pyplot.subplots() ax = df.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 10 @@ -550,7 +556,8 @@ def test_hist_df_kwargs(self): def test_hist_df_with_nonnumerics(self): # GH 9853 df = DataFrame( - np.random.RandomState(42).randn(10, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).RandomState(42).randn(10, 4), + columns=["A", "B", "C", "D"], ) df["E"] = ["x", "y"] * 5 _, ax = mpl.pyplot.subplots() @@ -560,7 +567,8 @@ def test_hist_df_with_nonnumerics(self): def test_hist_df_with_nonnumerics_no_bins(self): # GH 9853 df = DataFrame( - np.random.RandomState(42).randn(10, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).RandomState(42).randn(10, 4), + columns=["A", "B", "C", "D"], ) df["E"] = ["x", "y"] * 5 _, ax = mpl.pyplot.subplots() @@ -569,7 +577,7 @@ def test_hist_df_with_nonnumerics_no_bins(self): def test_hist_secondary_legend(self): # GH 9610 - df = DataFrame(np.random.randn(30, 4), columns=list("abcd")) + df = DataFrame(np.random.default_rng(2).randn(30, 4), columns=list("abcd")) # primary -> secondary _, ax = mpl.pyplot.subplots() @@ -583,7 +591,7 @@ def test_hist_secondary_legend(self): def test_hist_secondary_secondary(self): # GH 9610 - df = DataFrame(np.random.randn(30, 4), columns=list("abcd")) + df = DataFrame(np.random.default_rng(2).randn(30, 4), columns=list("abcd")) # secondary -> secondary _, ax = mpl.pyplot.subplots() ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) @@ -596,7 +604,7 @@ def test_hist_secondary_secondary(self): def test_hist_secondary_primary(self): # GH 9610 - df = DataFrame(np.random.randn(30, 4), columns=list("abcd")) + df = DataFrame(np.random.default_rng(2).randn(30, 4), columns=list("abcd")) # secondary -> primary _, ax = mpl.pyplot.subplots() ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) @@ -643,7 +651,7 @@ class TestDataFrameGroupByPlots: def test_grouped_hist_legacy(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -660,7 +668,7 @@ def test_grouped_hist_legacy(self): _check_axes_shape(axes, axes_num=4, layout=(2, 2)) def test_grouped_hist_legacy_axes_shape_no_col(self): - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -676,7 +684,7 @@ def test_grouped_hist_legacy_axes_shape_no_col(self): _check_axes_shape(axes, axes_num=4, layout=(2, 2)) def test_grouped_hist_legacy_single_key(self): - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -698,7 +706,7 @@ def test_grouped_hist_legacy_grouped_hist_kwargs(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -734,7 +742,7 @@ def test_grouped_hist_legacy_grouped_hist_kwargs(self): def test_grouped_hist_legacy_grouped_hist(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -753,7 +761,7 @@ def test_grouped_hist_legacy_grouped_hist(self): def test_grouped_hist_legacy_external_err(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -770,7 +778,7 @@ def test_grouped_hist_legacy_external_err(self): _grouped_hist(df.A, by=df.C, foo="bar") def test_grouped_hist_legacy_figsize_err(self): - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) df = DataFrame(rs.randn(10, 1), columns=["A"]) df["B"] = to_datetime( rs.randint( @@ -788,9 +796,9 @@ def test_grouped_hist_legacy_figsize_err(self): def test_grouped_hist_legacy2(self): n = 10 - weight = Series(np.random.normal(166, 20, size=n)) - height = Series(np.random.normal(60, 10, size=n)) - gender_int = np.random.RandomState(42).choice([0, 1], size=n) + weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) + height = Series(np.random.default_rng(2).normal(60, 10, size=n)) + gender_int = np.random.default_rng(2).RandomState(42).choice([0, 1], size=n) df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int}) gb = df_int.groupby("gender") axes = gb.hist() @@ -943,6 +951,8 @@ def test_axis_share_xy(self, hist_df): ) def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument - df = DataFrame(np.random.randint(1, 10, size=(10, 2)), columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).randint(1, 10, size=(10, 2)), columns=["a", "b"] + ) ax = df.hist(by="a", histtype=histtype) _check_patches_all_filled(ax, filled=expected) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 53219e0d20b6d..f28834842d4af 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -106,7 +106,7 @@ def test_scatter_matrix_axis(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.RandomState(42).randn(100, 3)) + df = DataFrame(np.random.default_rng(2).RandomState(42).randn(100, 3)) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning, check_stacklevel=False): @@ -131,7 +131,7 @@ def test_scatter_matrix_axis_smaller(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.RandomState(42).randn(100, 3)) + df = DataFrame(np.random.default_rng(2).RandomState(42).randn(100, 3)) df[0] = (df[0] - 2) / 3 # we are plotting multiples on a sub-plot @@ -170,9 +170,9 @@ def test_andrews_curves_no_warning(self, iris): "iris", DataFrame( { - "A": np.random.rand(10), - "B": np.random.rand(10), - "C": np.random.rand(10), + "A": np.random.default_rng(2).rand(10), + "B": np.random.default_rng(2).rand(10), + "C": np.random.default_rng(2).rand(10), "Name": ["A"] * 10, } ), @@ -197,9 +197,9 @@ def test_andrews_curves_linecolors(self, request, df, linecolors): "iris", DataFrame( { - "A": np.random.rand(10), - "B": np.random.rand(10), - "C": np.random.rand(10), + "A": np.random.default_rng(2).rand(10), + "B": np.random.default_rng(2).rand(10), + "C": np.random.default_rng(2).rand(10), "Name": ["A"] * 10, } ), @@ -411,11 +411,11 @@ def test_get_standard_colors_random_seed(self): # GH17525 df = DataFrame(np.zeros((10, 10))) - # Make sure that the np.random.seed isn't reset by get_standard_colors + # Make sure that the random seed isn't reset by get_standard_colors plotting.parallel_coordinates(df, 0) - rand1 = np.random.random() + rand1 = np.random.default_rng(None).random() plotting.parallel_coordinates(df, 0) - rand2 = np.random.random() + rand2 = np.random.default_rng(None).random() assert rand1 != rand2 def test_get_standard_colors_consistency(self): @@ -467,7 +467,7 @@ def test_get_standard_colors_no_appending(self): color_after = get_standard_colors(1, color=color_before) assert len(color_after) == len(color_before) - df = DataFrame(np.random.randn(48, 4), columns=list("ABCD")) + df = DataFrame(np.random.default_rng(2).randn(48, 4), columns=list("ABCD")) color_list = cm.gnuplot(np.linspace(0, 1, 16)) p = df.A.plot.bar(figsize=(16, 7), color=color_list) @@ -481,7 +481,7 @@ def test_dictionary_color(self, kind): expected = [(0.5, 0.24, 0.6), (0.3, 0.7, 0.7)] - df1 = DataFrame(np.random.rand(2, 2), columns=data_files) + df1 = DataFrame(np.random.default_rng(2).rand(2, 2), columns=data_files) dic_color = {"b": (0.3, 0.7, 0.7), "a": (0.5, 0.24, 0.6)} ax = df1.plot(kind=kind, color=dic_color) @@ -595,7 +595,12 @@ def test_has_externally_shared_axis_invalid_compare_axis(self): def test_externally_shared_axes(self): # Example from GH33819 # Create data - df = DataFrame({"a": np.random.randn(1000), "b": np.random.randn(1000)}) + df = DataFrame( + { + "a": np.random.default_rng(2).randn(1000), + "b": np.random.default_rng(2).randn(1000), + } + ) # Create figure fig = mpl.pyplot.figure() diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2e9283fdf30cd..6c179786d8bb4 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -103,7 +103,9 @@ def test_plot_series_barh(self, series): _check_plot_works(series[:10].plot.barh) def test_plot_series_bar_ax(self): - ax = _check_plot_works(Series(np.random.randn(10)).plot.bar, color="black") + ax = _check_plot_works( + Series(np.random.default_rng(2).randn(10)).plot.bar, color="black" + ) _check_colors([ax.patches[0]], facecolors=["black"]) @pytest.mark.parametrize("kwargs", [{}, {"layout": (-1, 1)}, {"layout": (1, -1)}]) @@ -325,14 +327,14 @@ def test_bar_user_colors(self): assert result == expected def test_rotation_default(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) # Default rot 0 _, ax = mpl.pyplot.subplots() axes = df.plot(ax=ax) _check_ticks_props(axes, xrot=0) def test_rotation_30(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(np.random.default_rng(2).randn(5, 5)) _, ax = mpl.pyplot.subplots() axes = df.plot(rot=30, ax=ax) _check_ticks_props(axes, xrot=30) @@ -342,7 +344,7 @@ def test_irregular_datetime(self): rng = date_range("1/1/2000", "3/1/2000") rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] - ser = Series(np.random.randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), rng) _, ax = mpl.pyplot.subplots() ax = ser.plot(ax=ax) xp = DatetimeConverter.convert(datetime(1999, 1, 1), "", ax) @@ -366,7 +368,9 @@ def test_pie_series(self): # if sum of values is less than 1.0, pie handle them as rate and draw # semicircle. series = Series( - np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + np.random.default_rng(2).randint(1, 5), + index=["a", "b", "c", "d", "e"], + name="YLABEL", ) ax = _check_plot_works(series.plot.pie) _check_text_labels(ax.texts, series.index) @@ -374,14 +378,18 @@ def test_pie_series(self): def test_pie_series_no_label(self): series = Series( - np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + np.random.default_rng(2).randint(1, 5), + index=["a", "b", "c", "d", "e"], + name="YLABEL", ) ax = _check_plot_works(series.plot.pie, labels=None) _check_text_labels(ax.texts, [""] * 5) def test_pie_series_less_colors_than_elements(self): series = Series( - np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + np.random.default_rng(2).randint(1, 5), + index=["a", "b", "c", "d", "e"], + name="YLABEL", ) color_args = ["r", "g", "b"] ax = _check_plot_works(series.plot.pie, colors=color_args) @@ -391,7 +399,9 @@ def test_pie_series_less_colors_than_elements(self): def test_pie_series_labels_and_colors(self): series = Series( - np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + np.random.default_rng(2).randint(1, 5), + index=["a", "b", "c", "d", "e"], + name="YLABEL", ) # with labels and colors labels = ["A", "B", "C", "D", "E"] @@ -402,7 +412,9 @@ def test_pie_series_labels_and_colors(self): def test_pie_series_autopct_and_fontsize(self): series = Series( - np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + np.random.default_rng(2).randint(1, 5), + index=["a", "b", "c", "d", "e"], + name="YLABEL", ) color_args = ["r", "g", "b", "c", "m"] ax = _check_plot_works( @@ -436,8 +448,8 @@ def test_pie_nan(self): def test_df_series_secondary_legend(self): # GH 9779 - df = DataFrame(np.random.randn(30, 3), columns=list("abc")) - s = Series(np.random.randn(30), name="x") + df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) + s = Series(np.random.default_rng(2).randn(30), name="x") # primary -> secondary (without passing ax) _, ax = mpl.pyplot.subplots() @@ -451,8 +463,8 @@ def test_df_series_secondary_legend(self): def test_df_series_secondary_legend_with_axes(self): # GH 9779 - df = DataFrame(np.random.randn(30, 3), columns=list("abc")) - s = Series(np.random.randn(30), name="x") + df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) + s = Series(np.random.default_rng(2).randn(30), name="x") # primary -> secondary (with passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) @@ -465,8 +477,8 @@ def test_df_series_secondary_legend_with_axes(self): def test_df_series_secondary_legend_both(self): # GH 9779 - df = DataFrame(np.random.randn(30, 3), columns=list("abc")) - s = Series(np.random.randn(30), name="x") + df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) + s = Series(np.random.default_rng(2).randn(30), name="x") # secondary -> secondary (without passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(secondary_y=True, ax=ax) @@ -480,8 +492,8 @@ def test_df_series_secondary_legend_both(self): def test_df_series_secondary_legend_both_with_axis(self): # GH 9779 - df = DataFrame(np.random.randn(30, 3), columns=list("abc")) - s = Series(np.random.randn(30), name="x") + df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) + s = Series(np.random.default_rng(2).randn(30), name="x") # secondary -> secondary (with passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(secondary_y=True, ax=ax) @@ -495,8 +507,8 @@ def test_df_series_secondary_legend_both_with_axis(self): def test_df_series_secondary_legend_both_with_axis_2(self): # GH 9779 - df = DataFrame(np.random.randn(30, 3), columns=list("abc")) - s = Series(np.random.randn(30), name="x") + df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) + s = Series(np.random.default_rng(2).randn(30), name="x") # secondary -> secondary (with passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(secondary_y=True, mark_right=False, ax=ax) @@ -513,8 +525,8 @@ def test_df_series_secondary_legend_both_with_axis_2(self): ) def test_secondary_logy(self, input_logy, expected_scale): # GH 25545 - s1 = Series(np.random.randn(100)) - s2 = Series(np.random.randn(100)) + s1 = Series(np.random.default_rng(2).randn(100)) + s2 = Series(np.random.default_rng(2).randn(100)) # GH 24980 ax1 = s1.plot(logy=input_logy) @@ -524,7 +536,7 @@ def test_secondary_logy(self, input_logy, expected_scale): assert ax2.get_yscale() == expected_scale def test_plot_fails_with_dupe_color_and_style(self): - x = Series(np.random.randn(2)) + x = Series(np.random.default_rng(2).randn(2)) _, ax = mpl.pyplot.subplots() msg = ( "Cannot pass 'style' string with a color symbol and 'color' keyword " @@ -562,7 +574,7 @@ def test_kde_kwargs_check_axes(self, ts): @td.skip_if_no_scipy def test_kde_missing_vals(self): - s = Series(np.random.uniform(size=50)) + s = Series(np.random.default_rng(2).uniform(size=50)) s[0] = np.nan axes = _check_plot_works(s.plot.kde) @@ -632,14 +644,14 @@ def test_dup_datetime_index_plot(self): dr1 = date_range("1/1/2009", periods=4) dr2 = date_range("1/2/2009", periods=4) index = dr1.append(dr2) - values = np.random.randn(index.size) + values = np.random.default_rng(2).randn(index.size) s = Series(values, index=index) _check_plot_works(s.plot) def test_errorbar_asymmetrical(self): # GH9536 s = Series(np.arange(10), name="x") - err = np.random.rand(2, 10) + err = np.random.default_rng(2).rand(2, 10) ax = s.plot(yerr=err, xerr=err) @@ -652,17 +664,19 @@ def test_errorbar_asymmetrical(self): f"with the shape \\(2, {len(s)}\\)" ) with pytest.raises(ValueError, match=msg): - s.plot(yerr=np.random.rand(2, 11)) + s.plot(yerr=np.random.default_rng(2).rand(2, 11)) @pytest.mark.slow @pytest.mark.parametrize("kind", ["line", "bar"]) @pytest.mark.parametrize( "yerr", [ - Series(np.abs(np.random.randn(10))), - np.abs(np.random.randn(10)), - list(np.abs(np.random.randn(10))), - DataFrame(np.abs(np.random.randn(10, 2)), columns=["x", "y"]), + Series(np.abs(np.random.default_rng(2).randn(10))), + np.abs(np.random.default_rng(2).randn(10)), + list(np.abs(np.random.default_rng(2).randn(10))), + DataFrame( + np.abs(np.random.default_rng(2).randn(10, 2)), columns=["x", "y"] + ), ], ) def test_errorbar_plot(self, kind, yerr): @@ -673,7 +687,7 @@ def test_errorbar_plot(self, kind, yerr): @pytest.mark.slow def test_errorbar_plot_yerr_0(self): s = Series(np.arange(10), name="x") - s_err = np.abs(np.random.randn(10)) + s_err = np.abs(np.random.default_rng(2).randn(10)) ax = _check_plot_works(s.plot, xerr=s_err) _check_has_errorbars(ax, xerr=1, yerr=0) @@ -681,8 +695,10 @@ def test_errorbar_plot_yerr_0(self): @pytest.mark.parametrize( "yerr", [ - Series(np.abs(np.random.randn(12))), - DataFrame(np.abs(np.random.randn(12, 2)), columns=["x", "y"]), + Series(np.abs(np.random.default_rng(2).randn(12))), + DataFrame( + np.abs(np.random.default_rng(2).randn(12, 2)), columns=["x", "y"] + ), ], ) def test_errorbar_plot_ts(self, yerr): diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 83b9a83c0a6a2..f42cc9b868d63 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -40,7 +40,7 @@ def get_objs(): tm.makeStringIndex(10, name="a"), ] - arr = np.random.randn(10) + arr = np.random.default_rng(2).randn(10) series = [Series(arr, index=idx, name="a") for idx in indexes] objs = indexes + series @@ -548,7 +548,7 @@ class TestSeriesReductions: # intended long-term to be series-specific def test_sum_inf(self): - s = Series(np.random.randn(10)) + s = Series(np.random.default_rng(2).randn(10)) s2 = s.copy() s[5:8] = np.inf @@ -556,7 +556,7 @@ def test_sum_inf(self): assert np.isinf(s.sum()) - arr = np.random.randn(100, 100).astype("f4") + arr = np.random.default_rng(2).randn(100, 100).astype("f4") arr[:, 2] = np.inf msg = "use_inf_as_na option is deprecated" @@ -1156,7 +1156,7 @@ def test_minmax_nat_dataframe(self, nat_df): def test_min_max(self): rng = date_range("1/1/2000", "12/31/2000") - rng2 = rng.take(np.random.permutation(len(rng))) + rng2 = rng.take(np.random.default_rng(2).permutation(len(rng))) the_min = rng2.min() the_max = rng2.max() @@ -1171,7 +1171,9 @@ def test_min_max(self): def test_min_max_series(self): rng = date_range("1/1/2000", periods=10, freq="4h") lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"] - df = DataFrame({"TS": rng, "V": np.random.randn(len(rng)), "L": lvls}) + df = DataFrame( + {"TS": rng, "V": np.random.default_rng(2).randn(len(rng)), "L": lvls} + ) result = df.TS.max() exp = Timestamp(df.TS.iat[-1]) diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index 38f682c9c4f5a..7ab711abcc8e7 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -51,7 +51,7 @@ def simple_date_range_series(): def _simple_date_range_series(start, end, freq="D"): rng = date_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) + return Series(np.random.default_rng(2).randn(len(rng)), index=rng) return _simple_date_range_series @@ -64,7 +64,7 @@ def simple_period_range_series(): def _simple_period_range_series(start, end, freq="D"): rng = period_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) + return Series(np.random.default_rng(2).randn(len(rng)), index=rng) return _simple_period_range_series diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 86a3017753844..86ae77953a646 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -46,7 +46,7 @@ def _index_freq(): @pytest.fixture def _static_values(index): - return np.random.rand(len(index)) + return np.random.default_rng(2).rand(len(index)) @pytest.fixture(params=["s", "ms", "us", "ns"]) @@ -93,7 +93,9 @@ def test_custom_grouper(index, unit): def test_custom_grouper_df(index, unit): b = Grouper(freq=Minute(5), closed="right", label="right") dti = index.as_unit(unit) - df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64") + df = DataFrame( + np.random.default_rng(2).rand(len(dti), 10), index=dti, dtype="float64" + ) r = df.groupby(b).agg("sum") assert len(r.columns) == 10 @@ -339,7 +341,7 @@ def test_resample_basic_from_daily(unit): start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" ).as_unit(unit) - s = Series(np.random.rand(len(dti)), dti) + s = Series(np.random.default_rng(2).rand(len(dti)), dti) # to weekly result = s.resample("w-sun").last() @@ -454,7 +456,7 @@ def test_resample_upsample(unit): start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" ).as_unit(unit) - s = Series(np.random.rand(len(dti)), dti) + s = Series(np.random.default_rng(2).rand(len(dti)), dti) # to minutely, by padding result = s.resample("Min").ffill() @@ -507,7 +509,7 @@ def test_resample_extra_index_point(unit): def test_upsample_with_limit(unit): rng = date_range("1/1/2000", periods=3, freq="5t").as_unit(unit) - ts = Series(np.random.randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), rng) result = ts.resample("t").ffill(limit=2) expected = ts.reindex(result.index, method="ffill", limit=2) @@ -521,7 +523,7 @@ def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule, unit): rng = date_range("1/1/2000", periods=3, freq=freq, tz=tz_aware_fixture).as_unit( unit ) - ts = Series(np.random.randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), rng) result = ts.resample(rule).nearest(limit=2) expected = ts.reindex(result.index, method="nearest", limit=2) @@ -635,7 +637,7 @@ def test_resample_dup_index(): # GH 4812 # dup columns with resample raising df = DataFrame( - np.random.randn(4, 12), + np.random.default_rng(2).randn(4, 12), index=[2000, 2000, 2000, 2000], columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], ) @@ -655,7 +657,7 @@ def test_resample_reresample(unit): dti = date_range( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D" ).as_unit(unit) - s = Series(np.random.rand(len(dti)), dti) + s = Series(np.random.default_rng(2).rand(len(dti)), dti) bs = s.resample("B", closed="right", label="right").mean() result = bs.resample("8H").mean() assert len(result) == 22 @@ -690,7 +692,7 @@ def _ohlc(group): return [group.iloc[0], group.max(), group.min(), group.iloc[-1]] rng = date_range("1/1/2000 00:00:00", "1/1/2000 5:59:50", freq="10s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) resampled = ts.resample("5min", closed="right", label="right").ohlc() @@ -706,7 +708,7 @@ def _ohlc(group): def test_downsample_non_unique(unit): rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) rng2 = rng.repeat(5).values - ts = Series(np.random.randn(len(rng2)), index=rng2) + ts = Series(np.random.default_rng(2).randn(len(rng2)), index=rng2) result = ts.resample("M").mean() @@ -720,7 +722,7 @@ def test_asfreq_non_unique(unit): # GH #1077 rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) rng2 = rng.repeat(2).values - ts = Series(np.random.randn(len(rng2)), index=rng2) + ts = Series(np.random.default_rng(2).randn(len(rng2)), index=rng2) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): @@ -729,7 +731,9 @@ def test_asfreq_non_unique(unit): def test_resample_axis1(unit): rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) - df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).randn(3, len(rng)), columns=rng, index=["a", "b", "c"] + ) warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): @@ -746,7 +750,7 @@ def test_resample_anchored_ticks(freq, unit): # middle of a desired interval rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) ts[:2] = np.nan # so results are the same result = ts[2:].resample(freq, closed="left", label="left").mean() expected = ts.resample(freq, closed="left", label="left").mean() @@ -758,7 +762,7 @@ def test_resample_single_group(end, unit): mysum = lambda x: x.sum() rng = date_range("2000-1-1", f"2000-{end}-10", freq="D").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) @@ -780,7 +784,7 @@ def test_resample_offset(unit): # GH 31809 rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) resampled = ts.resample("5min", offset="2min").mean() exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min").as_unit( @@ -803,7 +807,7 @@ def test_resample_offset(unit): def test_resample_origin(kwargs, unit): # GH 31809 rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) exp_rng = date_range( "1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min" @@ -818,7 +822,7 @@ def test_resample_origin(kwargs, unit): ) def test_resample_bad_origin(origin, unit): rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) msg = ( "'origin' should be equal to 'epoch', 'start', 'start_day', " "'end', 'end_day' or should be a Timestamp convertible type. Got " @@ -831,7 +835,7 @@ def test_resample_bad_origin(origin, unit): @pytest.mark.parametrize("offset", ["invalid_value", "12dayys", "2000-30-30", object()]) def test_resample_bad_offset(offset, unit): rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) msg = f"'offset' should be a Timedelta convertible type. Got '{offset}' instead." with pytest.raises(ValueError, match=msg): ts.resample("5min", offset=offset) @@ -841,7 +845,7 @@ def test_resample_origin_prime_freq(unit): # GH 31809 start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00" rng = date_range(start, end, freq="7min").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) exp_rng = date_range( "2000-10-01 23:14:00", "2000-10-02 00:22:00", freq="17min" @@ -882,7 +886,7 @@ def test_resample_origin_with_tz(unit): rng = date_range( "2000-01-01 00:00:00", "2000-01-01 02:00", freq="s", tz=tz ).as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) exp_rng = date_range( "1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min", tz=tz @@ -902,7 +906,7 @@ def test_resample_origin_with_tz(unit): # if the series is not tz aware, origin should not be tz aware rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) with pytest.raises(ValueError, match=msg): ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean() @@ -911,7 +915,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(unit): # GH 34474 start, end = "2000-10-01 23:30:00+0500", "2000-12-02 00:30:00+0500" rng = date_range(start, end, freq="7min").as_unit(unit) - random_values = np.random.randn(len(rng)) + random_values = np.random.default_rng(2).randn(len(rng)) ts_1 = Series(random_values, index=rng) result_1 = ts_1.resample("D", origin="epoch").mean() @@ -992,7 +996,7 @@ def _create_series(values, timestamps, freq="D"): def test_resample_daily_anchored(unit): rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) ts[:2] = np.nan # so results are the same result = ts[2:].resample("D", closed="left", label="left").mean() @@ -1004,7 +1008,7 @@ def test_resample_to_period_monthly_buglet(unit): # GH #1259 rng = date_range("1/1/2000", "12/31/2000").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) result = ts.resample("M", kind="period").mean() exp_index = period_range("Jan-2000", "Dec-2000", freq="M") @@ -1014,7 +1018,7 @@ def test_resample_to_period_monthly_buglet(unit): def test_period_with_agg(): # aggregate a period resampler with a lambda s2 = Series( - np.random.randint(0, 5, 50), + np.random.default_rng(2).randint(0, 5, 50), index=period_range("2012-01-01", freq="H", periods=50), dtype="float64", ) @@ -1081,7 +1085,7 @@ def test_resample_dtype_coercion(unit): def test_weekly_resample_buglet(unit): # #1327 rng = date_range("1/1/2000", freq="B", periods=20).as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) resampled = ts.resample("W").mean() expected = ts.resample("W-SUN").mean() @@ -1091,7 +1095,7 @@ def test_weekly_resample_buglet(unit): def test_monthly_resample_error(unit): # #1451 dates = date_range("4/16/2012 20:00", periods=5000, freq="h").as_unit(unit) - ts = Series(np.random.randn(len(dates)), index=dates) + ts = Series(np.random.default_rng(2).randn(len(dates)), index=dates) # it works! ts.resample("M") @@ -1179,7 +1183,7 @@ def test_resample_anchored_multiday(label, sec): index2 = date_range("2014-10-15 23:00:00", periods=2, freq="2200L") index = index1.union(index2) - s = Series(np.random.randn(5), index=index) + s = Series(np.random.default_rng(2).randn(5), index=index) # Ensure left closing works result = s.resample("2200L", label=label).mean() @@ -1190,7 +1194,7 @@ def test_corner_cases(unit): # miscellaneous test coverage rng = date_range("1/1/2000", periods=12, freq="t").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) result = ts.resample("5t", closed="right", label="left").mean() ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t").as_unit(unit) @@ -1216,7 +1220,7 @@ def test_corner_cases_date(simple_date_range_series, unit): def test_anchored_lowercase_buglet(unit): dates = date_range("4/16/2012 20:00", periods=50000, freq="s").as_unit(unit) - ts = Series(np.random.randn(len(dates)), index=dates) + ts = Series(np.random.default_rng(2).randn(len(dates)), index=dates) # it works! ts.resample("d").mean() @@ -1225,7 +1229,7 @@ def test_upsample_apply_functions(unit): # #1596 rng = date_range("2012-06-12", periods=4, freq="h").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) result = ts.resample("20min").aggregate(["mean", "sum"]) assert isinstance(result, DataFrame) @@ -1233,9 +1237,9 @@ def test_upsample_apply_functions(unit): def test_resample_not_monotonic(unit): rng = date_range("2012-06-12", periods=200, freq="h").as_unit(unit) - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) - ts = ts.take(np.random.permutation(len(ts))) + ts = ts.take(np.random.default_rng(2).permutation(len(ts))) result = ts.resample("D").sum() exp = ts.sort_index().resample("D").sum() @@ -1452,7 +1456,7 @@ def test_resample_group_info(n, k, unit): # GH10914 # use a fixed seed to always have the same uniques - prng = np.random.RandomState(1234) + prng = np.random.default_rng(2).RandomState(1234) dr = date_range(start="2015-08-27", periods=n // 10, freq="T").as_unit(unit) ts = Series(prng.randint(0, n // k, n).astype("int64"), index=prng.choice(dr, n)) @@ -1478,7 +1482,9 @@ def test_resample_group_info(n, k, unit): def test_resample_size(unit): n = 10000 dr = date_range("2015-09-19", periods=n, freq="T").as_unit(unit) - ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) + ts = Series( + np.random.default_rng(2).randn(n), index=np.random.default_rng(2).choice(dr, n) + ) left = ts.resample("7T").size() ix = date_range(start=left.index.min(), end=ts.index.max(), freq="7T").as_unit(unit) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 20b997bdca873..2d76f3ad97328 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -157,7 +157,7 @@ def test_basic_upsample(self, freq, simple_period_range_series): def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") - ts = Series(np.random.randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), rng) result = ts.resample("M", convention="end").ffill(limit=2) expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) @@ -408,7 +408,7 @@ def test_resample_to_quarterly_start_end(self, simple_period_range_series, how): def test_resample_fill_missing(self): rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A") - s = Series(np.random.randn(4), index=rng) + s = Series(np.random.default_rng(2).randn(4), index=rng) stamps = s.to_timestamp() filled = s.resample("A").ffill() @@ -417,7 +417,7 @@ def test_resample_fill_missing(self): def test_cant_fill_missing_dups(self): rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A") - s = Series(np.random.randn(5), index=rng) + s = Series(np.random.default_rng(2).randn(5), index=rng) msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): s.resample("A").ffill() @@ -426,7 +426,7 @@ def test_cant_fill_missing_dups(self): @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) def test_resample_5minute(self, freq, kind): rng = period_range("1/1/2000", "1/5/2000", freq="T") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) expected = ts.to_timestamp().resample(freq).mean() if kind != "timestamp": expected = expected.to_period(freq) @@ -458,7 +458,7 @@ def test_resample_irregular_sparse(self): def test_resample_weekly_all_na(self): rng = date_range("1/1/2000", periods=10, freq="W-WED") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) result = ts.resample("W-THU").asfreq() @@ -513,8 +513,8 @@ def test_resample_tz_localized(self): rng = date_range("1/1/2011", periods=20000, freq="H") rng = rng.tz_localize("EST") ts = DataFrame(index=rng) - ts["first"] = np.random.randn(len(rng)) - ts["second"] = np.cumsum(np.random.randn(len(rng))) + ts["first"] = np.random.default_rng(2).randn(len(rng)) + ts["second"] = np.cumsum(np.random.default_rng(2).randn(len(rng))) expected = DataFrame( { "first": ts.resample("A").sum()["first"], @@ -532,7 +532,7 @@ def test_resample_tz_localized(self): def test_closed_left_corner(self): # #1465 s = Series( - np.random.randn(21), + np.random.default_rng(2).randn(21), index=date_range(start="1/1/2012 9:30", freq="1min", periods=21), ) s.iloc[0] = np.nan @@ -631,7 +631,7 @@ def test_monthly_convention_span(self): ) def test_default_right_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.randn(len(idx), 2), idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), idx) resampled = df.resample(to_freq).mean() tm.assert_frame_equal( @@ -644,7 +644,7 @@ def test_default_right_closed_label(self, from_freq, to_freq): ) def test_default_left_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.randn(len(idx), 2), idx) + df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), idx) resampled = df.resample(to_freq).mean() tm.assert_frame_equal( @@ -654,7 +654,7 @@ def test_default_left_closed_label(self, from_freq, to_freq): def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") - s = Series(np.random.randn(len(index)), index=index) + s = Series(np.random.default_rng(2).randn(len(index)), index=index) result = s.resample("A").mean() tm.assert_almost_equal(result.iloc[0], s.mean()) @@ -663,7 +663,10 @@ def test_evenly_divisible_with_no_extra_bins(self): # 4076 # when the frequency is evenly divisible, sometimes extra bins - df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9)) + df = DataFrame( + np.random.default_rng(2).randn(9, 3), + index=date_range("2000-1-1", periods=9), + ) result = df.resample("5D").mean() expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T expected.index = pd.DatetimeIndex( diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 6aa59d8b3d164..a346c9be15415 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -24,7 +24,7 @@ def dti(): @pytest.fixture def _test_series(dti): - return Series(np.random.rand(len(dti)), dti) + return Series(np.random.default_rng(2).rand(len(dti)), dti) @pytest.fixture @@ -89,7 +89,7 @@ def test_groupby_resample_on_api(): { "key": ["A", "B"] * 5, "dates": date_range("2016-01-01", periods=10), - "values": np.random.randn(10), + "values": np.random.default_rng(2).randn(10), } ) @@ -278,7 +278,9 @@ def test_transform_frame(on): # GH#47079 index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" - df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df = DataFrame( + np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + ) expected = df.groupby(pd.Grouper(freq="20min")).transform("mean") if on == "date": # Move date to being a column; result will then have a RangeIndex @@ -341,7 +343,7 @@ def test_agg_consistency(): # make sure that we are consistent across # similar aggregations with and w/o selection list df = DataFrame( - np.random.randn(1000, 3), + np.random.default_rng(2).randn(1000, 3), index=date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], ) @@ -356,7 +358,7 @@ def test_agg_consistency(): def test_agg_consistency_int_str_column_mix(): # GH#39025 df = DataFrame( - np.random.randn(1000, 2), + np.random.default_rng(2).randn(1000, 2), index=date_range("1/1/2012", freq="S", periods=1000), columns=[1, "a"], ) @@ -375,10 +377,11 @@ def test_agg_consistency_int_str_column_mix(): def test_agg(): # test with all three Resampler apis and TimeGrouper - np.random.seed(1234) index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" - df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df = DataFrame( + np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + ) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays( @@ -488,10 +491,11 @@ def test_agg(): def test_agg_misc(): # test with all three Resampler apis and TimeGrouper - np.random.seed(1234) index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" - df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df = DataFrame( + np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + ) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays( @@ -585,10 +589,12 @@ def test_agg_misc(): ) def test_multi_agg_axis_1_raises(func): # GH#46904 - np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" - df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index).T + df = DataFrame( + np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + ).T warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): res = df.resample("M", axis=1) @@ -599,10 +605,11 @@ def test_multi_agg_axis_1_raises(func): def test_agg_nested_dicts(): - np.random.seed(1234) index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" - df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df = DataFrame( + np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + ) df_col = df.reset_index() df_mult = df_col.copy() df_mult.index = pd.MultiIndex.from_arrays( @@ -1005,10 +1012,11 @@ def test_args_kwargs_depr(method, raises): def test_df_axis_param_depr(): - np.random.seed(1234) index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" - df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index).T + df = DataFrame( + np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + ).T # Deprecation error when axis=1 is explicitly passed warning_msg = "DataFrame.resample with axis=1 is deprecated." diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index df14a5bc374c6..113dfe4c1ced1 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -142,7 +142,7 @@ def test_groupby_with_origin(): middle = "1/15/2000 00:00:00" rng = date_range(start, end, freq="1231min") # prime number - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) ts2 = ts[middle:end] # proves that grouper without a fixed origin does not work @@ -272,7 +272,9 @@ def f_1(x): def test_apply_with_mutated_index(): # GH 15169 index = date_range("1-1-2015", "12-31-15", freq="D") - df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index) + df = DataFrame( + data={"col1": np.random.default_rng(2).rand(len(index))}, index=index + ) def f(x): s = Series([1, 2], index=["a", "b"]) @@ -362,7 +364,7 @@ def test_median_duplicate_columns(): # GH 14233 df = DataFrame( - np.random.randn(20, 3), + np.random.default_rng(2).randn(20, 3), columns=list("aaa"), index=date_range("2012-01-01", periods=20, freq="s"), ) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 2cd47296d5cab..e27f9ed0c6d1d 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -17,7 +17,9 @@ @pytest.fixture def test_series(): - return Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000)) + return Series( + np.random.default_rng(2).randn(1000), index=date_range("1/1/2000", periods=1000) + ) def test_apply(test_series): @@ -95,7 +97,7 @@ def test_fails_on_no_datetime_index(func): n = 2 index = func(n) name = type(index).__name__ - df = DataFrame({"a": np.random.randn(n)}, index=index) + df = DataFrame({"a": np.random.default_rng(2).randn(n)}, index=index) msg = ( "Only valid with DatetimeIndex, TimedeltaIndex " @@ -109,7 +111,7 @@ def test_aaa_group_order(): # GH 12840 # check TimeGrouper perform stable sorts n = 20 - data = np.random.randn(n, 4) + data = np.random.default_rng(2).randn(n, 4) df = DataFrame(data, columns=["A", "B", "C", "D"]) df["key"] = [ datetime(2013, 1, 1), @@ -130,7 +132,7 @@ def test_aaa_group_order(): def test_aggregate_normal(resample_method): """Check TimeGrouper's aggregation is identical as normal groupby.""" - data = np.random.randn(20, 4) + data = np.random.default_rng(2).randn(20, 4) normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, 3, 4, 5] * 4 @@ -156,7 +158,7 @@ def test_aggregate_normal(resample_method): def test_aggregate_nth(): """Check TimeGrouper's aggregation is identical as normal groupby.""" - data = np.random.randn(20, 4) + data = np.random.default_rng(2).randn(20, 4) normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, 3, 4, 5] * 4 @@ -208,7 +210,7 @@ def test_aggregate_with_nat(func, fill_value): # and 'nth' doesn't work yet n = 20 - data = np.random.randn(n, 4).astype("int64") + data = np.random.default_rng(2).randn(n, 4).astype("int64") normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 @@ -239,7 +241,7 @@ def test_aggregate_with_nat(func, fill_value): def test_aggregate_with_nat_size(): # GH 9925 n = 20 - data = np.random.randn(n, 4).astype("int64") + data = np.random.default_rng(2).randn(n, 4).astype("int64") normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index d87e6ca2f69b9..14708694ca1c9 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -81,7 +81,7 @@ def test_resample_timedelta_idempotency(): def test_resample_offset_with_timedeltaindex(): # GH 10530 & 31809 rng = timedelta_range(start="0s", periods=25, freq="s") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) with_base = ts.resample("2s", offset="5s").mean() without_base = ts.resample("2s").mean() @@ -154,7 +154,7 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): def test_resample_with_timedelta_yields_no_empty_groups(duplicates): # GH 10603 df = DataFrame( - np.random.normal(size=(10000, 4)), + np.random.default_rng(2).normal(size=(10000, 4)), index=timedelta_range(start="0s", periods=10000, freq="3906250n"), ) if duplicates: diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index e6faeedd09525..d0740aa558cd2 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -123,9 +123,9 @@ def test_append_sorts(self, sort): def test_append_different_columns(self, sort): df = DataFrame( { - "bools": np.random.randn(10) > 0, - "ints": np.random.randint(0, 10, 10), - "floats": np.random.randn(10), + "bools": np.random.default_rng(2).randn(10) > 0, + "ints": np.random.default_rng(2).randint(0, 10, 10), + "floats": np.random.default_rng(2).randn(10), "strings": ["foo", "bar"] * 5, } ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 8208abc23551d..89cfb8a2081c4 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -40,8 +40,8 @@ def test_append_concat(self): d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") - s1 = Series(np.random.randn(10), d1) - s2 = Series(np.random.randn(10), d2) + s1 = Series(np.random.default_rng(2).randn(10), d1) + s2 = Series(np.random.default_rng(2).randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() @@ -52,8 +52,8 @@ def test_append_concat(self): assert result.index[0] == s1.index[0] def test_concat_copy(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.randn(4, 3)) - df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1)) + df = DataFrame(np.random.default_rng(2).randn(4, 3)) + df2 = DataFrame(np.random.default_rng(2).randint(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: "foo"}, index=range(4)) # These are actual copies. @@ -86,7 +86,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): assert arr.base is not None # Float block was consolidated. - df4 = DataFrame(np.random.randn(4, 1)) + df4 = DataFrame(np.random.default_rng(2).randn(4, 1)) result = concat([df, df2, df3, df4], axis=1, copy=False) for arr in result._mgr.arrays: if arr.dtype.kind == "f": @@ -107,8 +107,8 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): def test_concat_with_group_keys(self): # axis=0 - df = DataFrame(np.random.randn(3, 4)) - df2 = DataFrame(np.random.randn(4, 4)) + df = DataFrame(np.random.default_rng(2).randn(3, 4)) + df2 = DataFrame(np.random.default_rng(2).randn(4, 4)) result = concat([df, df2], keys=[0, 1]) exp_index = MultiIndex.from_arrays( @@ -123,8 +123,8 @@ def test_concat_with_group_keys(self): tm.assert_frame_equal(result, expected) # axis=1 - df = DataFrame(np.random.randn(4, 3)) - df2 = DataFrame(np.random.randn(4, 4)) + df = DataFrame(np.random.default_rng(2).randn(4, 3)) + df2 = DataFrame(np.random.default_rng(2).randn(4, 4)) result = concat([df, df2], keys=[0, 1], axis=1) expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) @@ -135,7 +135,7 @@ def test_concat_with_group_keys(self): tm.assert_frame_equal(result, expected) def test_concat_keys_specific_levels(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]] level = ["three", "two", "one", "zero"] result = concat( @@ -156,10 +156,10 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass): constructor = dict if mapping == "dict" else non_dict_mapping_subclass frames = constructor( { - "foo": DataFrame(np.random.randn(4, 3)), - "bar": DataFrame(np.random.randn(4, 3)), - "baz": DataFrame(np.random.randn(4, 3)), - "qux": DataFrame(np.random.randn(4, 3)), + "foo": DataFrame(np.random.default_rng(2).randn(4, 3)), + "bar": DataFrame(np.random.default_rng(2).randn(4, 3)), + "baz": DataFrame(np.random.default_rng(2).randn(4, 3)), + "qux": DataFrame(np.random.default_rng(2).randn(4, 3)), } ) @@ -179,8 +179,8 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass): tm.assert_frame_equal(result, expected) def test_concat_keys_and_levels(self): - df = DataFrame(np.random.randn(1, 3)) - df2 = DataFrame(np.random.randn(1, 4)) + df = DataFrame(np.random.default_rng(2).randn(1, 3)) + df2 = DataFrame(np.random.default_rng(2).randn(1, 4)) levels = [["foo", "baz"], ["one", "two"]] names = ["first", "second"] @@ -221,8 +221,8 @@ def test_concat_keys_and_levels(self): def test_concat_keys_levels_no_overlap(self): # GH #1406 - df = DataFrame(np.random.randn(1, 3), index=["a"]) - df2 = DataFrame(np.random.randn(1, 4), index=["b"]) + df = DataFrame(np.random.default_rng(2).randn(1, 3), index=["a"]) + df2 = DataFrame(np.random.default_rng(2).randn(1, 4), index=["b"]) msg = "Values not found in passed level" with pytest.raises(ValueError, match=msg): @@ -260,8 +260,8 @@ def test_crossed_dtypes_weird_corner(self): ) tm.assert_frame_equal(appended, expected) - df = DataFrame(np.random.randn(1, 3), index=["a"]) - df2 = DataFrame(np.random.randn(1, 4), index=["b"]) + df = DataFrame(np.random.default_rng(2).randn(1, 3), index=["a"]) + df2 = DataFrame(np.random.default_rng(2).randn(1, 4), index=["b"]) result = concat([df, df2], keys=["one", "two"], names=["first", "second"]) assert result.index.names == ("first", "second") @@ -360,7 +360,7 @@ def test_dtype_coercion(self): tm.assert_series_equal(result.dtypes, df.dtypes) def test_concat_single_with_key(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) result = concat([df], keys=["foo"]) expected = concat([df, df], keys=["foo", "bar"]) @@ -371,7 +371,7 @@ def test_concat_no_items_raises(self): concat([]) def test_concat_exclude_none(self): - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) pieces = [df[:5], None, None, df[5:]] result = concat(pieces) @@ -499,8 +499,12 @@ def test_concat_ordered_dict(self): def test_concat_duplicate_indices_raise(self): # GH 45888: test raise for concat DataFrames with duplicate indices # https://github.com/pandas-dev/pandas/issues/36263 - df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) - df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) + df1 = DataFrame( + np.random.default_rng(2).randn(5), index=[0, 1, 2, 3, 3], columns=["a"] + ) + df2 = DataFrame( + np.random.default_rng(2).randn(5), index=[0, 1, 2, 2, 4], columns=["b"] + ) msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): concat([df1, df2], axis=1) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 6ef54b907cf34..026cda27bc567 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -14,7 +14,7 @@ class TestEmptyConcat: def test_handle_empty_objects(self, sort): - df = DataFrame(np.random.randn(10, 4), columns=list("abcd")) + df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=list("abcd")) dfcopy = df[:5].copy() dfcopy["foo"] = "bar" diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index ce06e74de91b9..1c1bc43df713d 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -80,12 +80,12 @@ def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out): def test_concat_rename_index(self): a = DataFrame( - np.random.rand(3, 3), + np.random.default_rng(2).rand(3, 3), columns=list("ABC"), index=Index(list("abc"), name="index_a"), ) b = DataFrame( - np.random.rand(3, 3), + np.random.default_rng(2).rand(3, 3), columns=list("ABC"), index=Index(list("abc"), name="index_b"), ) @@ -160,7 +160,7 @@ def test_dups_index(self): # single dtypes df = DataFrame( - np.random.randint(0, 10, size=40).reshape(10, 4), + np.random.default_rng(2).randint(0, 10, size=40).reshape(10, 4), columns=["A", "A", "C", "C"], ) @@ -175,9 +175,12 @@ def test_dups_index(self): # multi dtypes df = concat( [ - DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), DataFrame( - np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + ), + DataFrame( + np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + columns=["A", "C"], ), ], axis=1, @@ -240,7 +243,7 @@ def test_concat_multiindex_rangeindex(self): # when multi-index levels are RangeIndex objects # there is a bug in concat with objects of len 1 - df = DataFrame(np.random.randn(9, 2)) + df = DataFrame(np.random.default_rng(2).randn(9, 2)) df.index = MultiIndex( levels=[pd.RangeIndex(3), pd.RangeIndex(3)], codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)], diff --git a/pandas/tests/reshape/concat/test_invalid.py b/pandas/tests/reshape/concat/test_invalid.py index a8c6ef97d1ccc..f2317c5ab4fac 100644 --- a/pandas/tests/reshape/concat/test_invalid.py +++ b/pandas/tests/reshape/concat/test_invalid.py @@ -34,7 +34,7 @@ def test_concat_invalid_first_argument(self): def test_concat_generator_obj(self): # generator ok though - concat(DataFrame(np.random.rand(5, 5)) for _ in range(3)) + concat(DataFrame(np.random.default_rng(2).rand(5, 5)) for _ in range(3)) def test_concat_textreader_obj(self): # text reader ok diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index 2711b6a34c62c..9becb807ae028 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -60,8 +60,8 @@ def test_concat_series_axis1(self): def test_concat_series_axis1_preserves_series_names(self): # preserve series names, #2489 - s = Series(np.random.randn(5), name="A") - s2 = Series(np.random.randn(5), name="B") + s = Series(np.random.default_rng(2).randn(5), name="A") + s2 = Series(np.random.default_rng(2).randn(5), name="B") result = concat([s, s2], axis=1) expected = DataFrame({"A": s, "B": s2}) @@ -73,8 +73,10 @@ def test_concat_series_axis1_preserves_series_names(self): def test_concat_series_axis1_with_reindex(self, sort): # must reindex, #2603 - s = Series(np.random.randn(3), index=["c", "a", "b"], name="A") - s2 = Series(np.random.randn(4), index=["d", "a", "b", "c"], name="B") + s = Series(np.random.default_rng(2).randn(3), index=["c", "a", "b"], name="A") + s2 = Series( + np.random.default_rng(2).randn(4), index=["d", "a", "b", "c"], name="B" + ) result = concat([s, s2], axis=1, sort=sort) expected = DataFrame({"A": s, "B": s2}, index=["c", "a", "b", "d"]) if sort: @@ -100,8 +102,12 @@ def test_concat_series_axis1_names_applied(self): def test_concat_series_axis1_same_names_ignore_index(self): dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1] - s1 = Series(np.random.randn(len(dates)), index=dates, name="value") - s2 = Series(np.random.randn(len(dates)), index=dates, name="value") + s1 = Series( + np.random.default_rng(2).randn(len(dates)), index=dates, name="value" + ) + s2 = Series( + np.random.default_rng(2).randn(len(dates)), index=dates, name="value" + ) result = concat([s1, s2], axis=1, ignore_index=True) expected = Index(range(2)) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 179748f0506b5..e73b4fe0d98fc 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -24,7 +24,7 @@ def get_test_data(ngroups=8, n=50): if len(arr) < n: arr = np.asarray(list(arr) + unique_groups[: n - len(arr)]) - np.random.shuffle(arr) + np.random.default_rng(2).shuffle(arr) return arr @@ -36,8 +36,8 @@ def df(self): { "key1": get_test_data(), "key2": get_test_data(), - "data1": np.random.randn(50), - "data2": np.random.randn(50), + "data1": np.random.default_rng(2).randn(50), + "data2": np.random.default_rng(2).randn(50), } ) @@ -51,7 +51,7 @@ def df2(self): { "key1": get_test_data(n=10), "key2": get_test_data(ngroups=4, n=10), - "value": np.random.randn(10), + "value": np.random.default_rng(2).randn(10), } ) @@ -152,10 +152,16 @@ def test_join_on(self, target_source): def test_join_on_fails_with_different_right_index(self): df = DataFrame( - {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} + { + "a": np.random.default_rng(2).choice(["m", "f"], size=3), + "b": np.random.default_rng(2).randn(3), + } ) df2 = DataFrame( - {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, + { + "a": np.random.default_rng(2).choice(["m", "f"], size=10), + "b": np.random.default_rng(2).randn(10), + }, index=tm.makeCustomIndex(10, 2), ) msg = r'len\(left_on\) must equal the number of levels in the index of "right"' @@ -164,11 +170,17 @@ def test_join_on_fails_with_different_right_index(self): def test_join_on_fails_with_different_left_index(self): df = DataFrame( - {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}, + { + "a": np.random.default_rng(2).choice(["m", "f"], size=3), + "b": np.random.default_rng(2).randn(3), + }, index=tm.makeCustomIndex(3, 2), ) df2 = DataFrame( - {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)} + { + "a": np.random.default_rng(2).choice(["m", "f"], size=10), + "b": np.random.default_rng(2).randn(10), + } ) msg = r'len\(right_on\) must equal the number of levels in the index of "left"' with pytest.raises(ValueError, match=msg): @@ -176,10 +188,16 @@ def test_join_on_fails_with_different_left_index(self): def test_join_on_fails_with_different_column_counts(self): df = DataFrame( - {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} + { + "a": np.random.default_rng(2).choice(["m", "f"], size=3), + "b": np.random.default_rng(2).randn(3), + } ) df2 = DataFrame( - {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, + { + "a": np.random.default_rng(2).choice(["m", "f"], size=10), + "b": np.random.default_rng(2).randn(10), + }, index=tm.makeCustomIndex(10, 2), ) msg = r"len\(right_on\) must equal len\(left_on\)" @@ -317,10 +335,10 @@ def test_join_empty_bug(self): def test_join_unconsolidated(self): # GH #331 - a = DataFrame(np.random.randn(30, 2), columns=["a", "b"]) - c = Series(np.random.randn(30)) + a = DataFrame(np.random.default_rng(2).randn(30, 2), columns=["a", "b"]) + c = Series(np.random.default_rng(2).randn(30)) a["c"] = c - d = DataFrame(np.random.randn(30, 1), columns=["q"]) + d = DataFrame(np.random.default_rng(2).randn(30, 1), columns=["q"]) # it works! a.join(d) @@ -337,8 +355,12 @@ def test_join_multiindex(self): names=["first", "second"], ) - df1 = DataFrame(data=np.random.randn(6), index=index1, columns=["var X"]) - df2 = DataFrame(data=np.random.randn(6), index=index2, columns=["var Y"]) + df1 = DataFrame( + data=np.random.default_rng(2).randn(6), index=index1, columns=["var X"] + ) + df2 = DataFrame( + data=np.random.default_rng(2).randn(6), index=index2, columns=["var Y"] + ) df1 = df1.sort_index(level=0) df2 = df2.sort_index(level=0) @@ -376,12 +398,14 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): "one", ] - data = np.random.randn(len(key1)) + data = np.random.default_rng(2).randn(len(key1)) data = DataFrame({"key1": key1, "key2": key2, "data": data}) index = lexsorted_two_level_string_multiindex to_join = DataFrame( - np.random.randn(10, 3), index=index, columns=["j_one", "j_two", "j_three"] + np.random.default_rng(2).randn(10, 3), + index=index, + columns=["j_one", "j_two", "j_three"], ) joined = data.join(to_join, on=["key1", "key2"], how="inner") @@ -435,19 +459,25 @@ def test_join_hierarchical_mixed_raises(self): merge(new_df, other_df, left_index=True, right_index=True) def test_join_float64_float32(self): - a = DataFrame(np.random.randn(10, 2), columns=["a", "b"], dtype=np.float64) - b = DataFrame(np.random.randn(10, 1), columns=["c"], dtype=np.float32) + a = DataFrame( + np.random.default_rng(2).randn(10, 2), columns=["a", "b"], dtype=np.float64 + ) + b = DataFrame( + np.random.default_rng(2).randn(10, 1), columns=["c"], dtype=np.float32 + ) joined = a.join(b) assert joined.dtypes["a"] == "float64" assert joined.dtypes["b"] == "float64" assert joined.dtypes["c"] == "float32" - a = np.random.randint(0, 5, 100).astype("int64") - b = np.random.random(100).astype("float64") - c = np.random.random(100).astype("float32") + a = np.random.default_rng(2).randint(0, 5, 100).astype("int64") + b = np.random.default_rng(2).random(100).astype("float64") + c = np.random.default_rng(2).random(100).astype("float32") df = DataFrame({"a": a, "b": b, "c": c}) xpdf = DataFrame({"a": a, "b": b, "c": c}) - s = DataFrame(np.random.random(5).astype("float32"), columns=["md"]) + s = DataFrame( + np.random.default_rng(2).random(5).astype("float32"), columns=["md"] + ) rs = df.merge(s, left_on="a", right_index=True) assert rs.dtypes["a"] == "int64" assert rs.dtypes["b"] == "float64" @@ -496,8 +526,8 @@ def test_join_many_non_unique_index(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), + "C": np.random.default_rng(2).randn(8), + "D": np.random.default_rng(2).randn(8), } ) s = Series( @@ -564,7 +594,10 @@ def test_join_non_unique_period_index(self): def test_mixed_type_join_with_suffix(self): # GH #916 - df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"]) + df = DataFrame( + np.random.default_rng(2).randn(20, 6), + columns=["a", "b", "c", "d", "e", "f"], + ) df.insert(0, "id", 0) df.insert(5, "dt", "foo") @@ -579,7 +612,7 @@ def test_mixed_type_join_with_suffix(self): mn.join(cn, rsuffix="_right") def test_join_many(self): - df = DataFrame(np.random.randn(10, 6), columns=list("abcdef")) + df = DataFrame(np.random.default_rng(2).randn(10, 6), columns=list("abcdef")) df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]] joined = df_list[0].join(df_list[1:]) @@ -607,7 +640,9 @@ def _check_diff_index(df_list, result, exp_index): df_list[0].join(df_list[1:], on="a") def test_join_many_mixed(self): - df = DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"]) + df = DataFrame( + np.random.default_rng(2).randn(8, 4), columns=["A", "B", "C", "D"] + ) df["key"] = ["foo", "bar"] * 4 df1 = df.loc[:, ["A", "B"]] df2 = df.loc[:, ["C", "D"]] @@ -620,9 +655,12 @@ def test_join_dups(self): # joining dups df = concat( [ - DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), DataFrame( - np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + ), + DataFrame( + np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + columns=["A", "C"], ), ], axis=1, @@ -634,10 +672,10 @@ def test_join_dups(self): tm.assert_frame_equal(result, expected) # GH 4975, invalid join on dups - w = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - x = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - y = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - z = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + w = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) + x = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) + y = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) + z = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) dta = x.merge(y, left_index=True, right_index=True).merge( z, left_index=True, right_index=True, how="outer" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a09e29b6eea98..990e4a655269c 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -41,7 +41,7 @@ def get_test_data(ngroups=8, n=50): if len(arr) < n: arr = np.asarray(list(arr) + unique_groups[: n - len(arr)]) - np.random.shuffle(arr) + np.random.default_rng(2).shuffle(arr) return arr @@ -113,8 +113,8 @@ def df(self): { "key1": get_test_data(), "key2": get_test_data(), - "data1": np.random.randn(50), - "data2": np.random.randn(50), + "data1": np.random.default_rng(2).randn(50), + "data2": np.random.default_rng(2).randn(50), } ) @@ -128,19 +128,24 @@ def df2(self): { "key1": get_test_data(n=10), "key2": get_test_data(ngroups=4, n=10), - "value": np.random.randn(10), + "value": np.random.default_rng(2).randn(10), } ) @pytest.fixture def left(self): return DataFrame( - {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + { + "key": ["a", "b", "c", "d", "e", "e", "a"], + "v1": np.random.default_rng(2).randn(7), + } ) @pytest.fixture def right(self): - return DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + return DataFrame( + {"v2": np.random.default_rng(2).randn(4)}, index=["d", "b", "c", "a"] + ) def test_merge_inner_join_empty(self): # GH 15328 @@ -178,9 +183,14 @@ def test_merge_index_as_on_arg(self, df, df2): def test_merge_index_singlekey_right_vs_left(self): left = DataFrame( - {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + { + "key": ["a", "b", "c", "d", "e", "e", "a"], + "v1": np.random.default_rng(2).randn(7), + } + ) + right = DataFrame( + {"v2": np.random.default_rng(2).randn(4)}, index=["d", "b", "c", "a"] ) - right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) merged1 = merge( left, right, left_on="key", right_index=True, how="left", sort=False @@ -200,9 +210,14 @@ def test_merge_index_singlekey_right_vs_left(self): def test_merge_index_singlekey_inner(self): left = DataFrame( - {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + { + "key": ["a", "b", "c", "d", "e", "e", "a"], + "v1": np.random.default_rng(2).randn(7), + } + ) + right = DataFrame( + {"v2": np.random.default_rng(2).randn(4)}, index=["d", "b", "c", "a"] ) - right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) # inner join result = merge(left, right, left_on="key", right_index=True, how="inner") @@ -598,8 +613,8 @@ def test_merge_nosort(self): # GH#2098 d = { - "var1": np.random.randint(0, 10, size=10), - "var2": np.random.randint(0, 10, size=10), + "var1": np.random.default_rng(2).randint(0, 10, size=10), + "var2": np.random.default_rng(2).randint(0, 10, size=10), "var3": [ datetime(2012, 1, 12), datetime(2011, 2, 4), @@ -616,7 +631,9 @@ def test_merge_nosort(self): df = DataFrame.from_dict(d) var3 = df.var3.unique() var3 = np.sort(var3) - new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)}) + new = DataFrame.from_dict( + {"var3": var3, "var8": np.random.default_rng(2).random(7)} + ) result = df.merge(new, on="var3", sort=False) exp = merge(df, new, on="var3", sort=False) @@ -1819,20 +1836,18 @@ def test_merge_empty(self, left_empty, how, exp): @pytest.fixture def left(): - np.random.seed(1234) return DataFrame( { - "X": Series(np.random.choice(["foo", "bar"], size=(10,))).astype( - CDT(["foo", "bar"]) - ), - "Y": np.random.choice(["one", "two", "three"], size=(10,)), + "X": Series( + np.random.default_rng(2).choice(["foo", "bar"], size=(10,)) + ).astype(CDT(["foo", "bar"])), + "Y": np.random.default_rng(2).choice(["one", "two", "three"], size=(10,)), } ) @pytest.fixture def right(): - np.random.seed(1234) return DataFrame( {"X": Series(["foo", "bar"]).astype(CDT(["foo", "bar"])), "Z": [1, 2]} ) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index b50035f2df6c9..f0801e3aad49c 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -22,7 +22,7 @@ def left(): key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"] - data = np.random.randn(len(key1)) + data = np.random.default_rng(2).randn(len(key1)) return DataFrame({"key1": key1, "key2": key2, "data": data}) @@ -123,11 +123,17 @@ def run_asserts(left, right, sort): tm.assert_frame_equal(out, res) lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) - left = DataFrame(np.random.choice(lc, (5000, 2)), columns=["1st", "3rd"]) + left = DataFrame( + np.random.default_rng(2).choice(lc, (5000, 2)), columns=["1st", "3rd"] + ) # Explicit cast to float to avoid implicit cast when setting nan - left.insert(1, "2nd", np.random.randint(0, 1000, len(left)).astype("float")) + left.insert( + 1, + "2nd", + np.random.default_rng(2).randint(0, 1000, len(left)).astype("float"), + ) - i = np.random.permutation(len(left)) + i = np.random.default_rng(2).permutation(len(left)) right = left.iloc[i].copy() left["4th"] = bind_cols(left) @@ -142,7 +148,7 @@ def run_asserts(left, right, sort): left.loc[3::43, "3rd"] = np.nan left["4th"] = bind_cols(left) - i = np.random.permutation(len(left)) + i = np.random.default_rng(2).permutation(len(left)) right = left.iloc[i, :-1] right["5th"] = -bind_cols(right) right.set_index(icols, inplace=True) @@ -191,10 +197,20 @@ def test_compress_group_combinations(self): key1 = np.tile(key1, 2) key2 = key1[::-1] - df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)}) + df = DataFrame( + { + "key1": key1, + "key2": key2, + "value1": np.random.default_rng(2).randn(20000), + } + ) df2 = DataFrame( - {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)} + { + "key1": key1[::2], + "key2": key2[::2], + "value2": np.random.default_rng(2).randn(10000), + } ) # just to hit the label compression code path @@ -377,10 +393,10 @@ def test_left_merge_na_buglet(self): left = DataFrame( { "id": list("abcde"), - "v1": np.random.randn(5), - "v2": np.random.randn(5), + "v1": np.random.default_rng(2).randn(5), + "v2": np.random.default_rng(2).randn(5), "dummy": list("abcde"), - "v3": np.random.randn(5), + "v3": np.random.default_rng(2).randn(5), }, columns=["id", "v1", "v2", "dummy", "v3"], ) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 382c102f1194f..8fe13943f2d98 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -57,9 +57,9 @@ def df(): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) @@ -86,9 +86,9 @@ def test_crosstab_multiple(self, df): @pytest.mark.parametrize("box", [np.array, list, tuple]) def test_crosstab_ndarray(self, box): # GH 44076 - a = box(np.random.randint(0, 5, size=100)) - b = box(np.random.randint(0, 3, size=100)) - c = box(np.random.randint(0, 10, size=100)) + a = box(np.random.default_rng(2).randint(0, 5, size=100)) + b = box(np.random.default_rng(2).randint(0, 3, size=100)) + c = box(np.random.default_rng(2).randint(0, 10, size=100)) df = DataFrame({"a": a, "b": b, "c": c}) @@ -126,9 +126,9 @@ def test_crosstab_non_aligned(self): tm.assert_frame_equal(result, expected) def test_crosstab_margins(self): - a = np.random.randint(0, 7, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 5, size=100) + a = np.random.default_rng(2).randint(0, 7, size=100) + b = np.random.default_rng(2).randint(0, 3, size=100) + c = np.random.default_rng(2).randint(0, 5, size=100) df = DataFrame({"a": a, "b": b, "c": c}) @@ -157,9 +157,9 @@ def test_crosstab_margins(self): def test_crosstab_margins_set_margin_name(self): # GH 15972 - a = np.random.randint(0, 7, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 5, size=100) + a = np.random.default_rng(2).randint(0, 7, size=100) + b = np.random.default_rng(2).randint(0, 3, size=100) + c = np.random.default_rng(2).randint(0, 5, size=100) df = DataFrame({"a": a, "b": b, "c": c}) @@ -206,10 +206,10 @@ def test_crosstab_margins_set_margin_name(self): ) def test_crosstab_pass_values(self): - a = np.random.randint(0, 7, size=100) - b = np.random.randint(0, 3, size=100) - c = np.random.randint(0, 5, size=100) - values = np.random.randn(100) + a = np.random.default_rng(2).randint(0, 7, size=100) + b = np.random.default_rng(2).randint(0, 3, size=100) + c = np.random.default_rng(2).randint(0, 5, size=100) + values = np.random.default_rng(2).randn(100) table = crosstab( [a, b], c, values, aggfunc="sum", rownames=["foo", "bar"], colnames=["baz"] @@ -546,8 +546,8 @@ def test_crosstab_with_numpy_size(self): "A": ["one", "one", "two", "three"] * 6, "B": ["A", "B", "C"] * 8, "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, - "D": np.random.randn(24), - "E": np.random.randn(24), + "D": np.random.default_rng(2).randn(24), + "E": np.random.default_rng(2).randn(24), } ) result = crosstab( @@ -867,7 +867,7 @@ def test_margin_with_ordered_categorical_column(self): @pytest.mark.parametrize("b_dtype", ["category", "int64"]) def test_categoricals(a_dtype, b_dtype): # https://github.com/pandas-dev/pandas/issues/37465 - g = np.random.RandomState(25982704) + g = np.random.default_rng(2).RandomState(25982704) a = Series(g.randint(0, 3, size=100)).astype(a_dtype) b = Series(g.randint(0, 2, size=100)).astype(b_dtype) result = crosstab(a, b, margins=True, dropna=False) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 28a696be53e7f..0c0f16bcec5d5 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -239,7 +239,7 @@ def test_labels(right, breaks, closed): def test_cut_pass_series_name_to_factor(): name = "foo" - ser = Series(np.random.randn(100), name=name) + ser = Series(np.random.default_rng(2).randn(100), name=name) factor = cut(ser, 4) assert factor.name == name @@ -283,7 +283,7 @@ def test_inf_handling(): def test_cut_out_of_bounds(): - arr = np.random.randn(100) + arr = np.random.default_rng(2).randn(100) result = cut(arr, [-1, 0, 1]) mask = isna(result) @@ -618,7 +618,7 @@ def test_cut_incorrect_labels(labels): @pytest.mark.parametrize("right", [True, False]) @pytest.mark.parametrize("include_lowest", [True, False]) def test_cut_nullable_integer(bins, right, include_lowest): - a = np.random.randint(0, 10, size=50).astype(float) + a = np.random.default_rng(2).randint(0, 10, size=50).astype(float) a[::2] = np.nan result = cut( pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest @@ -677,7 +677,7 @@ def test_cut_unordered_with_series_labels(): def test_cut_no_warnings(): - df = DataFrame({"value": np.random.randint(0, 100, 20)}) + df = DataFrame({"value": np.random.default_rng(2).randint(0, 100, 20)}) labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)] with tm.assert_produces_warning(False): df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index ea9da4e87240b..d4f490d9d26ed 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -322,7 +322,7 @@ def test_melt_missing_columns_raises(self): # attempted with column names absent from the dataframe # Generate data - df = DataFrame(np.random.randn(5, 4), columns=list("abcd")) + df = DataFrame(np.random.default_rng(2).randn(5, 4), columns=list("abcd")) # Try to melt with missing `value_vars` column name msg = "The following '{Var}' are not present in the DataFrame: {Col}" @@ -668,8 +668,7 @@ def test_pairs(self): class TestWideToLong: def test_simple(self): - np.random.seed(123) - x = np.random.randn(3) + x = np.random.default_rng(2).randn(3) df = DataFrame( { "A1970": {0: "a", 1: "b", 2: "c"}, @@ -704,8 +703,8 @@ def test_stubs(self): def test_separating_character(self): # GH14779 - np.random.seed(123) - x = np.random.randn(3) + + x = np.random.default_rng(2).randn(3) df = DataFrame( { "A.1970": {0: "a", 1: "b", 2: "c"}, @@ -729,8 +728,7 @@ def test_separating_character(self): tm.assert_frame_equal(result, expected) def test_escapable_characters(self): - np.random.seed(123) - x = np.random.randn(3) + x = np.random.default_rng(2).randn(3) df = DataFrame( { "A(quarterly)1970": {0: "a", 1: "b", 2: "c"}, diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1e122442cd40c..d1621d74b1505 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -83,9 +83,9 @@ def data(self): "shiny", "shiny", ], - "D": np.random.randn(11), - "E": np.random.randn(11), - "F": np.random.randn(11), + "D": np.random.default_rng(2).randn(11), + "E": np.random.default_rng(2).randn(11), + "F": np.random.default_rng(2).randn(11), } ) @@ -1068,7 +1068,13 @@ def test_pivot_table_multiindex_only(self, cols): def test_pivot_table_retains_tz(self): dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam") - df = DataFrame({"A": np.random.randn(3), "B": np.random.randn(3), "C": dti}) + df = DataFrame( + { + "A": np.random.default_rng(2).randn(3), + "B": np.random.default_rng(2).randn(3), + "C": dti, + } + ) result = df.pivot_table(index=["B", "C"], dropna=False) # check tz retention @@ -1103,7 +1109,7 @@ def test_pivot_no_level_overlap(self): "a": ["a", "a", "a", "a", "b", "b", "b", "b"] * 2, "b": [0, 0, 0, 0, 1, 1, 1, 1] * 2, "c": (["foo"] * 4 + ["bar"] * 4) * 2, - "value": np.random.randn(16), + "value": np.random.default_rng(2).randn(16), } ) @@ -1143,15 +1149,15 @@ def test_pivot_columns_lexsorted(self): dtype=[("Index", object), ("Symbol", object)], ) items = np.empty(n, dtype=dtype) - iproduct = np.random.randint(0, len(products), n) + iproduct = np.random.default_rng(2).randint(0, len(products), n) items["Index"] = products["Index"][iproduct] items["Symbol"] = products["Symbol"][iproduct] dr = date_range(date(2000, 1, 1), date(2010, 12, 31)) - dates = dr[np.random.randint(0, len(dr), n)] + dates = dr[np.random.default_rng(2).randint(0, len(dr), n)] items["Year"] = dates.year items["Month"] = dates.month items["Day"] = dates.day - items["Price"] = np.random.lognormal(4.0, 2.0, n) + items["Price"] = np.random.default_rng(2).lognormal(4.0, 2.0, n) df = DataFrame(items) @@ -1684,7 +1690,7 @@ def test_pivot_dtaccessor(self): @pytest.mark.parametrize("i", range(1, 367)) def test_daily(self, i): rng = date_range("1/1/2000", "12/31/2004", freq="D") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) annual = pivot_table( DataFrame(ts), index=ts.index.year, columns=ts.index.dayofyear @@ -1703,7 +1709,7 @@ def test_daily(self, i): @pytest.mark.parametrize("i", range(1, 13)) def test_monthly(self, i): rng = date_range("1/1/2000", "12/31/2004", freq="M") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month) annual.columns = annual.columns.droplevel(0) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index f7c7204d02a49..90c8e8c84cc46 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -29,7 +29,7 @@ def test_qcut(): - arr = np.random.randn(1000) + arr = np.random.default_rng(2).randn(1000) # We store the bins as Index that have been # rounded to comparisons are a bit tricky. @@ -47,14 +47,14 @@ def test_qcut(): def test_qcut_bounds(): - arr = np.random.randn(1000) + arr = np.random.default_rng(2).randn(1000) factor = qcut(arr, 10, labels=False) assert len(np.unique(factor)) == 10 def test_qcut_specify_quantiles(): - arr = np.random.randn(100) + arr = np.random.default_rng(2).randn(100) factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0]) expected = qcut(arr, 4) @@ -82,7 +82,7 @@ def test_qcut_include_lowest(): def test_qcut_nas(): - arr = np.random.randn(100) + arr = np.random.default_rng(2).randn(100) arr[:20] = np.nan result = qcut(arr, 4) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 283a3a9e7148d..67fdc35896ada 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -290,7 +290,6 @@ def test_utc_z_designator(self): assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is timezone.utc def test_asm8(self): - np.random.seed(7_960_929) ns = [Timestamp.min._value, Timestamp.max._value, 1000] for n in ns: diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 5cdeee20f3435..483409e5943bb 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -689,7 +689,12 @@ def test_dt_accessor_api(self): assert isinstance(ser.dt, DatetimeProperties) @pytest.mark.parametrize( - "ser", [Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5))] + "ser", + [ + Series(np.arange(5)), + Series(list("abcde")), + Series(np.random.default_rng(2).randn(5)), + ], ) def test_dt_accessor_invalid(self, ser): # GH#9322 check that series with incorrect dtypes don't have attr diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index f47e344336a8b..813850efb0e4e 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -77,7 +77,7 @@ def test_getitem_setitem_datetime_tz(tz_source): N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern")) - ts = Series(np.random.randn(N), index=rng) + ts = Series(np.random.default_rng(2).randn(N), index=rng) # also test Timestamp tz handling, GH #2789 result = ts.copy() @@ -108,7 +108,7 @@ def test_getitem_setitem_datetimeindex(): N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") - ts = Series(np.random.randn(N), index=rng) + ts = Series(np.random.default_rng(2).randn(N), index=rng) result = ts["1990-01-01 04:00:00"] expected = ts.iloc[4] @@ -214,7 +214,7 @@ def test_getitem_setitem_datetimeindex(): def test_getitem_setitem_periodindex(): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") - ts = Series(np.random.randn(N), index=rng) + ts = Series(np.random.default_rng(2).randn(N), index=rng) result = ts["1990-01-01 04"] expected = ts.iloc[4] @@ -330,11 +330,13 @@ def test_loc_getitem_over_size_cutoff(monkeypatch): d += 3 * sec # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + duplicate_positions = np.random.default_rng(2).randint(0, len(dates) - 1, 20) for p in duplicate_positions: dates[p + 1] = dates[p] - df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")) + df = DataFrame( + np.random.default_rng(2).randn(len(dates), 4), index=dates, columns=list("ABCD") + ) pos = n * 3 timestamp = df.index[pos] @@ -354,7 +356,7 @@ def test_indexing_over_size_cutoff_period_index(monkeypatch): idx = period_range("1/1/2000", freq="T", periods=n) assert idx._engine.over_size_threshold - s = Series(np.random.randn(len(idx)), index=idx) + s = Series(np.random.default_rng(2).randn(len(idx)), index=idx) pos = n - 1 timestamp = idx[pos] @@ -368,7 +370,7 @@ def test_indexing_over_size_cutoff_period_index(monkeypatch): def test_indexing_unordered(): # GH 2437 rng = date_range(start="2011-01-01", end="2011-01-15") - ts = Series(np.random.rand(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).rand(len(rng)), index=rng) ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) for t in ts.index: @@ -405,7 +407,7 @@ def test_indexing_unordered2(): # diff freq rng = date_range(datetime(2005, 1, 1), periods=20, freq="M") ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) + ts = ts.take(np.random.default_rng(2).permutation(20)) result = ts["2005"] for t in result.index: @@ -414,7 +416,7 @@ def test_indexing_unordered2(): def test_indexing(): idx = date_range("2001-1-1", periods=20, freq="M") - ts = Series(np.random.rand(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).rand(len(idx)), index=idx) # getting @@ -466,7 +468,7 @@ def test_getitem_str_year_with_datetimeindex(): def test_getitem_str_second_with_datetimeindex(): # GH14826, indexing with a seconds resolution string / datetime object df = DataFrame( - np.random.rand(5, 5), + np.random.default_rng(2).rand(5, 5), columns=["open", "high", "low", "close", "volume"], index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"), ) diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py index e64a91d9ca581..38ebe1f8377a1 100644 --- a/pandas/tests/series/indexing/test_get.py +++ b/pandas/tests/series/indexing/test_get.py @@ -166,7 +166,10 @@ def test_get_with_default(): @pytest.mark.parametrize( "arr", - [np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")], + [ + np.random.default_rng(2).randn(10), + tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), + ], ) def test_get_with_ea(arr): # GH#21260 diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 8bfa59c5d9f08..d9b505ad1a2fb 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -94,7 +94,10 @@ def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self): def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype): dtype = any_int_numpy_dtype - ser = Series(np.random.randn(6), index=Index([0, 0, 1, 1, 2, 2], dtype=dtype)) + ser = Series( + np.random.default_rng(2).randn(6), + index=Index([0, 0, 1, 1, 2, 2], dtype=dtype), + ) with pytest.raises(KeyError, match=r"^5$"): ser[5] @@ -103,7 +106,7 @@ def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype): ser["c"] # not monotonic - ser = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) + ser = Series(np.random.default_rng(2).randn(6), index=[2, 2, 0, 0, 1, 1]) with pytest.raises(KeyError, match=r"^5$"): ser[5] @@ -144,14 +147,14 @@ def test_getitem_pydatetime_tz(self, tzstr): @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_string_index_alias_tz_aware(self, tz): rng = date_range("1/1/2000", periods=10, tz=tz) - ser = Series(np.random.randn(len(rng)), index=rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), index=rng) result = ser["1/3/2000"] tm.assert_almost_equal(result, ser.iloc[2]) def test_getitem_time_object(self): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) mask = (rng.hour == 9) & (rng.minute == 30) result = ts[time(9, 30)] @@ -235,7 +238,7 @@ def test_getitem_slice_strings_with_datetimeindex(self): ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] ) - ts = Series(np.random.randn(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).randn(len(idx)), index=idx) result = ts["1/2/2000":] expected = ts[1:] @@ -284,7 +287,7 @@ def test_getitem_slice_2d(self, datetime_series): def test_getitem_median_slice_bug(self): index = date_range("20090415", "20090519", freq="2B") - ser = Series(np.random.randn(13), index=index) + ser = Series(np.random.default_rng(2).randn(13), index=index) indexer = [slice(6, 7, None)] msg = "Indexing with a single-item list" @@ -339,7 +342,9 @@ def test_getitem_slice_bug(self): tm.assert_series_equal(result, ser[:0]) def test_getitem_slice_integers(self): - ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) + ser = Series( + np.random.default_rng(2).randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16] + ) result = ser[:4] expected = Series(ser.values[:4], index=[2, 4, 6, 8]) @@ -634,7 +639,7 @@ def test_getitem_preserve_name(datetime_series): def test_getitem_with_integer_labels(): # integer indexes, be careful - ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) with pytest.raises(KeyError, match="not in index"): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index dfc8afbdf3acb..2c33a6e46d95a 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -25,7 +25,7 @@ def test_basic_indexing(): - s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"]) + s = Series(np.random.default_rng(2).randn(5), index=["a", "b", "a", "a", "b"]) warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated" msg = "index 5 is out of bounds for axis 0 with size 5" @@ -98,7 +98,7 @@ def test_basic_getitem_dt64tz_values(): def test_getitem_setitem_ellipsis(): - s = Series(np.random.randn(10)) + s = Series(np.random.default_rng(2).randn(10)) result = s[...] tm.assert_series_equal(result, s) @@ -307,7 +307,7 @@ def test_preserve_refs(datetime_series): def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl): index = lexsorted_two_level_string_multiindex - ser = Series(np.random.randn(len(index)), index=index, name="sth") + ser = Series(np.random.default_rng(2).randn(len(index)), index=index, name="sth") result = indexer_sl(ser)["foo"] assert result.name == ser.name diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 28235a8918e3f..4a4b850f8fb36 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -7,7 +7,7 @@ def test_mask(): # compare with tested results in test_where - s = Series(np.random.randn(5)) + s = Series(np.random.default_rng(2).randn(5)) cond = s > 0 rs = s.where(~cond, np.nan) @@ -56,7 +56,7 @@ def test_mask_casts2(): def test_mask_inplace(): - s = Series(np.random.randn(5)) + s = Series(np.random.default_rng(2).randn(5)) cond = s > 0 rs = s.copy() diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e87a968dee323..9704e2e628923 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -221,7 +221,9 @@ def test_setitem_slice(self): assert (ser == 0).all() def test_setitem_slice_integers(self): - ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) + ser = Series( + np.random.default_rng(2).randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16] + ) ser[:4] = 0 assert (ser[:4] == 0).all() @@ -255,7 +257,9 @@ def test_setitem_mask_cast(self): def test_setitem_mask_align_and_promote(self): # GH#8387: test that changing types does not break alignment - ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5) + ts = Series( + np.random.default_rng(2).randn(100), index=np.arange(100, 0, -1) + ).round(5) mask = ts > 0 left = ts.copy() right = ts[mask].copy().map(str) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 0c8cb493141b7..c6d967f87702f 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -114,7 +114,7 @@ def test_where_unsafe(): def test_where(): - s = Series(np.random.randn(5)) + s = Series(np.random.default_rng(2).randn(5)) cond = s > 0 rs = s.where(cond).dropna() @@ -143,7 +143,7 @@ def test_where(): def test_where_error(): - s = Series(np.random.randn(5)) + s = Series(np.random.default_rng(2).randn(5)) cond = s > 0 msg = "Array conditional must be same shape as self" @@ -319,7 +319,7 @@ def test_broadcast(size, mask, item, box): def test_where_inplace(): - s = Series(np.random.randn(5)) + s = Series(np.random.default_rng(2).randn(5)) cond = s > 0 rs = s.copy() diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index 3edbe1b2f61f3..eb75b81d8d5e5 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -192,7 +192,7 @@ def test_align_with_dataframe_method(method): def test_align_dt64tzindex_mismatched_tzs(): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") - ser = Series(np.random.randn(len(idx1)), index=idx1) + ser = Series(np.random.default_rng(2).randn(len(idx1)), index=idx1) ser_central = ser.tz_convert("US/Central") # different timezones convert to UTC @@ -203,7 +203,7 @@ def test_align_dt64tzindex_mismatched_tzs(): def test_align_periodindex(join_type): rng = period_range("1/1/2000", "1/1/2010", freq="A") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) # TODO: assert something? ts.align(ts[::2], join=join_type) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index e1d64795e235d..0106d0270b771 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -46,7 +46,7 @@ def test_argsort(self, datetime_series): tm.assert_series_equal(result, expected) def test_argsort_stable(self): - s = Series(np.random.randint(0, 100, size=10000)) + s = Series(np.random.default_rng(2).randint(0, 100, size=10000)) mindexer = s.argsort(kind="mergesort") qindexer = s.argsort() diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 5ee5671d1dea3..1d0b611decb00 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -21,7 +21,7 @@ class TestSeriesAsof: def test_asof_nanosecond_index_access(self): ts = Timestamp("20130101").as_unit("ns")._value dti = DatetimeIndex([ts + 50 + i for i in range(100)]) - ser = Series(np.random.randn(100), index=dti) + ser = Series(np.random.default_rng(2).randn(100), index=dti) first_value = ser.asof(ser.index[0]) @@ -39,7 +39,7 @@ def test_basic(self): # array or list or dates N = 50 rng = date_range("1/1/1990", periods=N, freq="53s") - ts = Series(np.random.randn(N), index=rng) + ts = Series(np.random.default_rng(2).randn(N), index=rng) ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") @@ -119,7 +119,7 @@ def test_periodindex(self): # array or list or dates N = 50 rng = period_range("1/1/1990", periods=N, freq="H") - ts = Series(np.random.randn(N), index=rng) + ts = Series(np.random.default_rng(2).randn(N), index=rng) ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="37min") @@ -177,7 +177,7 @@ def test_errors(self): # subset with Series N = 10 rng = date_range("1/1/1990", periods=N, freq="53s") - s = Series(np.random.randn(N), index=rng) + s = Series(np.random.default_rng(2).randn(N), index=rng) with pytest.raises(ValueError, match="not valid for Series"): s.asof(s.index[0], subset="foo") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index d72c8599dfe5e..9d943c582bacf 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -326,7 +326,7 @@ def test_astype_to_str_preserves_na(self, value, string_value): @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): - ser = Series(np.random.randn(5), name="foo") + ser = Series(np.random.default_rng(2).randn(5), name="foo") as_typed = ser.astype(dtype) assert as_typed.dtype == dtype @@ -489,7 +489,9 @@ def test_astype_string_to_extension_dtype_roundtrip( class TestAstypeCategorical: def test_astype_categorical_to_other(self): cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() + ser = Series( + np.random.default_rng(2).RandomState(0).randint(0, 10000, 100) + ).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) expected = ser @@ -532,7 +534,7 @@ def cmp(a, b): def test_astype_categorical_invalid_conversions(self): # invalid conversion (these are NOT a dtype) cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.randint(0, 10000, 100)).sort_values() + ser = Series(np.random.default_rng(2).randint(0, 10000, 100)).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) msg = ( diff --git a/pandas/tests/series/methods/test_autocorr.py b/pandas/tests/series/methods/test_autocorr.py index 05e3540a7e702..3ca35e8049c14 100644 --- a/pandas/tests/series/methods/test_autocorr.py +++ b/pandas/tests/series/methods/test_autocorr.py @@ -18,7 +18,7 @@ def test_autocorr(self, datetime_series): # Choose a random lag between 1 and length of Series - 2 # and compare the result with the Series corr() function - n = 1 + np.random.randint(max(1, len(datetime_series) - 2)) + n = 1 + np.random.default_rng(2).randint(max(1, len(datetime_series) - 2)) corr1 = datetime_series.corr(datetime_series.shift(n)) corr2 = datetime_series.autocorr(lag=n) diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index fb6f7e386d5d5..6ee8df47a433c 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -52,7 +52,7 @@ def test_combine_first(self): # mixed types index = tm.makeStringIndex(20) - floats = Series(np.random.randn(20), index=index) + floats = Series(np.random.default_rng(2).randn(20), index=index) strings = Series(tm.makeStringIndex(10), index=index[::2]) combined = strings.combine_first(floats) diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index 6ab255cfa3d25..2e7d51fad48d8 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -45,8 +45,8 @@ def test_cov(self, datetime_series): @pytest.mark.parametrize("dtype", ["float64", "Float64"]) def test_cov_ddof(self, test_ddof, dtype): # GH#34611 - np_array1 = np.random.rand(10) - np_array2 = np.random.rand(10) + np_array1 = np.random.default_rng(2).rand(10) + np_array2 = np.random.default_rng(2).rand(10) s1 = Series(np_array1, dtype=dtype) s2 = Series(np_array2, dtype=dtype) @@ -142,8 +142,8 @@ def test_corr_rank(self): def test_corr_invalid_method(self): # GH PR #22298 - s1 = Series(np.random.randn(10)) - s2 = Series(np.random.randn(10)) + s1 = Series(np.random.default_rng(2).randn(10)) + s2 = Series(np.random.default_rng(2).randn(10)) msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " with pytest.raises(ValueError, match=msg): s1.corr(s2, method="____") diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index aaffd52b78f95..15c5c73c5159b 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -801,7 +801,7 @@ def test_fillna_invalid_method(self, datetime_series): assert "ffil" in str(inst) def test_fillna_listlike_invalid(self): - ser = Series(np.random.randint(-100, 100, 50)) + ser = Series(np.random.default_rng(2).randint(-100, 100, 50)) msg = '"value" parameter must be a scalar or dict, but you passed a "list"' with pytest.raises(TypeError, match=msg): ser.fillna([1, 2]) @@ -910,7 +910,7 @@ def test_pad_nan(self): def test_series_fillna_limit(self): index = np.arange(10) - s = Series(np.random.randn(10), index=index) + s = Series(np.random.default_rng(2).randn(10), index=index) result = s[:2].reindex(index) result = result.fillna(method="pad", limit=5) @@ -928,7 +928,7 @@ def test_series_fillna_limit(self): def test_series_pad_backfill_limit(self): index = np.arange(10) - s = Series(np.random.randn(10), index=index) + s = Series(np.random.default_rng(2).randn(10), index=index) result = s[:2].reindex(index, method="pad", limit=5) @@ -943,7 +943,7 @@ def test_series_pad_backfill_limit(self): tm.assert_series_equal(result, expected) def test_fillna_int(self): - ser = Series(np.random.randint(-100, 100, 50)) + ser = Series(np.random.default_rng(2).randint(-100, 100, 50)) return_value = ser.fillna(method="ffill", inplace=True) assert return_value is None tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 4a8bf554742dd..0e3755281640f 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -134,7 +134,7 @@ def test_interpolate_cubicspline(self): @td.skip_if_no_scipy def test_interpolate_pchip(self): - ser = Series(np.sort(np.random.uniform(size=100))) + ser = Series(np.sort(np.random.default_rng(2).uniform(size=100))) # interpolate at new_index new_index = ser.index.union( @@ -217,8 +217,8 @@ def test_interpolate_corners(self, kwargs): tm.assert_series_equal(s.interpolate(**kwargs), s) def test_interpolate_index_values(self): - s = Series(np.nan, index=np.sort(np.random.rand(30))) - s[::3] = np.random.randn(10) + s = Series(np.nan, index=np.sort(np.random.default_rng(2).rand(30))) + s[::3] = np.random.default_rng(2).randn(10) vals = s.index.values.astype(float) @@ -743,7 +743,7 @@ def test_spline_smooth(self): def test_spline_interpolation(self): # Explicit cast to float to avoid implicit cast when setting np.nan s = Series(np.arange(10) ** 2, dtype="float") - s[np.random.randint(0, 9, 3)] = np.nan + s[np.random.default_rng(2).randint(0, 9, 3)] = np.nan result1 = s.interpolate(method="spline", order=1) expected1 = s.interpolate(method="spline", order=1) tm.assert_series_equal(result1, expected1) @@ -764,7 +764,7 @@ def test_interp_timedelta64(self): def test_series_interpolate_method_values(self): # GH#1646 rng = date_range("1/1/2000", "1/20/2000", freq="D") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) ts[::2] = np.nan diff --git a/pandas/tests/series/methods/test_is_monotonic.py b/pandas/tests/series/methods/test_is_monotonic.py index b60c3cc8fcb5a..c36399f09afcb 100644 --- a/pandas/tests/series/methods/test_is_monotonic.py +++ b/pandas/tests/series/methods/test_is_monotonic.py @@ -8,7 +8,7 @@ class TestIsMonotonic: def test_is_monotonic_numeric(self): - ser = Series(np.random.randint(0, 10, size=1000)) + ser = Series(np.random.default_rng(2).randint(0, 10, size=1000)) assert not ser.is_monotonic_increasing ser = Series(np.arange(1000)) assert ser.is_monotonic_increasing is True diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py index db77f77467b42..678b016fcf301 100644 --- a/pandas/tests/series/methods/test_is_unique.py +++ b/pandas/tests/series/methods/test_is_unique.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( "data, expected", [ - (np.random.randint(0, 10, size=1000), False), + (np.random.default_rng(2).randint(0, 10, size=1000), False), (np.arange(1000), True), ([], True), ([np.nan], True), diff --git a/pandas/tests/series/methods/test_matmul.py b/pandas/tests/series/methods/test_matmul.py index b944395bff29f..7041142caf80f 100644 --- a/pandas/tests/series/methods/test_matmul.py +++ b/pandas/tests/series/methods/test_matmul.py @@ -13,9 +13,11 @@ class TestMatmul: def test_matmul(self): # matmul test is for GH#10259 - a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) + a = Series(np.random.default_rng(2).randn(4), index=["p", "q", "r", "s"]) b = DataFrame( - np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + np.random.default_rng(2).randn(3, 4), + index=["1", "2", "3"], + columns=["p", "q", "r", "s"], ).T # Series @ DataFrame -> Series diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index ecc5d3060c0a2..1deb45ff3a962 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -219,9 +219,9 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype): dtype = any_numeric_ea_dtype if dtype.startswith("UInt"): # Can't cast from negative float to uint on some platforms - arr = np.random.randint(1, 10, 10) + arr = np.random.default_rng(2).randint(1, 10, 10) else: - arr = np.random.randn(10) + arr = np.random.default_rng(2).randn(10) arr = arr.astype(dtype.lower(), copy=False) ser = Series(arr.copy(), dtype=dtype) diff --git a/pandas/tests/series/methods/test_nunique.py b/pandas/tests/series/methods/test_nunique.py index 50d3b9331b2b2..76117c706b401 100644 --- a/pandas/tests/series/methods/test_nunique.py +++ b/pandas/tests/series/methods/test_nunique.py @@ -8,7 +8,7 @@ def test_nunique(): # basics.rst doc example - series = Series(np.random.randn(500)) + series = Series(np.random.default_rng(2).randn(500)) series[20:500] = np.nan series[10:20] = 5000 result = series.nunique() diff --git a/pandas/tests/series/methods/test_quantile.py b/pandas/tests/series/methods/test_quantile.py index 40dee1bb17155..07abe19808e4d 100644 --- a/pandas/tests/series/methods/test_quantile.py +++ b/pandas/tests/series/methods/test_quantile.py @@ -43,7 +43,7 @@ def test_quantile(self, datetime_series): with pytest.raises(ValueError, match=msg): datetime_series.quantile(invalid) - s = Series(np.random.randn(100)) + s = Series(np.random.default_rng(2).randn(100)) percentile_array = [-0.5, 0.25, 1.5] with pytest.raises(ValueError, match=msg): s.quantile(percentile_array) diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 2d71dfa24ad15..c2fd221ec711b 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -141,7 +141,7 @@ def test_rank(self, datetime_series): [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40], dtype="float64", ) - random_order = np.random.permutation(len(values)) + random_order = np.random.default_rng(2).permutation(len(values)) iseries = Series(values[random_order]) exp = Series(random_order + 1.0, dtype="float64") iranks = iseries.rank() @@ -322,9 +322,9 @@ def test_rank_desc_mix_nans_infs(self): def test_rank_methods_series(self, method, op, value): from scipy.stats import rankdata - xs = np.random.randn(9) + xs = np.random.default_rng(2).randn(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates - np.random.shuffle(xs) + np.random.default_rng(2).shuffle(xs) index = [chr(ord("a") + i) for i in range(len(xs))] vals = op(xs, value) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index cf042b310ff63..ee2077dfb33f1 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -87,7 +87,7 @@ def test_reindex_series_add_nat(): def test_reindex_with_datetimes(): rng = date_range("1/1/2000", periods=20) - ts = Series(np.random.randn(20), index=rng) + ts = Series(np.random.default_rng(2).randn(20), index=rng) result = ts.reindex(list(ts.index[5:10])) expected = ts[5:10] diff --git a/pandas/tests/series/methods/test_repeat.py b/pandas/tests/series/methods/test_repeat.py index e63317f685556..794ac2b1aab4b 100644 --- a/pandas/tests/series/methods/test_repeat.py +++ b/pandas/tests/series/methods/test_repeat.py @@ -10,7 +10,7 @@ class TestRepeat: def test_repeat(self): - ser = Series(np.random.randn(3), index=["a", "b", "c"]) + ser = Series(np.random.default_rng(2).randn(3), index=["a", "b", "c"]) reps = ser.repeat(5) exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5)) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 50b9714082054..fa30264f5eee7 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -51,7 +51,7 @@ def test_replace_noop_doesnt_downcast(self): def test_replace(self): N = 100 - ser = pd.Series(np.random.randn(N)) + ser = pd.Series(np.random.default_rng(2).randn(N)) ser[0:4] = np.nan ser[6:10] = 0 @@ -66,7 +66,11 @@ def test_replace(self): ser[ser == 0.0] = np.nan tm.assert_series_equal(rs, ser) - ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser = pd.Series( + np.fabs(np.random.default_rng(2).randn(N)), + tm.makeDateIndex(N), + dtype=object, + ) ser[:5] = np.nan ser[6:10] = "foo" ser[20:30] = "bar" @@ -280,7 +284,11 @@ def test_replace_Int_with_na(self, any_int_ea_dtype): def test_replace2(self): N = 100 - ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser = pd.Series( + np.fabs(np.random.default_rng(2).randn(N)), + tm.makeDateIndex(N), + dtype=object, + ) ser[:5] = np.nan ser[6:10] = "foo" ser[20:30] = "bar" diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index ec38c5b8b744b..36ae8a4219158 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -18,7 +18,7 @@ class TestResetIndex: def test_reset_index_dti_round_trip(self): dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) - d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) + d1 = DataFrame({"v": np.random.default_rng(2).rand(len(dti))}, index=dti) d2 = d1.reset_index() assert d2.dtypes.iloc[0] == np.dtype("M8[ns]") d3 = d2.set_index("index") @@ -56,7 +56,7 @@ def test_reset_index(self): levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], ) - s = Series(np.random.randn(6), index=index) + s = Series(np.random.default_rng(2).randn(6), index=index) rs = s.reset_index(level=1) assert len(rs.columns) == 2 diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index e0dde50bfdc7f..c5d4694f8bdbd 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -1,5 +1,3 @@ -import random - import numpy as np import pytest @@ -26,7 +24,7 @@ def test_sort_index(self, datetime_series): datetime_series.index = datetime_series.index._with_freq(None) rindex = list(datetime_series.index) - random.shuffle(rindex) + np.random.default_rng(2).shuffle(rindex) random_order = datetime_series.reindex(rindex) sorted_series = random_order.sort_index() @@ -61,7 +59,7 @@ def test_sort_index_inplace(self, datetime_series): # For GH#11402 rindex = list(datetime_series.index) - random.shuffle(rindex) + np.random.default_rng(2).shuffle(rindex) # descending random_order = datetime_series.reindex(rindex) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index ea239c753ecb5..6a2e4747be13e 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -77,7 +77,7 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # GH#5856/5853 # Series.sort_values operating on a view - df = DataFrame(np.random.randn(10, 4)) + df = DataFrame(np.random.default_rng(2).randn(10, 4)) s = df.iloc[:, 0] msg = ( diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index 6e1c76bd170c6..1ef45c0f521fb 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -46,7 +46,7 @@ def test_unstack(): levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], ) - s = Series(np.random.randn(6), index=index) + s = Series(np.random.default_rng(2).randn(6), index=index) exp_index = MultiIndex( levels=[["one", "two", "three"], [0, 1]], codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 7d70206585be4..b355930bab756 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -120,7 +120,7 @@ def test_class_axis(self): def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( - np.random.randn(1000, 3), + np.random.default_rng(2).randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) @@ -134,14 +134,14 @@ def f(x): def test_ndarray_compat_like_func(self): # using an ndarray like function - s = Series(np.random.randn(10)) + s = Series(np.random.default_rng(2).randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) def test_ndarray_compat_ravel(self): # ravel - s = Series(np.random.randn(10)) + s = Series(np.random.default_rng(2).randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) def test_empty_method(self): @@ -179,7 +179,7 @@ def test_inspect_getmembers(self): def test_unknown_attribute(self): # GH#9680 tdi = pd.timedelta_range(start=0, periods=10, freq="1s") - ser = Series(np.random.normal(size=10), index=tdi) + ser = Series(np.random.default_rng(2).normal(size=10), index=tdi) assert "foo" not in ser.__dict__ msg = "'Series' object has no attribute 'foo'" with pytest.raises(AttributeError, match=msg): diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index a0edfae606e3f..4981f7bab7a8e 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -38,7 +38,7 @@ def switch_numexpr_min_elements(request): def _permute(obj): - return obj.take(np.random.permutation(len(obj))) + return obj.take(np.random.default_rng(2).permutation(len(obj))) class TestSeriesFlexArithmetic: @@ -155,7 +155,7 @@ class TestSeriesArithmetic: def test_add_series_with_period_index(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) result = ts + ts[::2] expected = ts + ts @@ -369,15 +369,15 @@ def test_add_list_to_masked_array_boolean(self, request): class TestSeriesFlexComparison: @pytest.mark.parametrize("axis", [0, None, "index"]) def test_comparison_flex_basic(self, axis, comparison_op): - left = Series(np.random.randn(10)) - right = Series(np.random.randn(10)) + left = Series(np.random.default_rng(2).randn(10)) + right = Series(np.random.default_rng(2).randn(10)) result = getattr(left, comparison_op.__name__)(right, axis=axis) expected = comparison_op(left, right) tm.assert_series_equal(result, expected) def test_comparison_bad_axis(self, comparison_op): - left = Series(np.random.randn(10)) - right = Series(np.random.randn(10)) + left = Series(np.random.default_rng(2).randn(10)) + right = Series(np.random.default_rng(2).randn(10)) msg = "No axis named 1 for object type" with pytest.raises(ValueError, match=msg): @@ -714,14 +714,16 @@ class TestTimeSeriesArithmetic: def test_series_add_tz_mismatch_converts_to_utc(self): rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") - perm = np.random.permutation(100)[:90] + perm = np.random.default_rng(2).permutation(100)[:90] ser1 = Series( - np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern") + np.random.default_rng(2).randn(90), + index=rng.take(perm).tz_convert("US/Eastern"), ) - perm = np.random.permutation(100)[:90] + perm = np.random.default_rng(2).permutation(100)[:90] ser2 = Series( - np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin") + np.random.default_rng(2).randn(90), + index=rng.take(perm).tz_convert("Europe/Berlin"), ) result = ser1 + ser2 @@ -735,7 +737,7 @@ def test_series_add_tz_mismatch_converts_to_utc(self): def test_series_add_aware_naive_raises(self): rng = date_range("1/1/2011", periods=10, freq="H") - ser = Series(np.random.randn(len(rng)), index=rng) + ser = Series(np.random.default_rng(2).randn(len(rng)), index=rng) ser_utc = ser.tz_localize("utc") @@ -757,7 +759,7 @@ def test_datetime_understood(self): def test_align_date_objects_with_datetimeindex(self): rng = date_range("1/1/2000", periods=20) - ts = Series(np.random.randn(20), index=rng) + ts = Series(np.random.default_rng(2).randn(20), index=rng) ts_slice = ts[5:] ts2 = ts_slice.copy() diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c7536273862c0..52334bd8b25bf 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -177,7 +177,7 @@ def test_constructor(self, datetime_series): ValueError, match=r"Data must be 1-dimensional, got ndarray of shape \(3, 3\) instead", ): - Series(np.random.randn(3, 3), index=np.arange(3)) + Series(np.random.default_rng(2).randn(3, 3), index=np.arange(3)) mixed.name = "Series" rs = Series(mixed).name @@ -810,7 +810,7 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype) def test_constructor_floating_data_int_dtype(self, frame_or_series): # GH#40110 - arr = np.random.randn(2) + arr = np.random.default_rng(2).randn(2) # Long-standing behavior (for Series, new in 2.0 for DataFrame) # has been to ignore the dtype on these; @@ -1843,7 +1843,9 @@ def test_constructor_subclass_dict(self, dict_subclass): def test_constructor_ordereddict(self): # GH3283 - data = OrderedDict((f"col{i}", np.random.random()) for i in range(12)) + data = OrderedDict( + (f"col{i}", np.random.default_rng(2).random()) for i in range(12) + ) series = Series(data) expected = Series(list(data.values()), list(data.keys())) @@ -2074,7 +2076,7 @@ def test_series_from_index_dtype_equal_does_not_copy(self): class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): idx = tm.makeDateIndex(10000) - ser = Series(np.random.randn(len(idx)), idx.astype(object)) + ser = Series(np.random.default_rng(2).randn(len(idx)), idx.astype(object)) # as of 2.0, we no longer silently cast the object-dtype index # to DatetimeIndex GH#39307, GH#23598 assert not isinstance(ser.index, DatetimeIndex) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 3f0078d3c1487..351b19066fbf4 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -75,7 +75,7 @@ def test_timedelta64_nan(self): def test_logical_range_select(self, datetime_series): # NumPy limitation =( # https://github.com/pandas-dev/pandas/commit/9030dc021f07c76809848925cb34828f6c8484f3 - np.random.seed(12345) + selector = -0.5 <= datetime_series <= 0.5 expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) tm.assert_series_equal(selector, expected) diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py index 6a575ab85943a..9c5c3c3d067de 100644 --- a/pandas/tests/series/test_npfuncs.py +++ b/pandas/tests/series/test_npfuncs.py @@ -13,7 +13,7 @@ class TestPtp: def test_ptp(self): # GH#21614 N = 1000 - arr = np.random.randn(N) + arr = np.random.default_rng(2).randn(N) ser = Series(arr) assert np.ptp(ser) == np.ptp(arr) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index c42b9f056878d..c8acdd1f60714 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -67,7 +67,7 @@ def test_empty_name_printing(self): @pytest.mark.parametrize("args", [(), (0, -1)]) def test_float_range(self, args): - str(Series(np.random.randn(1000), index=np.arange(1000, *args))) + str(Series(np.random.default_rng(2).randn(1000), index=np.arange(1000, *args))) def test_empty_object(self): # empty @@ -114,14 +114,16 @@ def test_various_names(self, name, string_series): def test_tuple_name(self): biggie = Series( - np.random.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz") + np.random.default_rng(2).randn(1000), + index=np.arange(1000), + name=("foo", "bar", "baz"), ) repr(biggie) @pytest.mark.parametrize("arg", [100, 1001]) def test_tidy_repr_name_0(self, arg): # tidy repr - ser = Series(np.random.randn(arg), name=0) + ser = Series(np.random.default_rng(2).randn(arg), name=0) rep_str = repr(ser) assert "Name: 0" in rep_str @@ -149,7 +151,7 @@ def test_tidy_repr(self): repr(a) # should not raise exception def test_repr_bool_fails(self, capsys): - s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) + s = Series([DataFrame(np.random.default_rng(2).randn(2, 2)) for i in range(5)]) # It works (with no Cython exception barf)! repr(s) @@ -197,13 +199,13 @@ def test_timeseries_repr_object_dtype(self): index = Index( [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object ) - ts = Series(np.random.randn(len(index)), index) + ts = Series(np.random.default_rng(2).randn(len(index)), index) repr(ts) ts = tm.makeTimeSeries(1000) assert repr(ts).splitlines()[-1].startswith("Freq:") - ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)] + ts2 = ts.iloc[np.random.default_rng(2).randint(0, len(ts) - 1, 400)] repr(ts2).splitlines()[-1] def test_latex_repr(self): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 38dea7dc5f8bf..6fd24e438afea 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -28,8 +28,8 @@ def arrays_for_binary_ufunc(): """ A pair of random, length-100 integer-dtype arrays, that are mostly 0. """ - a1 = np.random.randint(0, 10, 100, dtype="int64") - a2 = np.random.randint(0, 10, 100, dtype="int64") + a1 = np.random.default_rng(2).randint(0, 10, 100, dtype="int64") + a2 = np.random.default_rng(2).randint(0, 10, 100, dtype="int64") a1[::3] = 0 a2[::4] = 0 return a1, a2 @@ -38,7 +38,7 @@ def arrays_for_binary_ufunc(): @pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp]) def test_unary_ufunc(ufunc, sparse): # Test that ufunc(pd.Series) == pd.Series(ufunc) - arr = np.random.randint(0, 10, 10, dtype="int64") + arr = np.random.default_rng(2).randint(0, 10, 10, dtype="int64") arr[::2] = 0 if sparse: arr = SparseArray(arr, dtype=pd.SparseDtype("int64", 0)) @@ -120,7 +120,7 @@ def test_binary_ufunc_with_series( series = pd.Series(a1, name=name) other = pd.Series(a2, name=name) - idx = np.random.permutation(len(a1)) + idx = np.random.default_rng(2).permutation(len(a1)) if shuffle: other = other.take(idx) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8c26bbd209a6a..78bab1142c1d8 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -527,13 +527,13 @@ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques): class TestUnique: def test_ints(self): - arr = np.random.randint(0, 100, size=50) + arr = np.random.default_rng(2).randint(0, 100, size=50) result = algos.unique(arr) assert isinstance(result, np.ndarray) def test_objects(self): - arr = np.random.randint(0, 100, size=50).astype("O") + arr = np.random.default_rng(2).randint(0, 100, size=50).astype("O") result = algos.unique(arr) assert isinstance(result, np.ndarray) @@ -878,7 +878,7 @@ def test_unique_masked(self, any_numeric_ea_dtype): def test_nunique_ints(index_or_series_or_array): # GH#36327 - values = index_or_series_or_array(np.random.randint(0, 20, 30)) + values = index_or_series_or_array(np.random.default_rng(2).randint(0, 20, 30)) result = algos.nunique_ints(values) expected = len(algos.unique(values)) assert result == expected @@ -1175,9 +1175,7 @@ def test_isin_unsigned_dtype(self): class TestValueCounts: def test_value_counts(self): - np.random.seed(1234) - - arr = np.random.randn(4) + arr = np.random.default_rng(2).randn(4) factor = cut(arr, 4) # assert isinstance(factor, n) @@ -1862,8 +1860,8 @@ def test_is_lexsorted(): def test_groupsort_indexer(): - a = np.random.randint(0, 1000, 100).astype(np.intp) - b = np.random.randint(0, 1000, 100).astype(np.intp) + a = np.random.default_rng(2).randint(0, 1000, 100).astype(np.intp) + b = np.random.default_rng(2).randint(0, 1000, 100).astype(np.intp) result = libalgos.groupsort_indexer(a, 1000)[0] diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 52fded5a6ee55..47026d92d8c2e 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -52,36 +52,36 @@ def test_all_not_none(): def test_random_state(): - import numpy.random as npr - # Check with seed state = com.random_state(5) - assert state.uniform() == npr.RandomState(5).uniform() + assert state.uniform() == np.random.RandomState(5).uniform() # Check with random state object - state2 = npr.RandomState(10) - assert com.random_state(state2).uniform() == npr.RandomState(10).uniform() + state2 = np.random.RandomState(10) + assert com.random_state(state2).uniform() == np.random.RandomState(10).uniform() # check with no arg random state - assert com.random_state() is np.random + assert com.random_state() is np.random.default_rng(2) # check array-like # GH32503 - state_arr_like = npr.randint(0, 2**31, size=624, dtype="uint32") + state_arr_like = np.random.default_rng(None).randint( + 0, 2**31, size=624, dtype="uint32" + ) assert ( com.random_state(state_arr_like).uniform() - == npr.RandomState(state_arr_like).uniform() + == np.random.RandomState(state_arr_like).uniform() ) # Check BitGenerators # GH32503 assert ( - com.random_state(npr.MT19937(3)).uniform() - == npr.RandomState(npr.MT19937(3)).uniform() + com.random_state(np.random.MT19937(3)).uniform() + == np.random.RandomState(np.random.MT19937(3)).uniform() ) assert ( - com.random_state(npr.PCG64(11)).uniform() - == npr.RandomState(npr.PCG64(11)).uniform() + com.random_state(np.random.PCG64(11)).uniform() + == np.random.RandomState(np.random.PCG64(11)).uniform() ) # Error for floats or strings diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ec4f8893885f3..2576b653e238d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -17,12 +17,16 @@ @pytest.fixture def _frame(): - return DataFrame(np.random.randn(10001, 4), columns=list("ABCD"), dtype="float64") + return DataFrame( + np.random.default_rng(2).randn(10001, 4), columns=list("ABCD"), dtype="float64" + ) @pytest.fixture def _frame2(): - return DataFrame(np.random.randn(100, 4), columns=list("ABCD"), dtype="float64") + return DataFrame( + np.random.default_rng(2).randn(100, 4), columns=list("ABCD"), dtype="float64" + ) @pytest.fixture @@ -52,20 +56,24 @@ def _mixed2(_frame2): @pytest.fixture def _integer(): return DataFrame( - np.random.randint(1, 100, size=(10001, 4)), columns=list("ABCD"), dtype="int64" + np.random.default_rng(2).randint(1, 100, size=(10001, 4)), + columns=list("ABCD"), + dtype="int64", ) @pytest.fixture def _integer_randint(_integer): # randint to get a case with zeros - return _integer * np.random.randint(0, 2, size=np.shape(_integer)) + return _integer * np.random.default_rng(2).randint(0, 2, size=np.shape(_integer)) @pytest.fixture def _integer2(): return DataFrame( - np.random.randint(1, 100, size=(101, 4)), columns=list("ABCD"), dtype="int64" + np.random.default_rng(2).randint(1, 100, size=(101, 4)), + columns=list("ABCD"), + dtype="int64", ) @@ -184,8 +192,8 @@ def test_run_binary(self, request, fixture, flex, comparison_op): # self.run_binary(df.iloc[:, i], binary_comp, flex) def test_invalid(self): - array = np.random.randn(1_000_001) - array2 = np.random.randn(100) + array = np.random.default_rng(2).randn(1_000_001) + array2 = np.random.default_rng(2).randn(100) # no op result = expr._can_use_numexpr(operator.add, None, array, array, "evaluate") @@ -291,7 +299,12 @@ def testit(): "op_str,opname", [("/", "truediv"), ("//", "floordiv"), ("**", "pow")] ) def test_bool_ops_raise_on_arithmetic(self, op_str, opname): - df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5}) + df = DataFrame( + { + "a": np.random.default_rng(2).rand(10) > 0.5, + "b": np.random.default_rng(2).rand(10) > 0.5, + } + ) msg = f"operator '{opname}' not implemented for bool dtypes" f = getattr(operator, opname) @@ -320,7 +333,12 @@ def test_bool_ops_raise_on_arithmetic(self, op_str, opname): ) def test_bool_ops_warn_on_arithmetic(self, op_str, opname): n = 10 - df = DataFrame({"a": np.random.rand(n) > 0.5, "b": np.random.rand(n) > 0.5}) + df = DataFrame( + { + "a": np.random.default_rng(2).rand(n) > 0.5, + "b": np.random.default_rng(2).rand(n) > 0.5, + } + ) subs = {"+": "|", "*": "&", "-": "^"} sub_funcs = {"|": "or_", "&": "and_", "^": "xor"} diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 43f1f5527c8e2..afe4aa28b9c4b 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -83,7 +83,9 @@ def test_groupby_corner(self): codes=[[0], [0], [0]], names=["one", "two", "three"], ) - df = DataFrame([np.random.rand(4)], columns=["a", "b", "c", "d"], index=midx) + df = DataFrame( + [np.random.default_rng(2).rand(4)], columns=["a", "b", "c", "d"], index=midx + ) # should work df.groupby(level="three") @@ -159,7 +161,7 @@ def test_multilevel_consolidate(self): index = MultiIndex.from_tuples( [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")] ) - df = DataFrame(np.random.randn(4, 4), index=index, columns=index) + df = DataFrame(np.random.default_rng(2).randn(4, 4), index=index, columns=index) df["Totals", ""] = df.sum(1) df = df._consolidate() @@ -169,8 +171,8 @@ def test_level_with_tuples(self): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - series = Series(np.random.randn(6), index=index) - frame = DataFrame(np.random.randn(6, 4), index=index) + series = Series(np.random.default_rng(2).randn(6), index=index) + frame = DataFrame(np.random.default_rng(2).randn(6, 4), index=index) result = series[("foo", "bar", 0)] result2 = series.loc[("foo", "bar", 0)] @@ -194,8 +196,8 @@ def test_level_with_tuples(self): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - series = Series(np.random.randn(6), index=index) - frame = DataFrame(np.random.randn(6, 4), index=index) + series = Series(np.random.default_rng(2).randn(6), index=index) + frame = DataFrame(np.random.default_rng(2).randn(6, 4), index=index) result = series[("foo", "bar")] result2 = series.loc[("foo", "bar")] diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 6c903611e212b..2fe7f9f66d37c 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -34,8 +34,7 @@ def arr_shape(): @pytest.fixture def arr_float(arr_shape): - np.random.seed(11235) - return np.random.randn(*arr_shape) + return np.random.default_rng(2).randn(*arr_shape) @pytest.fixture @@ -45,14 +44,12 @@ def arr_complex(arr_float): @pytest.fixture def arr_int(arr_shape): - np.random.seed(11235) - return np.random.randint(-10, 10, arr_shape) + return np.random.default_rng(2).randint(-10, 10, arr_shape) @pytest.fixture def arr_bool(arr_shape): - np.random.seed(11235) - return np.random.randint(0, 2, arr_shape) == 0 + return np.random.default_rng(2).randint(0, 2, arr_shape) == 0 @pytest.fixture @@ -67,14 +64,12 @@ def arr_utf(arr_float): @pytest.fixture def arr_date(arr_shape): - np.random.seed(11235) - return np.random.randint(0, 20000, arr_shape).astype("M8[ns]") + return np.random.default_rng(2).randint(0, 20000, arr_shape).astype("M8[ns]") @pytest.fixture def arr_tdelta(arr_shape): - np.random.seed(11235) - return np.random.randint(0, 20000, arr_shape).astype("m8[ns]") + return np.random.default_rng(2).randint(0, 20000, arr_shape).astype("m8[ns]") @pytest.fixture @@ -191,20 +186,23 @@ def arr_nan_float1_1d(arr_nan_float1): class TestnanopsDataFrame: def setup_method(self): - np.random.seed(11235) nanops._USE_BOTTLENECK = False arr_shape = (11, 7) - self.arr_float = np.random.randn(*arr_shape) - self.arr_float1 = np.random.randn(*arr_shape) + self.arr_float = np.random.default_rng(2).randn(*arr_shape) + self.arr_float1 = np.random.default_rng(2).randn(*arr_shape) self.arr_complex = self.arr_float + self.arr_float1 * 1j - self.arr_int = np.random.randint(-10, 10, arr_shape) - self.arr_bool = np.random.randint(0, 2, arr_shape) == 0 + self.arr_int = np.random.default_rng(2).randint(-10, 10, arr_shape) + self.arr_bool = np.random.default_rng(2).randint(0, 2, arr_shape) == 0 self.arr_str = np.abs(self.arr_float).astype("S") self.arr_utf = np.abs(self.arr_float).astype("U") - self.arr_date = np.random.randint(0, 20000, arr_shape).astype("M8[ns]") - self.arr_tdelta = np.random.randint(0, 20000, arr_shape).astype("m8[ns]") + self.arr_date = ( + np.random.default_rng(2).randint(0, 20000, arr_shape).astype("M8[ns]") + ) + self.arr_tdelta = ( + np.random.default_rng(2).randint(0, 20000, arr_shape).astype("m8[ns]") + ) self.arr_nan = np.tile(np.nan, arr_shape) self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan]) @@ -1009,7 +1007,7 @@ def test_nanstd_roundoff(self, ddof): @property def prng(self): - return np.random.RandomState(1234) + return np.random.default_rng(2).RandomState(1234) class TestNanskewFixedValues: @@ -1060,7 +1058,7 @@ def test_nans_skipna(self, samples, actual_skew): @property def prng(self): - return np.random.RandomState(1234) + return np.random.default_rng(2).RandomState(1234) class TestNankurtFixedValues: @@ -1111,7 +1109,7 @@ def test_nans_skipna(self, samples, actual_kurt): @property def prng(self): - return np.random.RandomState(1234) + return np.random.default_rng(2).RandomState(1234) class TestDatetime64NaNOps: diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 45cdc3c332a9b..be1321bd7fb47 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -34,11 +34,13 @@ @pytest.fixture def left_right(): low, high, n = -1 << 10, 1 << 10, 1 << 20 - left = DataFrame(np.random.randint(low, high, (n, 7)), columns=list("ABCDEFG")) + left = DataFrame( + np.random.default_rng(2).randint(low, high, (n, 7)), columns=list("ABCDEFG") + ) left["left"] = left.sum(axis=1) # one-2-one match - i = np.random.permutation(len(left)) + i = np.random.default_rng(2).permutation(len(left)) right = left.iloc[i].copy() right.columns = right.columns[:-1].tolist() + ["right"] right.index = np.arange(len(right)) @@ -61,7 +63,7 @@ def test_int64_overflow(self): "F": B, "G": A, "H": B, - "values": np.random.randn(2500), + "values": np.random.default_rng(2).randn(2500), } ) @@ -96,7 +98,7 @@ def test_int64_overflow_groupby_large_range(self): @pytest.mark.parametrize("agg", ["mean", "median"]) def test_int64_overflow_groupby_large_df_shuffled(self, agg): - rs = np.random.RandomState(42) + rs = np.random.default_rng(2).RandomState(42) arr = rs.randint(-1 << 12, 1 << 12, (1 << 15, 5)) i = rs.choice(len(arr), len(arr) * 4) arr = np.vstack((arr, arr[i])) # add some duplicate rows @@ -198,8 +200,12 @@ def test_nargsort(self, ascending, na_position, exp): class TestMerge: def test_int64_overflow_outer_merge(self): # #2690, combinatorial explosion - df1 = DataFrame(np.random.randn(1000, 7), columns=list("ABCDEF") + ["G1"]) - df2 = DataFrame(np.random.randn(1000, 7), columns=list("ABCDEF") + ["G2"]) + df1 = DataFrame( + np.random.default_rng(2).randn(1000, 7), columns=list("ABCDEF") + ["G1"] + ) + df2 = DataFrame( + np.random.default_rng(2).randn(1000, 7), columns=list("ABCDEF") + ["G2"] + ) result = merge(df1, df2, how="outer") assert len(result) == 2000 @@ -242,7 +248,7 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): # one-2-many/none match low, high, n = -1 << 10, 1 << 10, 1 << 11 left = DataFrame( - np.random.randint(low, high, (n, 7)).astype("int64"), + np.random.default_rng(2).randint(low, high, (n, 7)).astype("int64"), columns=list("ABCDEFG"), ) @@ -254,23 +260,23 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): left = concat([left, left], ignore_index=True) right = DataFrame( - np.random.randint(low, high, (n // 2, 7)).astype("int64"), + np.random.default_rng(2).randint(low, high, (n // 2, 7)).astype("int64"), columns=list("ABCDEFG"), ) # add duplicates & overlap with left to the right frame - i = np.random.choice(len(left), n) + i = np.random.default_rng(2).choice(len(left), n) right = concat([right, right, left.iloc[i]], ignore_index=True) - left["left"] = np.random.randn(len(left)) - right["right"] = np.random.randn(len(right)) + left["left"] = np.random.default_rng(2).randn(len(left)) + right["right"] = np.random.default_rng(2).randn(len(right)) # shuffle left & right frames - i = np.random.permutation(len(left)) + i = np.random.default_rng(2).permutation(len(left)) left = left.iloc[i].copy() left.index = np.arange(len(left)) - i = np.random.permutation(len(right)) + i = np.random.default_rng(2).permutation(len(right)) right = right.iloc[i].copy() right.index = np.arange(len(right)) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 47615be32e5b0..61c1d54eadc6d 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -42,7 +42,7 @@ def dtype_fill_out_dtype(request): class TestTake: def test_1d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype - data = np.random.randint(0, 2, 4).astype(dtype) + data = np.random.default_rng(2).randint(0, 2, 4).astype(dtype) indexer = [2, 1, 0, -1] result = algos.take_nd(data, indexer, fill_value=fill_value) @@ -58,7 +58,7 @@ def test_1d_fill_nonna(self, dtype_fill_out_dtype): def test_2d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype - data = np.random.randint(0, 2, (5, 3)).astype(dtype) + data = np.random.default_rng(2).randint(0, 2, (5, 3)).astype(dtype) indexer = [2, 1, 0, -1] result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) @@ -83,7 +83,7 @@ def test_2d_fill_nonna(self, dtype_fill_out_dtype): def test_3d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype - data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + data = np.random.default_rng(2).randint(0, 2, (5, 4, 3)).astype(dtype) indexer = [2, 1, 0, -1] result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) @@ -115,7 +115,7 @@ def test_3d_fill_nonna(self, dtype_fill_out_dtype): assert result.dtype == dtype def test_1d_other_dtypes(self): - arr = np.random.randn(10).astype(np.float32) + arr = np.random.default_rng(2).randn(10).astype(np.float32) indexer = [1, 2, 3, -1] result = algos.take_nd(arr, indexer) @@ -124,7 +124,7 @@ def test_1d_other_dtypes(self): tm.assert_almost_equal(result, expected) def test_2d_other_dtypes(self): - arr = np.random.randn(10, 5).astype(np.float32) + arr = np.random.default_rng(2).randn(10, 5).astype(np.float32) indexer = [1, 2, 3, -1] @@ -165,7 +165,7 @@ def test_2d_bool(self): assert result.dtype == np.object_ def test_2d_float32(self): - arr = np.random.randn(4, 3).astype(np.float32) + arr = np.random.default_rng(2).randn(4, 3).astype(np.float32) indexer = [0, 2, -1, 1, -1] # axis=0 @@ -183,7 +183,10 @@ def test_2d_float32(self): def test_2d_datetime64(self): # 2005/01/01 - 2006/01/01 - arr = np.random.randint(11_045_376, 11_360_736, (5, 3)) * 100_000_000_000 + arr = ( + np.random.default_rng(2).randint(11_045_376, 11_360_736, (5, 3)) + * 100_000_000_000 + ) arr = arr.view(dtype="datetime64[ns]") indexer = [0, 2, -1, 1, -1] diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py index 6a0d0a8d92955..34d9ff820d584 100644 --- a/pandas/tests/tslibs/test_ccalendar.py +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -26,7 +26,7 @@ def test_get_day_of_year_numeric(date_tuple, expected): def test_get_day_of_year_dt(): - dt = datetime.fromordinal(1 + np.random.randint(365 * 4000)) + dt = datetime.fromordinal(1 + np.random.default_rng(2).randint(365 * 4000)) result = ccalendar.get_day_of_year(dt.year, dt.month, dt.day) expected = (dt - dt.replace(month=1, day=1)).days + 1 diff --git a/pandas/tests/util/test_make_objects.py b/pandas/tests/util/test_make_objects.py index 6f5f2d3924e1f..feeefea835423 100644 --- a/pandas/tests/util/test_make_objects.py +++ b/pandas/tests/util/test_make_objects.py @@ -10,6 +10,6 @@ def test_make_multiindex_respects_k(): # GH#38795 respect 'k' arg - N = np.random.randint(0, 100) + N = np.random.default_rng(2).randint(0, 100) mi = tm.makeMultiIndex(k=N) assert len(mi) == N diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 479829ccc7169..f45fe7443497f 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -124,7 +124,7 @@ def halflife_with_times(request): @pytest.fixture def series(): """Make mocked series as fixture.""" - arr = np.random.randn(100) + arr = np.random.default_rng(2).randn(100) locs = np.arange(20, 40) arr[locs] = np.NaN series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) @@ -135,7 +135,7 @@ def series(): def frame(): """Make mocked frame as fixture.""" return DataFrame( - np.random.randn(100, 10), + np.random.default_rng(2).randn(100, 10), index=bdate_range(datetime(2009, 1, 1), periods=100), ) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index 6b7093b4e4c3c..db84672ed3d2b 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -21,7 +21,7 @@ def test_getitem(step): - frame = DataFrame(np.random.randn(5, 5)) + frame = DataFrame(np.random.default_rng(2).randn(5, 5)) r = frame.rolling(window=5, step=step) tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns) diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index c37fd5258874f..2ba8850218c8e 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -55,7 +55,10 @@ def test_rolling_apply_out_of_bounds(engine_and_raw): def test_rolling_apply_with_pandas_objects(window): # 5071 df = DataFrame( - {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)}, + { + "A": np.random.default_rng(2).randn(5), + "B": np.random.default_rng(2).randint(0, 10, size=5), + }, index=date_range("20130101", periods=5, freq="s"), ) @@ -184,7 +187,7 @@ def numpysum(x, par): def test_nans(raw): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -199,7 +202,7 @@ def test_nans(raw): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.randn(20)) + obj2 = Series(np.random.default_rng(2).randn(20)) result = obj2.rolling(10, min_periods=5).apply(f, raw=raw) assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -210,7 +213,7 @@ def test_nans(raw): def test_center(raw): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 352828a155f6d..089771a27d14c 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -415,7 +415,7 @@ def test_ewma_nan_handling_cases(s, adjust, ignore_na, w): def test_ewm_alpha(): # GH 10789 - arr = np.random.randn(100) + arr = np.random.default_rng(2).randn(100) locs = np.arange(20, 40) arr[locs] = np.NaN @@ -431,7 +431,7 @@ def test_ewm_alpha(): def test_ewm_domain_checks(): # GH 12492 - arr = np.random.randn(100) + arr = np.random.default_rng(2).randn(100) locs = np.arange(20, 40) arr[locs] = np.NaN @@ -483,7 +483,7 @@ def test_ew_empty_series(method): @pytest.mark.parametrize("name", ["mean", "var", "std"]) def test_ew_min_periods(min_periods, name): # excluding NaNs correctly - arr = np.random.randn(50) + arr = np.random.default_rng(2).randn(50) arr[:10] = np.NaN arr[-10:] = np.NaN s = Series(arr) @@ -524,8 +524,8 @@ def test_ew_min_periods(min_periods, name): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_ewm_corr_cov(name): - A = Series(np.random.randn(50), index=range(50)) - B = A[2:] + np.random.randn(48) + A = Series(np.random.default_rng(2).randn(50), index=range(50)) + B = A[2:] + np.random.default_rng(2).randn(48) A[:10] = np.NaN B.iloc[-10:] = np.NaN @@ -539,8 +539,8 @@ def test_ewm_corr_cov(name): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_ewm_corr_cov_min_periods(name, min_periods): # GH 7898 - A = Series(np.random.randn(50), index=range(50)) - B = A[2:] + np.random.randn(48) + A = Series(np.random.default_rng(2).randn(50), index=range(50)) + B = A[2:] + np.random.default_rng(2).randn(48) A[:10] = np.NaN B.iloc[-10:] = np.NaN @@ -565,13 +565,13 @@ def test_ewm_corr_cov_min_periods(name, min_periods): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_different_input_array_raise_exception(name): - A = Series(np.random.randn(50), index=range(50)) + A = Series(np.random.default_rng(2).randn(50), index=range(50)) A[:10] = np.NaN msg = "other must be a DataFrame or Series" # exception raised is Exception with pytest.raises(ValueError, match=msg): - getattr(A.ewm(com=20, min_periods=5), name)(np.random.randn(50)) + getattr(A.ewm(com=20, min_periods=5), name)(np.random.default_rng(2).randn(50)) @pytest.mark.parametrize("name", ["var", "std", "mean"]) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index bbcc260aa779e..c5d90173b1fdc 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -231,7 +231,7 @@ def test_expanding_sem(frame_or_series): @pytest.mark.parametrize("method", ["skew", "kurt"]) def test_expanding_skew_kurt_numerical_stability(method): # GH: 6929 - s = Series(np.random.rand(10)) + s = Series(np.random.default_rng(2).rand(10)) expected = getattr(s.expanding(3), method)() s = s + 5000 result = getattr(s.expanding(3), method)() @@ -246,12 +246,14 @@ def test_expanding_skew_kurt_numerical_stability(method): def test_rank(window, method, pct, ascending, test_data): length = 20 if test_data == "default": - ser = Series(data=np.random.rand(length)) + ser = Series(data=np.random.default_rng(2).rand(length)) elif test_data == "duplicates": - ser = Series(data=np.random.choice(3, length)) + ser = Series(data=np.random.default_rng(2).choice(3, length)) elif test_data == "nans": ser = Series( - data=np.random.choice([1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length) + data=np.random.default_rng(2).choice( + [1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length + ) ) expected = ser.expanding(window).apply( @@ -264,7 +266,7 @@ def test_rank(window, method, pct, ascending, test_data): def test_expanding_corr(series): A = series.dropna() - B = (A + np.random.randn(len(A)))[:-5] + B = (A + np.random.default_rng(2).randn(len(A)))[:-5] result = A.expanding().corr(B) @@ -290,7 +292,7 @@ def test_expanding_quantile(series): def test_expanding_cov(series): A = series - B = (A + np.random.randn(len(A)))[:-5] + B = (A + np.random.default_rng(2).randn(len(A)))[:-5] result = A.expanding().cov(B) @@ -351,7 +353,7 @@ def test_expanding_func(func, static_comp, frame_or_series): ids=["sum", "mean", "max", "min"], ) def test_expanding_min_periods(func, static_comp): - ser = Series(np.random.randn(50)) + ser = Series(np.random.default_rng(2).randn(50)) msg = "The 'axis' keyword in Series.expanding is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): @@ -365,7 +367,7 @@ def test_expanding_min_periods(func, static_comp): assert isna(result.iloc[13]) assert notna(result.iloc[14]) - ser2 = Series(np.random.randn(20)) + ser2 = Series(np.random.default_rng(2).randn(20)) with tm.assert_produces_warning(FutureWarning, match=msg): result = getattr(ser2.expanding(min_periods=5, axis=0), func)() assert isna(result[3]) @@ -401,7 +403,7 @@ def test_expanding_apply(engine_and_raw, frame_or_series): def test_expanding_min_periods_apply(engine_and_raw): engine, raw = engine_and_raw - ser = Series(np.random.randn(50)) + ser = Series(np.random.default_rng(2).randn(50)) result = ser.expanding(min_periods=30).apply( lambda x: x.mean(), raw=raw, engine=engine @@ -416,7 +418,7 @@ def test_expanding_min_periods_apply(engine_and_raw): assert isna(result.iloc[13]) assert notna(result.iloc[14]) - ser2 = Series(np.random.randn(20)) + ser2 = Series(np.random.default_rng(2).randn(20)) result = ser2.expanding(min_periods=5).apply( lambda x: x.mean(), raw=raw, engine=engine ) @@ -623,7 +625,7 @@ def mean_w_arg(x, const): engine, raw = engine_and_raw - df = DataFrame(np.random.rand(20, 3)) + df = DataFrame(np.random.default_rng(2).rand(20, 3)) expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index f9bc572b41312..7417ea004e240 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -51,7 +51,7 @@ def pairwise_other_frame(): def test_rolling_cov(series): A = series - B = A + np.random.randn(len(A)) + B = A + np.random.default_rng(2).randn(len(A)) result = A.rolling(window=50, min_periods=25).cov(B) tm.assert_almost_equal(result.iloc[-1], np.cov(A[-50:], B[-50:])[0, 1]) @@ -59,7 +59,7 @@ def test_rolling_cov(series): def test_rolling_corr(series): A = series - B = A + np.random.randn(len(A)) + B = A + np.random.default_rng(2).randn(len(A)) result = A.rolling(window=50, min_periods=25).corr(B) tm.assert_almost_equal(result.iloc[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) @@ -96,7 +96,9 @@ def test_flex_binary_frame(method, frame): frame2 = frame.copy() frame2 = DataFrame( - np.random.randn(*frame2.shape), index=frame2.index, columns=frame2.columns + np.random.default_rng(2).randn(*frame2.shape), + index=frame2.index, + columns=frame2.columns, ) res3 = getattr(frame.rolling(window=10), method)(frame2) @@ -133,7 +135,7 @@ def test_corr_sanity(): res = df[0].rolling(5, center=True).corr(df[1]) assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) - df = DataFrame(np.random.rand(30, 2)) + df = DataFrame(np.random.default_rng(2).rand(30, 2)) res = df[0].rolling(5, center=True).corr(df[1]) assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 381272ff691fe..6fbbfed3e9742 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -693,7 +693,7 @@ def test_rolling_window_as_string(center, expected_data): date_today = datetime.now() days = date_range(date_today, date_today + timedelta(365), freq="D") - npr = np.random.RandomState(seed=421) + npr = np.random.default_rng(2).RandomState(seed=421) data = npr.randint(1, high=100, size=len(days)) df = DataFrame({"DateCol": days, "metric": data}) @@ -1038,7 +1038,7 @@ def test_rolling_numerical_accuracy_jump(): index = date_range(start="2020-01-01", end="2020-01-02", freq="60s").append( DatetimeIndex(["2020-01-03"]) ) - data = np.random.rand(len(index)) + data = np.random.default_rng(2).rand(len(index)) df = DataFrame({"data": data}, index=index) result = df.rolling("60s").mean() @@ -1465,7 +1465,7 @@ def test_groupby_rolling_nan_included(): @pytest.mark.parametrize("method", ["skew", "kurt"]) def test_rolling_skew_kurt_numerical_stability(method): # GH#6929 - ser = Series(np.random.rand(10)) + ser = Series(np.random.default_rng(2).rand(10)) ser_copy = ser.copy() expected = getattr(ser.rolling(3), method)() tm.assert_series_equal(ser, ser_copy) @@ -1654,12 +1654,14 @@ def test_rolling_numeric_dtypes(): def test_rank(window, method, pct, ascending, test_data): length = 20 if test_data == "default": - ser = Series(data=np.random.rand(length)) + ser = Series(data=np.random.default_rng(2).rand(length)) elif test_data == "duplicates": - ser = Series(data=np.random.choice(3, length)) + ser = Series(data=np.random.default_rng(2).choice(3, length)) elif test_data == "nans": ser = Series( - data=np.random.choice([1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length) + data=np.random.default_rng(2).choice( + [1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length + ) ) expected = ser.rolling(window).apply( @@ -1676,7 +1678,9 @@ def test_rolling_quantile_np_percentile(): row = 10 col = 5 idx = date_range("20100101", periods=row, freq="B") - df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) + df = DataFrame( + np.random.default_rng(2).rand(row * col).reshape((row, -1)), index=idx + ) df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) np_percentile = np.percentile(df, [25, 50, 75], axis=0) diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index 03c0839d00787..229c3ff303dfe 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -149,7 +149,7 @@ def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp): ], ) def test_nans(compare_func, roll_func, kwargs): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -164,7 +164,7 @@ def test_nans(compare_func, roll_func, kwargs): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.randn(20)) + obj2 = Series(np.random.default_rng(2).randn(20)) result = getattr(obj2.rolling(10, min_periods=5), roll_func)(**kwargs) assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -176,7 +176,7 @@ def test_nans(compare_func, roll_func, kwargs): def test_nans_count(): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN result = obj.rolling(50, min_periods=30).count() @@ -240,7 +240,7 @@ def test_min_periods_count(series, step): ], ) def test_center(roll_func, kwargs, minp): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -463,8 +463,8 @@ def test_rolling_median_resample(): def test_rolling_median_memory_error(): # GH11722 n = 20000 - Series(np.random.randn(n)).rolling(window=2, center=False).median() - Series(np.random.randn(n)).rolling(window=2, center=False).median() + Series(np.random.default_rng(2).randn(n)).rolling(window=2, center=False).median() + Series(np.random.default_rng(2).randn(n)).rolling(window=2, center=False).median() @pytest.mark.parametrize( diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py index e6d12d924f611..126ca345c72ff 100644 --- a/pandas/tests/window/test_rolling_quantile.py +++ b/pandas/tests/window/test_rolling_quantile.py @@ -88,7 +88,7 @@ def test_time_rule_frame(raw, frame, q): @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) def test_nans(q): compare_func = partial(scoreatpercentile, per=q) - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -103,7 +103,7 @@ def test_nans(q): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.randn(20)) + obj2 = Series(np.random.default_rng(2).randn(20)) result = obj2.rolling(10, min_periods=5).quantile(q) assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -127,7 +127,7 @@ def test_min_periods(series, minp, q, step): @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) def test_center(q): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 56acee542aea4..87b15253154b8 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -85,7 +85,7 @@ def test_nans(sp_func, roll_func): import scipy.stats compare_func = partial(getattr(scipy.stats, sp_func), bias=False) - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -100,7 +100,7 @@ def test_nans(sp_func, roll_func): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.randn(20)) + obj2 = Series(np.random.default_rng(2).randn(20)) result = getattr(obj2.rolling(10, min_periods=5), roll_func)() assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -128,7 +128,7 @@ def test_min_periods(series, minp, roll_func, step): @pytest.mark.parametrize("roll_func", ["kurt", "skew"]) def test_center(roll_func): - obj = Series(np.random.randn(50)) + obj = Series(np.random.default_rng(2).randn(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -186,7 +186,7 @@ def test_rolling_skew_edge_cases(step): expected = Series([np.NaN] * 5)[::step] # yields all NaN (window too small) - d = Series(np.random.randn(5)) + d = Series(np.random.default_rng(2).randn(5)) x = d.rolling(window=2, step=step).skew() tm.assert_series_equal(expected, x) @@ -207,7 +207,7 @@ def test_rolling_kurt_edge_cases(step): # yields all NaN (window too small) expected = Series([np.NaN] * 5)[::step] - d = Series(np.random.randn(5)) + d = Series(np.random.default_rng(2).randn(5)) x = d.rolling(window=3, step=step).kurt() tm.assert_series_equal(expected, x) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 265ef29e42c48..48807f9678a87 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -504,7 +504,8 @@ def test_perf_min(self): N = 10000 dfp = DataFrame( - {"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s") + {"B": np.random.default_rng(2).randn(N)}, + index=date_range("20130101", periods=N, freq="s"), ) expected = dfp.rolling(2, min_periods=1).min() result = dfp.rolling("2s").min() diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 9af1e8753ffc1..00c402d76f95c 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -112,7 +112,7 @@ def test_constructor_with_win_type_invalid(frame_or_series): @td.skip_if_no_scipy def test_window_with_args(step): # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling( + r = Series(np.random.default_rng(2).randn(100)).rolling( window=10, min_periods=1, win_type="gaussian", step=step ) expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) @@ -223,7 +223,7 @@ def test_cmov_window_corner(step): assert len(result) == 0 # shorter than window - vals = Series(np.random.randn(5)) + vals = Series(np.random.default_rng(2).randn(5)) result = vals.rolling(10, win_type="boxcar", step=step).mean() assert np.isnan(result).all() assert len(result) == len(range(0, 5, step or 1)) @@ -324,7 +324,7 @@ def test_cmov_window_frame(f, xp, step): @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4, 5]) @td.skip_if_no_scipy def test_cmov_window_na_min_periods(step, min_periods): - vals = Series(np.random.randn(10)) + vals = Series(np.random.default_rng(2).randn(10)) vals[4] = np.nan vals[8] = np.nan diff --git a/pyproject.toml b/pyproject.toml index 58671ee80d300..b5d39a999cab9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -240,7 +240,9 @@ select = [ # Ruff-specific rules "RUF", # flake8-bandit: exec-builtin - "S102" + "S102", + # numpy-legacy-random + "NPY002" ] ignore = [ From 7495d1e5ca61c97aa1d7f981a815b067305a3508 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:10:46 -0700 Subject: [PATCH 02/22] Fix other testing functions --- pandas/_testing/__init__.py | 31 ++++++++++++++++++------------- pandas/_testing/_random.py | 5 +++-- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index d1a729343e062..11eed7e303566 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -391,9 +391,11 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index: if is_unsigned_integer_dtype(dtype): values += 2 ** (dtype.itemsize * 8 - 1) elif dtype.kind == "f": - values = np.random.random_sample(k) - np.random.random_sample(1) + values = np.random.default_rng(2).random_sample(k) - np.random.default_rng( + 2 + ).random_sample(1) values.sort() - values = values * (10 ** np.random.randint(0, 9)) + values = values * (10 ** np.random.default_rng(2).randint(0, 9)) else: raise NotImplementedError(f"wrong dtype {dtype}") @@ -486,7 +488,7 @@ def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: # make series def make_rand_series(name=None, dtype=np.float64) -> Series: index = makeStringIndex(_N) - data = np.random.randn(_N) + data = np.random.default_rng(2).randn(_N) with np.errstate(invalid="ignore"): data = data.astype(dtype, copy=False) return Series(data, index=index, name=name) @@ -509,21 +511,27 @@ def makeObjectSeries(name=None) -> Series: def getSeriesData() -> dict[str, Series]: index = makeStringIndex(_N) - return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)} + return { + c: Series(np.random.default_rng(2).randn(_N), index=index) for c in getCols(_K) + } def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: if nper is None: nper = _N return Series( - np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name + np.random.default_rng(2).randn(nper), + index=makeDateIndex(nper, freq=freq), + name=name, ) def makePeriodSeries(nper=None, name=None) -> Series: if nper is None: nper = _N - return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name) + return Series( + np.random.default_rng(2).randn(nper), index=makePeriodIndex(nper), name=name + ) def getTimeSeriesData(nper=None, freq: Frequency = "B") -> dict[str, Series]: @@ -786,11 +794,8 @@ def makeCustomDataframe( return DataFrame(data, index, columns, dtype=dtype) -def _create_missing_idx(nrows, ncols, density: float, random_state=None): - if random_state is None: - random_state = np.random - else: - random_state = np.random.RandomState(random_state) +def _create_missing_idx(nrows, ncols, density: float): + random_state = np.random.default_rng(2) # below is cribbed from scipy.sparse size = round((1 - density) * nrows * ncols) @@ -813,9 +818,9 @@ def _gen_unique_rand(rng, _extra_size): return i.tolist(), j.tolist() -def makeMissingDataframe(density: float = 0.9, random_state=None) -> DataFrame: +def makeMissingDataframe(density: float = 0.9) -> DataFrame: df = makeDataFrame() - i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) + i, j = _create_missing_idx(*df.shape, density=density) df.iloc[i, j] = np.nan return df diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py index af8c7b4870f4c..4306a72700aff 100644 --- a/pandas/_testing/_random.py +++ b/pandas/_testing/_random.py @@ -17,7 +17,8 @@ def rands_array( Generate an array of byte strings. """ retval = ( - np.random.choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace) + np.random.default_rng(2) + .choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace) .view((np.str_, nchars)) .reshape(size) ) @@ -31,4 +32,4 @@ def rands(nchars) -> str: See `rands_array` if you want to create an array of random strings. """ - return "".join(np.random.choice(RANDS_CHARS, nchars)) + return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars)) From efb909b25747f55e066b1886c7193afa750742e1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:28:19 -0700 Subject: [PATCH 03/22] Fix random_sample --- pandas/_testing/__init__.py | 4 +--- pandas/tests/indexing/test_iloc.py | 4 +--- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/io/excel/test_writers.py | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 11eed7e303566..3831acf3c5bbf 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -391,9 +391,7 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index: if is_unsigned_integer_dtype(dtype): values += 2 ** (dtype.itemsize * 8 - 1) elif dtype.kind == "f": - values = np.random.default_rng(2).random_sample(k) - np.random.default_rng( - 2 - ).random_sample(1) + values = np.random.default_rng(2).random(k) - np.random.default_rng(2).random(1) values.sort() values = values * (10 ** np.random.default_rng(2).randint(0, 9)) else: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 4539ff57844c2..b2edc25e41c3d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -152,9 +152,7 @@ def test_is_scalar_access(self): def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds - df = DataFrame( - np.random.default_rng(2).random_sample((20, 5)), columns=list("ABCDE") - ) + df = DataFrame(np.random.default_rng(2).random((20, 5)), columns=list("ABCDE")) # lists of positions should raise IndexError! msg = "positional indexers are out-of-bounds" diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 5530780409a52..147e57ddba253 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -305,7 +305,7 @@ def test_loc_getitem_dups(self): # GH 5678 # repeated getitems on a dup index returning a ndarray df = DataFrame( - np.random.default_rng(2).random_sample((20, 5)), + np.random.default_rng(2).random((20, 5)), index=["ABCDE"[x % 5] for x in range(20)], ) expected = df.loc["A", 0] diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 36a16a4270e20..18fecc277cdf0 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -482,7 +482,7 @@ def test_int_types(self, np_type, path): @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) def test_float_types(self, np_type, path): # Test np.float values read come back as float. - df = DataFrame(np.random.default_rng(2).random_sample(10), dtype=np_type) + df = DataFrame(np.random.default_rng(2).random(10), dtype=np_type) df.to_excel(path, "test1") with ExcelFile(path) as reader: From 6ca87a95928e9bf826472e479634a97a2cd305ea Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Jul 2023 16:41:37 -0700 Subject: [PATCH 04/22] Fix more usage --- pandas/_testing/__init__.py | 45 +---- pandas/tests/apply/test_frame_apply.py | 18 +- pandas/tests/apply/test_invalid_arg.py | 8 +- pandas/tests/apply/test_series_apply.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 30 +-- pandas/tests/arithmetic/test_object.py | 2 +- .../tests/arrays/categorical/test_astype.py | 4 +- .../arrays/categorical/test_constructors.py | 2 +- .../tests/arrays/categorical/test_indexing.py | 4 +- .../tests/arrays/categorical/test_missing.py | 2 +- .../arrays/categorical/test_operators.py | 2 +- .../tests/arrays/sparse/test_arithmetics.py | 4 +- pandas/tests/computation/test_eval.py | 177 ++++++++++-------- .../copy_view/test_core_functionalities.py | 4 +- pandas/tests/dtypes/test_generic.py | 2 +- pandas/tests/extension/json/array.py | 4 +- pandas/tests/extension/list/array.py | 2 +- pandas/tests/extension/test_sparse.py | 2 +- pandas/tests/frame/conftest.py | 6 +- .../frame/constructors/test_from_records.py | 46 +++-- pandas/tests/frame/indexing/test_delitem.py | 4 +- pandas/tests/frame/indexing/test_getitem.py | 10 +- pandas/tests/frame/indexing/test_indexing.py | 50 +++-- pandas/tests/frame/indexing/test_insert.py | 6 +- pandas/tests/frame/indexing/test_mask.py | 8 +- pandas/tests/frame/indexing/test_set_value.py | 4 +- pandas/tests/frame/indexing/test_setitem.py | 27 +-- pandas/tests/frame/indexing/test_where.py | 19 +- pandas/tests/frame/indexing/test_xs.py | 17 +- pandas/tests/frame/methods/test_align.py | 4 +- pandas/tests/frame/methods/test_asfreq.py | 12 +- pandas/tests/frame/methods/test_asof.py | 2 +- pandas/tests/frame/methods/test_astype.py | 2 +- pandas/tests/frame/methods/test_at_time.py | 16 +- .../tests/frame/methods/test_between_time.py | 20 +- pandas/tests/frame/methods/test_clip.py | 16 +- pandas/tests/frame/methods/test_copy.py | 6 +- pandas/tests/frame/methods/test_cov_corr.py | 11 +- pandas/tests/frame/methods/test_describe.py | 2 +- pandas/tests/frame/methods/test_diff.py | 6 +- pandas/tests/frame/methods/test_dot.py | 10 +- pandas/tests/frame/methods/test_drop.py | 16 +- pandas/tests/frame/methods/test_dropna.py | 12 +- pandas/tests/frame/methods/test_duplicated.py | 2 +- pandas/tests/frame/methods/test_fillna.py | 16 +- .../frame/methods/test_first_valid_index.py | 2 +- pandas/tests/frame/methods/test_head_tail.py | 2 +- pandas/tests/frame/methods/test_map.py | 2 +- pandas/tests/frame/methods/test_matmul.py | 12 +- pandas/tests/frame/methods/test_pop.py | 2 +- pandas/tests/frame/methods/test_quantile.py | 4 +- pandas/tests/frame/methods/test_rank.py | 8 +- pandas/tests/frame/methods/test_reindex.py | 34 ++-- pandas/tests/frame/methods/test_rename.py | 2 +- .../tests/frame/methods/test_reset_index.py | 8 +- pandas/tests/frame/methods/test_sample.py | 10 +- pandas/tests/frame/methods/test_set_index.py | 12 +- pandas/tests/frame/methods/test_shift.py | 16 +- pandas/tests/frame/methods/test_sort_index.py | 21 ++- .../tests/frame/methods/test_sort_values.py | 12 +- pandas/tests/frame/methods/test_swapaxes.py | 6 +- pandas/tests/frame/methods/test_to_csv.py | 36 ++-- pandas/tests/frame/methods/test_to_dict.py | 2 +- pandas/tests/frame/methods/test_to_numpy.py | 2 +- pandas/tests/frame/methods/test_to_period.py | 10 +- pandas/tests/frame/methods/test_to_records.py | 8 +- .../tests/frame/methods/test_to_timestamp.py | 8 +- pandas/tests/frame/methods/test_truncate.py | 12 +- pandas/tests/frame/methods/test_values.py | 4 +- pandas/tests/frame/test_api.py | 4 +- pandas/tests/frame/test_arithmetic.py | 66 ++++--- pandas/tests/frame/test_constructors.py | 36 ++-- pandas/tests/frame/test_iteration.py | 2 +- pandas/tests/frame/test_nonunique_indexes.py | 16 +- pandas/tests/frame/test_npfuncs.py | 2 +- pandas/tests/frame/test_query_eval.py | 115 +++++++----- pandas/tests/frame/test_reductions.py | 12 +- pandas/tests/frame/test_repr_info.py | 12 +- pandas/tests/frame/test_stack_unstack.py | 38 ++-- pandas/tests/frame/test_subclass.py | 8 +- pandas/tests/generic/test_frame.py | 16 +- pandas/tests/generic/test_generic.py | 2 +- .../tests/groupby/aggregate/test_aggregate.py | 4 +- pandas/tests/groupby/aggregate/test_cython.py | 20 +- pandas/tests/groupby/aggregate/test_numba.py | 8 +- pandas/tests/groupby/aggregate/test_other.py | 20 +- pandas/tests/groupby/conftest.py | 14 +- pandas/tests/groupby/test_apply.py | 14 +- pandas/tests/groupby/test_apply_mutate.py | 2 +- pandas/tests/groupby/test_categorical.py | 22 +-- pandas/tests/groupby/test_counting.py | 14 +- pandas/tests/groupby/test_filters.py | 6 +- pandas/tests/groupby/test_function.py | 28 +-- pandas/tests/groupby/test_groupby.py | 70 ++++--- pandas/tests/groupby/test_grouping.py | 30 +-- pandas/tests/groupby/test_indexing.py | 4 +- pandas/tests/groupby/test_libgroupby.py | 2 +- pandas/tests/groupby/test_nth.py | 2 +- pandas/tests/groupby/test_nunique.py | 2 +- pandas/tests/groupby/test_pipe.py | 4 +- pandas/tests/groupby/test_quantile.py | 2 +- pandas/tests/groupby/test_rank.py | 6 +- pandas/tests/groupby/test_skew.py | 4 +- pandas/tests/groupby/test_timegrouper.py | 2 +- pandas/tests/groupby/test_value_counts.py | 2 +- pandas/tests/groupby/transform/test_numba.py | 8 +- .../tests/groupby/transform/test_transform.py | 27 +-- .../indexes/categorical/test_category.py | 2 +- .../indexes/categorical/test_indexing.py | 2 +- .../tests/indexes/datetimes/test_indexing.py | 2 +- pandas/tests/indexes/datetimes/test_join.py | 6 +- .../indexes/datetimes/test_partial_slicing.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 4 +- pandas/tests/indexes/multi/test_get_set.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 4 +- pandas/tests/indexes/period/test_indexing.py | 2 +- pandas/tests/indexes/period/test_join.py | 2 +- .../indexes/period/test_partial_slicing.py | 4 +- pandas/tests/indexes/test_subclass.py | 2 +- pandas/tests/indexes/timedeltas/test_join.py | 4 +- pandas/tests/indexing/conftest.py | 26 ++- .../indexing/multiindex/test_datetime.py | 2 +- .../tests/indexing/multiindex/test_getitem.py | 4 +- pandas/tests/indexing/multiindex/test_iloc.py | 6 +- .../indexing/multiindex/test_indexing_slow.py | 6 +- pandas/tests/indexing/multiindex/test_loc.py | 29 +-- .../indexing/multiindex/test_multiindex.py | 2 +- .../tests/indexing/multiindex/test_partial.py | 8 +- .../tests/indexing/multiindex/test_setitem.py | 16 +- .../tests/indexing/multiindex/test_sorted.py | 2 +- pandas/tests/indexing/test_at.py | 2 +- pandas/tests/indexing/test_categorical.py | 8 +- .../indexing/test_chaining_and_caching.py | 6 +- pandas/tests/indexing/test_floats.py | 6 +- pandas/tests/indexing/test_iat.py | 2 +- pandas/tests/indexing/test_iloc.py | 39 ++-- pandas/tests/indexing/test_indexing.py | 13 +- pandas/tests/indexing/test_loc.py | 34 ++-- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/indexing/test_scalar.py | 4 +- pandas/tests/interchange/test_impl.py | 12 +- pandas/tests/internals/test_internals.py | 20 +- pandas/tests/io/excel/test_style.py | 10 +- pandas/tests/io/excel/test_writers.py | 20 +- pandas/tests/io/formats/style/test_style.py | 4 +- pandas/tests/io/formats/test_format.py | 31 +-- pandas/tests/io/formats/test_info.py | 26 +-- pandas/tests/io/formats/test_printing.py | 4 +- pandas/tests/io/formats/test_series_info.py | 8 +- pandas/tests/io/formats/test_to_html.py | 11 +- pandas/tests/io/formats/test_to_string.py | 2 +- .../tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/json/test_pandas.py | 8 +- pandas/tests/io/json/test_ujson.py | 2 +- pandas/tests/io/parser/test_index_col.py | 9 +- pandas/tests/io/parser/test_network.py | 4 +- pandas/tests/io/pytables/test_append.py | 40 ++-- pandas/tests/io/pytables/test_complex.py | 4 +- pandas/tests/io/pytables/test_errors.py | 14 +- pandas/tests/io/pytables/test_put.py | 6 +- pandas/tests/io/pytables/test_read.py | 2 +- pandas/tests/io/pytables/test_round_trip.py | 23 ++- pandas/tests/io/pytables/test_select.py | 25 ++- pandas/tests/io/pytables/test_store.py | 16 +- pandas/tests/io/pytables/test_time_series.py | 8 +- pandas/tests/io/pytables/test_timezones.py | 10 +- pandas/tests/io/test_clipboard.py | 4 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_parquet.py | 25 ++- pandas/tests/io/test_pickle.py | 2 +- pandas/tests/io/test_sql.py | 4 +- pandas/tests/io/test_stata.py | 12 +- pandas/tests/libs/test_hashtable.py | 2 +- pandas/tests/plotting/conftest.py | 4 +- pandas/tests/plotting/frame/test_frame.py | 113 ++++++----- .../tests/plotting/frame/test_frame_color.py | 88 ++++----- .../tests/plotting/frame/test_frame_legend.py | 36 +++- .../plotting/frame/test_frame_subplots.py | 18 +- .../tests/plotting/frame/test_hist_box_by.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 42 +++-- pandas/tests/plotting/test_datetimelike.py | 159 ++++++++-------- pandas/tests/plotting/test_groupby.py | 20 +- pandas/tests/plotting/test_hist_method.py | 116 ++++++------ pandas/tests/plotting/test_misc.py | 24 +-- pandas/tests/plotting/test_series.py | 72 ++++--- pandas/tests/reductions/test_reductions.py | 12 +- pandas/tests/resample/conftest.py | 4 +- pandas/tests/resample/test_datetime_index.py | 61 +++--- pandas/tests/resample/test_period_index.py | 24 +-- pandas/tests/resample/test_resample_api.py | 6 +- .../tests/resample/test_resampler_grouper.py | 4 +- pandas/tests/resample/test_time_grouper.py | 15 +- pandas/tests/resample/test_timedelta.py | 2 +- pandas/tests/reshape/concat/test_append.py | 6 +- pandas/tests/reshape/concat/test_concat.py | 52 ++--- pandas/tests/reshape/concat/test_empty.py | 4 +- pandas/tests/reshape/concat/test_index.py | 9 +- pandas/tests/reshape/concat/test_series.py | 20 +- pandas/tests/reshape/merge/test_join.py | 81 +++++--- pandas/tests/reshape/merge/test_merge.py | 25 +-- pandas/tests/reshape/merge/test_multi.py | 14 +- pandas/tests/reshape/test_crosstab.py | 40 ++-- pandas/tests/reshape/test_cut.py | 8 +- pandas/tests/reshape/test_melt.py | 10 +- pandas/tests/reshape/test_pivot.py | 20 +- pandas/tests/reshape/test_qcut.py | 8 +- .../series/accessors/test_dt_accessor.py | 2 +- pandas/tests/series/indexing/test_datetime.py | 14 +- pandas/tests/series/indexing/test_get.py | 2 +- pandas/tests/series/indexing/test_getitem.py | 21 ++- pandas/tests/series/indexing/test_indexing.py | 10 +- pandas/tests/series/indexing/test_mask.py | 4 +- pandas/tests/series/indexing/test_setitem.py | 5 +- pandas/tests/series/indexing/test_where.py | 6 +- pandas/tests/series/methods/test_align.py | 4 +- pandas/tests/series/methods/test_argsort.py | 2 +- pandas/tests/series/methods/test_asof.py | 8 +- pandas/tests/series/methods/test_astype.py | 6 +- pandas/tests/series/methods/test_autocorr.py | 2 +- .../series/methods/test_combine_first.py | 2 +- pandas/tests/series/methods/test_cov_corr.py | 4 +- pandas/tests/series/methods/test_fillna.py | 8 +- .../tests/series/methods/test_interpolate.py | 6 +- .../tests/series/methods/test_is_monotonic.py | 2 +- pandas/tests/series/methods/test_is_unique.py | 2 +- pandas/tests/series/methods/test_matmul.py | 6 +- pandas/tests/series/methods/test_nlargest.py | 4 +- pandas/tests/series/methods/test_nunique.py | 2 +- pandas/tests/series/methods/test_quantile.py | 2 +- pandas/tests/series/methods/test_rank.py | 2 +- pandas/tests/series/methods/test_reindex.py | 2 +- pandas/tests/series/methods/test_repeat.py | 2 +- pandas/tests/series/methods/test_replace.py | 6 +- .../tests/series/methods/test_reset_index.py | 2 +- .../tests/series/methods/test_sort_values.py | 2 +- pandas/tests/series/methods/test_unstack.py | 2 +- pandas/tests/series/test_api.py | 6 +- pandas/tests/series/test_arithmetic.py | 18 +- pandas/tests/series/test_constructors.py | 8 +- pandas/tests/series/test_npfuncs.py | 2 +- pandas/tests/series/test_repr.py | 22 ++- pandas/tests/series/test_ufunc.py | 6 +- pandas/tests/test_algos.py | 12 +- pandas/tests/test_common.py | 2 +- pandas/tests/test_expressions.py | 26 +-- pandas/tests/test_multilevel.py | 12 +- pandas/tests/test_nanops.py | 22 +-- pandas/tests/test_sorting.py | 20 +- pandas/tests/test_take.py | 14 +- pandas/tests/tslibs/test_ccalendar.py | 2 +- pandas/tests/util/test_hashing.py | 4 +- pandas/tests/util/test_make_objects.py | 2 +- pandas/tests/window/conftest.py | 4 +- pandas/tests/window/test_api.py | 2 +- pandas/tests/window/test_apply.py | 10 +- pandas/tests/window/test_ewm.py | 20 +- pandas/tests/window/test_expanding.py | 12 +- pandas/tests/window/test_pairwise.py | 6 +- pandas/tests/window/test_rolling.py | 2 +- pandas/tests/window/test_rolling_functions.py | 16 +- pandas/tests/window/test_rolling_quantile.py | 6 +- pandas/tests/window/test_rolling_skew_kurt.py | 10 +- pandas/tests/window/test_timeseries_window.py | 2 +- pandas/tests/window/test_win_type.py | 6 +- 264 files changed, 2084 insertions(+), 1623 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 3831acf3c5bbf..be00d371abe92 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -393,7 +393,7 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index: elif dtype.kind == "f": values = np.random.default_rng(2).random(k) - np.random.default_rng(2).random(1) values.sort() - values = values * (10 ** np.random.default_rng(2).randint(0, 9)) + values = values * (10 ** np.random.default_rng(2).integers(0, 9)) else: raise NotImplementedError(f"wrong dtype {dtype}") @@ -486,7 +486,7 @@ def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: # make series def make_rand_series(name=None, dtype=np.float64) -> Series: index = makeStringIndex(_N) - data = np.random.default_rng(2).randn(_N) + data = np.random.default_rng(2).standard_normal(_N) with np.errstate(invalid="ignore"): data = data.astype(dtype, copy=False) return Series(data, index=index, name=name) @@ -510,7 +510,8 @@ def makeObjectSeries(name=None) -> Series: def getSeriesData() -> dict[str, Series]: index = makeStringIndex(_N) return { - c: Series(np.random.default_rng(2).randn(_N), index=index) for c in getCols(_K) + c: Series(np.random.default_rng(2).standard_normal(_N), index=index) + for c in getCols(_K) } @@ -518,7 +519,7 @@ def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series: if nper is None: nper = _N return Series( - np.random.default_rng(2).randn(nper), + np.random.default_rng(2).standard_normal(nper), index=makeDateIndex(nper, freq=freq), name=name, ) @@ -528,7 +529,9 @@ def makePeriodSeries(nper=None, name=None) -> Series: if nper is None: nper = _N return Series( - np.random.default_rng(2).randn(nper), index=makePeriodIndex(nper), name=name + np.random.default_rng(2).standard_normal(nper), + index=makePeriodIndex(nper), + name=name, ) @@ -792,37 +795,6 @@ def makeCustomDataframe( return DataFrame(data, index, columns, dtype=dtype) -def _create_missing_idx(nrows, ncols, density: float): - random_state = np.random.default_rng(2) - - # below is cribbed from scipy.sparse - size = round((1 - density) * nrows * ncols) - # generate a few more to ensure unique values - min_rows = 5 - fac = 1.02 - extra_size = min(size + min_rows, fac * size) - - def _gen_unique_rand(rng, _extra_size): - ind = rng.rand(int(_extra_size)) - return np.unique(np.floor(ind * nrows * ncols))[:size] - - ind = _gen_unique_rand(random_state, extra_size) - while ind.size < size: - extra_size *= 1.05 - ind = _gen_unique_rand(random_state, extra_size) - - j = np.floor(ind * 1.0 / nrows).astype(int) - i = (ind - j * nrows).astype(int) - return i.tolist(), j.tolist() - - -def makeMissingDataframe(density: float = 0.9) -> DataFrame: - df = makeDataFrame() - i, j = _create_missing_idx(*df.shape, density=density) - df.iloc[i, j] = np.nan - return df - - class SubclassedSeries(Series): _metadata = ["testattr", "name"] @@ -1133,7 +1105,6 @@ def shares_memory(left, right) -> bool: "makeFloatSeries", "makeIntervalIndex", "makeIntIndex", - "makeMissingDataframe", "makeMixedDataFrame", "makeMultiIndex", "makeNumericIndex", diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 10ccf12442522..158e9c083be13 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -376,7 +376,7 @@ def test_apply_reduce_to_dict(): def test_apply_differently_indexed(): - df = DataFrame(np.random.default_rng(2).randn(20, 10)) + df = DataFrame(np.random.default_rng(2).standard_normal(20, 10)) result = df.apply(Series.describe, axis=0) expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) @@ -463,9 +463,9 @@ def test_apply_convert_objects(): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) @@ -816,7 +816,7 @@ def test_with_listlike_columns(): # GH 17348 df = DataFrame( { - "a": Series(np.random.default_rng(2).randn(4)), + "a": Series(np.random.default_rng(2).standard_normal(4)), "b": ["a", "list", "of", "words"], "ts": date_range("2016-10-01", periods=4, freq="H"), } @@ -862,7 +862,9 @@ def test_infer_output_shape_columns(): def test_infer_output_shape_listlike_columns(): # GH 16353 - df = DataFrame(np.random.default_rng(2).randn(6, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(6, 3), columns=["A", "B", "C"] + ) result = df.apply(lambda x: [1, 2, 3], axis=1) expected = Series([[1, 2, 3] for t in df.itertuples()]) @@ -911,7 +913,9 @@ def fun(x): def test_consistent_coerce_for_shapes(lst): # we want column names to NOT be propagated # just because the shape matches the input shape - df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 3), columns=["A", "B", "C"] + ) result = df.apply(lambda x: lst, axis=1) expected = Series([lst for t in df.itertuples()]) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 2f7e0d56af2fa..315bbbac749eb 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -93,7 +93,7 @@ def test_series_nested_renamer(renamer): def test_apply_dict_depr(): tsdf = DataFrame( - np.random.default_rng(2).randn(10, 3), + np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=10), ) @@ -190,9 +190,9 @@ def test_apply_modify_traceback(): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index b8892e114b80d..aea1e03dfe0ee 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -76,7 +76,7 @@ def f(x): @pytest.mark.parametrize("convert_dtype", [True, False]) def test_apply_convert_dtype_deprecated(convert_dtype): - ser = Series(np.random.default_rng(2).randn(10)) + ser = Series(np.random.default_rng(2).standard_normal(10)) def func(x): return x if x > 0 else np.nan diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 3c79cc773047c..4fba28ee3bca3 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -99,8 +99,8 @@ def test_df_numeric_cmp_dt64_raises(self, box_with_array, fixed_now_ts): def test_compare_invalid(self): # GH#8058 # ops testing - a = Series(np.random.default_rng(2).randn(5), name=0) - b = Series(np.random.default_rng(2).randn(5)) + a = Series(np.random.default_rng(2).standard_normal(5), name=0) + b = Series(np.random.default_rng(2).standard_normal(5)) b.name = pd.Timestamp("2000-01-01") tm.assert_series_equal(a / b, 1 / (b / a)) @@ -109,7 +109,7 @@ def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch): box = box_with_array xbox = box if box is not Index else np.ndarray - obj = Series(np.random.default_rng(2).randn(51)) + obj = Series(np.random.default_rng(2).standard_normal(51)) obj = tm.box_expected(obj, box, transpose=False) with monkeypatch.context() as m: m.setattr(expr, "_MIN_ELEMENTS", 50) @@ -459,7 +459,7 @@ def test_ser_divmod_inf(self): def test_rdiv_zero_compat(self): # GH#8674 zero_array = np.array([0] * 5) - data = np.random.default_rng(2).randn(5) + data = np.random.default_rng(2).standard_normal(5) expected = Series([0.0] * 5) result = zero_array / Series(data) @@ -535,7 +535,7 @@ def test_df_div_zero_int(self): def test_df_div_zero_series_does_not_commute(self): # integer div, but deal with the 0's (GH#9144) - df = pd.DataFrame(np.random.default_rng(2).randn(10, 5)) + df = pd.DataFrame(np.random.default_rng(2).standard_normal(10, 5)) ser = df[0] res = ser / df res2 = df / ser @@ -602,7 +602,7 @@ def test_df_mod_zero_int(self): def test_df_mod_zero_series_does_not_commute(self): # GH#3590, modulo as ints # not commutative with series - df = pd.DataFrame(np.random.default_rng(2).randn(10, 5)) + df = pd.DataFrame(np.random.default_rng(2).standard_normal(10, 5)) ser = df[0] res = ser % df res2 = df % ser @@ -770,7 +770,7 @@ def test_divmod_series(self, numeric_idx): @pytest.mark.parametrize("other", [np.nan, 7, -23, 2.718, -3.14, np.inf]) def test_ops_np_scalar(self, other): - vals = np.random.default_rng(2).randn(5, 3) + vals = np.random.default_rng(2).standard_normal(5, 3) f = lambda x: pd.DataFrame( x, index=list("ABCDE"), columns=["jim", "joe", "jolie"] ) @@ -912,7 +912,7 @@ def test_series_frame_radd_bug(self, fixed_now_ts): # TODO: This came from series.test.test_operators, needs cleanup def test_datetime64_with_index(self): # arithmetic integer ops with an index - ser = Series(np.random.default_rng(2).randn(5)) + ser = Series(np.random.default_rng(2).standard_normal(5)) expected = ser - ser.index.to_series() result = ser - ser.index tm.assert_series_equal(result, expected) @@ -933,7 +933,7 @@ def test_datetime64_with_index(self): result = ser - ser.index.to_period() df = pd.DataFrame( - np.random.default_rng(2).randn(5, 2), + np.random.default_rng(2).standard_normal(5, 2), index=pd.date_range("20130101", periods=5), ) df["date"] = pd.Timestamp("20130102") @@ -1179,7 +1179,9 @@ def test_numarr_with_dtype_add_int(self, dtype, box_with_array): def test_operators_reverse_object(self, op): # GH#56 arr = Series( - np.random.default_rng(2).randn(10), index=np.arange(10), dtype=object + np.random.default_rng(2).standard_normal(10), + index=np.arange(10), + dtype=object, ) result = op(1.0, arr) @@ -1287,13 +1289,13 @@ def test_arithmetic_with_frame_or_series(self, op): # check that we return NotImplemented when operating with Series # or DataFrame index = RangeIndex(5) - other = Series(np.random.default_rng(2).randn(5)) + other = Series(np.random.default_rng(2).standard_normal(5)) expected = op(Series(index), other) result = op(index, other) tm.assert_series_equal(result, expected) - other = pd.DataFrame(np.random.default_rng(2).randn(2, 5)) + other = pd.DataFrame(np.random.default_rng(2).standard_normal(2, 5)) expected = op(pd.DataFrame([index, index]), other) result = op(index, other) tm.assert_frame_equal(result, expected) @@ -1412,7 +1414,7 @@ def test_dataframe_div_silenced(): columns=list("ABCD"), ) pdf2 = pd.DataFrame( - np.random.default_rng(2).randn(10, 4), + np.random.default_rng(2).standard_normal(10, 4), index=list("abcdefghjk"), columns=list("ABCX"), ) @@ -1449,7 +1451,7 @@ def test_integer_array_add_list_like( def test_sub_multiindex_swapped_levels(): # GH 9952 df = pd.DataFrame( - {"a": np.random.default_rng(2).randn(6)}, + {"a": np.random.default_rng(2).standard_normal(6)}, index=pd.MultiIndex.from_product( [["a", "b"], [0, 1, 2]], names=["levA", "levB"] ), diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 6e2ca746663bc..5ffbf1a38e845 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -22,7 +22,7 @@ class TestObjectComparisons: def test_comparison_object_numeric_nas(self, comparison_op): - ser = Series(np.random.default_rng(2).randn(10), dtype=object) + ser = Series(np.random.default_rng(2).standard_normal(10), dtype=object) shifted = ser.shift(2) func = comparison_op diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index ffca1807dac52..c65cf5bf58d60 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -16,7 +16,7 @@ class TestAstype: def test_astype_str_int_categories_to_nullable_int(self): # GH#39616 dtype = CategoricalDtype([str(i) for i in range(5)]) - codes = np.random.default_rng(2).randint(5, size=20) + codes = np.random.default_rng(2).integers(5, size=20) arr = Categorical.from_codes(codes, dtype=dtype) res = arr.astype("Int64") @@ -26,7 +26,7 @@ def test_astype_str_int_categories_to_nullable_int(self): def test_astype_str_int_categories_to_nullable_float(self): # GH#39616 dtype = CategoricalDtype([str(i / 2) for i in range(5)]) - codes = np.random.default_rng(2).randint(5, size=20) + codes = np.random.default_rng(2).integers(5, size=20) arr = Categorical.from_codes(codes, dtype=dtype) res = arr.astype("Float64") diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 1f2c2fd709eb7..e25e31e2f2e9e 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -513,7 +513,7 @@ def test_construction_with_null(self, klass, nulls_fixture): def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype, validate): # GH#39649 cats = pd.array(range(5), dtype=any_numeric_ea_dtype) - codes = np.random.default_rng(2).randint(5, size=3) + codes = np.random.default_rng(2).integers(5, size=3) dtype = CategoricalDtype(cats) arr = Categorical.from_codes(codes, dtype=dtype, validate=validate) assert arr.categories.dtype == cats.dtype diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 4046eca5d2416..e15bac9f0b8aa 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -107,7 +107,7 @@ def test_setitem_listlike(self): # properly coerce the input indexers cat = Categorical( - np.random.default_rng(2).randint(0, 5, size=150000).astype(np.int8) + np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8) ).add_categories([-1000]) indexer = np.array([100000]).astype(np.int64) cat[indexer] = -1000 @@ -133,7 +133,7 @@ def test_getitem_listlike(self): # properly coerce the input indexers c = Categorical( - np.random.default_rng(2).randint(0, 5, size=150000).astype(np.int8) + np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8) ) result = c.codes[np.array([100000]).astype(np.int64)] expected = c[np.array([100000]).astype(np.int64)].codes diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 19ac81102f7f2..0eeb01b746088 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -28,7 +28,7 @@ def test_na_flags_int_categories(self): # #1457 categories = list(range(10)) - labels = np.random.default_rng(2).randint(0, 10, 20) + labels = np.random.default_rng(2).integers(0, 10, 20) labels[::5] = -1 cat = Categorical(labels, categories) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index cac0c4e61e82d..508ef6cb9c025 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -345,7 +345,7 @@ def test_compare_unordered_different_order(self): assert not a.equals(b) def test_numeric_like_ops(self): - df = DataFrame({"value": np.random.default_rng(2).randint(0, 10000, 100)}) + df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)}) labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index b336a2658a79b..ffc93b4e4f176 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -475,8 +475,8 @@ def test_mismatched_length_cmp_op(cons): @pytest.mark.parametrize("fill_value", [np.nan, 3]) def test_binary_operators(op, fill_value): op = getattr(operator, op) - data1 = np.random.default_rng(2).randn(20) - data2 = np.random.default_rng(2).randn(20) + data1 = np.random.default_rng(2).standard_normal(20) + data2 = np.random.default_rng(2).standard_normal(20) data1[::2] = fill_value data2[::3] = fill_value diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 2697c6d0107ea..d1aa1e2d05f89 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -98,15 +98,15 @@ def _eval_single_bin(lhs, cmp1, rhs, engine): ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"], ) def lhs(request): - nan_df1 = DataFrame(np.random.default_rng(2).rand(10, 5)) + nan_df1 = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) nan_df1[nan_df1 > 0.5] = np.nan opts = ( - DataFrame(np.random.default_rng(2).randn(10, 5)), - Series(np.random.default_rng(2).randn(5)), + DataFrame(np.random.default_rng(2).standard_normal(10, 5)), + Series(np.random.default_rng(2).standard_normal(5)), Series([1, 2, np.nan, np.nan, 5]), nan_df1, - np.random.default_rng(2).randn(), + np.random.default_rng(2).standard_normal(), ) return opts[request.param] @@ -366,7 +366,7 @@ def test_frame_invert(self, engine, parser): # ~ ## # frame # float always raises - lhs = DataFrame(np.random.default_rng(2).randn(5, 2)) + lhs = DataFrame(np.random.default_rng(2).standard_normal(5, 2)) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert_dd'" with pytest.raises(NotImplementedError, match=msg): @@ -377,7 +377,7 @@ def test_frame_invert(self, engine, parser): pd.eval(expr, engine=engine, parser=parser) # int raises on numexpr - lhs = DataFrame(np.random.default_rng(2).randint(5, size=(5, 2))) + lhs = DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert" with pytest.raises(NotImplementedError, match=msg): @@ -388,14 +388,14 @@ def test_frame_invert(self, engine, parser): tm.assert_frame_equal(expect, result) # bool always works - lhs = DataFrame(np.random.default_rng(2).rand(5, 2) > 0.5) + lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5) expect = ~lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) # object raises lhs = DataFrame( - {"b": ["a", 1, 2.0], "c": np.random.default_rng(2).rand(3) > 0.5} + {"b": ["a", 1, 2.0], "c": np.random.default_rng(2).standard_normal(3) > 0.5} ) if engine == "numexpr": with pytest.raises(ValueError, match="unknown type object"): @@ -411,7 +411,7 @@ def test_series_invert(self, engine, parser): # series # float raises - lhs = Series(np.random.default_rng(2).randn(5)) + lhs = Series(np.random.default_rng(2).standard_normal(5)) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert_dd'" with pytest.raises(NotImplementedError, match=msg): @@ -422,7 +422,7 @@ def test_series_invert(self, engine, parser): pd.eval(expr, engine=engine, parser=parser) # int raises on numexpr - lhs = Series(np.random.default_rng(2).randint(5, size=5)) + lhs = Series(np.random.default_rng(2).integers(5, size=5)) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert" with pytest.raises(NotImplementedError, match=msg): @@ -433,7 +433,7 @@ def test_series_invert(self, engine, parser): tm.assert_series_equal(expect, result) # bool - lhs = Series(np.random.default_rng(2).rand(5) > 0.5) + lhs = Series(np.random.default_rng(2).standard_normal(5) > 0.5) expect = ~lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_series_equal(expect, result) @@ -456,19 +456,19 @@ def test_frame_negate(self, engine, parser): expr = "-lhs" # float - lhs = DataFrame(np.random.default_rng(2).randn(5, 2)) + lhs = DataFrame(np.random.default_rng(2).standard_normal(5, 2)) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) # int - lhs = DataFrame(np.random.default_rng(2).randint(5, size=(5, 2))) + lhs = DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) # bool doesn't work with numexpr but works elsewhere - lhs = DataFrame(np.random.default_rng(2).rand(5, 2) > 0.5) + lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5) if engine == "numexpr": msg = "couldn't find matching opcode for 'neg_bb'" with pytest.raises(NotImplementedError, match=msg): @@ -482,19 +482,19 @@ def test_series_negate(self, engine, parser): expr = "-lhs" # float - lhs = Series(np.random.default_rng(2).randn(5)) + lhs = Series(np.random.default_rng(2).standard_normal(5)) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_series_equal(expect, result) # int - lhs = Series(np.random.default_rng(2).randint(5, size=5)) + lhs = Series(np.random.default_rng(2).integers(5, size=5)) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_series_equal(expect, result) # bool doesn't work with numexpr but works elsewhere - lhs = Series(np.random.default_rng(2).rand(5) > 0.5) + lhs = Series(np.random.default_rng(2).standard_normal(5) > 0.5) if engine == "numexpr": msg = "couldn't find matching opcode for 'neg_bb'" with pytest.raises(NotImplementedError, match=msg): @@ -508,11 +508,11 @@ def test_series_negate(self, engine, parser): "lhs", [ # Float - DataFrame(np.random.default_rng(2).randn(5, 2)), + DataFrame(np.random.default_rng(2).standard_normal((5, 2))), # Int - DataFrame(np.random.default_rng(2).randint(5, size=(5, 2))), + DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))), # bool doesn't work with numexpr but works elsewhere - DataFrame(np.random.default_rng(2).rand(5, 2) > 0.5), + DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5), ], ) def test_frame_pos(self, lhs, engine, parser): @@ -526,11 +526,11 @@ def test_frame_pos(self, lhs, engine, parser): "lhs", [ # Float - Series(np.random.default_rng(2).randn(5)), + Series(np.random.default_rng(2).standard_normal(5)), # Int - Series(np.random.default_rng(2).randint(5, size=5)), + Series(np.random.default_rng(2).integers(5, size=5)), # bool doesn't work with numexpr but works elsewhere - Series(np.random.default_rng(2).rand(5) > 0.5), + Series(np.random.default_rng(2).standard_normal(5) > 0.5), ], ) def test_series_pos(self, lhs, engine, parser): @@ -623,8 +623,8 @@ def test_unary_in_function(self): ), ) def test_disallow_scalar_bool_ops(self, ex, engine, parser): - x, a, b = np.random.default_rng(2).randn(3), 1, 2 # noqa: F841 - df = DataFrame(np.random.default_rng(2).randn(3, 2)) # noqa: F841 + x, a, b = np.random.default_rng(2).standard_normal(3), 1, 2 # noqa: F841 + df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) # noqa: F841 msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" with pytest.raises(NotImplementedError, match=msg): @@ -717,7 +717,7 @@ def test_and_logic_string_match(self): assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}") -f = lambda *args, **kwargs: np.random.default_rng(2).randn() +f = lambda *args, **kwargs: np.random.default_rng(2).standard_normal() # ------------------------------------- @@ -795,7 +795,7 @@ def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): tm.assert_frame_equal(res, df < 2) df3 = DataFrame( - np.random.default_rng(2).randn(*df.shape), + np.random.default_rng(2).standard_normal(*df.shape), index=df.index, columns=df.columns, ) @@ -838,7 +838,7 @@ def test_basic_frame_series_alignment( 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type ) index = getattr(df, index_name) - s = Series(np.random.default_rng(2).randn(5), index[:5]) + s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) if should_warn(df.index, s.index): with tm.assert_produces_warning(RuntimeWarning): @@ -878,7 +878,7 @@ def test_basic_series_frame_alignment( 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type ) index = getattr(df, index_name) - s = Series(np.random.default_rng(2).randn(5), index[:5]) + s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) if should_warn(s.index, df.index): with tm.assert_produces_warning(RuntimeWarning): res = pd.eval("s + df", engine=engine, parser=parser) @@ -905,7 +905,7 @@ def test_series_frame_commutativity( 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type ) index = getattr(df, index_name) - s = Series(np.random.default_rng(2).randn(5), index[:5]) + s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) lhs = f"s {op} df" rhs = f"df {op} s" @@ -944,7 +944,7 @@ def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 ) index = getattr(locals().get(obj_name), index_name) - ser = Series(np.random.default_rng(2).randn(n), index[:n]) + ser = Series(np.random.default_rng(2).standard_normal(n), index[:n]) if r2 == "dt" or c2 == "dt": if engine == "numexpr": @@ -971,8 +971,8 @@ def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): tm.assert_frame_equal(res, expected) def test_performance_warning_for_poor_alignment(self, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(1000, 10)) - s = Series(np.random.default_rng(2).randn(10000)) + df = DataFrame(np.random.default_rng(2).standard_normal(1000, 10)) + s = Series(np.random.default_rng(2).standard_normal(10000)) if engine == "numexpr": seen = PerformanceWarning else: @@ -981,17 +981,17 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): with tm.assert_produces_warning(seen): pd.eval("df + s", engine=engine, parser=parser) - s = Series(np.random.default_rng(2).randn(1000)) + s = Series(np.random.default_rng(2).standard_normal(1000)) with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) - df = DataFrame(np.random.default_rng(2).randn(10, 10000)) - s = Series(np.random.default_rng(2).randn(10000)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 10000)) + s = Series(np.random.default_rng(2).standard_normal(10000)) with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) - df = DataFrame(np.random.default_rng(2).randn(10, 10)) - s = Series(np.random.default_rng(2).randn(10000)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 10)) + s = Series(np.random.default_rng(2).standard_normal(10000)) is_python_engine = engine == "python" @@ -1092,8 +1092,8 @@ def test_bool_ops_with_constants(self, rhs, lhs, op): assert res == exp def test_4d_ndarray_fails(self): - x = np.random.default_rng(2).randn(3, 4, 5, 6) - y = Series(np.random.default_rng(2).randn(10)) + x = np.random.default_rng(2).standard_normal(3, 4, 5, 6) + y = Series(np.random.default_rng(2).standard_normal(10)) msg = "N-dimensional objects, where N > 2, are not supported with eval" with pytest.raises(NotImplementedError, match=msg): self.eval("x + y", local_dict={"x": x, "y": y}) @@ -1103,23 +1103,25 @@ def test_constant(self): assert x == 1 def test_single_variable(self): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) df2 = self.eval("df", local_dict={"df": df}) tm.assert_frame_equal(df, df2) def test_failing_subscript_with_name_error(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # noqa: F841 with pytest.raises(NameError, match="name 'x' is not defined"): self.eval("df[x > 2] > 2") def test_lhs_expression_subscript(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) expected = (df + 1)[df > 2] tm.assert_frame_equal(result, expected) def test_attr_expression(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=list("abc")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 3), columns=list("abc") + ) expr1 = "df.a < df.b" expec1 = df.a < df.b expr2 = "df.a + df.b + df.c" @@ -1132,34 +1134,44 @@ def test_attr_expression(self): tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df})) def test_assignment_fails(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=list("abc")) - df2 = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 3), columns=list("abc") + ) + df2 = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) expr1 = "df = df2" msg = "cannot assign without a target object" with pytest.raises(ValueError, match=msg): self.eval(expr1, local_dict={"df": df, "df2": df2}) def test_assignment_column_multiple_raise(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) # multiple assignees with pytest.raises(SyntaxError, match="invalid syntax"): df.eval("d c = a + b") def test_assignment_column_invalid_assign(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) # invalid assignees msg = "left hand side of an assignment must be a single name" with pytest.raises(SyntaxError, match=msg): df.eval("d,c = a + b") def test_assignment_column_invalid_assign_function_call(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) msg = "cannot assign to function call" with pytest.raises(SyntaxError, match=msg): df.eval('Timestamp("20131001") = a + b') def test_assignment_single_assign_existing(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) # single assignment - existing variable expected = df.copy() expected["a"] = expected["a"] + expected["b"] @@ -1167,7 +1179,9 @@ def test_assignment_single_assign_existing(self): tm.assert_frame_equal(df, expected) def test_assignment_single_assign_new(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) # single assignment - new variable expected = df.copy() expected["c"] = expected["a"] + expected["b"] @@ -1175,7 +1189,9 @@ def test_assignment_single_assign_new(self): tm.assert_frame_equal(df, expected) def test_assignment_single_assign_local_overlap(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) df = df.copy() a = 1 # noqa: F841 df.eval("a = 1 + b", inplace=True) @@ -1185,7 +1201,9 @@ def test_assignment_single_assign_local_overlap(self): tm.assert_frame_equal(df, expected) def test_assignment_single_assign_name(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) a = 1 # noqa: F841 old_a = df.a.copy() @@ -1195,7 +1213,9 @@ def test_assignment_single_assign_name(self): assert result.name is None def test_assignment_multiple_raises(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) # multiple assignment df.eval("c = a + b", inplace=True) msg = "can only assign a single expression" @@ -1203,7 +1223,9 @@ def test_assignment_multiple_raises(self): df.eval("c = a = b") def test_assignment_explicit(self): - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) # explicit targets self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) expected = df.copy() @@ -1222,7 +1244,9 @@ def test_column_in(self): @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") def test_assignment_not_inplace(self): # see gh-9297 - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + ) actual = df.eval("c = a + b", inplace=False) assert actual is not None @@ -1427,7 +1451,7 @@ def test_nested_period_index_subscript_expression(self): tm.assert_frame_equal(r, e) def test_date_boolean(self, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) res = self.eval( "df.dates1 < 20130101", @@ -1501,7 +1525,7 @@ def test_check_many_exprs(self, engine, parser): ], ) def test_fails_and_or_not(self, expr, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) if parser == "python": msg = "'BoolOp' nodes are not implemented" if "not" in expr: @@ -1525,7 +1549,7 @@ def test_fails_and_or_not(self, expr, engine, parser): @pytest.mark.parametrize("char", ["|", "&"]) def test_fails_ampersand_pipe(self, char, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # noqa: F841 ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)" if parser == "python": msg = "cannot evaluate scalar only bool ops" @@ -1546,7 +1570,7 @@ def eval(self, *args, **kwargs): ) @pytest.mark.parametrize("fn", _unary_math_ops) def test_unary_functions(self, fn): - df = DataFrame({"a": np.random.default_rng(2).randn(10)}) + df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) a = df.a expr = f"{fn}(a)" @@ -1559,8 +1583,8 @@ def test_unary_functions(self, fn): def test_binary_functions(self, fn): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), } ) a = df.a @@ -1575,8 +1599,8 @@ def test_binary_functions(self, fn): def test_df_use_case(self, engine, parser): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), } ) df.eval( @@ -1592,8 +1616,8 @@ def test_df_use_case(self, engine, parser): def test_df_arithmetic_subexpression(self, engine, parser): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), } ) df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True) @@ -1617,7 +1641,9 @@ def test_result_types(self, dtype, expect_dtype, engine, parser): # Did not test complex64 because DataFrame is converting it to # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 - df = DataFrame({"a": np.random.default_rng(2).randn(10).astype(dtype)}) + df = DataFrame( + {"a": np.random.default_rng(2).standard_normal(10).astype(dtype)} + ) assert df.a.dtype == dtype df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True) got = df.b @@ -1627,21 +1653,21 @@ def test_result_types(self, dtype, expect_dtype, engine, parser): tm.assert_series_equal(got, expect, check_names=False) def test_undefined_func(self, engine, parser): - df = DataFrame({"a": np.random.default_rng(2).randn(10)}) + df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) msg = '"mysin" is not a supported function' with pytest.raises(ValueError, match=msg): df.eval("mysin(a)", engine=engine, parser=parser) def test_keyword_arg(self, engine, parser): - df = DataFrame({"a": np.random.default_rng(2).randn(10)}) + df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) msg = 'Function "sin" does not support keyword arguments' with pytest.raises(TypeError, match=msg): df.eval("sin(x=a)", engine=engine, parser=parser) -_var_s = np.random.default_rng(2).randn(10) +_var_s = np.random.default_rng(2).standard_normal(10) class TestScope: @@ -1801,8 +1827,8 @@ def test_more_than_one_expression_raises(engine, parser): @pytest.mark.parametrize("rhs", (int, float)) def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): gen = { - int: lambda: np.random.default_rng(2).randint(10), - float: np.random.default_rng(2).randn, + int: lambda: np.random.default_rng(2).integers(10), + float: np.random.default_rng(2).standard_normal, } mid = gen[lhs]() # noqa: F841 @@ -1846,7 +1872,9 @@ def test_inf(engine, parser): @pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"]) def test_query_token(engine, column): # See: https://github.com/pandas-dev/pandas/pull/42826 - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=[column, "b"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=[column, "b"] + ) expected = df[df[column] > 5] query_string = f"`{column}` > 5" result = df.query(query_string, engine=engine) @@ -1884,7 +1912,8 @@ def test_eval_no_support_column_name(request, column): ) df = DataFrame( - np.random.default_rng(2).randint(0, 100, size=(10, 2)), columns=[column, "col1"] + np.random.default_rng(2).integers(0, 100, size=(10, 2)), + columns=[column, "col1"], ) expected = df[df[column] > 6] result = df.query(f"{column}>6") diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index 3a0a1ea3be5a0..1ffa97a41ab69 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -75,7 +75,9 @@ def func(): def test_delete(using_copy_on_write): - df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 3), columns=["a", "b", "c"] + ) del df["b"] if using_copy_on_write: # TODO: This should not have references, delete makes a shallow copy diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 5b9442dc857ed..31f25ca373096 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -19,7 +19,7 @@ class TestABCClasses: categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index) - sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).randn(10)) + sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).standard_normal(10)) datetime_array = pd.core.arrays.DatetimeArray(datetime_index) timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 0a5ccd662bc1f..49dce278a7413 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -239,8 +239,8 @@ def make_data(): return [ UserDict( [ - (rng.choice(string.ascii_letters), rng.randint(0, 100)) - for _ in range(rng.randint(0, 10)) + (rng.choice(string.ascii_letters), rng.integers(0, 100)) + for _ in range(rng.integers(0, 10)) ] ) for _ in range(100) diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 5324b10455eaa..9554aef3842fb 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -128,7 +128,7 @@ def make_data(): rng = np.random.default_rng(2) data = np.empty(100, dtype=object) data[:] = [ - [rng.choice(string.ascii_letters) for _ in range(rng.randint(0, 10))] + [rng.choice(string.ascii_letters) for _ in range(rng.integers(0, 10))] for _ in range(100) ] return data diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 5d77ef67b440a..def46bf848682 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -30,7 +30,7 @@ def make_data(fill_value): if np.isnan(fill_value): data = np.random.default_rng(2).uniform(size=100) else: - data = np.random.default_rng(2).randint(1, 100, size=100) + data = np.random.default_rng(2).integers(1, 100, size=100) if data[0] == data[1]: data[0] += 1 diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 86ddd8edfb210..fb2df0b82e5f4 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -253,9 +253,9 @@ def frame_of_index_cols(): "A": ["foo", "foo", "foo", "bar", "bar"], "B": ["one", "two", "three", "one", "two"], "C": ["a", "b", "c", "d", "e"], - "D": np.random.default_rng(2).randn(5), - "E": np.random.default_rng(2).randn(5), - ("tuple", "as", "label"): np.random.default_rng(2).randn(5), + "D": np.random.default_rng(2).standard_normal(5), + "E": np.random.default_rng(2).standard_normal(5), + ("tuple", "as", "label"): np.random.default_rng(2).standard_normal(5), } ) return df diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 1a7142d59d839..133bd130a3add 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -59,13 +59,21 @@ def test_from_records_with_datetimes(self): def test_from_records_sequencelike(self): df = DataFrame( { - "A": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), - "A1": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), + "A": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float64 + ), + "A1": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float64 + ), "B": np.array(np.arange(6), dtype=np.int64), "C": ["foo"] * 6, "D": np.array([True, False] * 3, dtype=bool), - "E": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), - "E1": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), + "E": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float32 + ), + "E1": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float32 + ), "F": np.array(np.arange(6), dtype=np.int32), } ) @@ -140,13 +148,21 @@ def test_from_records_dictlike(self): # test the dict methods df = DataFrame( { - "A": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), - "A1": np.array(np.random.default_rng(2).randn(6), dtype=np.float64), + "A": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float64 + ), + "A1": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float64 + ), "B": np.array(np.arange(6), dtype=np.int64), "C": ["foo"] * 6, "D": np.array([True, False] * 3, dtype=bool), - "E": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), - "E1": np.array(np.random.default_rng(2).randn(6), dtype=np.float32), + "E": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float32 + ), + "E1": np.array( + np.random.default_rng(2).standard_normal(6), dtype=np.float32 + ), "F": np.array(np.arange(6), dtype=np.int32), } ) @@ -175,15 +191,19 @@ def test_from_records_dictlike(self): tm.assert_frame_equal(r, df) def test_from_records_with_index_data(self): - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"] + ) - data = np.random.default_rng(2).randn(10) + data = np.random.default_rng(2).standard_normal(10) with tm.assert_produces_warning(FutureWarning): df1 = DataFrame.from_records(df, index=data) tm.assert_index_equal(df1.index, Index(data)) def test_from_records_bad_index_column(self): - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"] + ) # should pass with tm.assert_produces_warning(FutureWarning): @@ -390,8 +410,8 @@ def test_from_records_set_index_name(self): def create_dict(order_id): return { "order_id": order_id, - "quantity": np.random.default_rng(2).randint(1, 10), - "price": np.random.default_rng(2).randint(1, 10), + "quantity": np.random.default_rng(2).integers(1, 10), + "price": np.random.default_rng(2).integers(1, 10), } documents = [create_dict(i) for i in range(10)] diff --git a/pandas/tests/frame/indexing/test_delitem.py b/pandas/tests/frame/indexing/test_delitem.py index 0f76576c2a526..b6b0bf03f49a3 100644 --- a/pandas/tests/frame/indexing/test_delitem.py +++ b/pandas/tests/frame/indexing/test_delitem.py @@ -16,7 +16,7 @@ def test_delitem(self, float_frame): def test_delitem_multiindex(self): midx = MultiIndex.from_product([["A", "B"], [1, 2]]) - df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=midx) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), columns=midx) assert len(df.columns) == 4 assert ("A",) in df.columns assert "A" in df.columns @@ -55,6 +55,6 @@ def test_delitem_col_still_multiindex(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).randn(3, 4), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 4), columns=index) del df[("a", "", "")] assert isinstance(df.columns, MultiIndex) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 31f1696b0e3c6..dad6aa35e2a3a 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -37,7 +37,7 @@ def test_getitem_unused_level_raises(self): def test_getitem_periodindex(self): rng = period_range("1/1/2000", periods=5) - df = DataFrame(np.random.default_rng(2).randn(10, 5), columns=rng) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5), columns=rng) ts = df[rng[0]] tm.assert_series_equal(ts, df.iloc[:, 0]) @@ -93,7 +93,9 @@ def test_getitem_list_missing_key(self): def test_getitem_list_duplicates(self): # GH#1943 - df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=list("AABC")) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 4), columns=list("AABC") + ) df.columns.name = "foo" result = df[["B", "C"]] @@ -129,7 +131,7 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): else: # MultiIndex columns frame = DataFrame( - np.random.default_rng(2).randn(8, 3), + np.random.default_rng(2).standard_normal(8, 3), columns=Index( [("foo", "bar"), ("baz", "qux"), ("peek", "aboo")], name=("sth", "sth2"), @@ -425,7 +427,7 @@ def test_getitem_slice_float64(self, frame_or_series): start, end = values[[5, 15]] - data = np.random.default_rng(2).randn(20, 3) + data = np.random.default_rng(2).standard_normal(20, 3) if frame_or_series is not DataFrame: data = data[:, 0] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9e88b09e3b244..7c4a6bf0a64b4 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -68,9 +68,9 @@ def test_getitem_numeric_should_not_fallback_to_positional(self, any_numeric_dty def test_getitem2(self, float_frame): df = float_frame.copy() - df["$10"] = np.random.default_rng(2).randn(len(df)) + df["$10"] = np.random.default_rng(2).standard_normal(len(df)) - ad = np.random.default_rng(2).randn(len(df)) + ad = np.random.default_rng(2).standard_normal(len(df)) df["@awesome_domain"] = ad with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")): @@ -224,7 +224,7 @@ def test_getitem_boolean_list(self, lst): tm.assert_frame_equal(result, expected) def test_getitem_boolean_iadd(self): - arr = np.random.default_rng(2).randn(5, 5) + arr = np.random.default_rng(2).standard_normal(5, 5) df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"]) @@ -245,7 +245,7 @@ def test_boolean_index_empty_corner(self): def test_getitem_ix_mixed_integer(self): df = DataFrame( - np.random.default_rng(2).randn(4, 3), + np.random.default_rng(2).standard_normal(4, 3), index=[1, 10, "C", "E"], columns=[1, 2, 3], ) @@ -306,7 +306,7 @@ def test_setitem(self, float_frame, using_copy_on_write): tm.assert_series_equal(series, float_frame["col6"], check_names=False) # set ndarray - arr = np.random.default_rng(2).randn(len(float_frame)) + arr = np.random.default_rng(2).standard_normal(len(float_frame)) float_frame["col9"] = arr assert (float_frame["col9"] == arr).all() @@ -524,7 +524,7 @@ def test_loc_setitem_boolean_mask_allfalse(self): tm.assert_frame_equal(result, df) def test_getitem_fancy_slice_integers_step(self): - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) # this is OK df.iloc[:8:2] @@ -532,7 +532,9 @@ def test_getitem_fancy_slice_integers_step(self): assert isna(df.iloc[:8:2]).values.all() def test_getitem_setitem_integer_slice_keyerrors(self): - df = DataFrame(np.random.default_rng(2).randn(10, 5), index=range(0, 20, 2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 5), index=range(0, 20, 2) + ) # this is OK cp = df.copy() @@ -602,13 +604,13 @@ def test_getitem_setitem_non_ix_labels(self): tm.assert_frame_equal(result2, expected) def test_ix_multi_take(self): - df = DataFrame(np.random.default_rng(2).randn(3, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) rs = df.loc[df.index == 0, :] xp = df.reindex([0]) tm.assert_frame_equal(rs, xp) # GH#1321 - df = DataFrame(np.random.default_rng(2).randn(3, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) rs = df.loc[df.index == 0, df.columns == 1] xp = df.reindex(index=[0], columns=[1]) tm.assert_frame_equal(rs, xp) @@ -634,7 +636,7 @@ def test_setitem_fancy_scalar(self, float_frame): f[col] for idx in f.index[::5]: i = f.index.get_loc(idx) - val = np.random.default_rng(2).randn() + val = np.random.default_rng(2).standard_normal() expected.iloc[i, j] = val ix[idx, col] = val @@ -708,7 +710,7 @@ def test_getitem_setitem_boolean_misaligned(self, float_frame): tm.assert_frame_equal(cp, expected) def test_getitem_setitem_boolean_multi(self): - df = DataFrame(np.random.default_rng(2).randn(3, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) # get k1 = np.array([True, False, True]) @@ -724,7 +726,7 @@ def test_getitem_setitem_boolean_multi(self): def test_getitem_setitem_float_labels(self, using_array_manager): index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(np.random.default_rng(2).randn(5, 5), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5), index=index) result = df.loc[1.5:4] expected = df.reindex([1.5, 2, 3, 4]) @@ -752,7 +754,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) - df = DataFrame(np.random.default_rng(2).randn(5, 5), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5), index=index) # positional slicing only via iloc! msg = ( @@ -811,7 +813,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): def test_setitem_single_column_mixed_datetime(self): df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=["a", "b", "c", "d", "e"], columns=["foo", "bar", "baz"], ) @@ -942,7 +944,7 @@ def test_setitem_frame_align(self, float_frame): def test_getitem_setitem_ix_duplicates(self): # #1201 df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=["foo", "foo", "bar", "baz", "bar"], ) @@ -961,7 +963,7 @@ def test_getitem_setitem_ix_duplicates(self): def test_getitem_ix_boolean_duplicates_multiple(self): # #1201 df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=["foo", "foo", "bar", "baz", "bar"], ) @@ -1009,7 +1011,9 @@ def test_single_element_ix_dont_upcast(self, float_frame): tm.assert_series_equal(result, expected) def test_iloc_row(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4), index=range(0, 20, 2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 4), index=range(0, 20, 2) + ) result = df.iloc[1] exp = df.loc[2] @@ -1030,7 +1034,9 @@ def test_iloc_row(self): tm.assert_frame_equal(result, expected) def test_iloc_row_slice_view(self, using_copy_on_write, request): - df = DataFrame(np.random.default_rng(2).randn(10, 4), index=range(0, 20, 2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 4), index=range(0, 20, 2) + ) original = df.copy() # verify slice is view @@ -1050,7 +1056,9 @@ def test_iloc_row_slice_view(self, using_copy_on_write, request): tm.assert_series_equal(df[2], exp_col) def test_iloc_col(self): - df = DataFrame(np.random.default_rng(2).randn(4, 10), columns=range(0, 20, 2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 10), columns=range(0, 20, 2) + ) result = df.iloc[:, 1] exp = df.loc[:, 2] @@ -1071,7 +1079,9 @@ def test_iloc_col(self): tm.assert_frame_equal(result, expected) def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.default_rng(2).randn(4, 10), columns=range(0, 20, 2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 10), columns=range(0, 20, 2) + ) original = df.copy() subset = df.iloc[:, slice(4, 8)] diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 11bd11840655f..137102ac86e29 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -18,7 +18,7 @@ class TestDataFrameInsert: def test_insert(self): df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=np.arange(5), columns=["c", "b", "a"], ) @@ -74,7 +74,7 @@ def test_insert_with_columns_dups(self): tm.assert_frame_equal(df, exp) def test_insert_item_cache(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.default_rng(2).randn(4, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 3)) ser = df[0] if using_array_manager: @@ -100,7 +100,7 @@ def test_insert_EA_no_warning(self): # PerformanceWarning about fragmented frame should not be raised when # using EAs (https://github.com/pandas-dev/pandas/issues/44098) df = DataFrame( - np.random.default_rng(2).randint(0, 100, size=(3, 100)), dtype="Int64" + np.random.default_rng(2).integers(0, 100, size=(3, 100)), dtype="Int64" ) with tm.assert_produces_warning(None): df["a"] = np.array([1, 2, 3]) diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 8e8749ffe4dbc..374a02c6f5acc 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -18,14 +18,14 @@ class TestDataFrameMask: def test_mask(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) cond = df > 0 rs = df.where(cond, np.nan) tm.assert_frame_equal(rs, df.mask(df <= 0)) tm.assert_frame_equal(rs, df.mask(~cond)) - other = DataFrame(np.random.default_rng(2).randn(5, 3)) + other = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) rs = df.where(cond, other) tm.assert_frame_equal(rs, df.mask(df <= 0, other)) tm.assert_frame_equal(rs, df.mask(~cond, other)) @@ -40,7 +40,7 @@ def test_mask2(self): def test_mask_inplace(self): # GH#8801 - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) cond = df > 0 rdf = df.copy() @@ -85,7 +85,7 @@ def test_mask_callable(self): def test_mask_dtype_bool_conversion(self): # GH#3733 - df = DataFrame(data=np.random.default_rng(2).randn(100, 50)) + df = DataFrame(data=np.random.default_rng(2).standard_normal(100, 50)) df = df.where(df > 0) # create nans bools = df > 0 mask = isna(df) diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 098acac2d0097..e7c973aa8ec75 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -42,7 +42,9 @@ def test_set_value_resize(self, float_frame): def test_set_value_with_index_dtype_change(self): df_orig = DataFrame( - np.random.default_rng(2).randn(3, 3), index=range(3), columns=list("ABC") + np.random.default_rng(2).standard_normal(3, 3), + index=range(3), + columns=list("ABC"), ) # this is actually ambiguous as the 2 is interpreted as a positional diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index ebe7515691ecb..f6ef6e42ef18c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -57,14 +57,14 @@ class mystring(str): "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"] ) def test_setitem_dtype(self, dtype, float_frame): - # Use randint since casting negative floats to uints is undefined - arr = np.random.default_rng(2).randint(1, 10, len(float_frame)) + # Use integers since casting negative floats to uints is undefined + arr = np.random.default_rng(2).integers(1, 10, len(float_frame)) float_frame[dtype] = np.array(arr, dtype=dtype) assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): - data = np.random.default_rng(2).randn(len(float_frame), 2) + data = np.random.default_rng(2).standard_normal(len(float_frame), 2) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) @@ -85,7 +85,8 @@ def test_setitem_error_msmgs(self): # GH 4107, more descriptive error message df = DataFrame( - np.random.default_rng(2).randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"] + np.random.default_rng(2).integers(0, 2, (4, 4)), + columns=["a", "b", "c", "d"], ) msg = "Cannot set a DataFrame with multiple columns to the single column gr" @@ -97,7 +98,7 @@ def test_setitem_benchmark(self): N = 10 K = 5 df = DataFrame(index=range(N)) - new_col = np.random.default_rng(2).randn(N) + new_col = np.random.default_rng(2).standard_normal(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) @@ -105,7 +106,7 @@ def test_setitem_benchmark(self): def test_setitem_different_dtype(self): df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=np.arange(5), columns=["c", "b", "a"], ) @@ -356,7 +357,7 @@ def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") - df = DataFrame(np.random.default_rng(2).randn(5, 3), index=rng) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3), index=rng) df["Index"] = rng rs = Index(df["Index"]) @@ -402,7 +403,7 @@ def test_setitem_period_d_dtype(self): def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) - df = DataFrame(np.random.default_rng(2).randn(3, 3), columns=cols) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 3), columns=cols) df[False] = ["a", "b", "c"] @@ -571,7 +572,7 @@ def test_setitem_multi_index(self): cols = MultiIndex.from_product(it) index = date_range("20141006", periods=20) - vals = np.random.default_rng(2).randint(1, 1000, (len(index), len(cols))) + vals = np.random.default_rng(2).integers(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] @@ -776,7 +777,7 @@ def expected(self, idx): def test_setitem_dt64series(self, idx, expected): # convert to utc - df = DataFrame(np.random.default_rng(2).randn(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).standard_normal(2, 1), columns=["A"]) df["B"] = idx df["B"] = idx.to_series(index=[0, 1]).dt.tz_convert(None) @@ -786,7 +787,7 @@ def test_setitem_dt64series(self, idx, expected): def test_setitem_datetimeindex(self, idx, expected): # setting a DataFrame column with a tzaware DTI retains the dtype - df = DataFrame(np.random.default_rng(2).randn(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).standard_normal(2, 1), columns=["A"]) # assign to frame df["B"] = idx @@ -795,7 +796,7 @@ def test_setitem_datetimeindex(self, idx, expected): def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): # setting a DataFrame column with a tzaware DTI retains the dtype - df = DataFrame(np.random.default_rng(2).randn(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).standard_normal(2, 1), columns=["A"]) # object array of datetimes with a tz df["B"] = idx.to_pydatetime() @@ -844,7 +845,7 @@ def test_setitem_with_expansion_categorical_dtype(self): df = DataFrame( { "value": np.array( - np.random.default_rng(2).randint(0, 10000, 100), dtype="int32" + np.random.default_rng(2).integers(0, 10000, 100), dtype="int32" ) } ) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 906921231d806..35208b5eb4c0f 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -24,7 +24,9 @@ @pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"]) def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame): if request.param == "default": - return DataFrame(np.random.default_rng(2).randn(5, 3), columns=["A", "B", "C"]) + return DataFrame( + np.random.default_rng(2).standard_normal(5, 3), columns=["A", "B", "C"] + ) if request.param == "float_string": return float_string_frame if request.param == "mixed_float": @@ -145,7 +147,9 @@ def _check_align(df, cond, other, check_dtypes=True): def test_where_invalid(self): # invalid conditions - df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 3), columns=["A", "B", "C"] + ) cond = df > 0 err1 = (df + 1).values[0:2, :] @@ -367,7 +371,7 @@ def test_where_datetime(self): { "A": date_range("20130102", periods=5), "B": date_range("20130104", periods=5), - "C": np.random.default_rng(2).randn(5), + "C": np.random.default_rng(2).standard_normal(5), } ) @@ -430,7 +434,7 @@ def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): def test_where_align(self): def create(): - df = DataFrame(np.random.default_rng(2).randn(10, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 3)) df.iloc[3:5, 0] = np.nan df.iloc[4:6, 1] = np.nan df.iloc[5:8, 2] = np.nan @@ -470,7 +474,7 @@ def test_where_complex(self): def test_where_axis(self): # GH 9736 - df = DataFrame(np.random.default_rng(2).randn(2, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(2, 2)) mask = DataFrame([[False, False], [False, False]]) ser = Series([0, 1]) @@ -526,9 +530,10 @@ def test_where_axis_multiple_dtypes(self): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ - DataFrame(np.random.default_rng(2).randn(10, 2)), + DataFrame(np.random.default_rng(2).standard_normal(10, 2)), DataFrame( - np.random.default_rng(2).randint(0, 10, size=(10, 2)), dtype="int64" + np.random.default_rng(2).integers(0, 10, size=(10, 2)), + dtype="int64", ), ], ignore_index=True, diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 7d66a037d68e3..a775d4ad5922d 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -95,7 +95,8 @@ def test_xs_corner(self): def test_xs_duplicates(self): df = DataFrame( - np.random.default_rng(2).randn(5, 2), index=["b", "b", "c", "b", "a"] + np.random.default_rng(2).standard_normal(5, 2), + index=["b", "b", "c", "b", "a"], ) cross = df.xs("c") @@ -153,7 +154,9 @@ def test_xs_doc_example(self): index = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).randn(3, 8), index=["A", "B", "C"], columns=index + np.random.default_rng(2).standard_normal(3, 8), + index=["A", "B", "C"], + columns=index, ) result = df.xs(("one", "bar"), level=("second", "first"), axis=1) @@ -167,7 +170,9 @@ def test_xs_integer_key(self): ids = list("abcde") index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) df = DataFrame( - np.random.default_rng(2).randn(len(index), 3), index, ["X", "Y", "Z"] + np.random.default_rng(2).standard_normal(len(index), 3), + index, + ["X", "Y", "Z"], ) result = df.xs(20111201, level="date") @@ -182,7 +187,7 @@ def test_xs_level(self, multiindex_dataframe_random_data): tm.assert_frame_equal(result, expected) def test_xs_level_eq_2(self): - arr = np.random.default_rng(2).randn(3, 5) + arr = np.random.default_rng(2).standard_normal(3, 5) index = MultiIndex( levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], @@ -272,7 +277,7 @@ def test_xs_missing_values_in_index(self): ) def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): # see GH#2903 - arr = np.random.default_rng(2).randn(4, 4) + arr = np.random.default_rng(2).standard_normal(4, 4) index = MultiIndex( levels=[["a", "b"], ["bar", "foo", "hello", "world"]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]], @@ -343,7 +348,7 @@ def test_xs_IndexSlice_argument_not_implemented(self, frame_or_series): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - obj = DataFrame(np.random.default_rng(2).randn(6, 4), index=index) + obj = DataFrame(np.random.default_rng(2).standard_normal(6, 4), index=index) if frame_or_series is Series: obj = obj[0] diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 7f8953cdfa528..a52dc90b931f9 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -25,8 +25,8 @@ def test_align_asfreq_method_raises(self): def test_frame_align_aware(self): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") - df1 = DataFrame(np.random.default_rng(2).randn(len(idx1), 3), idx1) - df2 = DataFrame(np.random.default_rng(2).randn(len(idx2), 3), idx2) + df1 = DataFrame(np.random.default_rng(2).standard_normal(len(idx1), 3), idx1) + df2 = DataFrame(np.random.default_rng(2).standard_normal(len(idx2), 3), idx2) new1, new2 = df1.align(df2) assert df1.index.tz == new1.index.tz assert df2.index.tz == new2.index.tz diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index c644e2b7a9c99..6da6737d9215d 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -71,7 +71,9 @@ def test_asfreq_datetimeindex_empty(self, frame_or_series): def test_tz_aware_asfreq_smoke(self, tz, frame_or_series): dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) - obj = frame_or_series(np.random.default_rng(2).randn(len(dr)), index=dr) + obj = frame_or_series( + np.random.default_rng(2).standard_normal(len(dr)), index=dr + ) # it works! obj.asfreq("T") @@ -80,7 +82,7 @@ def test_asfreq_normalize(self, frame_or_series): rng = date_range("1/1/2000 09:30", periods=20) norm = date_range("1/1/2000", periods=20) - vals = np.random.default_rng(2).randn(20, 3) + vals = np.random.default_rng(2).standard_normal(20, 3) obj = DataFrame(vals, index=rng) expected = DataFrame(vals, index=norm) @@ -103,7 +105,9 @@ def test_asfreq_keep_index_name(self, frame_or_series): def test_asfreq_ts(self, frame_or_series): index = period_range(freq="A", start="1/1/2001", end="12/31/2010") - obj = DataFrame(np.random.default_rng(2).randn(len(index), 3), index=index) + obj = DataFrame( + np.random.default_rng(2).standard_normal(len(index), 3), index=index + ) obj = tm.get_obj(obj, frame_or_series) result = obj.asfreq("D", how="end") @@ -185,7 +189,7 @@ def test_asfreq_fillvalue(self): def test_asfreq_with_date_object_index(self, frame_or_series): rng = date_range("1/1/2000", periods=20) - ts = frame_or_series(np.random.default_rng(2).randn(20), index=rng) + ts = frame_or_series(np.random.default_rng(2).standard_normal(20), index=rng) ts2 = ts.copy() ts2.index = [x.date() for x in ts2.index] diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index b277679510c97..5683ec60b0d88 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -179,7 +179,7 @@ def test_is_copy(self, date_range_frame): def test_asof_periodindex_mismatched_freq(self): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") - df = DataFrame(np.random.default_rng(2).randn(N), index=rng) + df = DataFrame(np.random.default_rng(2).standard_normal(N), index=rng) # Mismatched freq msg = "Input has different freq" diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index ca1834defa006..a96c7bf8b3185 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -264,7 +264,7 @@ def test_astype_duplicate_col(self): def test_astype_duplicate_col_series_arg(self): # GH#44417 - vals = np.random.default_rng(2).randn(3, 4) + vals = np.random.default_rng(2).standard_normal(3, 4) df = DataFrame(vals, columns=["A", "B", "C", "A"]) dtypes = df.dtypes dtypes.iloc[0] = str diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index de168a149ff1c..43bc61138291b 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -19,7 +19,9 @@ def test_localized_at_time(self, tzstr, frame_or_series): tz = timezones.maybe_get_tz(tzstr) rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = frame_or_series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = frame_or_series( + np.random.default_rng(2).standard_normal(len(rng)), index=rng + ) ts_local = ts.tz_localize(tzstr) @@ -30,7 +32,7 @@ def test_localized_at_time(self, tzstr, frame_or_series): def test_at_time(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() @@ -44,7 +46,7 @@ def test_at_time(self, frame_or_series): def test_at_time_midnight(self, frame_or_series): # midnight, everything rng = date_range("1/1/2000", "1/31/2000") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 3), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), index=rng) ts = tm.get_obj(ts, frame_or_series) result = ts.at_time(time(0, 0)) @@ -53,7 +55,7 @@ def test_at_time_midnight(self, frame_or_series): def test_at_time_nonexistent(self, frame_or_series): # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = DataFrame(np.random.default_rng(2).randn(len(rng)), rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng)), rng) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time("16:00") assert len(rs) == 0 @@ -93,7 +95,7 @@ def test_at_time_raises(self, frame_or_series): def test_at_time_axis(self, axis): # issue 8839 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), len(rng))) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), len(rng))) ts.index, ts.columns = rng, rng indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] @@ -112,7 +114,9 @@ def test_at_time_axis(self, axis): def test_at_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") - df = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) + df = DataFrame( + np.random.default_rng(2).standard_normal(len(index), 5), index=index + ) akey = time(12, 0, 0) ainds = [24, 72, 120, 168] diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index b6097210cee9a..95285250b6181 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -22,7 +22,7 @@ class TestBetweenTime: def test_between_time_formats(self, frame_or_series): # GH#11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) strings = [ @@ -45,7 +45,7 @@ def test_localized_between_time(self, tzstr, frame_or_series): tz = timezones.maybe_get_tz(tzstr) rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) if frame_or_series is DataFrame: ts = ts.to_frame() @@ -69,7 +69,7 @@ def test_between_time_types(self, frame_or_series): def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(0, 0) @@ -103,7 +103,7 @@ def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) ts = tm.get_obj(ts, frame_or_series) stime = time(22, 0) etime = time(9, 0) @@ -140,7 +140,7 @@ def test_between_time_raises(self, frame_or_series): def test_between_time_axis(self, frame_or_series): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) if frame_or_series is DataFrame: ts = ts.to_frame() @@ -156,7 +156,7 @@ def test_between_time_axis(self, frame_or_series): def test_between_time_axis_aliases(self, axis): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), len(rng))) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), len(rng))) stime, etime = ("08:00:00", "09:00:00") exp_len = 7 @@ -174,7 +174,7 @@ def test_between_time_axis_raises(self, axis): # issue 8839 rng = date_range("1/1/2000", periods=100, freq="10min") mask = np.arange(0, len(rng)) - rand_data = np.random.default_rng(2).randn(len(rng), len(rng)) + rand_data = np.random.default_rng(2).standard_normal(len(rng), len(rng)) ts = DataFrame(rand_data, index=rng, columns=rng) stime, etime = ("08:00:00", "09:00:00") @@ -193,7 +193,9 @@ def test_between_time_axis_raises(self, axis): def test_between_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") - df = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) + df = DataFrame( + np.random.default_rng(2).standard_normal(len(index), 5), index=index + ) bkey = slice(time(13, 0, 0), time(14, 0, 0)) binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] @@ -207,7 +209,7 @@ def test_between_time_datetimeindex(self): def test_between_time_incorrect_arg_inclusive(self): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 2), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) stime = time(0, 0) etime = time(1, 0) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index d6b9ceab852d2..7a78adcc9dc4d 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -30,7 +30,7 @@ def test_inplace_clip(self, float_frame): def test_dataframe_clip(self): # GH#2747 - df = DataFrame(np.random.default_rng(2).randn(1000, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) for lb, ub in [(-1, 1), (1, -1)]: clipped_df = df.clip(lb, ub) @@ -60,8 +60,8 @@ def test_clip_mixed_numeric(self): def test_clip_against_series(self, inplace): # GH#6966 - df = DataFrame(np.random.default_rng(2).randn(1000, 2)) - lb = Series(np.random.default_rng(2).randn(1000)) + df = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) + lb = Series(np.random.default_rng(2).standard_normal(1000)) ub = lb + 1 original = df.copy() @@ -107,8 +107,8 @@ def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): @pytest.mark.parametrize("axis", [0, 1, None]) def test_clip_against_frame(self, axis): - df = DataFrame(np.random.default_rng(2).randn(1000, 2)) - lb = DataFrame(np.random.default_rng(2).randn(1000, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) + lb = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) ub = lb + 1 clipped_df = df.clip(lb, ub, axis=axis) @@ -124,10 +124,12 @@ def test_clip_against_frame(self, axis): def test_clip_against_unordered_columns(self): # GH#20911 df1 = DataFrame( - np.random.default_rng(2).randn(1000, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).standard_normal(1000, 4), + columns=["A", "B", "C", "D"], ) df2 = DataFrame( - np.random.default_rng(2).randn(1000, 4), columns=["D", "A", "B", "C"] + np.random.default_rng(2).standard_normal(1000, 4), + columns=["D", "A", "B", "C"], ) df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) result_upper = df1.clip(lower=0, upper=df2) diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py index 42c873081cc4f..95fcaaa473067 100644 --- a/pandas/tests/frame/methods/test_copy.py +++ b/pandas/tests/frame/methods/test_copy.py @@ -51,13 +51,13 @@ def test_copy_consolidates(self): # GH#42477 df = DataFrame( { - "a": np.random.default_rng(2).randint(0, 100, size=55), - "b": np.random.default_rng(2).randint(0, 100, size=55), + "a": np.random.default_rng(2).integers(0, 100, size=55), + "b": np.random.default_rng(2).integers(0, 100, size=55), } ) for i in range(0, 10): - df.loc[:, f"n_{i}"] = np.random.default_rng(2).randint(0, 100, size=55) + df.loc[:, f"n_{i}"] = np.random.default_rng(2).integers(0, 100, size=55) assert len(df._mgr.blocks) == 11 result = df.copy() diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index b311a7914cb26..97264164099e9 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -172,7 +172,8 @@ def test_corr_int_and_boolean(self, meth): def test_corr_cov_independent_index_column(self, method): # GH#14617 df = DataFrame( - np.random.default_rng(2).randn(4 * 10).reshape(10, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal(4 * 10).reshape(10, 4), + columns=list("abcd"), ) result = getattr(df, method)() assert result.index is not result.columns @@ -288,7 +289,7 @@ def test_corrwith(self, datetime_frame, dtype): datetime_frame = datetime_frame.astype(dtype) a = datetime_frame - noise = Series(np.random.default_rng(2).randn(len(a)), index=a.index) + noise = Series(np.random.default_rng(2).standard_normal(len(a)), index=a.index) b = datetime_frame.add(noise, axis=0) @@ -313,10 +314,12 @@ def test_corrwith(self, datetime_frame, dtype): index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] df1 = DataFrame( - np.random.default_rng(2).randn(5, 4), index=index, columns=columns + np.random.default_rng(2).standard_normal(5, 4), index=index, columns=columns ) df2 = DataFrame( - np.random.default_rng(2).randn(4, 4), index=index[:4], columns=columns + np.random.default_rng(2).standard_normal(4, 4), + index=index[:4], + columns=columns, ) correls = df1.corrwith(df2, axis=1) for row in index[:4]: diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index c3173646ae0d7..e2f92a1e04cb5 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -92,7 +92,7 @@ def test_describe_bool_frame(self): tm.assert_frame_equal(result, expected) def test_describe_categorical(self): - df = DataFrame({"value": np.random.default_rng(2).randint(0, 10000, 100)}) + df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)}) labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 8dc8977a63400..bda563314306a 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -13,7 +13,7 @@ class TestDataFrameDiff: def test_diff_requires_integer(self): - df = DataFrame(np.random.default_rng(2).randn(2, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(2, 2)) with pytest.raises(ValueError, match="periods must be an integer"): df.diff(1.5) @@ -156,7 +156,7 @@ def test_diff_timedelta(self): tm.assert_frame_equal(res, exp) def test_diff_mixed_dtype(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) df["A"] = np.array([1, 2, 3, 4, 5], dtype=object) result = df.diff() @@ -285,7 +285,7 @@ def test_diff_integer_na(self, axis, expected): def test_diff_readonly(self): # https://github.com/pandas-dev/pandas/issues/35559 - arr = np.random.default_rng(2).randn(5, 2) + arr = np.random.default_rng(2).standard_normal(5, 2) arr.flags.writeable = False df = DataFrame(arr) result = df.diff() diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py index d1fd4516b69e0..addd0309d0bb9 100644 --- a/pandas/tests/frame/methods/test_dot.py +++ b/pandas/tests/frame/methods/test_dot.py @@ -83,12 +83,14 @@ def test_dot_misaligned(self, obj, other): class TestSeriesDot(DotSharedTests): @pytest.fixture def obj(self): - return Series(np.random.default_rng(2).randn(4), index=["p", "q", "r", "s"]) + return Series( + np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"] + ) @pytest.fixture def other(self): return DataFrame( - np.random.default_rng(2).randn(3, 4), + np.random.default_rng(2).standard_normal(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"], ).T @@ -109,7 +111,7 @@ class TestDataFrameDot(DotSharedTests): @pytest.fixture def obj(self): return DataFrame( - np.random.default_rng(2).randn(3, 4), + np.random.default_rng(2).standard_normal(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"], ) @@ -117,7 +119,7 @@ def obj(self): @pytest.fixture def other(self): return DataFrame( - np.random.default_rng(2).randn(4, 2), + np.random.default_rng(2).standard_normal(4, 2), index=["p", "q", "r", "s"], columns=["1", "2"], ) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index c64b1f0e731d1..4382b575e7221 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -55,7 +55,7 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): # define dataframe with unique datetime index df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), ) @@ -159,7 +159,9 @@ def test_drop(self): # inplace cache issue # GH#5628 - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=list("abc")) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=list("abc") + ) expected = df[~(df.b > 0)] return_value = df.drop(labels=df[df.b > 0].index, inplace=True) assert return_value is None @@ -303,7 +305,7 @@ def test_mixed_depth_drop(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) result = df.drop("a", axis=1) expected = df.drop([("a", "", "")], axis=1) @@ -438,7 +440,7 @@ def test_drop_preserve_names(self): [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] ) - df = DataFrame(np.random.default_rng(2).randn(6, 3), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(6, 3), index=index) result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") @@ -497,9 +499,9 @@ def test_drop_with_duplicate_columns2(self): # drop buggy GH#6240 df = DataFrame( { - "A": np.random.default_rng(2).randn(5), - "B": np.random.default_rng(2).randn(5), - "C": np.random.default_rng(2).randn(5), + "A": np.random.default_rng(2).standard_normal(5), + "B": np.random.default_rng(2).standard_normal(5), + "C": np.random.default_rng(2).standard_normal(5), "D": ["a", "b", "c", "d", "e"], } ) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 991ea5646e006..aeb82ba586614 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -15,7 +15,7 @@ class TestDataFrameMissingData: def test_dropEmptyRows(self, float_frame): N = len(float_frame.index) - mat = np.random.default_rng(2).randn(N) + mat = np.random.default_rng(2).standard_normal(N) mat[:5] = np.nan frame = DataFrame({"foo": mat}, index=float_frame.index) @@ -39,7 +39,7 @@ def test_dropEmptyRows(self, float_frame): def test_dropIncompleteRows(self, float_frame): N = len(float_frame.index) - mat = np.random.default_rng(2).randn(N) + mat = np.random.default_rng(2).standard_normal(N) mat[:5] = np.nan frame = DataFrame({"foo": mat}, index=float_frame.index) @@ -65,7 +65,7 @@ def test_dropIncompleteRows(self, float_frame): assert return_value is None def test_dropna(self): - df = DataFrame(np.random.default_rng(2).randn(6, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(6, 4)) df.iloc[:2, 2] = np.nan dropped = df.dropna(axis=1) @@ -211,9 +211,9 @@ def test_dropna_categorical_interval_index(self): def test_dropna_with_duplicate_columns(self): df = DataFrame( { - "A": np.random.default_rng(2).randn(5), - "B": np.random.default_rng(2).randn(5), - "C": np.random.default_rng(2).randn(5), + "A": np.random.default_rng(2).standard_normal(5), + "B": np.random.default_rng(2).standard_normal(5), + "C": np.random.default_rng(2).standard_normal(5), "D": ["a", "b", "c", "d", "e"], } ) diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py index 6b407ced25796..788aede805110 100644 --- a/pandas/tests/frame/methods/test_duplicated.py +++ b/pandas/tests/frame/methods/test_duplicated.py @@ -26,7 +26,7 @@ def test_duplicated_implemented_no_recursion(): # gh-21524 # Ensure duplicated isn't implemented using recursion that # can fail on wide frames - df = DataFrame(np.random.default_rng(2).randint(0, 1000, (10, 1000))) + df = DataFrame(np.random.default_rng(2).integers(0, 1000, (10, 1000))) rec_limit = sys.getrecursionlimit() try: sys.setrecursionlimit(100) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 8fd0f5f35cd45..833eaa390f254 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -135,7 +135,7 @@ def test_fillna_different_dtype(self): def test_fillna_limit_and_value(self): # limit and value - df = DataFrame(np.random.default_rng(2).randn(10, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 3)) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan @@ -442,7 +442,7 @@ def test_bfill(self, datetime_frame): def test_frame_pad_backfill_limit(self): index = np.arange(10) - df = DataFrame(np.random.default_rng(2).randn(10, 4), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4), index=index) result = df[:2].reindex(index, method="pad", limit=5) @@ -461,7 +461,7 @@ def test_frame_pad_backfill_limit(self): def test_frame_fillna_limit(self): index = np.arange(10) - df = DataFrame(np.random.default_rng(2).randn(10, 4), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4), index=index) result = df[:2].reindex(index) msg = "DataFrame.fillna with 'method' is deprecated" @@ -485,14 +485,14 @@ def test_frame_fillna_limit(self): def test_fillna_skip_certain_blocks(self): # don't try to fill boolean, int blocks - df = DataFrame(np.random.default_rng(2).randn(10, 4).astype(int)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4).astype(int)) # it works! df.fillna(np.nan) @pytest.mark.parametrize("type", [int, float]) def test_fillna_positive_limit(self, type): - df = DataFrame(np.random.default_rng(2).randn(10, 4)).astype(type) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)).astype(type) msg = "Limit must be greater than 0" with pytest.raises(ValueError, match=msg): @@ -500,14 +500,14 @@ def test_fillna_positive_limit(self, type): @pytest.mark.parametrize("type", [int, float]) def test_fillna_integer_limit(self, type): - df = DataFrame(np.random.default_rng(2).randn(10, 4)).astype(type) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)).astype(type) msg = "Limit must be an integer" with pytest.raises(ValueError, match=msg): df.fillna(0, limit=0.5) def test_fillna_inplace(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) df.loc[:4, 1] = np.nan df.loc[-4:, 3] = np.nan @@ -595,7 +595,7 @@ def test_fillna_dataframe(self): tm.assert_frame_equal(result, expected) def test_fillna_columns(self): - arr = np.random.default_rng(2).randn(10, 10) + arr = np.random.default_rng(2).standard_normal(10, 10) arr[:, ::2] = np.nan df = DataFrame(arr) diff --git a/pandas/tests/frame/methods/test_first_valid_index.py b/pandas/tests/frame/methods/test_first_valid_index.py index 9273955f22146..a448768f4173d 100644 --- a/pandas/tests/frame/methods/test_first_valid_index.py +++ b/pandas/tests/frame/methods/test_first_valid_index.py @@ -48,7 +48,7 @@ def test_first_last_valid_frame(self, data, idx, expected_first, expected_last): def test_first_last_valid(self, index_func): N = 30 index = index_func(N) - mat = np.random.default_rng(2).randn(N) + mat = np.random.default_rng(2).standard_normal(N) mat[:5] = np.nan mat[-5:] = np.nan diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py index 28b8cede58bba..4f32e5781b07a 100644 --- a/pandas/tests/frame/methods/test_head_tail.py +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -9,7 +9,7 @@ def test_head_tail_generic(index, frame_or_series): ndim = 2 if frame_or_series is DataFrame else 1 shape = (len(index),) * ndim - vals = np.random.default_rng(2).randn(*shape) + vals = np.random.default_rng(2).standard_normal(*shape) obj = frame_or_series(vals, index=index) tm.assert_equal(obj.head(), obj.iloc[:5]) diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py index 7225e80f50b01..0de88114af199 100644 --- a/pandas/tests/frame/methods/test_map.py +++ b/pandas/tests/frame/methods/test_map.py @@ -106,7 +106,7 @@ def test_map_na_ignore(float_frame): # GH 23803 strlen_frame = float_frame.map(lambda x: len(str(x))) float_frame_with_na = float_frame.copy() - mask = np.random.default_rng(2).randint(0, 2, size=float_frame.shape, dtype=bool) + mask = np.random.default_rng(2).integers(0, 2, size=float_frame.shape, dtype=bool) float_frame_with_na[mask] = pd.NA strlen_frame_na_ignore = float_frame_with_na.map( lambda x: len(str(x)), na_action="ignore" diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py index ee11e4545db45..de1c6d7c62c4b 100644 --- a/pandas/tests/frame/methods/test_matmul.py +++ b/pandas/tests/frame/methods/test_matmul.py @@ -15,12 +15,12 @@ class TestMatMul: def test_matmul(self): # matmul test is for GH#10259 a = DataFrame( - np.random.default_rng(2).randn(3, 4), + np.random.default_rng(2).standard_normal(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"], ) b = DataFrame( - np.random.default_rng(2).randn(4, 2), + np.random.default_rng(2).standard_normal(4, 2), index=["p", "q", "r", "s"], columns=["one", "two"], ) @@ -70,10 +70,14 @@ def test_matmul(self): # unaligned df = DataFrame( - np.random.default_rng(2).randn(3, 4), index=[1, 2, 3], columns=range(4) + np.random.default_rng(2).standard_normal(3, 4), + index=[1, 2, 3], + columns=range(4), ) df2 = DataFrame( - np.random.default_rng(2).randn(5, 3), index=range(5), columns=[1, 2, 3] + np.random.default_rng(2).standard_normal(5, 3), + index=range(5), + columns=[1, 2, 3], ) with pytest.raises(ValueError, match="aligned"): diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index e1067f22baacc..dc352181ff957 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -54,7 +54,7 @@ def test_mixed_depth_pop(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) df1 = df.copy() df2 = df.copy() diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index f5dd10b7eab93..d9ccd1d2f6c42 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -773,7 +773,9 @@ def test_quantile_item_cache( ): # previous behavior incorrect retained an invalid _item_cache entry interpolation, method = interp_method - df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 3), columns=["A", "B", "C"] + ) df["D"] = df["A"] * 2 ser = df["A"] if not using_array_manager: diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 4ba946bf95663..35776e49e2c50 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -64,7 +64,9 @@ def test_rank(self, float_frame): tm.assert_almost_equal(ranks1.values, exp1) # integers - df = DataFrame(np.random.default_rng(2).randint(0, 5, size=40).reshape((10, 4))) + df = DataFrame( + np.random.default_rng(2).integers(0, 5, size=40).reshape((10, 4)) + ) result = df.rank() exp = df.astype(float).rank() @@ -126,7 +128,7 @@ def test_rank2(self): def test_rank_does_not_mutate(self): # GH#18521 # Check rank does not mutate DataFrame - df = DataFrame(np.random.default_rng(2).randn(10, 3), dtype="float64") + df = DataFrame(np.random.default_rng(2).standard_normal(10, 3), dtype="float64") expected = df.copy() df.rank() result = df @@ -229,7 +231,7 @@ def test_rank_methods_frame(self): import scipy.stats # noqa: F401 from scipy.stats import rankdata - xs = np.random.default_rng(2).randint(0, 21, (100, 26)) + xs = np.random.default_rng(2).integers(0, 21, (100, 26)) xs = (xs - 10.0) / 10.0 cols = [chr(ord("z") - i) for i in range(xs.shape[1])] diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 3c4cb3adde9e8..f7be83d132fa0 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -49,7 +49,7 @@ def test_dti_set_index_reindex_freq_with_tz(self): datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" ) df = DataFrame( - np.random.default_rng(2).randn(24, 1), columns=["a"], index=index + np.random.default_rng(2).standard_normal(24, 1), columns=["a"], index=index ) new_index = date_range( datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" @@ -161,7 +161,7 @@ def test_reindex_tzaware_fill_value(self): def test_reindex_copies(self): # based on asv time_reindex_axis1 N = 10 - df = DataFrame(np.random.default_rng(2).randn(N * 10, N)) + df = DataFrame(np.random.default_rng(2).standard_normal(N * 10, N)) cols = np.arange(N) np.random.default_rng(2).shuffle(cols) @@ -176,7 +176,9 @@ def test_reindex_copies_ea(self, using_copy_on_write): # https://github.com/pandas-dev/pandas/pull/51197 # also ensure to honor copy keyword for ExtensionDtypes N = 10 - df = DataFrame(np.random.default_rng(2).randn(N * 10, N), dtype="Float64") + df = DataFrame( + np.random.default_rng(2).standard_normal(N * 10, N), dtype="Float64" + ) cols = np.arange(N) np.random.default_rng(2).shuffle(cols) @@ -366,7 +368,9 @@ def test_reindex_nearest_tz_empty_frame(self): def test_reindex_frame_add_nat(self): rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") - df = DataFrame({"A": np.random.default_rng(2).randn(len(rng)), "B": rng}) + df = DataFrame( + {"A": np.random.default_rng(2).standard_normal(len(rng)), "B": rng} + ) result = df.reindex(range(15)) assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]")) @@ -429,7 +433,7 @@ def test_reindex_level_verify_first_level(self, idx, check_index_type): "jim": list("B" * 4 + "A" * 2 + "C" * 3), "joe": list("abcdeabcd")[::-1], "jolie": [10, 20, 30] * 3, - "joline": np.random.default_rng(2).randint(0, 1000, 9), + "joline": np.random.default_rng(2).integers(0, 1000, 9), } ) icol = ["jim", "joe", "jolie"] @@ -486,7 +490,7 @@ def test_reindex_level_verify_first_level_repeats(self, idx): for x in [2, 3, 3, 2, 3, 2, 3, 2] ] ), - "joline": np.random.default_rng(2).randn(20).round(3) * 10, + "joline": np.random.default_rng(2).standard_normal(20).round(3) * 10, } ) icol = ["jim", "joe", "jolie"] @@ -535,7 +539,7 @@ def test_reindex_level_verify_repeats(self, idx, indexer): for x in [2, 3, 3, 2, 3, 2, 3, 2] ] ), - "joline": np.random.default_rng(2).randn(20).round(3) * 10, + "joline": np.random.default_rng(2).standard_normal(20).round(3) * 10, } ) icol = ["jim", "joe", "jolie"] @@ -562,7 +566,7 @@ def test_reindex_level_verify(self, idx, indexer, check_index_type): "jim": list("B" * 4 + "A" * 2 + "C" * 3), "joe": list("abcdeabcd")[::-1], "jolie": [10, 20, 30] * 3, - "joline": np.random.default_rng(2).randint(0, 1000, 9), + "joline": np.random.default_rng(2).integers(0, 1000, 9), } ) icol = ["jim", "joe", "jolie"] @@ -572,7 +576,7 @@ def test_reindex_level_verify(self, idx, indexer, check_index_type): def test_non_monotonic_reindex_methods(self): dr = date_range("2013-08-01", periods=6, freq="B") - data = np.random.default_rng(2).randn(6, 1) + data = np.random.default_rng(2).standard_normal(6, 1) df = DataFrame(data, index=dr, columns=list("A")) df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) # index is not monotonic increasing or decreasing @@ -800,7 +804,7 @@ def test_reindex_axes(self): assert index_freq == seq_freq def test_reindex_fill_value(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) # axis=0 result = df.reindex(list(range(15))) @@ -853,7 +857,7 @@ def test_reindex_single_column_ea_index_and_columns(self, any_numeric_ea_dtype): def test_reindex_dups(self): # GH4746, reindex on duplicate index error messages - arr = np.random.default_rng(2).randn(10) + arr = np.random.default_rng(2).standard_normal(10) df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]) # set index is ok @@ -1022,21 +1026,21 @@ def test_reindex_with_nans(self): tm.assert_frame_equal(result, expected) def test_reindex_multi(self): - df = DataFrame(np.random.default_rng(2).randn(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) result = df.reindex(index=range(4), columns=range(4)) expected = df.reindex(list(range(4))).reindex(columns=range(4)) tm.assert_frame_equal(result, expected) - df = DataFrame(np.random.default_rng(2).randint(0, 10, (3, 3))) + df = DataFrame(np.random.default_rng(2).integers(0, 10, (3, 3))) result = df.reindex(index=range(4), columns=range(4)) expected = df.reindex(list(range(4))).reindex(columns=range(4)) tm.assert_frame_equal(result, expected) - df = DataFrame(np.random.default_rng(2).randint(0, 10, (3, 3))) + df = DataFrame(np.random.default_rng(2).integers(0, 10, (3, 3))) result = df.reindex(index=range(2), columns=range(2)) expected = df.reindex(range(2)).reindex(columns=range(2)) @@ -1044,7 +1048,7 @@ def test_reindex_multi(self): tm.assert_frame_equal(result, expected) df = DataFrame( - np.random.default_rng(2).randn(5, 3) + 1j, columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal(5, 3) + 1j, columns=["a", "b", "c"] ) result = df.reindex(index=[0, 1], columns=["a", "b"]) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 315e2150637d8..fb70e656814f9 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -87,7 +87,7 @@ def test_rename(self, float_frame): def test_rename_chainmap(self, args, kwargs): # see gh-23859 colAData = range(1, 11) - colBdata = np.random.default_rng(2).randn(10) + colBdata = np.random.default_rng(2).standard_normal(10) df = DataFrame({"A": colAData, "B": colBdata}) result = df.rename(*args, **kwargs) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 24f2b43a61230..bbc954388f0e8 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -94,7 +94,7 @@ def test_reset_index_tz(self, tz_aware_fixture): @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_frame_reset_index_tzaware_index(self, tz): dr = date_range("2012-06-02", periods=10, tz=tz) - df = DataFrame(np.random.default_rng(2).randn(len(dr)), dr) + df = DataFrame(np.random.default_rng(2).standard_normal(len(dr)), dr) roundtripped = df.reset_index().set_index("index") xp = df.index.tz rs = roundtripped.index.tz @@ -250,7 +250,7 @@ def test_reset_index_right_dtype(self): assert reset["time"].dtype == np.float64 def test_reset_index_multiindex_col(self): - vals = np.random.default_rng(2).randn(3, 3).astype(object) + vals = np.random.default_rng(2).standard_normal(3, 3).astype(object) idx = ["x", "y", "z"] full = np.hstack(([[x] for x in idx], vals)) df = DataFrame( @@ -602,7 +602,9 @@ def test_reset_index_delevel_infer_dtype(self): tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) df = DataFrame( - np.random.default_rng(2).randn(8, 3), columns=["A", "B", "C"], index=index + np.random.default_rng(2).standard_normal(8, 3), + columns=["A", "B", "C"], + index=index, ) deleveled = df.reset_index() assert is_integer_dtype(deleveled["prm1"]) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 02cef473a281d..2f6e839d224cc 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -14,9 +14,9 @@ class TestSample: @pytest.fixture def obj(self, frame_or_series): if frame_or_series is Series: - arr = np.random.default_rng(2).randn(10) + arr = np.random.default_rng(2).standard_normal(10) else: - arr = np.random.default_rng(2).randn(10, 10) + arr = np.random.default_rng(2).standard_normal(10, 10) return frame_or_series(arr, dtype=None) @pytest.mark.parametrize("test", list(range(10))) @@ -26,7 +26,7 @@ def test_sample(self, test, obj): # Check for stability when receives seed or random state -- run 10 # times. - seed = np.random.default_rng(2).randint(0, 100) + seed = np.random.default_rng(2).integers(0, 100) tm.assert_equal( obj.sample(n=4, random_state=seed), obj.sample(n=4, random_state=seed) ) @@ -344,7 +344,9 @@ def test_sample_aligns_weights_with_frame(self): def test_sample_is_copy(self): # GH#27357, GH#30784: ensure the result of sample is an actual copy and # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + ) df2 = df.sample(3) with tm.assert_produces_warning(None): diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 30f91f62cebb0..4a5f8617724dc 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -67,7 +67,7 @@ def test_set_index_empty_dataframe(self): def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) - df = DataFrame(np.random.default_rng(2).randn(3, 3), columns=columns) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 3), columns=columns) result = df.set_index(df.columns[0]) @@ -89,7 +89,7 @@ def test_set_index_cast_datetimeindex(self): df = DataFrame( { "A": [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], - "B": np.random.default_rng(2).randn(1000), + "B": np.random.default_rng(2).standard_normal(1000), } ) @@ -372,12 +372,14 @@ def test_construction_with_categorical_index(self): ci.name = "B" # with Categorical - df = DataFrame({"A": np.random.default_rng(2).randn(10), "B": ci.values}) + df = DataFrame( + {"A": np.random.default_rng(2).standard_normal(10), "B": ci.values} + ) idf = df.set_index("B") tm.assert_index_equal(idf.index, ci) # from a CategoricalIndex - df = DataFrame({"A": np.random.default_rng(2).randn(10), "B": ci}) + df = DataFrame({"A": np.random.default_rng(2).standard_normal(10), "B": ci}) idf = df.set_index("B") tm.assert_index_equal(idf.index, ci) @@ -569,7 +571,7 @@ def test_set_index_raise_on_len( # GH 24984 df = frame_of_index_cols # has length 5 - values = np.random.default_rng(2).randint(0, 10, (length,)) + values = np.random.default_rng(2).integers(0, 10, (length,)) msg = "Length mismatch: Expected 5 rows, received array of length.*" diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 4fa732adf7968..87eba3a232df4 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -20,7 +20,7 @@ class TestDataFrameShift: def test_shift_axis1_with_valid_fill_value_one_array(self): # Case with axis=1 that does not go through the "len(arrays)>1" path # in DataFrame.shift - data = np.random.default_rng(2).randn(5, 3) + data = np.random.default_rng(2).standard_normal(5, 3) df = DataFrame(data) res = df.shift(axis=1, periods=1, fill_value=12345) expected = df.T.shift(periods=1, fill_value=12345).T @@ -35,7 +35,7 @@ def test_shift_axis1_with_valid_fill_value_one_array(self): def test_shift_disallow_freq_and_fill_value(self, frame_or_series): # Can't pass both! obj = frame_or_series( - np.random.default_rng(2).randn(5), + np.random.default_rng(2).standard_normal(5), index=date_range("1/1/2000", periods=5, freq="H"), ) @@ -71,7 +71,7 @@ def test_shift_non_writable_array(self, input_data, output_data, frame_or_series def test_shift_mismatched_freq(self, frame_or_series): ts = frame_or_series( - np.random.default_rng(2).randn(5), + np.random.default_rng(2).standard_normal(5), index=date_range("1/1/2000", periods=5, freq="H"), ) @@ -399,7 +399,7 @@ def test_shift_duplicate_columns(self): # GH#9092; verify that position-based shifting works # in the presence of duplicate columns column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] - data = np.random.default_rng(2).randn(20, 5) + data = np.random.default_rng(2).standard_normal(20, 5) shifted = [] for columns in column_lists: @@ -419,8 +419,8 @@ def test_shift_duplicate_columns(self): def test_shift_axis1_multiple_blocks(self, using_array_manager): # GH#35488 - df1 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 3))) - df2 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 2))) + df1 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 3))) + df2 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 2))) df3 = pd.concat([df1, df2], axis=1) if not using_array_manager: assert len(df3._mgr.blocks) == 2 @@ -463,8 +463,8 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager): @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support def test_shift_axis1_multiple_blocks_with_int_fill(self): # GH#42719 - df1 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 3))) - df2 = DataFrame(np.random.default_rng(2).randint(1000, size=(5, 2))) + df1 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 3))) + df2 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 2))) df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) result = df3.shift(2, axis=1, fill_value=np.int_(0)) assert len(df3._mgr.blocks) == 2 diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index d9d19ce2992ab..21a93eebcf4e9 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -56,7 +56,7 @@ def test_sort_index_non_existent_label_multiindex(self): def test_sort_index_reorder_on_ops(self): # GH#15687 df = DataFrame( - np.random.default_rng(2).randn(8, 2), + np.random.default_rng(2).standard_normal(8, 2), index=MultiIndex.from_product( [["a", "b"], ["big", "small"], ["red", "blu"]], names=["letter", "size", "color"], @@ -217,7 +217,7 @@ def test_sort_index_multi_index(self): def test_sort_index_inplace(self): frame = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) @@ -262,7 +262,9 @@ def test_sort_index_different_sortorder(self): A = A.take(indexer) B = B.take(indexer) - df = DataFrame({"A": A, "B": B, "C": np.random.default_rng(2).randn(100)}) + df = DataFrame( + {"A": A, "B": B, "C": np.random.default_rng(2).standard_normal(100)} + ) ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) @@ -376,10 +378,11 @@ def test_sort_index_multiindex(self, level): def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack # confirming that we sort in the order of the bins - y = Series(np.random.default_rng(2).randn(100)) - x1 = Series(np.sign(np.random.default_rng(2).randn(100))) + y = Series(np.random.default_rng(2).standard_normal(100)) + x1 = Series(np.sign(np.random.default_rng(2).standard_normal(100))) x2 = pd.cut( - Series(np.random.default_rng(2).randn(100)), bins=[-3, -0.5, 0, 0.5, 3] + Series(np.random.default_rng(2).standard_normal(100)), + bins=[-3, -0.5, 0, 0.5, 3], ) model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) @@ -615,7 +618,7 @@ def test_sort_index_level_large_cardinality(self): # GH#2684 (int64) index = MultiIndex.from_arrays([np.arange(4000)] * 3) df = DataFrame( - np.random.default_rng(2).randn(4000).astype("int64"), index=index + np.random.default_rng(2).standard_normal(4000).astype("int64"), index=index ) # it works! @@ -625,7 +628,7 @@ def test_sort_index_level_large_cardinality(self): # GH#2684 (int32) index = MultiIndex.from_arrays([np.arange(4000)] * 3) df = DataFrame( - np.random.default_rng(2).randn(4000).astype("int32"), index=index + np.random.default_rng(2).standard_normal(4000).astype("int32"), index=index ) # it works! @@ -685,7 +688,7 @@ def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): ], ) def test_sort_index_multilevel_repr_8017(self, gen, extra): - data = np.random.default_rng(2).randn(3, 4) + data = np.random.default_rng(2).standard_normal(3, 4) columns = MultiIndex.from_tuples([("red", i) for i in gen]) df = DataFrame(data, index=list("def"), columns=columns) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 734d933b878c5..272a574dcd7e8 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -95,7 +95,7 @@ def test_sort_values_by_empty_list(self): def test_sort_values_inplace(self): frame = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) @@ -131,7 +131,9 @@ def test_sort_values_multicolumn(self): B = np.tile(np.arange(5), 20) np.random.default_rng(2).shuffle(A) np.random.default_rng(2).shuffle(B) - frame = DataFrame({"A": A, "B": B, "C": np.random.default_rng(2).randn(100)}) + frame = DataFrame( + {"A": A, "B": B, "C": np.random.default_rng(2).standard_normal(100)} + ) result = frame.sort_values(by=["A", "B"]) indexer = np.lexsort((frame["B"], frame["A"])) @@ -598,7 +600,9 @@ def test_sort_values_nat_na_position_default(self): def test_sort_values_item_cache(self, using_array_manager, using_copy_on_write): # previous behavior incorrect retained an invalid _item_cache entry - df = DataFrame(np.random.default_rng(2).randn(4, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 3), columns=["A", "B", "C"] + ) df["D"] = df["A"] * 2 ser = df["A"] if not using_array_manager: @@ -642,7 +646,7 @@ def test_sort_values_no_op_reset_index(self): class TestDataFrameSortKey: # test key sorting (issue 27237) def test_sort_values_inplace_key(self, sort_by_key): frame = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py index 68061d38de84d..a110ba700e3f7 100644 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -7,20 +7,20 @@ class TestSwapAxes: def test_swapaxes(self): - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) def test_swapaxes_noop(self): - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(df, df.swapaxes(0, 0)) def test_swapaxes_invalid_axis(self): - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): msg = "No axis named 2 for object type DataFrame" diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 06145b87677aa..2cf9cf2cc8472 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -74,7 +74,7 @@ def test_to_csv_from_csv2(self, float_frame): with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: # duplicate index df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), index=["a", "a", "b"], columns=["x", "y", "z"], ) @@ -84,7 +84,7 @@ def test_to_csv_from_csv2(self, float_frame): midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), index=midx, columns=["x", "y", "z"], ) @@ -108,8 +108,8 @@ def test_to_csv_from_csv2(self, float_frame): def test_to_csv_from_csv3(self): with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: - df1 = DataFrame(np.random.default_rng(2).randn(3, 1)) - df2 = DataFrame(np.random.default_rng(2).randn(3, 1)) + df1 = DataFrame(np.random.default_rng(2).standard_normal(3, 1)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(3, 1)) df1.to_csv(path) df2.to_csv(path, mode="a", header=False) @@ -207,9 +207,9 @@ def make_dtnat_arr(n, nnat=None): nnat = int(n * 0.1) # 10% s = list(date_range("2000", freq="5min", periods=n)) if nnat: - for i in np.random.default_rng(2).randint(0, len(s), nnat): + for i in np.random.default_rng(2).integers(0, len(s), nnat): s[i] = NaT - i = np.random.default_rng(2).randint(100) + i = np.random.default_rng(2).integers(100) s[-i] = NaT s[i] = NaT return s @@ -536,7 +536,7 @@ def _make_frame(names=None): if names is True: names = ["first", "second"] return DataFrame( - np.random.default_rng(2).randint(0, 10, size=(3, 3)), + np.random.default_rng(2).integers(0, 10, size=(3, 3)), columns=MultiIndex.from_tuples( [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names ), @@ -626,7 +626,9 @@ def test_to_csv_interval_index(self): tm.assert_frame_equal(result, expected) def test_to_csv_float32_nanrep(self): - df = DataFrame(np.random.default_rng(2).randn(1, 4).astype(np.float32)) + df = DataFrame( + np.random.default_rng(2).standard_normal(1, 4).astype(np.float32) + ) df[1] = np.nan with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: @@ -650,12 +652,12 @@ def create_cols(name): return [f"{name}{i:03d}" for i in range(5)] df_float = DataFrame( - np.random.default_rng(2).randn(100, 5), + np.random.default_rng(2).standard_normal(100, 5), dtype="float64", columns=create_cols("float"), ) df_int = DataFrame( - np.random.default_rng(2).randn(100, 5).astype("int64"), + np.random.default_rng(2).standard_normal(100, 5).astype("int64"), dtype="int64", columns=create_cols("int"), ) @@ -698,7 +700,7 @@ def create_cols(name): def test_to_csv_dups_cols(self): df = DataFrame( - np.random.default_rng(2).randn(1000, 30), + np.random.default_rng(2).standard_normal(1000, 30), columns=list(range(15)) + list(range(15)), dtype="float64", ) @@ -709,8 +711,12 @@ def test_to_csv_dups_cols(self): result.columns = df.columns tm.assert_frame_equal(result, df) - df_float = DataFrame(np.random.default_rng(2).randn(1000, 3), dtype="float64") - df_int = DataFrame(np.random.default_rng(2).randn(1000, 3)).astype("int64") + df_float = DataFrame( + np.random.default_rng(2).standard_normal(1000, 3), dtype="float64" + ) + df_int = DataFrame(np.random.default_rng(2).standard_normal(1000, 3)).astype( + "int64" + ) df_bool = DataFrame(True, index=df_float.index, columns=range(3)) df_object = DataFrame("foo", index=df_float.index, columns=range(3)) df_dt = DataFrame( @@ -764,7 +770,9 @@ def test_to_csv_wide_frame_formatting(self, monkeypatch): # Issue #8621 chunksize = 100 df = DataFrame( - np.random.default_rng(2).randn(1, chunksize + 10), columns=None, index=None + np.random.default_rng(2).standard_normal(1, chunksize + 10), + columns=None, + index=None, ) with tm.ensure_clean() as filename: with monkeypatch.context() as m: diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index d159e2e384845..ac689357a1458 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -149,7 +149,7 @@ def test_to_dict(self, mapping): @pytest.mark.parametrize("mapping", [list, defaultdict, []]) def test_to_dict_errors(self, mapping): # GH#16122 - df = DataFrame(np.random.default_rng(2).randn(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) msg = "|".join( [ "unsupported type: ", diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index 037e50e3a8452..a0cd3344008fc 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -24,7 +24,7 @@ def test_to_numpy_dtype(self): @td.skip_array_manager_invalid_test def test_to_numpy_copy(self, using_copy_on_write): - arr = np.random.default_rng(2).randn(4, 3) + arr = np.random.default_rng(2).standard_normal(4, 3) df = DataFrame(arr) if using_copy_on_write: assert df.values.base is not arr diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py index d00a8fd7418ee..199753025effe 100644 --- a/pandas/tests/frame/methods/test_to_period.py +++ b/pandas/tests/frame/methods/test_to_period.py @@ -18,7 +18,7 @@ def test_to_period(self, frame_or_series): dr = date_range("1/1/2000", "1/1/2001", freq="D") obj = DataFrame( - np.random.default_rng(2).randn(len(dr), K), + np.random.default_rng(2).standard_normal(len(dr), K), index=dr, columns=["A", "B", "C", "D", "E"], ) @@ -41,7 +41,9 @@ def test_to_period_without_freq(self, frame_or_series): ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" ) - obj = DataFrame(np.random.default_rng(2).randn(4, 4), index=idx, columns=idx) + obj = DataFrame( + np.random.default_rng(2).standard_normal(4, 4), index=idx, columns=idx + ) obj = tm.get_obj(obj, frame_or_series) expected = obj.copy() expected.index = exp_idx @@ -54,7 +56,7 @@ def test_to_period_without_freq(self, frame_or_series): def test_to_period_columns(self): dr = date_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.default_rng(2).randn(len(dr), 5), index=dr) + df = DataFrame(np.random.default_rng(2).standard_normal(len(dr), 5), index=dr) df["mix"] = "a" df = df.T @@ -68,7 +70,7 @@ def test_to_period_columns(self): def test_to_period_invalid_axis(self): dr = date_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.default_rng(2).randn(len(dr), 5), index=dr) + df = DataFrame(np.random.default_rng(2).standard_normal(len(dr), 5), index=dr) df["mix"] = "a" msg = "No axis named 2 for object type DataFrame" diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 79a693e4b58d8..00790de651517 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -20,7 +20,9 @@ class TestDataFrameToRecords: def test_to_records_timeseries(self): index = date_range("1/1/2000", periods=10) df = DataFrame( - np.random.default_rng(2).randn(10, 3), index=index, columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal(10, 3), + index=index, + columns=["a", "b", "c"], ) result = df.to_records() @@ -80,12 +82,12 @@ def test_to_records_floats(self): df.to_records() def test_to_records_index_name(self): - df = DataFrame(np.random.default_rng(2).randn(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) df.index.name = "X" rs = df.to_records() assert "X" in rs.dtype.fields - df = DataFrame(np.random.default_rng(2).randn(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) rs = df.to_records() assert "index" in rs.dtype.fields diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index 44d8676a63966..525b85a51512b 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -29,7 +29,7 @@ def test_to_timestamp(self, frame_or_series): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") obj = DataFrame( - np.random.default_rng(2).randn(len(index), K), + np.random.default_rng(2).standard_normal(len(index), K), index=index, columns=["A", "B", "C", "D", "E"], ) @@ -73,7 +73,7 @@ def test_to_timestamp_columns(self): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") df = DataFrame( - np.random.default_rng(2).randn(len(index), K), + np.random.default_rng(2).standard_normal(len(index), K), index=index, columns=["A", "B", "C", "D", "E"], ) @@ -123,7 +123,9 @@ def test_to_timestamp_columns(self): def test_to_timestamp_invalid_axis(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") - obj = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) + obj = DataFrame( + np.random.default_rng(2).standard_normal(len(index), 5), index=index + ) # invalid axis with pytest.raises(ValueError, match="axis"): diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index cbc1597c4158f..4c4b04076c8d5 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -80,8 +80,8 @@ def test_sort_values_nonsortedindex(self): rng = date_range("2011-01-01", "2012-01-01", freq="W") ts = DataFrame( { - "A": np.random.default_rng(2).randn(len(rng)), - "B": np.random.default_rng(2).randn(len(rng)), + "A": np.random.default_rng(2).standard_normal(len(rng)), + "B": np.random.default_rng(2).standard_normal(len(rng)), }, index=rng, ) @@ -97,10 +97,10 @@ def test_truncate_nonsortedindex_axis1(self): df = DataFrame( { - 3: np.random.default_rng(2).randn(5), - 20: np.random.default_rng(2).randn(5), - 2: np.random.default_rng(2).randn(5), - 0: np.random.default_rng(2).randn(5), + 3: np.random.default_rng(2).standard_normal(5), + 20: np.random.default_rng(2).standard_normal(5), + 2: np.random.default_rng(2).standard_normal(5), + 0: np.random.default_rng(2).standard_normal(5), }, columns=[3, 20, 2, 0], ) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index e9a90a1cd8b3b..bbca4ee1b88b1 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -72,7 +72,9 @@ def test_values_casts_datetimelike_to_object(self, constructor): expected = series.astype("object") - df = DataFrame({"a": series, "b": np.random.default_rng(2).randn(len(series))}) + df = DataFrame( + {"a": series, "b": np.random.default_rng(2).standard_normal(len(series))} + ) result = df.values.squeeze() assert (result[:, 0] == expected.values).all() diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 176adaae5d1a5..45ecec8ee318b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -126,8 +126,8 @@ def test_column_name_contains_unicode_surrogate(self): assert df.columns[0] == colname def test_new_empty_index(self): - df1 = DataFrame(np.random.default_rng(2).randn(0, 3)) - df2 = DataFrame(np.random.default_rng(2).randn(0, 3)) + df1 = DataFrame(np.random.default_rng(2).standard_normal(0, 3)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(0, 3)) df1.index.name = "foo" assert df2.index.name is None diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index ad099233873c9..01c482a3adf8e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -89,7 +89,9 @@ def test_comparison_with_categorical_dtype(self): def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks - df = DataFrame(np.random.default_rng(2).randn(6, 4), columns=list("ABCD")) + df = DataFrame( + np.random.default_rng(2).standard_normal(6, 4), columns=list("ABCD") + ) msg = "The truth value of a DataFrame is ambiguous" with pytest.raises(ValueError, match=msg): df in [None] @@ -99,21 +101,21 @@ def test_frame_in_list(self): [ [ { - "a": np.random.default_rng(2).randint(10, size=10), + "a": np.random.default_rng(2).integers(10, size=10), "b": pd.date_range("20010101", periods=10), }, { - "a": np.random.default_rng(2).randint(10, size=10), - "b": np.random.default_rng(2).randint(10, size=10), + "a": np.random.default_rng(2).integers(10, size=10), + "b": np.random.default_rng(2).integers(10, size=10), }, ], [ { - "a": np.random.default_rng(2).randint(10, size=10), - "b": np.random.default_rng(2).randint(10, size=10), + "a": np.random.default_rng(2).integers(10, size=10), + "b": np.random.default_rng(2).integers(10, size=10), }, { - "a": np.random.default_rng(2).randint(10, size=10), + "a": np.random.default_rng(2).integers(10, size=10), "b": pd.date_range("20010101", periods=10), }, ], @@ -123,13 +125,13 @@ def test_frame_in_list(self): "b": pd.date_range("20010101", periods=10), }, { - "a": np.random.default_rng(2).randint(10, size=10), - "b": np.random.default_rng(2).randint(10, size=10), + "a": np.random.default_rng(2).integers(10, size=10), + "b": np.random.default_rng(2).integers(10, size=10), }, ], [ { - "a": np.random.default_rng(2).randint(10, size=10), + "a": np.random.default_rng(2).integers(10, size=10), "b": pd.date_range("20010101", periods=10), }, { @@ -199,8 +201,8 @@ def test_timestamp_compare(self, left, right): { "dates1": pd.date_range("20010101", periods=10), "dates2": pd.date_range("20010102", periods=10), - "intcol": np.random.default_rng(2).randint(1000000000, size=10), - "floatcol": np.random.default_rng(2).randn(10), + "intcol": np.random.default_rng(2).integers(1000000000, size=10), + "floatcol": np.random.default_rng(2).standard_normal(10), "stringcol": list(tm.rands(10)), } ) @@ -266,7 +268,7 @@ def test_df_boolean_comparison_error(self): def test_df_float_none_comparison(self): df = DataFrame( - np.random.default_rng(2).randn(8, 3), + np.random.default_rng(2).standard_normal(8, 3), index=range(8), columns=["A", "B", "C"], ) @@ -289,8 +291,8 @@ class TestFrameFlexComparisons: # TODO: test_bool_flex_frame needs a better name @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"]) def test_bool_flex_frame(self, op): - data = np.random.default_rng(2).randn(5, 3) - other_data = np.random.default_rng(2).randn(5, 3) + data = np.random.default_rng(2).standard_normal(5, 3) + other_data = np.random.default_rng(2).standard_normal(5, 3) df = DataFrame(data) other = DataFrame(other_data) ndim_5 = np.ones(df.shape + (1, 3)) @@ -321,10 +323,10 @@ def test_bool_flex_frame(self, op): def test_bool_flex_series(self, box): # Series # list/tuple - data = np.random.default_rng(2).randn(5, 3) + data = np.random.default_rng(2).standard_normal(5, 3) df = DataFrame(data) - idx_ser = box(np.random.default_rng(2).randn(5)) - col_ser = box(np.random.default_rng(2).randn(3)) + idx_ser = box(np.random.default_rng(2).standard_normal(5)) + col_ser = box(np.random.default_rng(2).standard_normal(3)) idx_eq = df.eq(idx_ser, axis=0) col_eq = df.eq(col_ser) @@ -357,11 +359,11 @@ def test_bool_flex_series(self, box): tm.assert_frame_equal(idx_ge, -idx_lt) tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) - idx_ser = Series(np.random.default_rng(2).randn(5)) - col_ser = Series(np.random.default_rng(2).randn(3)) + idx_ser = Series(np.random.default_rng(2).standard_normal(5)) + col_ser = Series(np.random.default_rng(2).standard_normal(3)) def test_bool_flex_frame_na(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # NA df.loc[0, 0] = np.nan rs = df.eq(df) @@ -1166,7 +1168,9 @@ def test_arithmetic_midx_cols_different_dtypes_different_order(self): def test_frame_with_zero_len_series_corner_cases(): # GH#28600 # easy all-float case - df = DataFrame(np.random.default_rng(2).randn(6).reshape(3, 2), columns=["A", "B"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(6).reshape(3, 2), columns=["A", "B"] + ) ser = Series(dtype=np.float64) result = df + ser @@ -1215,7 +1219,7 @@ class TestFrameArithmeticUnsorted: def test_frame_add_tz_mismatch_converts_to_utc(self): rng = pd.date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") df = DataFrame( - np.random.default_rng(2).randn(len(rng)), index=rng, columns=["a"] + np.random.default_rng(2).standard_normal(len(rng)), index=rng, columns=["a"] ) df_moscow = df.tz_convert("Europe/Moscow") @@ -1227,7 +1231,7 @@ def test_frame_add_tz_mismatch_converts_to_utc(self): def test_align_frame(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") - ts = DataFrame(np.random.default_rng(2).randn(len(rng), 3), index=rng) + ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), index=rng) result = ts + ts[::2] expected = ts + ts @@ -1701,7 +1705,7 @@ def test_inplace_ops_identity(self): # make sure that we are actually changing the object s_orig = Series([1, 2, 3]) df_orig = DataFrame( - np.random.default_rng(2).randint(0, 5, size=10).reshape(-1, 5) + np.random.default_rng(2).integers(0, 5, size=10).reshape(-1, 5) ) # no dtype change @@ -1737,7 +1741,7 @@ def test_inplace_ops_identity(self): assert df._mgr is df2._mgr # mixed dtype - arr = np.random.default_rng(2).randint(0, 10, size=5) + arr = np.random.default_rng(2).integers(0, 10, size=5) df_orig = DataFrame({"A": arr.copy(), "B": "foo"}) df = df_orig.copy() df2 = df @@ -1806,7 +1810,7 @@ def test_alignment_non_pandas(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame( - np.random.default_rng(2).randn(3, 3), index=index, columns=columns + np.random.default_rng(2).standard_normal(3, 3), index=index, columns=columns ) align = DataFrame._align_for_op @@ -1824,7 +1828,7 @@ def test_alignment_non_pandas_length_mismatch(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame( - np.random.default_rng(2).randn(3, 3), index=index, columns=columns + np.random.default_rng(2).standard_normal(3, 3), index=index, columns=columns ) align = DataFrame._align_for_op @@ -1840,7 +1844,7 @@ def test_alignment_non_pandas_index_columns(self): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame( - np.random.default_rng(2).randn(3, 3), index=index, columns=columns + np.random.default_rng(2).standard_normal(3, 3), index=index, columns=columns ) align = DataFrame._align_for_op @@ -1927,7 +1931,7 @@ def test_pow_nan_with_zero(): def test_dataframe_series_extension_dtypes(): # https://github.com/pandas-dev/pandas/issues/34311 df = DataFrame( - np.random.default_rng(2).randint(0, 100, (10, 3)), columns=["a", "b", "c"] + np.random.default_rng(2).integers(0, 100, (10, 3)), columns=["a", "b", "c"] ) ser = Series([1, 2, 3], index=["a", "b", "c"]) @@ -1943,7 +1947,7 @@ def test_dataframe_series_extension_dtypes(): def test_dataframe_blockwise_slicelike(): # GH#34367 - arr = np.random.default_rng(2).randint(0, 1000, (100, 10)) + arr = np.random.default_rng(2).integers(0, 1000, (100, 10)) df1 = DataFrame(arr) # Explicit cast to float to avoid implicit cast when setting nan df2 = df1.copy().astype({1: "float", 3: "float", 7: "float"}) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2b8d824e0aa2d..48fcf59458730 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -370,7 +370,7 @@ def test_constructor_mixed_dtypes(self, typ, ad): elif typ == "float": dtypes = MIXED_FLOAT_DTYPES arrays = [ - np.array(np.random.default_rng(2).randint(10, size=10), dtype=d) + np.array(np.random.default_rng(2).integers(10, size=10), dtype=d) for d in dtypes ] @@ -777,7 +777,7 @@ def test_constructor_dict_cast2(self): # can't cast to float test_data = { "A": dict(zip(range(20), tm.makeStringIndex(20))), - "B": dict(zip(range(15), np.random.default_rng(2).randn(15))), + "B": dict(zip(range(15), np.random.default_rng(2).standard_normal(15))), } with pytest.raises(ValueError, match="could not convert string"): DataFrame(test_data, dtype=float) @@ -953,7 +953,7 @@ def test_constructor_extension_scalar_data(self, data, dtype): def test_nested_dict_frame_constructor(self): rng = pd.period_range("1/1/2000", periods=5) - df = DataFrame(np.random.default_rng(2).randn(10, 5), columns=rng) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5), columns=rng) data = {} for col in df.columns: @@ -1196,7 +1196,7 @@ def test_constructor_scalar_inference(self): assert df["object"].dtype == np.object_ def test_constructor_arrays_and_scalars(self): - df = DataFrame({"a": np.random.default_rng(2).randn(10), "b": True}) + df = DataFrame({"a": np.random.default_rng(2).standard_normal(10), "b": True}) exp = DataFrame({"a": df["a"].values, "b": [True] * 10}) tm.assert_frame_equal(df, exp) @@ -1218,11 +1218,11 @@ def test_constructor_empty_dataframe(self): def test_constructor_more(self, float_frame): # used to be in test_matrix.py - arr = np.random.default_rng(2).randn(10) + arr = np.random.default_rng(2).standard_normal(10) dm = DataFrame(arr, columns=["A"], index=np.arange(10)) assert dm.values.ndim == 2 - arr = np.random.default_rng(2).randn(0) + arr = np.random.default_rng(2).standard_normal(0) dm = DataFrame(arr) assert dm.values.ndim == 2 assert dm.values.ndim == 2 @@ -1500,8 +1500,8 @@ class CustomDict(dict): def test_constructor_ragged(self): data = { - "A": np.random.default_rng(2).randn(10), - "B": np.random.default_rng(2).randn(8), + "A": np.random.default_rng(2).standard_normal(10), + "B": np.random.default_rng(2).standard_normal(8), } with pytest.raises(ValueError, match="All arrays must be of the same length"): DataFrame(data) @@ -1635,7 +1635,7 @@ def test_constructor_Series_named(self): tm.assert_index_equal(df.index, a.index) # ndarray like - arr = np.random.default_rng(2).randn(10) + arr = np.random.default_rng(2).standard_normal(10) s = Series(arr, name="x") df = DataFrame(s) expected = DataFrame({"x": s}) @@ -2447,20 +2447,24 @@ def test_frame_ctor_datetime64_column(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") dates = np.asarray(rng) - df = DataFrame({"A": np.random.default_rng(2).randn(len(rng)), "B": dates}) + df = DataFrame( + {"A": np.random.default_rng(2).standard_normal(len(rng)), "B": dates} + ) assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) def test_dataframe_constructor_infer_multiindex(self): index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] multi = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=[np.array(x) for x in index_lists], ) assert isinstance(multi.index, MultiIndex) assert not isinstance(multi.columns, MultiIndex) - multi = DataFrame(np.random.default_rng(2).randn(4, 4), columns=index_lists) + multi = DataFrame( + np.random.default_rng(2).standard_normal(4, 4), columns=index_lists + ) assert isinstance(multi.columns, MultiIndex) @pytest.mark.parametrize( @@ -2679,10 +2683,10 @@ def test_construct_with_strings_and_none(self): class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M") - s1 = Series(np.random.default_rng(2).randn(len(rng1)), rng1) + s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1) rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M") - s2 = Series(np.random.default_rng(2).randn(len(rng2)), rng2) + s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2) df = DataFrame({"s1": s1, "s2": s2}) exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") @@ -2756,7 +2760,7 @@ def test_floating_values_integer_dtype(self): # GH#40110 make DataFrame behavior with arraylike floating data and # inty dtype match Series behavior - arr = np.random.default_rng(2).randn(10, 5) + arr = np.random.default_rng(2).standard_normal(10, 5) # GH#49599 in 2.0 we raise instead of either # a) silently ignoring dtype and returningfloat (the old Series behavior) or @@ -2988,7 +2992,7 @@ def test_construction_from_ndarray_datetimelike(self): assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): - arr = np.random.default_rng(2).randn(10, 2) + arr = np.random.default_rng(2).standard_normal(10, 2) dtype = pd.array([2.0]).dtype msg = r"len\(arrays\) must match len\(columns\)" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/frame/test_iteration.py b/pandas/tests/frame/test_iteration.py index a0cae2ffa4e8b..8bc26bff41767 100644 --- a/pandas/tests/frame/test_iteration.py +++ b/pandas/tests/frame/test_iteration.py @@ -92,7 +92,7 @@ def test_itertuples(self, float_frame): tm.assert_series_equal(ser, expected) df = DataFrame( - {"floats": np.random.default_rng(2).randn(5), "ints": range(5)}, + {"floats": np.random.default_rng(2).standard_normal(5), "ints": range(5)}, columns=["floats", "ints"], ) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 0ca53b17177c4..206ce857df960 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -21,7 +21,7 @@ class TestDataFrameNonuniqueIndexes: def test_setattr_columns_vs_construct_with_columns(self): # assignment # GH 3687 - arr = np.random.default_rng(2).randn(3, 2) + arr = np.random.default_rng(2).standard_normal(3, 2) idx = list(range(2)) df = DataFrame(arr, columns=["A", "A"]) df.columns = idx @@ -164,7 +164,7 @@ def test_dup_across_dtypes(self): def test_column_dups_indexes(self): # check column dups with index equal and not equal to df's index df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=["a", "b", "c", "d", "e"], columns=["A", "B", "A"], ) @@ -182,7 +182,9 @@ def test_changing_dtypes_with_duplicate_columns(self): # multiple assignments that change dtypes # the location indexer is a slice # GH 6120 - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=["that", "that"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=["that", "that"] + ) expected = DataFrame(1.0, index=range(5), columns=["that", "that"]) df["that"] = 1.0 @@ -295,8 +297,12 @@ def test_multi_dtype2(self): def test_dups_across_blocks(self, using_array_manager): # dups across blocks - df_float = DataFrame(np.random.default_rng(2).randn(10, 3), dtype="float64") - df_int = DataFrame(np.random.default_rng(2).randn(10, 3).astype("int64")) + df_float = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), dtype="float64" + ) + df_int = DataFrame( + np.random.default_rng(2).standard_normal(10, 3).astype("int64") + ) df_bool = DataFrame(True, index=df_float.index, columns=df_float.columns) df_object = DataFrame("foo", index=df_float.index, columns=df_float.columns) df_dt = DataFrame( diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index 083421fe72b90..b214babd5351c 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -31,7 +31,7 @@ def test_sum_deprecated_axis_behavior(self): # GH#52042 deprecated behavior of df.sum(axis=None), which gets # called when we do np.sum(df) - arr = np.random.default_rng(2).randn(4, 3) + arr = np.random.default_rng(2).standard_normal(4, 3) df = DataFrame(arr) msg = "The behavior of DataFrame.sum with axis=None is deprecated" diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 4f2a139767f5a..ef0ea07ec26e6 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -132,7 +132,7 @@ def test_ops(self, op_str, op, rop, n): def test_dataframe_sub_numexpr_path(self): # GH7192: Note we need a large number of rows to ensure this # goes through the numexpr path - df = DataFrame({"A": np.random.default_rng(2).randn(25000)}) + df = DataFrame({"A": np.random.default_rng(2).standard_normal(25000)}) df.iloc[0:5] = np.nan expected = 1 - np.isnan(df.iloc[0:25]) result = (1 - np.isnan(df)).iloc[0:25] @@ -159,7 +159,9 @@ def test_query_empty_string(self): def test_eval_resolvers_as_list(self): # GH 14095 - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 2), columns=list("ab") + ) dict1 = {"a": 1} dict2 = {"b": 2} assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] @@ -167,7 +169,9 @@ def test_eval_resolvers_as_list(self): def test_eval_resolvers_combined(self): # GH 34966 - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 2), columns=list("ab") + ) dict1 = {"c": 2} # Both input and default index/column resolvers should be usable @@ -190,7 +194,7 @@ def test_query_with_named_multiindex(self, parser, engine): a = np.random.default_rng(2).choice(["red", "green"], size=10) b = np.random.default_rng(2).choice(["eggs", "ham"], size=10) index = MultiIndex.from_arrays([a, b], names=["color", "food"]) - df = DataFrame(np.random.default_rng(2).randn(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), index=index) ind = Series( df.index.get_level_values("color").values, index=index, name="color" ) @@ -240,7 +244,7 @@ def test_query_with_unnamed_multiindex(self, parser, engine): a = np.random.default_rng(2).choice(["red", "green"], size=10) b = np.random.default_rng(2).choice(["eggs", "ham"], size=10) index = MultiIndex.from_arrays([a, b]) - df = DataFrame(np.random.default_rng(2).randn(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) @@ -329,7 +333,7 @@ def test_query_with_partially_named_multiindex(self, parser, engine): b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, "rating"] - df = DataFrame(np.random.default_rng(2).randn(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), index=index) res = df.query("rating == 1", parser=parser, engine=engine) ind = Series( df.index.get_level_values("rating").values, index=index, name="rating" @@ -395,7 +399,7 @@ def parser(self): def test_date_query_with_attribute_access(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -406,7 +410,7 @@ def test_date_query_with_attribute_access(self, engine, parser): tm.assert_frame_equal(res, expec) def test_date_query_no_attribute_access(self, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -416,7 +420,7 @@ def test_date_query_no_attribute_access(self, engine, parser): def test_date_query_with_NaT(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).randn(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) @@ -428,7 +432,7 @@ def test_date_query_with_NaT(self, engine, parser): def test_date_index_query(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).randn(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) return_value = df.set_index("dates1", inplace=True, drop=True) @@ -440,7 +444,9 @@ def test_date_index_query(self, engine, parser): def test_date_index_query_with_NaT(self, engine, parser): n = 10 # Cast to object to avoid implicit cast when setting entry to pd.NaT below - df = DataFrame(np.random.default_rng(2).randn(n, 3)).astype({0: object}) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)).astype( + {0: object} + ) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.iloc[0, 0] = pd.NaT @@ -489,7 +495,9 @@ def test_query_syntax_error(self, engine, parser): def test_query_scope(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).randn(20, 2), columns=list("ab")) + df = DataFrame( + np.random.default_rng(2).standard_normal(20, 2), columns=list("ab") + ) a, b = 1, 2 # noqa: F841 res = df.query("a > b", engine=engine, parser=parser) @@ -513,7 +521,7 @@ def test_query_scope(self, engine, parser): def test_query_doesnt_pickup_local(self, engine, parser): n = m = 10 df = DataFrame( - np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + np.random.default_rng(2).integers(m, size=(n, 3)), columns=list("abc") ) # we don't pick up the local 'sin' @@ -523,7 +531,7 @@ def test_query_doesnt_pickup_local(self, engine, parser): def test_query_builtin(self, engine, parser): n = m = 10 df = DataFrame( - np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + np.random.default_rng(2).integers(m, size=(n, 3)), columns=list("abc") ) df.index.name = "sin" @@ -532,7 +540,9 @@ def test_query_builtin(self, engine, parser): df.query("sin > 5", engine=engine, parser=parser) def test_query(self, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + ) tm.assert_frame_equal( df.query("a < b", engine=engine, parser=parser), df[df.a < df.b] @@ -544,7 +554,7 @@ def test_query(self, engine, parser): def test_query_index_with_name(self, engine, parser): df = DataFrame( - np.random.default_rng(2).randint(10, size=(10, 3)), + np.random.default_rng(2).integers(10, size=(10, 3)), index=Index(range(10), name="blob"), columns=["a", "b", "c"], ) @@ -559,7 +569,7 @@ def test_query_index_with_name(self, engine, parser): def test_query_index_without_name(self, engine, parser): df = DataFrame( - np.random.default_rng(2).randint(10, size=(10, 3)), + np.random.default_rng(2).integers(10, size=(10, 3)), index=range(10), columns=["a", "b", "c"], ) @@ -577,8 +587,8 @@ def test_query_index_without_name(self, engine, parser): def test_nested_scope(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).randn(5, 3)) - df2 = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) expected = df[(df > 0) & (df2 > 0)] result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) @@ -598,7 +608,7 @@ def test_nested_scope(self, engine, parser): tm.assert_frame_equal(result, expected) def test_nested_raises_on_local_self_reference(self, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # can't reference ourself b/c we're a local so @ is necessary with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): @@ -608,7 +618,8 @@ def test_local_syntax(self, engine, parser): skip_if_no_pandas_parser(parser) df = DataFrame( - np.random.default_rng(2).randn(100, 10), columns=list("abcdefghij") + np.random.default_rng(2).standard_normal(100, 10), + columns=list("abcdefghij"), ) b = 1 expect = df[df.a < b] @@ -622,7 +633,9 @@ def test_local_syntax(self, engine, parser): def test_chained_cmp_and_in(self, engine, parser): skip_if_no_pandas_parser(parser) cols = list("abc") - df = DataFrame(np.random.default_rng(2).randn(100, len(cols)), columns=cols) + df = DataFrame( + np.random.default_rng(2).standard_normal(100, len(cols)), columns=cols + ) res = df.query( "a < b < c and a not in b not in c", engine=engine, parser=parser ) @@ -632,15 +645,15 @@ def test_chained_cmp_and_in(self, engine, parser): def test_local_variable_with_in(self, engine, parser): skip_if_no_pandas_parser(parser) - a = Series(np.random.default_rng(2).randint(3, size=15), name="a") - b = Series(np.random.default_rng(2).randint(10, size=15), name="b") + a = Series(np.random.default_rng(2).integers(3, size=15), name="a") + b = Series(np.random.default_rng(2).integers(10, size=15), name="b") df = DataFrame({"a": a, "b": b}) expected = df.loc[(df.b - 1).isin(a)] result = df.query("b - 1 in a", engine=engine, parser=parser) tm.assert_frame_equal(expected, result) - b = Series(np.random.default_rng(2).randint(10, size=15), name="b") + b = Series(np.random.default_rng(2).integers(10, size=15), name="b") expected = df.loc[(b - 1).isin(a)] result = df.query("@b - 1 in a", engine=engine, parser=parser) tm.assert_frame_equal(expected, result) @@ -669,18 +682,22 @@ def test_index_resolvers_come_after_columns_with_the_same_name( n = 1 # noqa: F841 a = np.r_[20:101:20] - df = DataFrame({"index": a, "b": np.random.default_rng(2).randn(a.size)}) + df = DataFrame( + {"index": a, "b": np.random.default_rng(2).standard_normal(a.size)} + ) df.index.name = "index" result = df.query("index > 5", engine=engine, parser=parser) expected = df[df["index"] > 5] tm.assert_frame_equal(result, expected) - df = DataFrame({"index": a, "b": np.random.default_rng(2).randn(a.size)}) + df = DataFrame( + {"index": a, "b": np.random.default_rng(2).standard_normal(a.size)} + ) result = df.query("ilevel_0 > 5", engine=engine, parser=parser) expected = df.loc[df.index[df.index > 5]] tm.assert_frame_equal(result, expected) - df = DataFrame({"a": a, "b": np.random.default_rng(2).randn(a.size)}) + df = DataFrame({"a": a, "b": np.random.default_rng(2).standard_normal(a.size)}) df.index.name = "a" result = df.query("a > 5", engine=engine, parser=parser) expected = df[df.a > 5] @@ -738,9 +755,9 @@ def test_method_calls_in_query(self, engine, parser): "a": np.where( np.random.default_rng(2).rand(n) < 0.5, np.nan, - np.random.default_rng(2).randn(n), + np.random.default_rng(2).standard_normal(n), ), - "b": np.random.default_rng(2).randn(n), + "b": np.random.default_rng(2).standard_normal(n), } ) expected = df[df["a"].notnull()] @@ -759,7 +776,7 @@ def parser(self): return "python" def test_date_query_no_attribute_access(self, engine, parser): - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -771,7 +788,7 @@ def test_date_query_no_attribute_access(self, engine, parser): def test_date_query_with_NaT(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).randn(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) @@ -785,7 +802,7 @@ def test_date_query_with_NaT(self, engine, parser): def test_date_index_query(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).randn(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) return_value = df.set_index("dates1", inplace=True, drop=True) @@ -799,7 +816,9 @@ def test_date_index_query(self, engine, parser): def test_date_index_query_with_NaT(self, engine, parser): n = 10 # Cast to object to avoid implicit cast when setting entry to pd.NaT below - df = DataFrame(np.random.default_rng(2).randn(n, 3)).astype({0: object}) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)).astype( + {0: object} + ) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.iloc[0, 0] = pd.NaT @@ -813,7 +832,7 @@ def test_date_index_query_with_NaT(self, engine, parser): def test_date_index_query_with_NaT_duplicates(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).randn(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT @@ -829,8 +848,8 @@ def test_nested_scope(self, engine, parser): result = pd.eval("x + 1", engine=engine, parser=parser) assert result == 2 - df = DataFrame(np.random.default_rng(2).randn(5, 3)) - df2 = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # don't have the pandas parser msg = r"The '@' prefix is only supported by the pandas parser" @@ -879,7 +898,7 @@ def parser(self): def test_query_builtin(self, engine, parser): n = m = 10 df = DataFrame( - np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + np.random.default_rng(2).integers(m, size=(n, 3)), columns=list("abc") ) df.index.name = "sin" @@ -900,7 +919,7 @@ def parser(self): def test_query_builtin(self, engine, parser): n = m = 10 df = DataFrame( - np.random.default_rng(2).randint(m, size=(n, 3)), columns=list("abc") + np.random.default_rng(2).integers(m, size=(n, 3)), columns=list("abc") ) df.index.name = "sin" @@ -911,7 +930,7 @@ def test_query_builtin(self, engine, parser): class TestDataFrameQueryStrings: def test_str_query_method(self, parser, engine): - df = DataFrame(np.random.default_rng(2).randn(10, 1), columns=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 1), columns=["b"]) df["strings"] = Series(list("aabbccddee")) expect = df[df.strings == "a"] @@ -952,7 +971,7 @@ def test_str_query_method(self, parser, engine): tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) def test_str_list_query_method(self, parser, engine): - df = DataFrame(np.random.default_rng(2).randn(10, 1), columns=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 1), columns=["b"]) df["strings"] = Series(list("aabbccddee")) expect = df[df.strings.isin(["a", "b"])] @@ -991,8 +1010,8 @@ def test_query_with_string_columns(self, parser, engine): { "a": list("aaaabbbbcccc"), "b": list("aabbccddeeff"), - "c": np.random.default_rng(2).randint(5, size=12), - "d": np.random.default_rng(2).randint(9, size=12), + "c": np.random.default_rng(2).integers(5, size=12), + "d": np.random.default_rng(2).integers(9, size=12), } ) if parser == "pandas": @@ -1017,8 +1036,8 @@ def test_object_array_eq_ne(self, parser, engine): { "a": list("aaaabbbbcccc"), "b": list("aabbccddeeff"), - "c": np.random.default_rng(2).randint(5, size=12), - "d": np.random.default_rng(2).randint(9, size=12), + "c": np.random.default_rng(2).integers(5, size=12), + "d": np.random.default_rng(2).integers(9, size=12), } ) res = df.query("a == b", parser=parser, engine=engine) @@ -1075,7 +1094,7 @@ def test_query_lex_compare_strings(self, parser, engine, op, func): def test_query_single_element_booleans(self, parser, engine): columns = "bid", "bidsize", "ask", "asksize" - data = np.random.default_rng(2).randint(2, size=(1, len(columns))).astype(bool) + data = np.random.default_rng(2).integers(2, size=(1, len(columns))).astype(bool) df = DataFrame(data, columns=columns) res = df.query("bid & ask", engine=engine, parser=parser) expected = df[df.bid & df.ask] @@ -1098,7 +1117,9 @@ def test_query_string_scalar_variable(self, parser, engine): class TestDataFrameEvalWithFrame: @pytest.fixture def frame(self): - return DataFrame(np.random.default_rng(2).randn(10, 3), columns=list("abc")) + return DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=list("abc") + ) def test_simple_expr(self, frame, parser, engine): res = frame.eval("a + b", engine=engine, parser=parser) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index f0c6d97ac7627..29750074c6cf9 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -465,7 +465,8 @@ def test_var_std(self, datetime_frame): def test_numeric_only_flag(self, meth): # GH 9201 df1 = DataFrame( - np.random.default_rng(2).randn(5, 3), columns=["foo", "bar", "baz"] + np.random.default_rng(2).standard_normal(5, 3), + columns=["foo", "bar", "baz"], ) # Cast to object to avoid implicit cast when setting entry to "100" below df1 = df1.astype({"foo": object}) @@ -473,7 +474,8 @@ def test_numeric_only_flag(self, meth): df1.loc[0, "foo"] = "100" df2 = DataFrame( - np.random.default_rng(2).randn(5, 3), columns=["foo", "bar", "baz"] + np.random.default_rng(2).standard_normal(5, 3), + columns=["foo", "bar", "baz"], ) # Cast to object to avoid implicit cast when setting entry to "a" below df2 = df2.astype({"foo": object}) @@ -936,7 +938,7 @@ def test_mean_datetimelike_numeric_only_false(self): def test_mean_extensionarray_numeric_only_true(self): # https://github.com/pandas-dev/pandas/issues/33256 - arr = np.random.default_rng(2).randint(1000, size=(10, 5)) + arr = np.random.default_rng(2).integers(1000, size=(10, 5)) df = DataFrame(arr, dtype="Int64") result = df.mean(numeric_only=True) expected = DataFrame(arr).mean() @@ -1124,7 +1126,7 @@ def test_idxmax_dt64_multicolumn_axis1(self): def test_any_all_mixed_float(self, opname, axis, bool_only, float_string_frame): # make sure op works on mixed-type frame mixed = float_string_frame - mixed["_bool_"] = np.random.default_rng(2).randn(len(mixed)) > 0.5 + mixed["_bool_"] = np.random.default_rng(2).standard_normal(len(mixed)) > 0.5 getattr(mixed, opname)(axis=axis, bool_only=bool_only) @@ -1755,7 +1757,7 @@ def test_prod_sum_min_count_mixed_object(): def test_reduction_axis_none_returns_scalar(method, numeric_only): # GH#21597 As of 2.0, axis=None reduces over all axes. - df = DataFrame(np.random.default_rng(2).randn(4, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) result = getattr(df, method)(axis=None, numeric_only=numeric_only) np_arr = df.to_numpy() diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 4c48d4ac3e762..bdd865576f7ed 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -46,7 +46,7 @@ def test_repr_bytes_61_lines(self): def test_repr_unicode_level_names(self, frame_or_series): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) - obj = DataFrame(np.random.default_rng(2).randn(2, 4), index=index) + obj = DataFrame(np.random.default_rng(2).standard_normal(2, 4), index=index) obj = tm.get_obj(obj, frame_or_series) repr(obj) @@ -156,7 +156,10 @@ def test_repr_mixed(self, float_string_frame): def test_repr_mixed_big(self): # big mixed biggie = DataFrame( - {"A": np.random.default_rng(2).randn(200), "B": tm.makeStringIndex(200)}, + { + "A": np.random.default_rng(2).standard_normal(200), + "B": tm.makeStringIndex(200), + }, index=range(200), ) biggie.loc[:20, "A"] = np.nan @@ -261,7 +264,8 @@ def test_str_to_bytes_raises(self): def test_very_wide_info_repr(self): df = DataFrame( - np.random.default_rng(2).randn(10, 20), columns=tm.rands_array(10, 20) + np.random.default_rng(2).standard_normal(10, 20), + columns=tm.rands_array(10, 20), ) repr(df) @@ -341,7 +345,7 @@ def test_frame_datetime64_pre1900_repr(self): def test_frame_to_string_with_periodindex(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") - frame = DataFrame(np.random.default_rng(2).randn(3, 4), index=index) + frame = DataFrame(np.random.default_rng(2).standard_normal(3, 4), index=index) # it works! frame.to_string() diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 290bde4242282..5be064261f88c 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -384,7 +384,9 @@ def unstack_and_compare(df, column_name): def test_stack_ints(self): columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) - df = DataFrame(np.random.default_rng(2).randn(30, 27), columns=columns) + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 27), columns=columns + ) tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) tm.assert_frame_equal( @@ -409,7 +411,7 @@ def test_stack_mixed_levels(self): ], names=["exp", "animal", "hair_length"], ) - df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=columns) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), columns=columns) animal_hair_stacked = df.stack(level=["animal", "hair_length"]) exp_hair_stacked = df.stack(level=["exp", "hair_length"]) @@ -453,7 +455,7 @@ def test_stack_int_level_names(self): ], names=["exp", "animal", "hair_length"], ) - df = DataFrame(np.random.default_rng(2).randn(4, 4), columns=columns) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), columns=columns) exp_animal_stacked = df.stack(level=["exp", "animal"]) animal_hair_stacked = df.stack(level=["animal", "hair_length"]) @@ -973,7 +975,7 @@ def test_unstack_nan_index5(self): "1st": [1, 2, 1, 2, 1, 2], "2nd": date_range("2014-02-01", periods=6, freq="D"), "jim": 100 + np.arange(6), - "joe": (np.random.default_rng(2).randn(6) * 10).round(2), + "joe": (np.random.default_rng(2).standard_normal(6) * 10).round(2), } ) @@ -1384,10 +1386,10 @@ def test_unstack_non_slice_like_blocks(using_array_manager): mi = MultiIndex.from_product([range(5), ["A", "B", "C"]]) df = DataFrame( { - 0: np.random.default_rng(2).randn(15), - 1: np.random.default_rng(2).randn(15).astype(np.int64), - 2: np.random.default_rng(2).randn(15), - 3: np.random.default_rng(2).randn(15), + 0: np.random.default_rng(2).standard_normal(15), + 1: np.random.default_rng(2).standard_normal(15).astype(np.int64), + 2: np.random.default_rng(2).standard_normal(15), + 3: np.random.default_rng(2).standard_normal(15), }, index=mi, ) @@ -1504,7 +1506,7 @@ def test_unstack_multiple_no_empty_columns(self): [(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)] ) - s = Series(np.random.default_rng(2).randn(4), index=index) + s = Series(np.random.default_rng(2).standard_normal(4), index=index) unstacked = s.unstack([1, 2]) expected = unstacked.dropna(axis=1, how="all") @@ -1891,7 +1893,7 @@ def test_stack_multiple_bug(self): id_col = ([1] * 3) + ([2] * 3) name = (["a"] * 3) + (["b"] * 3) date = pd.to_datetime(["2013-01-03", "2013-01-04", "2013-01-05"] * 2) - var1 = np.random.default_rng(2).randint(0, 100, 6) + var1 = np.random.default_rng(2).integers(0, 100, 6) df = DataFrame({"ID": id_col, "NAME": name, "DATE": date, "VAR1": var1}) multi = df.set_index(["DATE", "ID"]) @@ -1940,12 +1942,12 @@ def test_unstack_sparse_keyspace(self): df = DataFrame( { - "A": np.random.default_rng(2).randint(100, size=NUM_ROWS), - "B": np.random.default_rng(2).randint(300, size=NUM_ROWS), - "C": np.random.default_rng(2).randint(-7, 7, size=NUM_ROWS), - "D": np.random.default_rng(2).randint(-19, 19, size=NUM_ROWS), - "E": np.random.default_rng(2).randint(3000, size=NUM_ROWS), - "F": np.random.default_rng(2).randn(NUM_ROWS), + "A": np.random.default_rng(2).integers(100, size=NUM_ROWS), + "B": np.random.default_rng(2).integers(300, size=NUM_ROWS), + "C": np.random.default_rng(2).integers(-7, 7, size=NUM_ROWS), + "D": np.random.default_rng(2).integers(-19, 19, size=NUM_ROWS), + "E": np.random.default_rng(2).integers(3000, size=NUM_ROWS), + "F": np.random.default_rng(2).standard_normal(NUM_ROWS), } ) @@ -1961,7 +1963,7 @@ def test_unstack_unobserved_keys(self): index = MultiIndex(levels, codes) - df = DataFrame(np.random.default_rng(2).randn(4, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 2), index=index) result = df.unstack() assert len(result.columns) == 4 @@ -1983,7 +1985,7 @@ def __init__(self, *args, **kwargs) -> None: with monkeypatch.context() as m: m.setattr(reshape_lib, "_Unstacker", MockUnstacker) df = DataFrame( - np.random.default_rng(2).randn(2**16, 2), + np.random.default_rng(2).standard_normal(2**16, 2), index=[np.arange(2**16), np.arange(2**16)], ) msg = "The following operation may generate" diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index f49f1165094b1..6215342f31dc0 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -497,7 +497,7 @@ def test_subclassed_melt(self): def test_subclassed_wide_to_long(self): # GH 9762 - x = np.random.default_rng(2).randn(3) + x = np.random.default_rng(2).standard_normal(3) df = tm.SubclassedDataFrame( { "A1970": {0: "a", 1: "b", 2: "c"}, @@ -660,10 +660,12 @@ def test_corrwith(self): index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] df1 = tm.SubclassedDataFrame( - np.random.default_rng(2).randn(5, 4), index=index, columns=columns + np.random.default_rng(2).standard_normal(5, 4), index=index, columns=columns ) df2 = tm.SubclassedDataFrame( - np.random.default_rng(2).randn(4, 4), index=index[:4], columns=columns + np.random.default_rng(2).standard_normal(4, 4), + index=index[:4], + columns=columns, ) correls = df1.corrwith(df2, axis=1, drop=True, method="kendall") diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 46064afb12b4e..37709d612714f 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -74,8 +74,8 @@ def test_metadata_propagation_indiv_groupby(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) result = df.groupby("A").sum() @@ -84,7 +84,7 @@ def test_metadata_propagation_indiv_groupby(self): def test_metadata_propagation_indiv_resample(self): # resample df = DataFrame( - np.random.default_rng(2).randn(1000, 2), + np.random.default_rng(2).standard_normal(1000, 2), index=date_range("20130101", periods=1000, freq="s"), ) result = df.resample("1T") @@ -115,10 +115,10 @@ def finalize(self, other, method=None, **kwargs): m.setattr(DataFrame, "__finalize__", finalize) df1 = DataFrame( - np.random.default_rng(2).randint(0, 4, (3, 2)), columns=["a", "b"] + np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["a", "b"] ) df2 = DataFrame( - np.random.default_rng(2).randint(0, 4, (3, 2)), columns=["c", "d"] + np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["c", "d"] ) DataFrame._metadata = ["filename"] df1.filename = "fname1.csv" @@ -130,7 +130,7 @@ def finalize(self, other, method=None, **kwargs): # concat # GH#6927 df1 = DataFrame( - np.random.default_rng(2).randint(0, 4, (3, 2)), columns=list("ab") + np.random.default_rng(2).integers(0, 4, (3, 2)), columns=list("ab") ) df1.filename = "foo" @@ -188,7 +188,9 @@ def test_validate_bool_args(self, value): def test_unexpected_keyword(self): # GH8597 - df = DataFrame(np.random.default_rng(2).randn(5, 2), columns=["jim", "joe"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=["jim", "joe"] + ) ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) ts = df["joe"].copy() ts[2] = np.nan diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 55e5fc8076fed..9cbdab713d1ef 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -46,7 +46,7 @@ def construct(box, shape, value=None, dtype=None, **kwargs): arr = np.repeat(arr, new_shape).reshape(shape) else: - arr = np.random.default_rng(2).randn(*shape) + arr = np.random.default_rng(2).standard_normal(*shape) return box(arr, dtype=dtype, **kwargs) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 6ae3049e2b4d3..abce0f6d14f70 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -359,7 +359,7 @@ def test_agg_multiple_functions_maintain_order(df): def test_agg_multiple_functions_same_name(): # GH 30880 df = DataFrame( - np.random.default_rng(2).randn(1000, 3), + np.random.default_rng(2).standard_normal(1000, 3), index=pd.date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], ) @@ -381,7 +381,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): # GH 30880 # ohlc expands dimensions, so different test to the above is required. df = DataFrame( - np.random.default_rng(2).randn(1000, 3), + np.random.default_rng(2).standard_normal(1000, 3), index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"), columns=Index(["A", "B", "C"], name="alpha"), ) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 6011593f442c8..6320da3f1f661 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -48,7 +48,7 @@ def test_cythonized_aggers(op_name): data = { "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan], "B": ["A", "B"] * 6, - "C": np.random.default_rng(2).randn(12), + "C": np.random.default_rng(2).standard_normal(12), } df = DataFrame(data) df.loc[2:10:2, "C"] = np.nan @@ -80,8 +80,8 @@ def test_cythonized_aggers(op_name): def test_cython_agg_boolean(): frame = DataFrame( { - "a": np.random.default_rng(2).randint(0, 5, 50), - "b": np.random.default_rng(2).randint(0, 2, 50).astype("bool"), + "a": np.random.default_rng(2).integers(0, 5, 50), + "b": np.random.default_rng(2).integers(0, 2, 50).astype("bool"), } ) result = frame.groupby("a")["b"].mean() @@ -95,7 +95,7 @@ def test_cython_agg_boolean(): def test_cython_agg_nothing_to_agg(): frame = DataFrame( - {"a": np.random.default_rng(2).randint(0, 5, 50), "b": ["foo", "bar"] * 25} + {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25} ) msg = "Cannot use numeric_only=True with SeriesGroupBy.mean and non-numeric dtypes" @@ -103,7 +103,7 @@ def test_cython_agg_nothing_to_agg(): frame.groupby("a")["b"].mean(numeric_only=True) frame = DataFrame( - {"a": np.random.default_rng(2).randint(0, 5, 50), "b": ["foo", "bar"] * 25} + {"a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25} ) result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) @@ -116,7 +116,7 @@ def test_cython_agg_nothing_to_agg(): def test_cython_agg_nothing_to_agg_with_dates(): frame = DataFrame( { - "a": np.random.default_rng(2).randint(0, 5, 50), + "a": np.random.default_rng(2).integers(0, 5, 50), "b": ["foo", "bar"] * 25, "dates": pd.date_range("now", periods=50, freq="T"), } @@ -147,8 +147,8 @@ def test_cython_agg_return_dict(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) @@ -189,8 +189,8 @@ def test_cython_fail_agg(): ], ) def test__cython_agg_general(op, targop): - df = DataFrame(np.random.default_rng(2).randn(1000)) - labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) + df = DataFrame(np.random.default_rng(2).standard_normal(1000)) + labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) result = df.groupby(labels)._cython_agg_general(op, alt=None, numeric_only=True) warn = FutureWarning if targop in com._cython_table else None diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index ba2fb04eae62a..19fbac682dccb 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -354,8 +354,8 @@ def test_multilabel_numba_vs_cython(numba_supported_reductions): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) gb = df.groupby(["A", "B"]) @@ -374,8 +374,8 @@ def test_multilabel_udf_numba_vs_cython(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) gb = df.groupby(["A", "B"]) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 34051ae3c996a..0260a4ec2af93 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -30,8 +30,8 @@ def test_agg_partial_failure_raises(): df = DataFrame( { - "data1": np.random.default_rng(2).randn(5), - "data2": np.random.default_rng(2).randn(5), + "data1": np.random.default_rng(2).standard_normal(5), + "data2": np.random.default_rng(2).standard_normal(5), "key1": ["a", "a", "b", "b", "a"], "key2": ["one", "two", "one", "two", "one"], } @@ -87,7 +87,7 @@ def test_agg_datetimes_mixed(): def test_agg_period_index(): prng = period_range("2012-1-1", freq="M", periods=3) - df = DataFrame(np.random.default_rng(2).randn(3, 2), index=prng) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 2), index=prng) rs = df.groupby(level=0).sum() assert isinstance(rs.index, PeriodIndex) @@ -175,7 +175,7 @@ def test_aggregate_api_consistency(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.default_rng(2).randn(8) + 1.0, + "C": np.random.default_rng(2).standard_normal(8) + 1.0, "D": np.arange(8), } ) @@ -239,7 +239,7 @@ def test_agg_compat(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.default_rng(2).randn(8) + 1.0, + "C": np.random.default_rng(2).standard_normal(8) + 1.0, "D": np.arange(8), } ) @@ -260,7 +260,7 @@ def test_agg_nested_dicts(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.default_rng(2).randn(8) + 1.0, + "C": np.random.default_rng(2).standard_normal(8) + 1.0, "D": np.arange(8), } ) @@ -284,7 +284,7 @@ def test_agg_nested_dicts(): def test_agg_item_by_item_raise_typeerror(): - df = DataFrame(np.random.default_rng(2).randint(10, size=(20, 10))) + df = DataFrame(np.random.default_rng(2).integers(10, size=(20, 10))) def raiseException(df): pprint_thing("----------------------------------------") @@ -346,9 +346,9 @@ def test_series_agg_multi_pure_python(): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index 67e1aabda05bb..49fa9dc51f0d3 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -45,8 +45,8 @@ def df(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) @@ -72,8 +72,8 @@ def df_mixed_floats(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.array(np.random.default_rng(2).randn(8), dtype="float32"), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.array(np.random.default_rng(2).standard_normal(8), dtype="float32"), } ) @@ -121,9 +121,9 @@ def three_group(): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 7df60a06b6eda..3162eadb73fd5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -250,7 +250,7 @@ def test_apply_with_mixed_dtype(): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 df = DataFrame( { - "foo1": np.random.default_rng(2).randn(6), + "foo1": np.random.default_rng(2).standard_normal(6), "foo2": ["one", "two", "two", "three", "one", "two"], } ) @@ -347,7 +347,7 @@ def f(piece): ) dr = bdate_range("1/1/2000", periods=100) - ts = Series(np.random.default_rng(2).randn(100), index=dr) + ts = Series(np.random.default_rng(2).standard_normal(100), index=dr) grouped = ts.groupby(lambda x: x.month, group_keys=False) result = grouped.apply(f) @@ -401,9 +401,9 @@ def trans2(group): df = DataFrame( { - "A": np.random.default_rng(2).randint(0, 5, 1000), - "B": np.random.default_rng(2).randint(0, 5, 1000), - "C": np.random.default_rng(2).randn(1000), + "A": np.random.default_rng(2).integers(0, 5, 1000), + "B": np.random.default_rng(2).integers(0, 5, 1000), + "C": np.random.default_rng(2).standard_normal(1000), } ) @@ -586,11 +586,11 @@ def test_apply_corner_cases(): # #535, can't use sliding iterator N = 1000 - labels = np.random.default_rng(2).randint(0, 100, size=N) + labels = np.random.default_rng(2).integers(0, 100, size=N) df = DataFrame( { "key": labels, - "value1": np.random.default_rng(2).randn(N), + "value1": np.random.default_rng(2).standard_normal(N), "value2": ["foo", "bar", "baz", "qux"] * (N // 4), } ) diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index a437f226b48b3..9bc07b584e9d1 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -34,7 +34,7 @@ def test_mutate_groups(): + ["d"] * 2 + ["e"] * 2, "cat3": [f"g{x}" for x in range(1, 15)], - "val": np.random.default_rng(2).randint(100, size=14), + "val": np.random.default_rng(2).integers(100, size=14), } ) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index ab7e3a36d7506..6cbac17d3fb62 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -207,11 +207,11 @@ def f(x): # more basic levels = ["foo", "bar", "baz", "qux"] - codes = np.random.default_rng(2).randint(0, 4, size=100) + codes = np.random.default_rng(2).integers(0, 4, size=100) cats = Categorical.from_codes(codes, levels, ordered=True) - data = DataFrame(np.random.default_rng(2).randn(100, 4)) + data = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) result = data.groupby(cats, observed=False).mean() @@ -462,9 +462,9 @@ def test_observed_perf(): # gh-14942 df = DataFrame( { - "cat": np.random.default_rng(2).randint(0, 255, size=30000), - "int_id": np.random.default_rng(2).randint(0, 255, size=30000), - "other_id": np.random.default_rng(2).randint(0, 10000, size=30000), + "cat": np.random.default_rng(2).integers(0, 255, size=30000), + "int_id": np.random.default_rng(2).integers(0, 255, size=30000), + "other_id": np.random.default_rng(2).integers(0, 10000, size=30000), "foo": 0, } ) @@ -642,11 +642,11 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): def test_datetime(): # GH9049: ensure backward compatibility levels = pd.date_range("2014-01-01", periods=4) - codes = np.random.default_rng(2).randint(0, 4, size=100) + codes = np.random.default_rng(2).integers(0, 4, size=100) cats = Categorical.from_codes(codes, levels, ordered=True) - data = DataFrame(np.random.default_rng(2).randn(100, 4)) + data = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) result = data.groupby(cats, observed=False).mean() expected = data.groupby(np.asarray(cats), observed=False).mean() @@ -681,7 +681,7 @@ def test_datetime(): def test_categorical_index(): s = np.random.default_rng(2).RandomState(12345) levels = ["foo", "bar", "baz", "qux"] - codes = s.randint(0, 4, size=20) + codes = s.integers(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) df = DataFrame(np.repeat(np.arange(20), 4).reshape(-1, 4), columns=list("abcd")) df["cats"] = cats @@ -710,7 +710,7 @@ def test_describe_categorical_columns(): categories=["foo", "bar", "baz", "qux"], ordered=True, ) - df = DataFrame(np.random.default_rng(2).randn(20, 4), columns=cats) + df = DataFrame(np.random.default_rng(2).standard_normal(20, 4), columns=cats) result = df.groupby([1, 2, 3, 4] * 5).describe() tm.assert_index_equal(result.stack().columns, cats) @@ -920,7 +920,7 @@ def test_preserve_on_ordered_ops(func, values): def test_categorical_no_compress(): - data = Series(np.random.default_rng(2).randn(9)) + data = Series(np.random.default_rng(2).standard_normal(9)) codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True) @@ -977,7 +977,7 @@ def test_sort(): # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') - df = DataFrame({"value": np.random.default_rng(2).randint(0, 10000, 100)}) + df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)}) labels = [f"{i} - {i+499}" for i in range(0, 10000, 500)] cat_labels = Categorical(labels, labels) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 5ed1540c1ed1f..fd5018d05380c 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -270,12 +270,12 @@ def test_count(): df = DataFrame( { "1st": np.random.default_rng(2).choice(list(ascii_lowercase), n), - "2nd": np.random.default_rng(2).randint(0, 5, n), - "3rd": np.random.default_rng(2).randn(n).round(3), - "4th": np.random.default_rng(2).randint(-10, 10, n), + "2nd": np.random.default_rng(2).integers(0, 5, n), + "3rd": np.random.default_rng(2).standard_normal(n).round(3), + "4th": np.random.default_rng(2).integers(-10, 10, n), "5th": np.random.default_rng(2).choice(dr, n), - "6th": np.random.default_rng(2).randn(n).round(3), - "7th": np.random.default_rng(2).randn(n).round(3), + "6th": np.random.default_rng(2).standard_normal(n).round(3), + "7th": np.random.default_rng(2).standard_normal(n).round(3), "8th": np.random.default_rng(2).choice(dr, n) - np.random.default_rng(2).choice(dr, 1), "9th": np.random.default_rng(2).choice(list(ascii_lowercase), n), @@ -330,8 +330,8 @@ def test_count_cross_type(): # Set float64 dtype to avoid upcast when setting nan below vals = np.hstack( ( - np.random.default_rng(2).randint(0, 5, (100, 2)), - np.random.default_rng(2).randint(0, 2, (100, 2)), + np.random.default_rng(2).integers(0, 5, (100, 2)), + np.random.default_rng(2).integers(0, 2, (100, 2)), ) ).astype("float64") diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 4d7aaea03a04d..77ad4c7070ebd 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -192,7 +192,7 @@ def test_filter_pdna_is_false(): def test_filter_against_workaround(): # Series of ints - s = Series(np.random.default_rng(2).randint(0, 100, 1000)) + s = Series(np.random.default_rng(2).integers(0, 100, 1000)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -213,10 +213,10 @@ def test_filter_against_workaround(): # Set up DataFrame of ints, floats, strings. letters = np.array(list(ascii_lowercase)) N = 1000 - random_letters = letters.take(np.random.default_rng(2).randint(0, 26, N)) + random_letters = letters.take(np.random.default_rng(2).integers(0, 26, N)) df = DataFrame( { - "ints": Series(np.random.default_rng(2).randint(0, 100, N)), + "ints": Series(np.random.default_rng(2).integers(0, 100, N)), "floats": N / 10 * Series(np.random.default_rng(2).random(N)), "letters": Series(random_letters), } diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 1789a0f4f5205..852de27052053 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -74,9 +74,9 @@ def test_intercept_builtin_sum(): @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key def test_builtins_apply(keys, f): # see gh-8155 - rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randint(1, 7, (10, 2)), columns=["jim", "joe"]) - df["jolie"] = rs.randn(10) + rs = np.random.default_rng(2) + df = DataFrame(rs.integers(1, 7, (10, 2)), columns=["jim", "joe"]) + df["jolie"] = rs.standard_normal(10) gb = df.groupby(keys) @@ -371,11 +371,11 @@ def test_cython_api2(): def test_cython_median(): - arr = np.random.default_rng(2).randn(1000) + arr = np.random.default_rng(2).standard_normal(1000) arr[::2] = np.nan df = DataFrame(arr) - labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) + labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) labels[::17] = np.nan result = df.groupby(labels).median() @@ -384,7 +384,7 @@ def test_cython_median(): exp = df.groupby(labels).agg(np.nanmedian) tm.assert_frame_equal(result, exp) - df = DataFrame(np.random.default_rng(2).randn(1000, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(1000, 5)) msg = "using DataFrameGroupBy.median" with tm.assert_produces_warning(FutureWarning, match=msg): rs = df.groupby(labels).agg(np.median) @@ -393,7 +393,7 @@ def test_cython_median(): def test_median_empty_bins(observed): - df = DataFrame(np.random.default_rng(2).randint(0, 44, 500)) + df = DataFrame(np.random.default_rng(2).integers(0, 44, 500)) grps = range(0, 55, 5) bins = pd.cut(df[0], grps) @@ -517,7 +517,9 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): def test_idxmin_idxmax_axis1(): - df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=["A", "B", "C", "D"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 4), columns=["A", "B", "C", "D"] + ) df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] gb = df.groupby("A") @@ -548,7 +550,9 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): msg = "GH#47723 groupby.corrwith and skew do not correctly implement axis=1" request.node.add_marker(pytest.mark.xfail(reason=msg)) - df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=["A", "B", "C", "D"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 4), columns=["A", "B", "C", "D"] + ) df["E"] = "x" groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] gb = df.groupby(groups) @@ -691,8 +695,8 @@ def scipy_sem(*args, **kwargs): ], ) def test_ops_general(op, targop): - df = DataFrame(np.random.default_rng(2).randn(1000)) - labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) + df = DataFrame(np.random.default_rng(2).standard_normal(1000)) + labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) result = getattr(df.groupby(labels), op)() warn = None if op in ("first", "last", "count", "sem") else FutureWarning @@ -745,7 +749,7 @@ def test_nlargest_mi_grouper(): iterables = [dts, ["one", "two"]] idx = MultiIndex.from_product(iterables, names=["first", "second"]) - s = Series(npr.randn(20), index=idx) + s = Series(npr.standard_normal(20), index=idx) result = s.groupby("first").nlargest(1) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 81779b2bf651d..243e8e59fca70 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -539,8 +539,8 @@ def test_multi_func(df): # some "groups" with no data df = DataFrame( { - "v1": np.random.default_rng(2).randn(6), - "v2": np.random.default_rng(2).randn(6), + "v1": np.random.default_rng(2).standard_normal(6), + "v2": np.random.default_rng(2).standard_normal(6), "k1": np.array(["b", "b", "b", "a", "a", "a"]), "k2": np.array(["1", "1", "1", "2", "2", "2"]), }, @@ -588,9 +588,9 @@ def test_frame_multi_key_function_list(): "two", "one", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) @@ -649,9 +649,9 @@ def test_frame_multi_key_function_list_partial_failure(): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) @@ -774,10 +774,10 @@ def test_groupby_as_index_agg(df): # GH7115 & GH8112 & GH8582 df = DataFrame( - np.random.default_rng(2).randint(0, 100, (50, 3)), + np.random.default_rng(2).integers(0, 100, (50, 3)), columns=["jim", "joe", "jolie"], ) - ts = Series(np.random.default_rng(2).randint(5, 10, 50), name="jim") + ts = Series(np.random.default_rng(2).integers(5, 10, 50), name="jim") gr = df.groupby(ts) gr.nth(0) # invokes set_selection_from_grouper internally @@ -807,7 +807,7 @@ def test_ops_not_as_index(reduction_func): pytest.skip(f"GH 5755: Test not applicable for {reduction_func}") df = DataFrame( - np.random.default_rng(2).randint(0, 5, size=(100, 2)), columns=["a", "b"] + np.random.default_rng(2).integers(0, 5, size=(100, 2)), columns=["a", "b"] ) expected = getattr(df.groupby("a"), reduction_func)() if reduction_func == "size": @@ -1041,8 +1041,8 @@ def test_empty_groups_corner(mframe): "k1": np.array(["b", "b", "b", "a", "a", "a"]), "k2": np.array(["1", "1", "1", "2", "2", "2"]), "k3": ["foo", "bar"] * 3, - "v1": np.random.default_rng(2).randn(6), - "v2": np.random.default_rng(2).randn(6), + "v1": np.random.default_rng(2).standard_normal(6), + "v2": np.random.default_rng(2).standard_normal(6), } ) @@ -1210,7 +1210,9 @@ def test_groupby_with_hier_columns(): columns = MultiIndex.from_tuples( [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")] ) - df = DataFrame(np.random.default_rng(2).randn(8, 4), index=index, columns=columns) + df = DataFrame( + np.random.default_rng(2).standard_normal(8, 4), index=index, columns=columns + ) result = df.groupby(level=0).mean() tm.assert_index_equal(result.columns, columns) @@ -1297,7 +1299,7 @@ def test_consistency_name(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], - "C": np.random.default_rng(2).randn(8) + 1.0, + "C": np.random.default_rng(2).standard_normal(8) + 1.0, "D": np.arange(8), } ) @@ -1359,8 +1361,8 @@ def test_cython_grouper_series_bug_noncontig(): def test_series_grouper_noncontig_index(): index = Index(tm.rands_array(10, 100)) - values = Series(np.random.default_rng(2).randn(50), index=index[::2]) - labels = np.random.default_rng(2).randint(0, 5, 50) + values = Series(np.random.default_rng(2).standard_normal(50), index=index[::2]) + labels = np.random.default_rng(2).integers(0, 5, 50) # it works! grouped = values.groupby(labels) @@ -1426,7 +1428,11 @@ def test_groupby_list_infer_array_like(df): # pathological case of ambiguity df = DataFrame( - {"foo": [0, 1], "bar": [3, 4], "val": np.random.default_rng(2).randn(2)} + { + "foo": [0, 1], + "bar": [3, 4], + "val": np.random.default_rng(2).standard_normal(2), + } ) result = df.groupby(["foo", "bar"]).mean() @@ -1449,10 +1455,14 @@ def test_groupby_keys_same_size_as_index(): def test_groupby_one_row(): # GH 11741 msg = r"^'Z'$" - df1 = DataFrame(np.random.default_rng(2).randn(1, 4), columns=list("ABCD")) + df1 = DataFrame( + np.random.default_rng(2).standard_normal(1, 4), columns=list("ABCD") + ) with pytest.raises(KeyError, match=msg): df1.groupby("Z") - df2 = DataFrame(np.random.default_rng(2).randn(2, 4), columns=list("ABCD")) + df2 = DataFrame( + np.random.default_rng(2).standard_normal(2, 4), columns=list("ABCD") + ) with pytest.raises(KeyError, match=msg): df2.groupby("Z") @@ -1461,7 +1471,7 @@ def test_groupby_nat_exclude(): # GH 6992 df = DataFrame( { - "values": np.random.default_rng(2).randn(8), + "values": np.random.default_rng(2).standard_normal(8), "dt": [ np.nan, Timestamp("2013-01-01"), @@ -1543,7 +1553,13 @@ def test_int32_overflow(): B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000))) A = np.arange(25000) df = DataFrame( - {"A": A, "B": B, "C": A, "D": B, "E": np.random.default_rng(2).randn(25000)} + { + "A": A, + "B": B, + "C": A, + "D": B, + "E": np.random.default_rng(2).standard_normal(25000), + } ) left = df.groupby(["A", "B", "C", "D"]).sum() @@ -1557,7 +1573,7 @@ def test_groupby_sort_multi(): "a": ["foo", "bar", "baz"], "b": [3, 2, 1], "c": [0, 1, 2], - "d": np.random.default_rng(2).randn(3), + "d": np.random.default_rng(2).standard_normal(3), } ) @@ -1580,7 +1596,7 @@ def test_groupby_sort_multi(): { "a": [0, 1, 2, 0, 1, 2], "b": [0, 0, 0, 1, 1, 1], - "d": np.random.default_rng(2).randn(6), + "d": np.random.default_rng(2).standard_normal(6), } ) grouped = df.groupby(["a", "b"])["d"] @@ -2083,7 +2099,7 @@ def get_categorical_invalid_expected(): def test_empty_groupby_apply_nonunique_columns(): # GH#44417 - df = DataFrame(np.random.default_rng(2).randn(0, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(0, 4)) df[3] = df[3].astype(np.int64) df.columns = [0, 1, 2, 0] gb = df.groupby(df[1], group_keys=False) @@ -2424,7 +2440,7 @@ def test_groupby_list_level(): ) def test_groups_repr_truncates(max_seq_items, expected): # GH 1135 - df = DataFrame(np.random.default_rng(2).randn(5, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 1)) df["a"] = df.index with pd.option_context("display.max_seq_items", max_seq_items): @@ -2547,7 +2563,7 @@ def test_groupby_numerical_stability_cumsum(): def test_groupby_cumsum_skipna_false(): # GH#46216 don't propagate np.nan above the diagonal - arr = np.random.default_rng(2).randn(5, 5) + arr = np.random.default_rng(2).standard_normal(5, 5) df = DataFrame(arr) for i in range(5): df.iloc[i, i] = np.nan diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 5e889ba776eb0..1a0395e0eaf20 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -71,9 +71,9 @@ def test_getitem_list_of_columns(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), - "E": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), + "E": np.random.default_rng(2).standard_normal(8), } ) @@ -90,9 +90,9 @@ def test_getitem_numeric_column_names(self): df = DataFrame( { 0: list("abcd") * 2, - 2: np.random.default_rng(2).randn(8), - 4: np.random.default_rng(2).randn(8), - 6: np.random.default_rng(2).randn(8), + 2: np.random.default_rng(2).standard_normal(8), + 4: np.random.default_rng(2).standard_normal(8), + 6: np.random.default_rng(2).standard_normal(8), } ) result = df.groupby(0)[df.columns[1:3]].mean() @@ -117,9 +117,9 @@ def test_getitem_single_column(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), - "E": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), + "E": np.random.default_rng(2).standard_normal(8), } ) @@ -138,7 +138,7 @@ def test_indices_grouped_by_tuple_with_lambda(self): "Tuples": ( (x, y) for x in [0, 1] - for y in np.random.default_rng(2).randint(3, 5, 5) + for y in np.random.default_rng(2).integers(3, 5, 5) ) } ) @@ -184,7 +184,7 @@ def test_grouper_multilevel_freq(self): d0 = date.today() - timedelta(days=14) dates = date_range(d0, date.today()) date_index = MultiIndex.from_product([dates, dates], names=["foo", "bar"]) - df = DataFrame(np.random.default_rng(2).randint(0, 100, 225), index=date_index) + df = DataFrame(np.random.default_rng(2).integers(0, 100, 225), index=date_index) # Check string level expected = ( @@ -440,7 +440,7 @@ def test_groupby_series_named_with_tuple(self, frame_or_series, index): def test_groupby_grouper_f_sanity_checked(self): dates = date_range("01-Jan-2013", periods=12, freq="MS") - ts = Series(np.random.default_rng(2).randn(12), index=dates) + ts = Series(np.random.default_rng(2).standard_normal(12), index=dates) # GH51979 # simple check that the passed function doesn't operates on the whole index @@ -876,7 +876,7 @@ def test_get_group_grouped_by_tuple_with_lambda(self): "Tuples": ( (x, y) for x in [0, 1] - for y in np.random.default_rng(2).randint(3, 5, 5) + for y in np.random.default_rng(2).integers(3, 5, 5) ) } ) @@ -977,8 +977,8 @@ def test_multi_iter_frame(self, three_group): k2 = np.array(["1", "2", "1", "2", "1", "2"]) df = DataFrame( { - "v1": np.random.default_rng(2).randn(6), - "v2": np.random.default_rng(2).randn(6), + "v1": np.random.default_rng(2).standard_normal(6), + "v2": np.random.default_rng(2).standard_normal(6), "k1": k1, "k2": k2, }, diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index d2d2342118128..e2737d62f4ead 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -128,9 +128,9 @@ def multiindex_data(): data = {} for date in dates: - nitems_for_date = nitems - rng.randint(0, 12) + nitems_for_date = nitems - rng.integers(0, 12) levels = [ - (item, rng.randint(0, 10000) / 100, rng.randint(0, 10000) / 100) + (item, rng.integers(0, 10000) / 100, rng.integers(0, 10000) / 100) for item in items[:nitems_for_date] ] levels.sort(key=lambda x: x[1]) diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index 23de180553080..ed810ebb439e4 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -133,7 +133,7 @@ class TestGroupVarFloat32(GroupVarTestMixin): @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_group_ohlc(dtype): - obj = np.array(np.random.default_rng(2).randn(20), dtype=dtype) + obj = np.array(np.random.default_rng(2).standard_normal(20), dtype=dtype) bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 0834d32b8e585..1cf4a90e25f1b 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -238,7 +238,7 @@ def test_nth(): # GH 7559 # from the vbench - df = DataFrame(np.random.default_rng(2).randint(1, 10, (100, 2)), dtype="int64") + df = DataFrame(np.random.default_rng(2).integers(1, 10, (100, 2)), dtype="int64") s = df[1] g = df[0] expected = s.groupby(g).first() diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 90932fa96ce3e..9c9e32d9ce226 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -30,7 +30,7 @@ def test_series_groupby_nunique(sort, dropna, as_index, with_nan, keys): { "jim": np.random.default_rng(2).choice(list(ascii_lowercase), n), "joe": np.random.default_rng(2).choice(days, n), - "julie": np.random.default_rng(2).randint(0, m, n), + "julie": np.random.default_rng(2).integers(0, m, n), } ) if with_nan: diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 249c316dda697..3e3086f9095cc 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -17,8 +17,8 @@ def test_pipe(): df = DataFrame( { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], - "B": random_state.randn(8), - "C": random_state.randn(8), + "B": random_state.standard_normal(8), + "C": random_state.standard_normal(8), } ) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index e2d6175f17f87..84ba768d64728 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -96,7 +96,7 @@ def test_quantile_array2(): arr = ( np.random.default_rng(2) .RandomState(0) - .randint(0, 5, size=(10, 3), dtype=np.int64) + .integers(0, 5, size=(10, 3), dtype=np.int64) ) df = DataFrame(arr, columns=list("ABC")) result = df.groupby("A").quantile([0.3, 0.7]) diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 21355d07a606b..bd1deb58c6195 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -33,12 +33,12 @@ def test_rank_unordered_categorical_typeerror(): def test_rank_apply(): lev1 = tm.rands_array(10, 100) lev2 = tm.rands_array(10, 130) - lab1 = np.random.default_rng(2).randint(0, 100, size=500) - lab2 = np.random.default_rng(2).randint(0, 130, size=500) + lab1 = np.random.default_rng(2).integers(0, 100, size=500) + lab2 = np.random.default_rng(2).integers(0, 130, size=500) df = DataFrame( { - "value": np.random.default_rng(2).randn(500), + "value": np.random.default_rng(2).standard_normal(500), "key1": lev1.take(lab1), "key2": lev2.take(lab2), } diff --git a/pandas/tests/groupby/test_skew.py b/pandas/tests/groupby/test_skew.py index 203c1cce213ee..89883c04519df 100644 --- a/pandas/tests/groupby/test_skew.py +++ b/pandas/tests/groupby/test_skew.py @@ -12,11 +12,11 @@ def test_groupby_skew_equivalence(): ncols = 2 nan_frac = 0.05 - arr = np.random.default_rng(2).randn(nrows, ncols) + arr = np.random.default_rng(2).standard_normal(nrows, ncols) arr[np.random.default_rng(2).random(nrows) < nan_frac] = np.nan df = pd.DataFrame(arr) - grps = np.random.default_rng(2).randint(0, ngroups, size=nrows) + grps = np.random.default_rng(2).integers(0, ngroups, size=nrows) gb = df.groupby(grps) result = gb.skew() diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 917fc56e0bd19..9f7f537ac2402 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -735,7 +735,7 @@ def test_groupby_with_timezone_selection(self): df = DataFrame( { - "factor": np.random.default_rng(2).randint(0, 3, size=60), + "factor": np.random.default_rng(2).integers(0, 3, size=60), "time": date_range("01/01/2000 00:00", periods=60, freq="s", tz="UTC"), } ) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 6da9113f18d8d..7c50124e57e29 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -52,7 +52,7 @@ def seed_df(seed_nans, n, m): { "1st": np.random.default_rng(2).choice(list("abcd"), n), "2nd": np.random.default_rng(2).choice(days, n), - "3rd": np.random.default_rng(2).randint(1, m + 1, n), + "3rd": np.random.default_rng(2).integers(1, m + 1, n), } ) diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 3216e84e52bb3..965691e31d772 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -245,8 +245,8 @@ def test_multilabel_numba_vs_cython(numba_supported_reductions): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) gb = df.groupby(["A", "B"]) @@ -261,8 +261,8 @@ def test_multilabel_udf_numba_vs_cython(): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) gb = df.groupby(["A", "B"]) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 444b41f7cc36e..8d7bcf367144f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -59,7 +59,7 @@ def demean(arr): return arr - arr.mean(axis=0) people = DataFrame( - np.random.default_rng(2).randn(5, 5), + np.random.default_rng(2).standard_normal(5, 5), columns=["a", "b", "c", "d", "e"], index=["Joe", "Steve", "Wes", "Jim", "Travis"], ) @@ -84,7 +84,10 @@ def demean(arr): def test_transform_fast(): df = DataFrame( - {"id": np.arange(100000) / 3, "val": np.random.default_rng(2).randn(100000)} + { + "id": np.arange(100000) / 3, + "val": np.random.default_rng(2).standard_normal(100000), + } ) grp = df.groupby("id")["val"] @@ -222,7 +225,7 @@ def test_transform_axis_ts(tsframe): r = len(base.index) c = len(base.columns) tso = DataFrame( - np.random.default_rng(2).randn(r, c), + np.random.default_rng(2).standard_normal(r, c), index=base.index, columns=base.columns, dtype="float64", @@ -655,10 +658,10 @@ def f(group): ) def test_cython_transform_series(op, args, targop): # GH 4095 - s = Series(np.random.default_rng(2).randn(1000)) + s = Series(np.random.default_rng(2).standard_normal(1000)) s_missing = s.copy() s_missing.iloc[2:10] = np.nan - labels = np.random.default_rng(2).randint(0, 50, size=1000).astype(float) + labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) # series for data in [s, s_missing]: @@ -727,7 +730,7 @@ def test_groupby_cum_skipna(op, skipna, input, exp): @pytest.fixture def frame(): - floating = Series(np.random.default_rng(2).randn(10)) + floating = Series(np.random.default_rng(2).standard_normal(10)) floating_missing = floating.copy() floating_missing.iloc[2:7] = np.nan strings = list("abcde") * 2 @@ -769,7 +772,7 @@ def frame_mi(frame): @pytest.mark.parametrize( "gb_target", [ - {"by": np.random.default_rng(2).randint(0, 50, size=10).astype(float)}, + {"by": np.random.default_rng(2).integers(0, 50, size=10).astype(float)}, {"level": 0}, {"by": "string"}, # {"by": 'string_missing'}]: @@ -819,7 +822,7 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target): @pytest.mark.parametrize( "gb_target", [ - {"by": np.random.default_rng(2).randint(0, 50, size=10).astype(float)}, + {"by": np.random.default_rng(2).integers(0, 50, size=10).astype(float)}, {"level": 0}, {"by": "string"}, # {"by": 'string_missing'}]: @@ -894,7 +897,7 @@ def test_transform_with_non_scalar_group(): ] ) df = DataFrame( - np.random.default_rng(2).randint(1, 10, (4, 12)), + np.random.default_rng(2).integers(1, 10, (4, 12)), columns=cols, index=["A", "C", "G", "T"], ) @@ -1423,11 +1426,11 @@ def test_null_group_lambda_self(sort, dropna, keys): # Whether a group contains a null value or not nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2 - a1 = np.random.default_rng(2).randint(0, 5, size=size).astype(float) + a1 = np.random.default_rng(2).integers(0, 5, size=size).astype(float) a1[nulls1] = np.nan - a2 = np.random.default_rng(2).randint(0, 5, size=size).astype(float) + a2 = np.random.default_rng(2).integers(0, 5, size=size).astype(float) a2[nulls2] = np.nan - values = np.random.default_rng(2).randint(0, 5, size=a1.shape) + values = np.random.default_rng(2).integers(0, 5, size=a1.shape) df = DataFrame({"A1": a1, "A2": a2, "B": values}) expected_values = values diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 917cc50347fba..64cbe657a8aff 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -204,7 +204,7 @@ def test_repr_roundtrip(self): # long format # this is not reprable - ci = CategoricalIndex(np.random.default_rng(2).randint(0, 5, size=100)) + ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100)) str(ci) def test_isin(self): diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 3a1e8150b3691..49eb79da616e7 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -224,7 +224,7 @@ def test_get_indexer_requires_unique(self): msg = "Reindexing only valid with uniquely valued Index objects" for n in [1, 2, 5, len(ci)]: - finder = oidx[np.random.default_rng(2).randint(0, len(ci), size=n)] + finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)] with pytest.raises(InvalidIndexError, match=msg): ci.get_indexer(finder) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index f2d518aba1432..8e1b41095e056 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -427,7 +427,7 @@ def test_get_loc_time_obj2(self): for n in ns: idx = date_range("2014-11-26", periods=n, freq="S") - ts = pd.Series(np.random.default_rng(2).randn(n), index=idx) + ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx) locs = np.arange(start, n, step, dtype=np.intp) result = ts.index.get_loc(key) diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index 56d35e6c0c930..ccfdb55fc8119 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -26,7 +26,9 @@ def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe( 10, 10, - data_gen_f=lambda *args, **kwargs: np.random.default_rng(2).randn(), + data_gen_f=lambda *args, **kwargs: np.random.default_rng( + 2 + ).standard_normal(), r_idx_type="i", c_idx_type="dt", ) @@ -45,7 +47,7 @@ def test_join_with_period_index(self, join_type): df = tm.makeCustomDataframe( 10, 10, - data_gen_f=lambda *args: np.random.default_rng(2).randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).integers(2), c_idx_type="p", r_idx_type="dt", ) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index ae872ee72d4c0..3f163ad882fca 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -21,7 +21,7 @@ class TestSlicing: def test_string_index_series_name_converted(self): # GH#1644 df = DataFrame( - np.random.default_rng(2).randn(10, 4), + np.random.default_rng(2).standard_normal(10, 4), index=date_range("1/1/2000", periods=10), ) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 5149bd68e10b7..adf7acfa59e0c 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -172,10 +172,10 @@ def test_union_freq_infer(self): def test_union_dataframe_index(self): rng1 = date_range("1/1/1999", "1/1/2012", freq="MS") - s1 = Series(np.random.default_rng(2).randn(len(rng1)), rng1) + s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1) rng2 = date_range("1/1/1980", "12/1/2001", freq="MS") - s2 = Series(np.random.default_rng(2).randn(len(rng2)), rng2) + s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2) df = DataFrame({"s1": s1, "s2": s2}) exp = date_range("1/1/1980", "1/1/2012", freq="MS") diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 9100ba100afc1..9687ac34da4b7 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -322,7 +322,7 @@ def test_set_value_keeps_names(): lev2 = ["1", "2", "3"] * 2 idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) df = pd.DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), columns=["one", "two", "three", "four"], index=idx, ) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 9d39e8f0dcd09..2aa6325a7dab7 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -246,8 +246,8 @@ def test_remove_unused_levels_large(first_type, second_type): size = 1 << 16 df = DataFrame( { - "first": rng.randint(0, 1 << 13, size).astype(first_type), - "second": rng.randint(0, 1 << 10, size).astype(second_type), + "first": rng.integers(0, 1 << 13, size).astype(first_type), + "second": rng.integers(0, 1 << 10, size).astype(second_type), "third": rng.rand(size), } ) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index fe241198d0703..f93f79967d07a 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -110,7 +110,7 @@ def test_getitem_index(self): def test_getitem_partial(self): rng = period_range("2007-01", periods=50, freq="M") - ts = Series(np.random.default_rng(2).randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) with pytest.raises(KeyError, match=r"^'2006'$"): ts["2006"] diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py index e70a6922a3d3c..191dba2be0c5d 100644 --- a/pandas/tests/indexes/period/test_join.py +++ b/pandas/tests/indexes/period/test_join.py @@ -38,7 +38,7 @@ def test_join_does_not_recur(self): df = tm.makeCustomDataframe( 3, 2, - data_gen_f=lambda *args: np.random.default_rng(2).randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).integers(2), c_idx_type="p", r_idx_type="dt", ) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index aa2231d1b8f2b..9a2eece108652 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -15,7 +15,7 @@ class TestPeriodIndex: def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") - ts = Series(np.random.default_rng(2).randn(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx) original = ts.copy() result = ts["2007"] @@ -29,7 +29,7 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") - ts = Series(np.random.default_rng(2).randn(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx) result = ts["2007"] expected = ts[idx == "2007"] diff --git a/pandas/tests/indexes/test_subclass.py b/pandas/tests/indexes/test_subclass.py index 721ce0bc30a95..aa7433cd38f32 100644 --- a/pandas/tests/indexes/test_subclass.py +++ b/pandas/tests/indexes/test_subclass.py @@ -32,7 +32,7 @@ def test_insert_fallback_to_base_index(): tm.assert_index_equal(result, expected) df = DataFrame( - np.random.default_rng(2).randn(2, 3), + np.random.default_rng(2).standard_normal(2, 3), columns=idx, index=Index([1, 2], name="string"), ) diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py index 1acae47ef3859..f3b12aa22bab0 100644 --- a/pandas/tests/indexes/timedeltas/test_join.py +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -28,7 +28,9 @@ def test_does_not_convert_mixed_integer(self): df = tm.makeCustomDataframe( 10, 10, - data_gen_f=lambda *args, **kwargs: np.random.default_rng(2).randn(), + data_gen_f=lambda *args, **kwargs: np.random.default_rng( + 2 + ).standard_normal(), r_idx_type="i", c_idx_type="td", ) diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py index 075bb43a207f7..a9c4782246b75 100644 --- a/pandas/tests/indexing/conftest.py +++ b/pandas/tests/indexing/conftest.py @@ -18,7 +18,7 @@ def series_ints(): @pytest.fixture def frame_ints(): return DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3), ) @@ -35,7 +35,7 @@ def series_uints(): @pytest.fixture def frame_uints(): return DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=Index(range(0, 8, 2), dtype=np.uint64), columns=Index(range(0, 12, 3), dtype=np.uint64), ) @@ -43,27 +43,31 @@ def frame_uints(): @pytest.fixture def series_labels(): - return Series(np.random.default_rng(2).randn(4), index=list("abcd")) + return Series(np.random.default_rng(2).standard_normal(4), index=list("abcd")) @pytest.fixture def frame_labels(): return DataFrame( - np.random.default_rng(2).randn(4, 4), index=list("abcd"), columns=list("ABCD") + np.random.default_rng(2).standard_normal(4, 4), + index=list("abcd"), + columns=list("ABCD"), ) @pytest.fixture def series_ts(): return Series( - np.random.default_rng(2).randn(4), index=date_range("20130101", periods=4) + np.random.default_rng(2).standard_normal(4), + index=date_range("20130101", periods=4), ) @pytest.fixture def frame_ts(): return DataFrame( - np.random.default_rng(2).randn(4, 4), index=date_range("20130101", periods=4) + np.random.default_rng(2).standard_normal(4, 4), + index=date_range("20130101", periods=4), ) @@ -77,7 +81,7 @@ def series_floats(): @pytest.fixture def frame_floats(): return DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=Index(range(0, 8, 2), dtype=np.float64), columns=Index(range(0, 12, 3), dtype=np.float64), ) @@ -85,12 +89,14 @@ def frame_floats(): @pytest.fixture def series_mixed(): - return Series(np.random.default_rng(2).randn(4), index=[2, 4, "null", 8]) + return Series(np.random.default_rng(2).standard_normal(4), index=[2, 4, "null", 8]) @pytest.fixture def frame_mixed(): - return DataFrame(np.random.default_rng(2).randn(4, 4), index=[2, 4, "null", 8]) + return DataFrame( + np.random.default_rng(2).standard_normal(4, 4), index=[2, 4, "null", 8] + ) @pytest.fixture @@ -106,7 +112,7 @@ def series_empty(): @pytest.fixture def frame_multi(): return DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=MultiIndex.from_product([[1, 2], [3, 4]]), columns=MultiIndex.from_product([[5, 6], [7, 8]]), ) diff --git a/pandas/tests/indexing/multiindex/test_datetime.py b/pandas/tests/indexing/multiindex/test_datetime.py index 7876a65648a7f..d325971e7baf6 100644 --- a/pandas/tests/indexing/multiindex/test_datetime.py +++ b/pandas/tests/indexing/multiindex/test_datetime.py @@ -19,7 +19,7 @@ def test_multiindex_period_datetime(): idx1 = Index(["a", "a", "a", "b", "b"]) idx2 = period_range("2012-01", periods=len(idx1), freq="M") - s = Series(np.random.default_rng(2).randn(len(idx1)), [idx1, idx2]) + s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2]) # try Period as index expected = s.iloc[0] diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index cae149248f7be..f6b9a8bfdd210 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -45,7 +45,7 @@ def test_series_getitem_duplicates_multiindex(level0_value): codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=["tag", "day"], ) - arr = np.random.default_rng(2).randn(len(index), 1) + arr = np.random.default_rng(2).standard_normal(len(index), 1) df = DataFrame(arr, index=index, columns=["val"]) # confirm indexing on missing value raises KeyError @@ -187,7 +187,7 @@ def test_frame_mixed_depth_get(): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) result = df["a"] expected = df["a", "", ""].rename("a") diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index c387ad1f4c444..9275cc376ad44 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -17,7 +17,7 @@ def simple_multiindex_dataframe(): random data by default. """ - data = np.random.default_rng(2).randn(3, 3) + data = np.random.default_rng(2).standard_normal(3, 3) return DataFrame( data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] ) @@ -67,7 +67,7 @@ def test_iloc_getitem_multiple_items(): # GH 5528 tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) index = MultiIndex.from_tuples(tup) - df = DataFrame(np.random.default_rng(2).randn(4, 4), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), index=index) result = df.iloc[[2, 3]] expected = df.xs("b", drop_level=False) tm.assert_frame_equal(result, expected) @@ -75,7 +75,7 @@ def test_iloc_getitem_multiple_items(): def test_iloc_getitem_labels(): # this is basically regular indexing - arr = np.random.default_rng(2).randn(4, 3) + arr = np.random.default_rng(2).standard_normal(4, 3) df = DataFrame( arr, columns=[["i", "i", "j"], ["A", "A", "B"]], diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index 94e3fbd9b6946..c6fc1659500e6 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -27,13 +27,13 @@ def cols(): @pytest.fixture def vals(n): vals = [ - np.random.default_rng(2).randint(0, 10, n), + np.random.default_rng(2).integers(0, 10, n), np.random.default_rng(2).choice(list("abcdefghij"), n), np.random.default_rng(2).choice( pd.date_range("20141009", periods=10).tolist(), n ), np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n), - np.random.default_rng(2).randn(n), + np.random.default_rng(2).standard_normal(n), ] vals = list(map(tuple, zip(*vals))) return vals @@ -43,7 +43,7 @@ def vals(n): def keys(n, m, vals): # bunch of keys for testing keys = [ - np.random.default_rng(2).randint(0, 11, m), + np.random.default_rng(2).integers(0, 11, m), np.random.default_rng(2).choice(list("abcdefghijk"), m), np.random.default_rng(2).choice( pd.date_range("20141009", periods=11).tolist(), m diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index b8333da5121be..7b1840469f471 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -29,7 +29,7 @@ def frame_random_data_integer_multi_index(): levels = [[0, 1], [0, 1, 2]] codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, codes=codes) - return DataFrame(np.random.default_rng(2).randn(6, 2), index=index) + return DataFrame(np.random.default_rng(2).standard_normal(6, 2), index=index) class TestMultiIndexLoc: @@ -76,7 +76,7 @@ def test_loc_getitem_general(self, any_real_numpy_dtype): def test_loc_getitem_multiindex_missing_label_raises(self): # GH#21593 df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -155,7 +155,7 @@ def test_loc_getitem_array(self): def test_loc_multiindex_labels(self): df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), columns=[["i", "i", "j"], ["A", "A", "B"]], index=[["i", "i", "j"], ["X", "X", "Y"]], ) @@ -182,7 +182,7 @@ def test_loc_multiindex_labels(self): def test_loc_multiindex_ints(self): df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -192,7 +192,7 @@ def test_loc_multiindex_ints(self): def test_loc_multiindex_missing_label_raises(self): df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -204,7 +204,7 @@ def test_loc_multiindex_missing_label_raises(self): def test_loc_multiindex_list_missing_label(self, key, pos): # GH 27148 - lists with missing labels _do_ raise df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -233,7 +233,7 @@ def test_loc_multiindex_indexer_none(self): attribute_values = ["Value" + str(i) for i in range(5)] index = MultiIndex.from_product([attributes, attribute_values]) - df = 0.1 * np.random.default_rng(2).randn(10, 1 * 5) + 0.5 + df = 0.1 * np.random.default_rng(2).standard_normal(10, 1 * 5) + 0.5 df = DataFrame(df, columns=index) result = df[attributes] tm.assert_frame_equal(result, df) @@ -280,7 +280,8 @@ def test_loc_multiindex_incomplete(self): def test_get_loc_single_level(self, single_level_multiindex): single_level = single_level_multiindex s = Series( - np.random.default_rng(2).randn(len(single_level)), index=single_level + np.random.default_rng(2).standard_normal(len(single_level)), + index=single_level, ) for k in single_level.values: s[k] @@ -290,13 +291,13 @@ def test_loc_getitem_int_slice(self): # loc should treat integer slices like label slices index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) - df = DataFrame(np.random.default_rng(2).randn(6, 6), index, index) + df = DataFrame(np.random.default_rng(2).standard_normal(6, 6), index, index) result = df.loc[6:8, :] expected = df tm.assert_frame_equal(result, expected) index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) - df = DataFrame(np.random.default_rng(2).randn(6, 6), index, index) + df = DataFrame(np.random.default_rng(2).standard_normal(6, 6), index, index) result = df.loc[20:30, :] expected = df.iloc[2:] tm.assert_frame_equal(result, expected) @@ -475,7 +476,9 @@ def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): # empty indexer multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) df = DataFrame( - np.random.default_rng(2).randn(5, 6), index=range(5), columns=multi_index + np.random.default_rng(2).standard_normal(5, 6), + index=range(5), + columns=multi_index, ) df = df.sort_index(level=0, axis=1) @@ -503,8 +506,8 @@ def test_loc_getitem_tuple_plus_slice(): { "a": np.arange(10), "b": np.arange(10), - "c": np.random.default_rng(2).randn(10), - "d": np.random.default_rng(2).randn(10), + "c": np.random.default_rng(2).standard_normal(10), + "d": np.random.default_rng(2).standard_normal(10), } ).set_index(["a", "b"]) expected = df.loc[0, 0] diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 2bab1c2b77b1c..90ccaa693546f 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -117,7 +117,7 @@ def test_multiindex_with_datatime_level_preserves_freq(self): idx = Index(range(2), name="A") dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") mi = MultiIndex.from_product([idx, dti]) - df = DataFrame(np.random.default_rng(2).randn(14, 2), index=mi) + df = DataFrame(np.random.default_rng(2).standard_normal(14, 2), index=mi) result = df.loc[0].index tm.assert_index_equal(result, dti) assert result.freq == dti.freq diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index e8c5aa789f714..31896fbd4126e 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -66,7 +66,9 @@ def test_xs_partial( ], ) df = DataFrame( - np.random.default_rng(2).randn(8, 4), index=index, columns=list("abcd") + np.random.default_rng(2).standard_normal(8, 4), + index=index, + columns=list("abcd"), ) result = df.xs(("foo", "one")) @@ -252,7 +254,9 @@ def test_loc_getitem_partial_both_axis(): iterables = [["a", "b"], [2, 1]] columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) - df = DataFrame(np.random.default_rng(2).randn(4, 4), index=rows, columns=columns) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 4), index=rows, columns=columns + ) expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) result = df.loc["a", "b"] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index e7c07afbcf42a..f7dd8def0ced5 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -140,7 +140,9 @@ def test_multiindex_setitem(self): ] df_orig = DataFrame( - np.random.default_rng(2).randn(6, 3), index=arrays, columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal(6, 3), + index=arrays, + columns=["A", "B", "C"], ).sort_index() expected = df_orig.loc[["bar"]] * 2 @@ -189,7 +191,7 @@ def test_multiindex_assignment(self): # mixed dtype df = DataFrame( - np.random.default_rng(2).randint(5, 10, size=9).reshape(3, 3), + np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], ) @@ -205,7 +207,7 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): arr = np.array([0.0, 1.0]) df = DataFrame( - np.random.default_rng(2).randint(5, 10, size=9).reshape(3, 3), + np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3), columns=list("abc"), index=[[4, 4, 8], [8, 10, 12]], dtype=np.int64, @@ -254,7 +256,7 @@ def test_groupby_example(self): index_cols = col_names[:5] df = DataFrame( - np.random.default_rng(2).randint(5, size=(NUM_ROWS, NUM_COLS)), + np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)), dtype=np.int64, columns=col_names, ) @@ -334,7 +336,7 @@ def test_frame_getitem_setitem_multislice(self): def test_frame_setitem_multi_column(self): df = DataFrame( - np.random.default_rng(2).randn(10, 4), + np.random.default_rng(2).standard_normal(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]], ) @@ -387,7 +389,7 @@ def test_loc_getitem_setitem_slice_integers(self, frame_or_series): ) obj = DataFrame( - np.random.default_rng(2).randn(len(index), 4), + np.random.default_rng(2).standard_normal(len(index), 4), index=index, columns=["a", "b", "c", "d"], ) @@ -472,7 +474,7 @@ def test_setitem_new_column_mixed_depth(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).randn(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) result = df.copy() expected = df.copy() diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py index 9b310c32920dc..cf3fa5296c97c 100644 --- a/pandas/tests/indexing/multiindex/test_sorted.py +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -141,7 +141,7 @@ def test_series_getitem_not_sorted(self): ] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) - s = Series(np.random.default_rng(2).randn(8), index=index) + s = Series(np.random.default_rng(2).standard_normal(8), index=index) arrays = [np.array(x) for x in zip(*index.values)] diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index dc48bf261746b..ea80b7e8ccd83 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -142,7 +142,7 @@ def test_at_with_duplicate_axes_requires_scalar_lookup(self): # GH#33041 check that falling back to loc doesn't allow non-scalar # args to slip in - arr = np.random.default_rng(2).randn(6).reshape(3, 2) + arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2) df = DataFrame(arr, columns=["A", "A"]) msg = "Invalid call for scalar access" diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 8f27af641014f..c8957dd02ef82 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -403,7 +403,9 @@ def test_loc_getitem_listlike_unused_category_raises_keyerror(self): def test_ix_categorical_index(self): # GH 12531 df = DataFrame( - np.random.default_rng(2).randn(3, 3), index=list("ABC"), columns=list("XYZ") + np.random.default_rng(2).standard_normal(3, 3), + index=list("ABC"), + columns=list("XYZ"), ) cdf = df.copy() cdf.index = CategoricalIndex(df.index) @@ -426,7 +428,9 @@ def test_ix_categorical_index(self): def test_ix_categorical_index_non_unique(self): # non-unique df = DataFrame( - np.random.default_rng(2).randn(3, 3), index=list("ABA"), columns=list("XYX") + np.random.default_rng(2).standard_normal(3, 3), + index=list("ABA"), + columns=list("XYX"), ) cdf = df.copy() cdf.index = CategoricalIndex(df.index) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 91b77d8ecc6af..bb30f9c844429 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -24,7 +24,7 @@ def random_text(nobs=100): # Construct a DataFrame where each row is a random slice from 'letters' - idxs = np.random.default_rng(2).randint(len(letters), size=(nobs, 2)) + idxs = np.random.default_rng(2).integers(len(letters), size=(nobs, 2)) idxs.sort(axis=1) strings = [letters[x[0] : x[1]] for x in idxs] @@ -400,7 +400,7 @@ def test_detect_chained_assignment_is_copy(self): @pytest.mark.arm_slow def test_detect_chained_assignment_sorting(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) ser = df.iloc[:, 0].sort_values() tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) @@ -445,7 +445,7 @@ def test_detect_chained_assignment_changing_dtype( df = DataFrame( { "A": date_range("20130101", periods=5), - "B": np.random.default_rng(2).randn(5), + "B": np.random.default_rng(2).standard_normal(5), "C": np.arange(5, dtype="int64"), "D": ["a", "b", "c", "d", "e"], } diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 807df5e08862b..0a90cff2cee21 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -15,7 +15,7 @@ def gen_obj(klass, index): obj = Series(np.arange(len(index)), index=index) else: obj = DataFrame( - np.random.default_rng(2).randn(len(index), len(index)), + np.random.default_rng(2).standard_normal(len(index), len(index)), index=index, columns=index, ) @@ -353,7 +353,7 @@ def test_slice_integer_frame_getitem(self, index_func): # similar to above, but on the getitem dim (of a DataFrame) index = index_func(5) - s = DataFrame(np.random.default_rng(2).randn(5, 2), index=index) + s = DataFrame(np.random.default_rng(2).standard_normal(5, 2), index=index) # getitem for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: @@ -408,7 +408,7 @@ def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): # similar to above, but on the getitem dim (of a DataFrame) index = index_func(5) - s = DataFrame(np.random.default_rng(2).randn(5, 2), index=index) + s = DataFrame(np.random.default_rng(2).standard_normal(5, 2), index=index) # setitem sc = s.copy() diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index 083a6e83649b1..4497c16efdfda 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -24,7 +24,7 @@ def test_iat_duplicate_columns(): def test_iat_getitem_series_with_period_index(): # GH#4390, iat incorrectly indexing index = period_range("1/1/2001", periods=10) - ser = Series(np.random.default_rng(2).randn(10), index=index) + ser = Series(np.random.default_rng(2).standard_normal(10), index=index) expected = ser[index[0]] result = ser.iat[0] assert expected == result diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index b2edc25e41c3d..f4fca2f1ec8bf 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -238,7 +238,9 @@ def check(result, expected): result.dtypes tm.assert_frame_equal(result, expected) - dfl = DataFrame(np.random.default_rng(2).randn(5, 2), columns=list("AB")) + dfl = DataFrame( + np.random.default_rng(2).standard_normal(5, 2), columns=list("AB") + ) check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[])) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) @@ -262,7 +264,7 @@ def check(result, expected): def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): # GH 25753 df = DataFrame( - np.random.default_rng(2).randn(len(index), len(columns)), + np.random.default_rng(2).standard_normal(len(index), len(columns)), index=index, columns=columns, ) @@ -404,10 +406,11 @@ def test_iloc_getitem_slice(self): def test_iloc_getitem_slice_dups(self): df1 = DataFrame( - np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + np.random.default_rng(2).standard_normal(10, 4), + columns=["A", "A", "B", "B"], ) df2 = DataFrame( - np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + np.random.default_rng(2).integers(0, 10, size=20).reshape(10, 2), columns=["A", "C"], ) @@ -432,7 +435,7 @@ def test_iloc_getitem_slice_dups(self): def test_iloc_setitem(self): df = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3), ) @@ -550,7 +553,7 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( # is redundant with another test comparing iloc against loc def test_iloc_getitem_frame(self): df = DataFrame( - np.random.default_rng(2).randn(10, 4), + np.random.default_rng(2).standard_normal(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2), ) @@ -600,7 +603,7 @@ def test_iloc_getitem_frame(self): def test_iloc_getitem_labelled_frame(self): # try with labelled frame df = DataFrame( - np.random.default_rng(2).randn(10, 4), + np.random.default_rng(2).standard_normal(10, 4), index=list("abcdefghij"), columns=list("ABCD"), ) @@ -636,7 +639,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): # multi axis slicing issue with single block # surfaced in GH 6059 - arr = np.random.default_rng(2).randn(6, 4) + arr = np.random.default_rng(2).standard_normal(6, 4) index = date_range("20130101", periods=6) columns = list("ABCD") df = DataFrame(arr, index=index, columns=columns) @@ -661,7 +664,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): tm.assert_frame_equal(result, expected) # related - arr = np.random.default_rng(2).randn(6, 4) + arr = np.random.default_rng(2).standard_normal(6, 4) index = list(range(0, 12, 2)) columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) @@ -676,7 +679,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): def test_iloc_setitem_series(self): df = DataFrame( - np.random.default_rng(2).randn(10, 4), + np.random.default_rng(2).standard_normal(10, 4), index=list("abcdefghij"), columns=list("ABCD"), ) @@ -690,7 +693,7 @@ def test_iloc_setitem_series(self): result = df.iloc[:, 2:3] tm.assert_frame_equal(result, expected) - s = Series(np.random.default_rng(2).randn(10), index=range(0, 20, 2)) + s = Series(np.random.default_rng(2).standard_normal(10), index=range(0, 20, 2)) s.iloc[1] = 1 result = s.iloc[1] @@ -976,9 +979,11 @@ def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture): def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): idx = Index([]) obj = DataFrame( - np.random.default_rng(2).randn(len(idx), len(idx)), index=idx, columns=idx + np.random.default_rng(2).standard_normal(len(idx), len(idx)), + index=idx, + columns=idx, ) - nd3 = np.random.default_rng(2).randint(5, size=(2, 2, 2)) + nd3 = np.random.default_rng(2).integers(5, size=(2, 2, 2)) msg = f"Cannot set values with ndim > {obj.ndim}" with pytest.raises(ValueError, match=msg): @@ -1055,7 +1060,7 @@ def test_iloc_setitem_dictionary_value(self): def test_iloc_getitem_float_duplicates(self): df = DataFrame( - np.random.default_rng(2).randn(3, 3), + np.random.default_rng(2).standard_normal(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc"), ) @@ -1073,7 +1078,7 @@ def test_iloc_getitem_float_duplicates(self): tm.assert_series_equal(df.loc[0.2, "a"], expect) df = DataFrame( - np.random.default_rng(2).randn(4, 3), + np.random.default_rng(2).standard_normal(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc"), ) @@ -1411,7 +1416,9 @@ def test_frame_iloc_setitem_callable(self): class TestILocSeries: def test_iloc(self, using_copy_on_write): - ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) + ser = Series( + np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2)) + ) ser_original = ser.copy() for i in range(len(ser)): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 321772e73deff..67bd45fb2807a 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -81,7 +81,7 @@ def test_getitem_ndarray_3d( # GH 25567 obj = gen_obj(frame_or_series, index) idxr = indexer_sli(obj) - nd3 = np.random.default_rng(2).randint(5, size=(2, 2, 2)) + nd3 = np.random.default_rng(2).integers(5, size=(2, 2, 2)) msgs = [] if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: @@ -125,7 +125,7 @@ def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): # GH 25567 obj = gen_obj(frame_or_series, index) idxr = indexer_sli(obj) - nd3 = np.random.default_rng(2).randint(5, size=(2, 2, 2)) + nd3 = np.random.default_rng(2).integers(5, size=(2, 2, 2)) if indexer_sli is tm.iloc: err = ValueError @@ -279,7 +279,9 @@ def test_dups_fancy_indexing_not_in_order(self): def test_dups_fancy_indexing_only_missing_label(self): # List containing only missing label - dfnu = DataFrame(np.random.default_rng(2).randn(5, 3), index=list("AABCD")) + dfnu = DataFrame( + np.random.default_rng(2).standard_normal(5, 3), index=list("AABCD") + ) with pytest.raises( KeyError, match=re.escape( @@ -305,7 +307,8 @@ def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame( - np.random.default_rng(2).randn(5, 5), columns=["A", "B", "B", "B", "A"] + np.random.default_rng(2).standard_normal(5, 5), + columns=["A", "B", "B", "B", "A"], ) with pytest.raises(KeyError, match="not in index"): @@ -314,7 +317,7 @@ def test_dups_fancy_indexing2(self): def test_dups_fancy_indexing3(self): # GH 6504, multi-axis indexing df = DataFrame( - np.random.default_rng(2).randn(9, 2), + np.random.default_rng(2).standard_normal(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"], ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 147e57ddba253..17f5894a64d2c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -782,7 +782,7 @@ def test_loc_setitem_empty_frame(self): def test_loc_setitem_frame(self): df = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), index=list("abcd"), columns=list("ABCD"), ) @@ -1027,7 +1027,7 @@ def test_loc_non_unique_memory_error(self, length, l2): df = pd.concat( [ DataFrame( - np.random.default_rng(2).randn(length, len(columns)), + np.random.default_rng(2).standard_normal(length, len(columns)), index=np.arange(length), columns=columns, ), @@ -1098,7 +1098,7 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): assert (sliced_df["a"] == 4).all() # These should not return copies - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) if using_copy_on_write: assert df[0] is not df.loc[:, 0] else: @@ -1269,7 +1269,9 @@ def test_loc_getitem_time_object(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") mask = (rng.hour == 9) & (rng.minute == 30) - obj = DataFrame(np.random.default_rng(2).randn(len(rng), 3), index=rng) + obj = DataFrame( + np.random.default_rng(2).standard_normal(len(rng), 3), index=rng + ) obj = tm.get_obj(obj, frame_or_series) result = obj.loc[time(9, 30)] @@ -1467,7 +1469,9 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): def test_loc_setitem_time_key(self, using_array_manager): index = date_range("2012-01-01", "2012-01-05", freq="30min") - df = DataFrame(np.random.default_rng(2).randn(len(index), 5), index=index) + df = DataFrame( + np.random.default_rng(2).standard_normal(len(index), 5), index=index + ) akey = time(12, 0, 0) bkey = slice(time(13, 0, 0), time(14, 0, 0)) ainds = [24, 72, 120, 168] @@ -1535,7 +1539,7 @@ def test_loc_setitem_td64_non_nano(self): tm.assert_series_equal(ser, expected) def test_loc_setitem_2d_to_1d_raises(self): - data = np.random.default_rng(2).randn(2, 2) + data = np.random.default_rng(2).standard_normal(2, 2) # float64 dtype to avoid upcast when trying to set float data ser = Series(range(2), dtype="float64") @@ -1602,7 +1606,7 @@ def test_loc_getitem_index_namedtuple(self): def test_loc_setitem_single_column_mixed(self): df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), index=["a", "b", "c", "d", "e"], columns=["foo", "bar", "baz"], ) @@ -2757,7 +2761,7 @@ def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_col def test_loc_setitem_float_intindex(): # GH 8720 - rand_data = np.random.default_rng(2).randn(8, 4) + rand_data = np.random.default_rng(2).standard_normal(8, 4) result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) @@ -2844,7 +2848,7 @@ def test_loc_datetimelike_mismatched_dtypes(): # GH#32650 dont mix and match datetime/timedelta/period dtypes df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), columns=["a", "b", "c"], index=date_range("2012", freq="H", periods=5), ) @@ -2865,7 +2869,7 @@ def test_loc_datetimelike_mismatched_dtypes(): def test_loc_with_period_index_indexer(): # GH#4125 idx = pd.period_range("2002-01", "2003-12", freq="M") - df = DataFrame(np.random.default_rng(2).randn(24, 10), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(24, 10), index=idx) tm.assert_frame_equal(df, df.loc[idx]) tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df, df.loc[list(idx)]) @@ -2875,7 +2879,7 @@ def test_loc_with_period_index_indexer(): def test_loc_setitem_multiindex_timestamp(): # GH#13831 - vals = np.random.default_rng(2).randn(8, 6) + vals = np.random.default_rng(2).standard_normal(8, 6) idx = date_range("1/1/2000", periods=8) cols = ["A", "B", "C", "D", "E", "F"] exp = DataFrame(vals, index=idx, columns=cols) @@ -3050,7 +3054,9 @@ def test_loc_getitem_not_monotonic(self, datetime_series): ts2.loc[d1:d2] = 0 def test_loc_getitem_setitem_integer_slice_keyerrors(self): - ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) + ser = Series( + np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2)) + ) # this is OK cp = ser.copy() @@ -3114,7 +3120,9 @@ def test_basic_setitem_with_labels(self, datetime_series): def test_loc_setitem_listlike_of_ints(self): # integer indexes, be careful - ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) + ser = Series( + np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2)) + ) inds = [0, 4, 6] arr_inds = np.array([0, 4, 6]) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 3506efe9f09d9..507b86452efa2 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -332,7 +332,7 @@ def test_partial_setting2(self): # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame( - np.random.default_rng(2).randn(8, 4), + np.random.default_rng(2).standard_normal(8, 4), index=dates, columns=["A", "B", "C", "D"], ) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index fc0dd43a3c744..37ea891ce18d7 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -76,7 +76,7 @@ def test_at_iat_coercion(self): # as timestamp is not a tuple! dates = date_range("1/1/2000", periods=8) df = DataFrame( - np.random.default_rng(2).randn(8, 4), + np.random.default_rng(2).standard_normal(8, 4), index=dates, columns=["A", "B", "C", "D"], ) @@ -136,7 +136,7 @@ def test_imethods_with_dups(self): def test_frame_at_with_duplicate_axes(self): # GH#33041 - arr = np.random.default_rng(2).randn(6).reshape(3, 2) + arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2) df = DataFrame(arr, columns=["A", "A"]) result = df.at[0, "A"] diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 2e5ee9f894eae..35e4c2b261e49 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -128,14 +128,14 @@ def test_bitmasks_pyarrow(offset, length, expected_values): @pytest.mark.parametrize( "data", [ - lambda: np.random.default_rng(2).randint(-100, 100), - lambda: np.random.default_rng(2).randint(1, 100), + lambda: np.random.default_rng(2).integers(-100, 100), + lambda: np.random.default_rng(2).integers(1, 100), lambda: np.random.default_rng(2).random(), lambda: np.random.default_rng(2).choice([True, False]), lambda: datetime( - year=np.random.default_rng(2).randint(1900, 2100), - month=np.random.default_rng(2).randint(1, 12), - day=np.random.default_rng(2).randint(1, 20), + year=np.random.default_rng(2).integers(1900, 2100), + month=np.random.default_rng(2).integers(1, 12), + day=np.random.default_rng(2).integers(1, 20), ), ], ) @@ -177,7 +177,7 @@ def test_missing_from_masked(): df2 = df.__dataframe__() rng = np.random.default_rng(2).RandomState(42) - dict_null = {col: rng.randint(low=0, high=len(df)) for col in df.columns} + dict_null = {col: rng.integers(low=0, high=len(df)) for col in df.columns} for col, num_nulls in dict_null.items(): null_idx = df.index[ rng.choice(np.arange(len(df)), size=num_nulls, replace=False) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a6e624f91b241..6969a49c87f65 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -350,7 +350,7 @@ def test_delete_datetimelike(self): def test_split(self): # GH#37799 - values = np.random.default_rng(2).randn(3, 4) + values = np.random.default_rng(2).standard_normal(3, 4) blk = new_block(values, placement=BlockPlacement([3, 1, 6]), ndim=2) result = blk._split() @@ -463,12 +463,16 @@ def test_set_change_dtype(self, mgr): assert mgr2.iget(idx).dtype == np.object_ mgr2.insert( - len(mgr2.items), "quux", np.random.default_rng(2).randn(N).astype(int) + len(mgr2.items), + "quux", + np.random.default_rng(2).standard_normal(N).astype(int), ) idx = mgr2.items.get_loc("quux") assert mgr2.iget(idx).dtype == np.int_ - mgr2.iset(mgr2.items.get_loc("quux"), np.random.default_rng(2).randn(N)) + mgr2.iset( + mgr2.items.get_loc("quux"), np.random.default_rng(2).standard_normal(N) + ) assert mgr2.iget(idx).dtype == np.float_ def test_copy(self, mgr): @@ -702,11 +706,11 @@ def test_interleave_dtype(self, mgr_string, dtype): assert mgr.as_array().dtype == "object" def test_consolidate_ordering_issues(self, mgr): - mgr.iset(mgr.items.get_loc("f"), np.random.default_rng(2).randn(N)) - mgr.iset(mgr.items.get_loc("d"), np.random.default_rng(2).randn(N)) - mgr.iset(mgr.items.get_loc("b"), np.random.default_rng(2).randn(N)) - mgr.iset(mgr.items.get_loc("g"), np.random.default_rng(2).randn(N)) - mgr.iset(mgr.items.get_loc("h"), np.random.default_rng(2).randn(N)) + mgr.iset(mgr.items.get_loc("f"), np.random.default_rng(2).standard_normal(N)) + mgr.iset(mgr.items.get_loc("d"), np.random.default_rng(2).standard_normal(N)) + mgr.iset(mgr.items.get_loc("b"), np.random.default_rng(2).standard_normal(N)) + mgr.iset(mgr.items.get_loc("g"), np.random.default_rng(2).standard_normal(N)) + mgr.iset(mgr.items.get_loc("h"), np.random.default_rng(2).standard_normal(N)) # we have datetime/tz blocks in mgr cons = mgr.consolidate() diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index c4576ad57a8ff..f5bd983fa77fd 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -38,7 +38,7 @@ def assert_equal_cell_styles(cell1, cell2): def test_styler_to_excel_unstyled(engine): # compare DataFrame.to_excel and Styler.to_excel when no styles applied pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).randn(2, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(2, 2)) with tm.ensure_clean(".xlsx") as path: with ExcelWriter(path, engine=engine) as writer: df.to_excel(writer, sheet_name="dataframe") @@ -130,7 +130,7 @@ def test_styler_to_excel_unstyled(engine): @pytest.mark.parametrize("css, attrs, expected", shared_style_params) def test_styler_to_excel_basic(engine, css, attrs, expected): pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).randn(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) styler = df.style.map(lambda x: css) with tm.ensure_clean(".xlsx") as path: @@ -161,7 +161,7 @@ def test_styler_to_excel_basic(engine, css, attrs, expected): @pytest.mark.parametrize("css, attrs, expected", shared_style_params) def test_styler_to_excel_basic_indexes(engine, css, attrs, expected): pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).randn(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) styler = df.style styler.map_index(lambda x: css, axis=0) @@ -230,7 +230,7 @@ def test_styler_to_excel_border_style(engine, border_style): expected = border_style pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).randn(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) styler = df.style.map(lambda x: css) with tm.ensure_clean(".xlsx") as path: @@ -260,7 +260,7 @@ def test_styler_custom_converter(): def custom_converter(css): return {"font": {"color": {"rgb": "111222"}}} - df = DataFrame(np.random.default_rng(2).randn(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) styler = df.style.map(lambda x: "color: #888999") with tm.ensure_clean(".xlsx") as path: with ExcelWriter(path, engine="openpyxl") as writer: diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 18fecc277cdf0..871be620d18d1 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -357,7 +357,7 @@ def test_excel_sheet_size(self, path): col_df.to_excel(path) def test_excel_sheet_by_name_raise(self, path): - gt = DataFrame(np.random.default_rng(2).randn(10, 2)) + gt = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) gt.to_excel(path) with ExcelFile(path) as xl: @@ -466,7 +466,7 @@ def test_int_types(self, np_type, path): # Test np.int values read come back as int # (rather than float which is Excel's format). df = DataFrame( - np.random.default_rng(2).randint(-10, 10, size=(10, 2)), dtype=np_type + np.random.default_rng(2).integers(-10, 10, size=(10, 2)), dtype=np_type ) df.to_excel(path, "test1") @@ -567,7 +567,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): frame.to_excel(path, "test1", index=False) # test index_label - df = DataFrame(np.random.default_rng(2).randn(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) >= 0 df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( @@ -576,7 +576,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.default_rng(2).randn(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) >= 0 df.to_excel( path, "test1", @@ -590,7 +590,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.default_rng(2).randn(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) >= 0 df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( @@ -615,7 +615,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): tm.assert_frame_equal(df, recons) def test_excel_roundtrip_indexname(self, merge_cells, path): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) df.index.name = "foo" df.to_excel(path, merge_cells=merge_cells) @@ -692,7 +692,7 @@ def test_to_excel_interval_no_labels(self, path): # # Test writing Interval without labels. df = DataFrame( - np.random.default_rng(2).randint(-10, 10, size=(20, 1)), dtype=np.int64 + np.random.default_rng(2).integers(-10, 10, size=(20, 1)), dtype=np.int64 ) expected = df.copy() @@ -709,7 +709,7 @@ def test_to_excel_interval_labels(self, path): # # Test writing Interval with labels. df = DataFrame( - np.random.default_rng(2).randint(-10, 10, size=(20, 1)), dtype=np.int64 + np.random.default_rng(2).integers(-10, 10, size=(20, 1)), dtype=np.int64 ) expected = df.copy() intervals = pd.cut( @@ -728,7 +728,7 @@ def test_to_excel_timedelta(self, path): # # Test writing timedelta to xls. df = DataFrame( - np.random.default_rng(2).randint(-10, 10, size=(20, 1)), + np.random.default_rng(2).integers(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64, ) @@ -1120,7 +1120,7 @@ def test_datetimes(self, path): def test_bytes_io(self, engine): # see gh-7074 with BytesIO() as bio: - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) # Pass engine explicitly, as there is no file path to infer from. with ExcelWriter(bio, engine=engine) as writer: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 42b3290c57c1c..0775b0cdbdca3 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -87,13 +87,13 @@ def blank_value(): @pytest.fixture def df(): - df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).randn(2)}) + df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).standard_normal(2)}) return df @pytest.fixture def styler(df): - df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).randn(2)}) + df = DataFrame({"A": [0, 1], "B": np.random.default_rng(2).standard_normal(2)}) return Styler(df) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 1ca535cc65cc1..47b16119a0b37 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -212,10 +212,10 @@ def test_repr_truncation(self): with option_context("display.max_colwidth", max_len): df = DataFrame( { - "A": np.random.default_rng(2).randn(10), + "A": np.random.default_rng(2).standard_normal(10), "B": [ tm.rands( - np.random.default_rng(2).randint(max_len - 1, max_len + 1) + np.random.default_rng(2).integers(max_len - 1, max_len + 1) ) for i in range(10) ], @@ -327,7 +327,7 @@ def test_repr_should_return_str(self): def test_repr_no_backslash(self): with option_context("mode.sim_interactive", True): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) assert "\\" not in repr(df) def test_expand_frame_repr(self): @@ -576,7 +576,7 @@ def test_to_string_repr_unicode(self): repr(df) idx = Index(["abc", "\u03c3a", "aegdvg"]) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) rs = repr(ser).split("\n") line_len = len(rs[0]) for line in rs[1:]: @@ -1193,7 +1193,7 @@ def test_wide_repr(self): def test_wide_repr_wide_columns(self): with option_context("mode.sim_interactive", True, "display.max_columns", 20): df = DataFrame( - np.random.default_rng(2).randn(5, 3), + np.random.default_rng(2).standard_normal(5, 3), columns=["a" * 90, "b" * 90, "c" * 90], ) rep_str = repr(df) @@ -1281,7 +1281,7 @@ def test_wide_repr_wide_long_columns(self): def test_long_series(self): n = 1000 s = Series( - np.random.default_rng(2).randint(-50, 50, n), + np.random.default_rng(2).integers(-50, 50, n), index=[f"s{x:04d}" for x in range(n)], dtype="int64", ) @@ -1370,7 +1370,10 @@ def test_index_with_nan(self): def test_to_string(self): # big mixed biggie = DataFrame( - {"A": np.random.default_rng(2).randn(200), "B": tm.makeStringIndex(200)}, + { + "A": np.random.default_rng(2).standard_normal(200), + "B": tm.makeStringIndex(200), + }, ) biggie.loc[:20, "A"] = np.nan @@ -1972,7 +1975,9 @@ def test_repr_html_long_multiindex(self): tuples = list(itertools.product(np.arange(max_L1), ["foo", "bar"])) idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).randn(max_L1 * 2, 2), index=idx, columns=["A", "B"] + np.random.default_rng(2).standard_normal(max_L1 * 2, 2), + index=idx, + columns=["A", "B"], ) with option_context("display.max_rows", 60, "display.max_columns", 20): reg_repr = df._repr_html_() @@ -1981,7 +1986,7 @@ def test_repr_html_long_multiindex(self): tuples = list(itertools.product(np.arange(max_L1 + 1), ["foo", "bar"])) idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).randn((max_L1 + 1) * 2, 2), + np.random.default_rng(2).standard_normal((max_L1 + 1) * 2, 2), index=idx, columns=["A", "B"], ) @@ -2027,7 +2032,7 @@ def test_info_repr(self): def test_info_repr_max_cols(self): # GH #6939 - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) with option_context( "display.large_repr", "info", @@ -2280,7 +2285,7 @@ def test_to_string(self): def test_freq_name_separation(self): s = Series( - np.random.default_rng(2).randn(10), + np.random.default_rng(2).standard_normal(10), index=date_range("1/1/2000", periods=10), name=0, ) @@ -2731,7 +2736,7 @@ def test_max_multi_index_display(self): ] tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=["first", "second"]) - s = Series(np.random.default_rng(2).randn(8), index=index) + s = Series(np.random.default_rng(2).standard_normal(8), index=index) with option_context("display.max_rows", 10): assert len(str(s).split("\n")) == 10 @@ -2745,7 +2750,7 @@ def test_max_multi_index_display(self): assert len(str(s).split("\n")) == 10 # index - s = Series(np.random.default_rng(2).randn(8), None) + s = Series(np.random.default_rng(2).standard_normal(8), None) with option_context("display.max_rows", 10): assert len(str(s).split("\n")) == 9 diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index a809793109d52..326cfc6c00af7 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -27,7 +27,7 @@ def duplicate_columns_frame(): """Dataframe with duplicate column names.""" return DataFrame( - np.random.default_rng(2).randn(1500, 4), columns=["a", "a", "b", "b"] + np.random.default_rng(2).standard_normal(1500, 4), columns=["a", "a", "b", "b"] ) @@ -48,10 +48,10 @@ def test_info_empty(): def test_info_categorical_column_smoke_test(): n = 2500 - df = DataFrame({"int64": np.random.default_rng(2).randint(100, size=n)}) + df = DataFrame({"int64": np.random.default_rng(2).integers(100, size=n)}) df["category"] = Series( np.array(list("abcdefghij")).take( - np.random.default_rng(2).randint(0, 10, size=n) + np.random.default_rng(2).integers(0, 10, size=n) ) ).astype("category") df.isna() @@ -91,7 +91,7 @@ def test_info_smoke_test(fixture_func_name, request): ], ) def test_info_default_verbose_selection(num_columns, max_info_columns, verbose): - frame = DataFrame(np.random.default_rng(2).randn(5, num_columns)) + frame = DataFrame(np.random.default_rng(2).standard_normal(5, num_columns)) with option_context("display.max_info_columns", max_info_columns): io_default = StringIO() frame.info(buf=io_default) @@ -108,7 +108,7 @@ def test_info_verbose_check_header_separator_body(): buf = StringIO() size = 1001 start = 5 - frame = DataFrame(np.random.default_rng(2).randn(3, size)) + frame = DataFrame(np.random.default_rng(2).standard_normal(3, size)) frame.info(verbose=True, buf=buf) res = buf.getvalue() @@ -170,7 +170,7 @@ def test_info_verbose_with_counts_spacing( size, header_exp, separator_exp, first_line_exp, last_line_exp ): """Test header column, spacer, first line and last line in verbose mode.""" - frame = DataFrame(np.random.default_rng(2).randn(3, size)) + frame = DataFrame(np.random.default_rng(2).standard_normal(3, size)) with StringIO() as buf: frame.info(verbose=True, show_counts=True, buf=buf) all_lines = buf.getvalue().splitlines() @@ -208,7 +208,7 @@ def test_info_memory(): def test_info_wide(): io = StringIO() - df = DataFrame(np.random.default_rng(2).randn(5, 101)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 101)) df.info(buf=io) io = StringIO() @@ -247,7 +247,7 @@ def test_info_shows_column_dtypes(): data = {} n = 10 for i, dtype in enumerate(dtypes): - data[i] = np.random.default_rng(2).randint(2, size=n).astype(dtype) + data[i] = np.random.default_rng(2).integers(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() df.info(buf=buf) @@ -263,7 +263,7 @@ def test_info_shows_column_dtypes(): def test_info_max_cols(): - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) for len_, verbose in [(5, None), (5, False), (12, True)]: # For verbose always ^ setting ^ summarize ^ full output with option_context("max_info_columns", 4): @@ -310,7 +310,7 @@ def test_info_memory_usage(): data = {} n = 10 for i, dtype in enumerate(dtypes): - data[i] = np.random.default_rng(2).randint(2, size=n).astype(dtype) + data[i] = np.random.default_rng(2).integers(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() @@ -341,7 +341,7 @@ def test_info_memory_usage(): data = {} n = 100 for i, dtype in enumerate(dtypes): - data[i] = np.random.default_rng(2).randint(2, size=n).astype(dtype) + data[i] = np.random.default_rng(2).integers(2, size=n).astype(dtype) df = DataFrame(data) df.columns = dtypes @@ -456,7 +456,9 @@ def memory_usage(f): [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"], ) - df = DataFrame({"value": np.random.default_rng(2).randn(N * M)}, index=index) + df = DataFrame( + {"value": np.random.default_rng(2).standard_normal(N * M)}, index=index + ) unstacked = df.unstack("id") assert df.values.nbytes == unstacked.values.nbytes diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index d24f9bcbc89d9..bf6a2bd4bae13 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -154,7 +154,9 @@ def test_publishes_not_implemented(self, ip): # column MultiIndex # GH 15996 midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) - df = pd.DataFrame(np.random.default_rng(2).randn(5, len(midx)), columns=midx) + df = pd.DataFrame( + np.random.default_rng(2).standard_normal(5, len(midx)), columns=midx + ) opt = pd.option_context("display.html.table_schema", True) diff --git a/pandas/tests/io/formats/test_series_info.py b/pandas/tests/io/formats/test_series_info.py index e6dd61cb72d91..bf20a5cb954c4 100644 --- a/pandas/tests/io/formats/test_series_info.py +++ b/pandas/tests/io/formats/test_series_info.py @@ -18,7 +18,7 @@ def test_info_categorical_column_just_works(): n = 2500 data = np.array(list("abcdefghij")).take( - np.random.default_rng(2).randint(0, 10, size=n) + np.random.default_rng(2).integers(0, 10, size=n) ) s = Series(data).astype("category") s.isna() @@ -92,7 +92,7 @@ def test_info_memory(): def test_info_wide(): - s = Series(np.random.default_rng(2).randn(101)) + s = Series(np.random.default_rng(2).standard_normal(101)) msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info" with pytest.raises(ValueError, match=msg): s.info(max_cols=1) @@ -110,7 +110,7 @@ def test_info_shows_dtypes(): ] n = 10 for dtype in dtypes: - s = Series(np.random.default_rng(2).randint(2, size=n).astype(dtype)) + s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype)) buf = StringIO() s.info(buf=buf) res = buf.getvalue() @@ -170,7 +170,7 @@ def test_info_memory_usage_bug_on_multiindex(): [list(uppercase), date_range("20160101", periods=N)], names=["id", "date"], ) - s = Series(np.random.default_rng(2).randn(N * M), index=index) + s = Series(np.random.default_rng(2).standard_normal(N * M), index=index) unstacked = s.unstack("id") assert s.values.nbytes == unstacked.values.nbytes diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 507831a97b039..87eac4263e79b 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -54,7 +54,10 @@ def biggie_df_fixture(request): """Fixture for a big mixed Dataframe and an empty Dataframe""" if request.param == "mixed": df = DataFrame( - {"A": np.random.default_rng(2).randn(200), "B": tm.makeStringIndex(200)}, + { + "A": np.random.default_rng(2).standard_normal(200), + "B": tm.makeStringIndex(200), + }, index=np.arange(200), ) df.loc[:20, "A"] = np.nan @@ -284,8 +287,8 @@ def test_to_html_regression_GH6098(): { "clé1": ["a", "a", "b", "b", "a"], "clé2": ["1er", "2ème", "1er", "2ème", "1er"], - "données1": np.random.default_rng(2).randn(5), - "données2": np.random.default_rng(2).randn(5), + "données1": np.random.default_rng(2).standard_normal(5), + "données2": np.random.default_rng(2).standard_normal(5), } ) @@ -398,7 +401,7 @@ def test_to_html_filename(biggie_df_fixture, tmpdir): def test_to_html_with_no_bold(): - df = DataFrame({"x": np.random.default_rng(2).randn(5)}) + df = DataFrame({"x": np.random.default_rng(2).standard_normal(5)}) html = df.to_html(bold_rows=False) result = html[html.find("")] assert " 0 @@ -277,7 +284,7 @@ def test_select_with_many_inputs(setup_path): df = DataFrame( { "ts": bdate_range("2012-01-01", periods=300), - "A": np.random.default_rng(2).randn(300), + "A": np.random.default_rng(2).standard_normal(300), "B": range(300), "users": ["a"] * 50 + ["b"] * 50 @@ -657,7 +664,7 @@ def test_frame_select_complex2(tmp_path): selection = read_hdf(pp, "df", where="A=[2,3]") hist = DataFrame( - np.random.default_rng(2).randn(25, 1), + np.random.default_rng(2).standard_normal(25, 1), columns=["data"], index=MultiIndex.from_tuples( [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 2e2271be3f668..da1124268393b 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -500,7 +500,7 @@ def test_remove(setup_path): def test_same_name_scoping(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.default_rng(2).randn(20, 2), + np.random.default_rng(2).standard_normal(20, 2), index=date_range("20130101", periods=20), ) store.put("df", df, format="table") @@ -624,7 +624,7 @@ def test_coordinates(setup_path): # pass array/mask as the coordinates with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.default_rng(2).randn(1000, 2), + np.random.default_rng(2).standard_normal(1000, 2), index=date_range("20000101", periods=1000), ) store.append("df", df) @@ -664,7 +664,7 @@ def test_coordinates(setup_path): tm.assert_frame_equal(result, expected) # list - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) store.append("df2", df) result = store.select("df2", where=[0, 3, 5]) expected = df.iloc[[0, 3, 5]] @@ -763,7 +763,7 @@ def test_start_stop_fixed(setup_path): def test_select_filter_corner(setup_path): - df = DataFrame(np.random.default_rng(2).randn(50, 100)) + df = DataFrame(np.random.default_rng(2).standard_normal(50, 100)) df.index = [f"{c:3d}" for c in df.index] df.columns = [f"{c:3d}" for c in df.columns] @@ -941,7 +941,9 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): ] for index in types_should_fail: - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=index(2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 2), columns=index(2) + ) path = tmp_path / setup_path with catch_warnings(record=True): msg = "cannot have non-object label DataIndexableCol" @@ -949,7 +951,9 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): df.to_hdf(path, "df", format="table", data_columns=True) for index in types_should_run: - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=index(2)) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 2), columns=index(2) + ) path = tmp_path / setup_path with catch_warnings(record=True): df.to_hdf(path, "df", format="table", data_columns=True) diff --git a/pandas/tests/io/pytables/test_time_series.py b/pandas/tests/io/pytables/test_time_series.py index 08ce04606982a..8b96390b611fe 100644 --- a/pandas/tests/io/pytables/test_time_series.py +++ b/pandas/tests/io/pytables/test_time_series.py @@ -24,7 +24,7 @@ def test_store_datetime_fractional_secs(setup_path): def test_tseries_indices_series(setup_path): with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) store["a"] = ser result = store["a"] @@ -33,7 +33,7 @@ def test_tseries_indices_series(setup_path): tm.assert_class_equal(result.index, ser.index, obj="series index") idx = tm.makePeriodIndex(10) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) store["a"] = ser result = store["a"] @@ -45,7 +45,7 @@ def test_tseries_indices_series(setup_path): def test_tseries_indices_frame(setup_path): with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), index=idx) store["a"] = df result = store["a"] @@ -54,7 +54,7 @@ def test_tseries_indices_frame(setup_path): tm.assert_class_equal(result.index, df.index, obj="dataframe index") idx = tm.makePeriodIndex(10) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), idx) store["a"] = df result = store["a"] diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index 217b1f7fc1375..e80874e515731 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -180,7 +180,7 @@ def test_tseries_select_index_column(setup_path): # check that no tz still works rng = date_range("1/1/2000", "1/30/2000") - frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -189,7 +189,7 @@ def test_tseries_select_index_column(setup_path): # check utc rng = date_range("1/1/2000", "1/30/2000", tz="UTC") - frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -198,7 +198,7 @@ def test_tseries_select_index_column(setup_path): # double check non-utc rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") - frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -211,7 +211,7 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): # index rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") rng = rng._with_freq(None) # freq doesn't round-trip - df = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) + df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) store["df"] = df result = store["df"] tm.assert_frame_equal(result, df) @@ -262,7 +262,7 @@ def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): def test_fixed_offset_tz(setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") - frame = DataFrame(np.random.default_rng(2).randn(len(rng), 4), index=rng) + frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) with ensure_clean_store(setup_path) as store: store["frame"] = frame diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 6ec89d822c60a..e13a866f6ac27 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -78,7 +78,7 @@ def df(request): return tm.makeCustomDataframe( max_rows + 1, 3, - data_gen_f=lambda *args: np.random.default_rng(2).randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).integers(2), c_idx_type="s", r_idx_type="i", c_idx_names=[None], @@ -119,7 +119,7 @@ def df(request): return tm.makeCustomDataframe( 5, 3, - data_gen_f=lambda *args: np.random.default_rng(2).randint(2), + data_gen_f=lambda *args: np.random.default_rng(2).integers(2), c_idx_type="s", r_idx_type="i", c_idx_names=[None], diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 74616421184dd..31ad48ed2b330 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1278,7 +1278,7 @@ def test_fallback_success(self, datapath): def test_to_html_timestamp(self): rng = date_range("2000-01-01", periods=10) - df = DataFrame(np.random.default_rng(2).randn(10, 4), index=rng) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4), index=rng) result = df.to_html() assert "2000-01-01" in result diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 02852708ef1fc..36764c3803a60 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -466,7 +466,8 @@ def test_multiindex_with_columns(self, pa): engine = pa dates = pd.date_range("01-Jan-2018", "01-Dec-2018", freq="MS") df = pd.DataFrame( - np.random.default_rng(2).randn(2 * len(dates), 3), columns=list("ABC") + np.random.default_rng(2).standard_normal(2 * len(dates), 3), + columns=list("ABC"), ) index1 = pd.MultiIndex.from_product( [["Level1", "Level2"], dates], names=["level", "date"] @@ -515,7 +516,9 @@ def test_write_ignoring_index(self, engine): def test_write_column_multiindex(self, engine): # Not able to write column multi-indexes with non-string column names. mi_columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) - df = pd.DataFrame(np.random.default_rng(2).randn(4, 3), columns=mi_columns) + df = pd.DataFrame( + np.random.default_rng(2).standard_normal(4, 3), columns=mi_columns + ) if engine == "fastparquet": self.check_error_on_write( @@ -532,7 +535,9 @@ def test_write_column_multiindex_nonstring(self, engine): ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], [1, 2, 1, 2, 1, 2, 1, 2], ] - df = pd.DataFrame(np.random.default_rng(2).randn(8, 8), columns=arrays) + df = pd.DataFrame( + np.random.default_rng(2).standard_normal(8, 8), columns=arrays + ) df.columns.names = ["Level1", "Level2"] if engine == "fastparquet": self.check_error_on_write(df, engine, ValueError, "Column name") @@ -549,7 +554,9 @@ def test_write_column_multiindex_string(self, pa): ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] - df = pd.DataFrame(np.random.default_rng(2).randn(8, 8), columns=arrays) + df = pd.DataFrame( + np.random.default_rng(2).standard_normal(8, 8), columns=arrays + ) df.columns.names = ["ColLevel1", "ColLevel2"] check_round_trip(df, engine) @@ -561,7 +568,9 @@ def test_write_column_index_string(self, pa): # Write column indexes with string column names arrays = ["bar", "baz", "foo", "qux"] - df = pd.DataFrame(np.random.default_rng(2).randn(8, 4), columns=arrays) + df = pd.DataFrame( + np.random.default_rng(2).standard_normal(8, 4), columns=arrays + ) df.columns.name = "StringCol" check_round_trip(df, engine) @@ -571,7 +580,9 @@ def test_write_column_index_nonstring(self, engine): # Write column indexes with string column names arrays = [1, 2, 3, 4] - df = pd.DataFrame(np.random.default_rng(2).randn(8, 4), columns=arrays) + df = pd.DataFrame( + np.random.default_rng(2).standard_normal(8, 4), columns=arrays + ) df.columns.name = "NonStringCol" if engine == "fastparquet": self.check_error_on_write( @@ -986,7 +997,7 @@ def test_filter_row_groups(self, pa): def test_read_parquet_manager(self, pa, using_array_manager): # ensure that read_parquet honors the pandas.options.mode.data_manager option df = pd.DataFrame( - np.random.default_rng(2).randn(10, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"] ) with tm.ensure_clean() as path: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index bb7a2263b321b..d66d5d532962c 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -527,7 +527,7 @@ def _test_roundtrip(frame): def test_pickle_timeseries_periodindex(): # GH#2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") - ts = Series(np.random.default_rng(2).randn(len(prng)), prng) + ts = Series(np.random.default_rng(2).standard_normal(len(prng)), prng) new_ts = tm.round_trip_pickle(ts) assert new_ts.index.freq == "M" diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e912fe525fdc3..c39217f08ae35 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1543,7 +1543,9 @@ def test_get_schema_keys(self, test_frame1): assert constraint_sentence in create_sql def test_chunksize_read(self): - df = DataFrame(np.random.default_rng(2).randn(22, 5), columns=list("abcde")) + df = DataFrame( + np.random.default_rng(2).standard_normal(22, 5), columns=list("abcde") + ) df.to_sql("test_chunksize", self.conn, index=False) # reading the query in one time diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 0f38711108175..b0e5422540b1c 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -372,13 +372,17 @@ def test_read_write_dta10(self, version): def test_stata_doc_examples(self): with tm.ensure_clean() as path: - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 2), columns=list("AB") + ) df.to_stata(path) def test_write_preserves_original(self): # 9795 - df = DataFrame(np.random.default_rng(2).randn(5, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 4), columns=list("abcd") + ) df.loc[2, "a":"c"] = np.nan df_copy = df.copy() with tm.ensure_clean() as path: @@ -1978,7 +1982,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten if use_dict: compression_arg = {"method": compression} - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), columns=list("AB")) df.index.name = "index" with tm.ensure_clean(file_name) as path: df.to_stata(path, version=version, compression=compression_arg) @@ -2016,7 +2020,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten def test_compression_dict(method, file_ext): file_name = f"test.{file_ext}" archive_name = "test.dta" - df = DataFrame(np.random.default_rng(2).randn(10, 2), columns=list("AB")) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), columns=list("AB")) df.index.name = "index" with tm.ensure_clean(file_name) as path: compression = {"method": method, "archive_name": archive_name} diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py index 32888ddd7e58b..b78e6426ca17f 100644 --- a/pandas/tests/libs/test_hashtable.py +++ b/pandas/tests/libs/test_hashtable.py @@ -660,7 +660,7 @@ def test_unique_label_indices_intp(writable): def test_unique_label_indices(): - a = np.random.default_rng(2).randint(1, 1 << 10, 1 << 15).astype(np.intp) + a = np.random.default_rng(2).integers(1, 1 << 10, 1 << 15).astype(np.intp) left = ht.unique_label_indices(a) right = np.unique(a, return_index=True)[1] diff --git a/pandas/tests/plotting/conftest.py b/pandas/tests/plotting/conftest.py index 01847608257c8..92a2d2144f04d 100644 --- a/pandas/tests/plotting/conftest.py +++ b/pandas/tests/plotting/conftest.py @@ -42,9 +42,9 @@ def hist_df(): "classroom": classroom, "height": rng.normal(66, 4, size=n), "weight": rng.normal(161, 32, size=n), - "category": rng.randint(4, size=n), + "category": rng.integers(4, size=n), "datetime": to_datetime( - rng.randint( + rng.integers( 812419200000000000, 819331200000000000, size=n, diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index e61d31532fb49..eb93dc344aab9 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -178,7 +178,7 @@ def test_plot_multiindex_unicode(self): [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"] ) df = DataFrame( - np.random.default_rng(2).randint(0, 10, (8, 2)), + np.random.default_rng(2).integers(0, 10, (8, 2)), columns=columns, index=index, ) @@ -264,13 +264,17 @@ def test_nonnumeric_exclude(self): assert len(ax.get_lines()) == 1 # B was plotted def test_implicit_label(self): - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + ) ax = df.plot(x="a", y="b") _check_text_labels(ax.xaxis.get_label(), "a") def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(np.random.default_rng(2).randn(2, 2), columns=["a", "b"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(2, 2), columns=["a", "b"] + ) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -470,7 +474,7 @@ def test_line_area_stacked_sep_df(self, kind): def test_line_area_stacked_mixed(self): mixed_df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -612,14 +616,14 @@ def test_area_sharey_dont_overwrite(self): @pytest.mark.parametrize("stacked", [True, False]) def test_bar_linewidth(self, stacked): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.bar(stacked=stacked, linewidth=2) for r in ax.patches: assert r.get_linewidth() == 2 def test_bar_linewidth_subplots(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # subplots axes = df.plot.bar(linewidth=2, subplots=True) _check_axes_shape(axes, axes_num=5, layout=(5, 1)) @@ -632,7 +636,7 @@ def test_bar_linewidth_subplots(self): ) @pytest.mark.parametrize("stacked", [True, False]) def test_bar_barwidth(self, meth, dim, stacked): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) width = 0.9 @@ -647,7 +651,7 @@ def test_bar_barwidth(self, meth, dim, stacked): "meth, dim", [("bar", "get_width"), ("barh", "get_height")] ) def test_barh_barwidth_subplots(self, meth, dim): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) width = 0.9 @@ -710,7 +714,7 @@ def test_bar_nan_stacked(self): def test_bar_categorical(self, idx): # GH 13019 df = DataFrame( - np.random.default_rng(2).randn(6, 5), + np.random.default_rng(2).standard_normal(6, 5), index=idx(list("ABCDEF")), columns=idx(list("abcde")), ) @@ -732,7 +736,7 @@ def test_bar_categorical(self, idx): @pytest.mark.parametrize("x, y", [("x", "y"), (1, 2)]) def test_plot_scatter(self, x, y): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -741,7 +745,7 @@ def test_plot_scatter(self, x, y): def test_plot_scatter_error(self): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -754,7 +758,7 @@ def test_plot_scatter_error(self): def test_plot_scatter_shape(self): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -764,7 +768,7 @@ def test_plot_scatter_shape(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = DataFrame(np.random.default_rng(2).randn(10), columns=["a"]) + df = DataFrame(np.random.default_rng(2).standard_normal(10), columns=["a"]) df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a (real )?number, not 'datetime.time'" @@ -829,7 +833,7 @@ def test_plot_scatter_with_categorical_data(self, x, y): @pytest.mark.parametrize("x, y, c", [("x", "y", "z"), (0, 1, 2)]) def test_plot_scatter_with_c(self, x, y, c): df = DataFrame( - np.random.default_rng(2).randint(low=0, high=100, size=(6, 4)), + np.random.default_rng(2).integers(low=0, high=100, size=(6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -842,7 +846,7 @@ def test_plot_scatter_with_c(self, x, y, c): def test_plot_scatter_with_c_props(self): df = DataFrame( - np.random.default_rng(2).randint(low=0, high=100, size=(6, 4)), + np.random.default_rng(2).integers(low=0, high=100, size=(6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -924,7 +928,7 @@ def test_plot_scatter_without_norm(self): ) def test_plot_bar(self, kwargs): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -934,7 +938,7 @@ def test_plot_bar(self, kwargs): @pytest.mark.slow def test_plot_bar_int_col(self): df = DataFrame( - np.random.default_rng(2).randn(10, 15), + np.random.default_rng(2).standard_normal(10, 15), index=list(string.ascii_letters[:10]), columns=range(15), ) @@ -1029,7 +1033,7 @@ def test_boxplot_vertical_positions(self, hist_df): def test_boxplot_return_type_invalid(self): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1040,7 +1044,7 @@ def test_boxplot_return_type_invalid(self): @pytest.mark.parametrize("return_type", ["dict", "axes", "both"]) def test_boxplot_return_type_invalid_type(self, return_type): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1049,7 +1053,7 @@ def test_boxplot_return_type_invalid_type(self, return_type): @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(np.random.default_rng(2).randn(100, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] _check_legend_labels(ax, labels=expected) @@ -1057,13 +1061,13 @@ def test_kde_df(self): @td.skip_if_no_scipy def test_kde_df_rot(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) ax = df.plot(kind="kde", rot=20, fontsize=5) _check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) @td.skip_if_no_scipy def test_kde_df_subplots(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) axes = _check_plot_works( df.plot, default_axes=True, @@ -1074,7 +1078,7 @@ def test_kde_df_subplots(self): @td.skip_if_no_scipy def test_kde_df_logy(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) axes = df.plot(kind="kde", logy=True, subplots=True) _check_ax_scales(axes, yaxis="log") @@ -1085,7 +1089,7 @@ def test_kde_missing_vals(self): _check_plot_works(df.plot, kind="kde") def test_hist_df(self): - df = DataFrame(np.random.default_rng(2).randn(100, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) ax = _check_plot_works(df.plot.hist) expected = [pprint_thing(c) for c in df.columns] @@ -1124,7 +1128,7 @@ def test_hist_df_series_cumulative(self): tm.assert_almost_equal(rects[-2].get_height(), 10.0) def test_hist_df_orientation(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) # if horizontal, yticklabels are rotated axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") _check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) @@ -1135,7 +1139,9 @@ def test_hist_df_orientation(self): def test_hist_weights(self, weights): # GH 33173 - df = DataFrame(dict(zip(["A", "B"], np.random.default_rng(2).randn(2, 100)))) + df = DataFrame( + dict(zip(["A", "B"], np.random.default_rng(2).standard_normal(2, 100))) + ) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1301,7 +1307,7 @@ def test_hist_df_coord(self, data): ) def test_plot_int_columns(self): - df = DataFrame(np.random.default_rng(2).randn(100, 4)).cumsum() + df = DataFrame(np.random.default_rng(2).standard_normal(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.parametrize( @@ -1319,7 +1325,7 @@ def test_style_by_column(self, markers): fig = plt.gcf() fig.clf() fig.add_subplot(111) - df = DataFrame(np.random.default_rng(2).randn(10, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 3)) ax = df.plot(style=markers) for idx, line in enumerate(ax.get_lines()[: len(markers)]): assert line.get_marker() == markers[idx] @@ -1387,7 +1393,8 @@ def test_all_invalid_plot_data(self, kind): ) def test_partially_invalid_plot_data_numeric(self, kind): df = DataFrame( - np.random.default_rng(2).RandomState(42).randn(10, 2), dtype=object + np.random.default_rng(2).RandomState(42).standard_normal(10, 2), + dtype=object, ) df[np.random.default_rng(2).rand(df.shape[0]) > 0.5] = "a" msg = "no numeric data to plot" @@ -1395,7 +1402,7 @@ def test_partially_invalid_plot_data_numeric(self, kind): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) msg = "invalid_plot_kind is not a valid plot kind" with pytest.raises(ValueError, match=msg): df.plot(kind="invalid_plot_kind") @@ -1654,7 +1661,7 @@ def test_errorbar_plot_external_valueerror(self): d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} df = DataFrame(d) with tm.external_error_raised(ValueError): - df.plot(yerr=np.random.default_rng(2).randn(11)) + df.plot(yerr=np.random.default_rng(2).standard_normal(11)) @pytest.mark.slow def test_errorbar_plot_external_typeerror(self): @@ -1724,8 +1731,8 @@ def test_errorbar_plot_iterator(self): def test_errorbar_with_integer_column_names(self): # test with integer column names - df = DataFrame(np.abs(np.random.default_rng(2).randn(10, 2))) - df_err = DataFrame(np.abs(np.random.default_rng(2).randn(10, 2))) + df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 2))) + df_err = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 2))) ax = _check_plot_works(df.plot, yerr=df_err) _check_has_errorbars(ax, xerr=0, yerr=2) ax = _check_plot_works(df.plot, y=0, yerr=1) @@ -1734,18 +1741,18 @@ def test_errorbar_with_integer_column_names(self): @pytest.mark.slow @pytest.mark.parametrize("kind", ["line", "bar"]) def test_errorbar_with_partial_columns_kind(self, kind): - df = DataFrame(np.abs(np.random.default_rng(2).randn(10, 3))) + df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 3))) df_err = DataFrame( - np.abs(np.random.default_rng(2).randn(10, 2)), columns=[0, 2] + np.abs(np.random.default_rng(2).standard_normal(10, 2)), columns=[0, 2] ) ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) _check_has_errorbars(ax, xerr=0, yerr=2) @pytest.mark.slow def test_errorbar_with_partial_columns_dti(self): - df = DataFrame(np.abs(np.random.default_rng(2).randn(10, 3))) + df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 3))) df_err = DataFrame( - np.abs(np.random.default_rng(2).randn(10, 2)), columns=[0, 2] + np.abs(np.random.default_rng(2).standard_normal(10, 2)), columns=[0, 2] ) ix = date_range("1/1/2000", periods=10, freq="M") df.set_index(ix, inplace=True) @@ -1830,12 +1837,12 @@ def test_table(self): def test_errorbar_scatter(self): df = DataFrame( - np.abs(np.random.default_rng(2).randn(5, 2)), + np.abs(np.random.default_rng(2).standard_normal(5, 2)), index=range(5), columns=["x", "y"], ) df_err = DataFrame( - np.abs(np.random.default_rng(2).randn(5, 2)) / 5, + np.abs(np.random.default_rng(2).standard_normal(5, 2)) / 5, index=range(5), columns=["x", "y"], ) @@ -1864,7 +1871,7 @@ def _check_errorbar_color(containers, expected, has_err="has_xerr"): # GH 8081 df = DataFrame( - np.abs(np.random.default_rng(2).randn(10, 5)), + np.abs(np.random.default_rng(2).standard_normal(10, 5)), columns=["a", "b", "c", "d", "e"], ) ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") @@ -2047,11 +2054,14 @@ def test_df_gridspec_patterns_vert_horiz(self): import matplotlib.pyplot as plt ts = Series( - np.random.default_rng(2).randn(10), index=date_range("1/1/2000", periods=10) + np.random.default_rng(2).standard_normal(10), + index=date_range("1/1/2000", periods=10), ) df = DataFrame( - np.random.default_rng(2).randn(10, 2), index=ts.index, columns=list("AB") + np.random.default_rng(2).standard_normal(10, 2), + index=ts.index, + columns=list("AB"), ) def _get_vertical_grid(): @@ -2127,7 +2137,8 @@ def test_df_gridspec_patterns_boxed(self): import matplotlib.pyplot as plt ts = Series( - np.random.default_rng(2).randn(10), index=date_range("1/1/2000", periods=10) + np.random.default_rng(2).standard_normal(10), + index=date_range("1/1/2000", periods=10), ) # boxed @@ -2142,7 +2153,9 @@ def _get_boxed_grid(): axes = _get_boxed_grid() df = DataFrame( - np.random.default_rng(2).randn(10, 4), index=ts.index, columns=list("ABCD") + np.random.default_rng(2).standard_normal(10, 4), + index=ts.index, + columns=list("ABCD"), ) axes = df.plot(subplots=True, ax=axes) for ax in axes: @@ -2191,8 +2204,8 @@ def test_plain_axes_df(self): # a new ax is created for the colorbar -> also multiples axes (GH11520) df = DataFrame( { - "a": np.random.default_rng(2).randn(8), - "b": np.random.default_rng(2).randn(8), + "a": np.random.default_rng(2).standard_normal(8), + "b": np.random.default_rng(2).standard_normal(8), } ) fig = mpl.pyplot.figure() @@ -2221,7 +2234,9 @@ def test_plain_axes_make_inset_axes(self): def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - DataFrame(np.random.default_rng(2).randn(15, 2), columns=list("AB")) + DataFrame( + np.random.default_rng(2).standard_normal(15, 2), columns=list("AB") + ) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2257,7 +2272,9 @@ def test_x_multiindex_values_ticks(self): # GH: 15912 index = MultiIndex.from_product([[2012, 2013], [1, 2]]) df = DataFrame( - np.random.default_rng(2).randn(4, 2), columns=["A", "B"], index=index + np.random.default_rng(2).standard_normal(4, 2), + columns=["A", "B"], + index=index, ) ax = df.plot() ax.set_xlim(-1, 4) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 8bd1dce2a885c..f438b6301ef90 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -37,7 +37,9 @@ class TestDataFrameColor: ) def test_mpl2_color_cycle_str(self, color): # GH 15516 - df = DataFrame(np.random.default_rng(2).randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + ) _check_plot_works(df.plot, color=color) def test_color_single_series_list(self): @@ -52,7 +54,7 @@ def test_rgb_tuple_color(self, color): _check_plot_works(df.plot, x="x", y="y", color=color) def test_color_empty_string(self): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) with pytest.raises(ValueError, match="Invalid color argument:"): df.plot(color="") @@ -98,31 +100,31 @@ def test_color_and_marker(self, color, expected): def test_bar_colors(self): default_colors = _unpack_cycler(plt.rcParams) - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.bar() _check_colors(ax.patches[::5], facecolors=default_colors[:5]) def test_bar_colors_custom(self): custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.bar(color=custom_colors) _check_colors(ax.patches[::5], facecolors=custom_colors) @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_bar_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.bar(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] _check_colors(ax.patches[::5], facecolors=rgba_colors) def test_bar_colors_single_col(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") _check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) def test_bar_colors_green(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot(kind="bar", color="green") _check_colors(ax.patches[::5], facecolors=["green"] * 5) @@ -244,7 +246,7 @@ def test_scatter_colorbar_different_cmap(self): def test_line_colors(self): custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -259,26 +261,26 @@ def test_line_colors(self): @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_line_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=rgba_colors) def test_line_colors_single_col(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') ax = df.loc[:, [0]].plot(color="DodgerBlue") _check_colors(ax.lines, linecolors=["DodgerBlue"]) def test_line_colors_single_color(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot(color="red") _check_colors(ax.get_lines(), linecolors=["red"] * 5) def test_line_colors_hex(self): # GH 10299 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] ax = df.plot(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -292,7 +294,7 @@ def test_line_colors_and_styles_subplots(self): # GH 9894 default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -300,7 +302,7 @@ def test_line_colors_and_styles_subplots(self): @pytest.mark.parametrize("color", ["k", "green"]) def test_line_colors_and_styles_subplots_single_color_str(self, color): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) axes = df.plot(subplots=True, color=color) for ax in axes: _check_colors(ax.get_lines(), linecolors=[color]) @@ -308,14 +310,14 @@ def test_line_colors_and_styles_subplots_single_color_str(self, color): @pytest.mark.parametrize("color", ["rgcby", list("rgcby")]) def test_line_colors_and_styles_subplots_custom_colors(self, color): # GH 9894 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) axes = df.plot(color=color, subplots=True) for ax, c in zip(axes, list(color)): _check_colors(ax.get_lines(), linecolors=[c]) def test_line_colors_and_styles_subplots_colormap_hex(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # GH 10299 custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] axes = df.plot(color=custom_colors, subplots=True) @@ -325,7 +327,7 @@ def test_line_colors_and_styles_subplots_colormap_hex(self): @pytest.mark.parametrize("cmap", ["jet", cm.jet]) def test_line_colors_and_styles_subplots_colormap_subplot(self, cmap): # GH 9894 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] axes = df.plot(colormap=cmap, subplots=True) for ax, c in zip(axes, rgba_colors): @@ -333,7 +335,7 @@ def test_line_colors_and_styles_subplots_colormap_subplot(self, cmap): def test_line_colors_and_styles_subplots_single_col(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) @@ -341,7 +343,7 @@ def test_line_colors_and_styles_subplots_single_col(self): def test_line_colors_and_styles_subplots_single_char(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # single character style axes = df.plot(style="r", subplots=True) for ax in axes: @@ -349,7 +351,7 @@ def test_line_colors_and_styles_subplots_single_char(self): def test_line_colors_and_styles_subplots_list_styles(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # list of styles styles = list("rgcby") axes = df.plot(style=styles, subplots=True) @@ -411,30 +413,30 @@ def test_area_colors_stacked_false(self): def test_hist_colors(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.hist() _check_colors(ax.patches[::10], facecolors=default_colors[:5]) def test_hist_colors_single_custom(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) custom_colors = "rgcby" ax = df.plot.hist(color=custom_colors) _check_colors(ax.patches[::10], facecolors=custom_colors) @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_hist_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.hist(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] _check_colors(ax.patches[::10], facecolors=rgba_colors) def test_hist_colors_single_col(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") _check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) def test_hist_colors_single_color(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot(kind="hist", color="green") _check_colors(ax.patches[::10], facecolors=["green"] * 5) @@ -449,7 +451,7 @@ def test_kde_colors(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_kde_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.kde(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=rgba_colors) @@ -458,7 +460,7 @@ def test_kde_colors_cmap(self, colormap): def test_kde_colors_and_styles_subplots(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -467,14 +469,14 @@ def test_kde_colors_and_styles_subplots(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["k", "red"]) def test_kde_colors_and_styles_subplots_single_col_str(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) axes = df.plot(kind="kde", color=colormap, subplots=True) for ax in axes: _check_colors(ax.get_lines(), linecolors=[colormap]) @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_custom_color(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) custom_colors = "rgcby" axes = df.plot(kind="kde", color=custom_colors, subplots=True) for ax, c in zip(axes, list(custom_colors)): @@ -483,7 +485,7 @@ def test_kde_colors_and_styles_subplots_custom_color(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_kde_colors_and_styles_subplots_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] axes = df.plot(kind="kde", colormap=colormap, subplots=True) for ax, c in zip(axes, rgba_colors): @@ -491,7 +493,7 @@ def test_kde_colors_and_styles_subplots_cmap(self, colormap): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_single_col(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) @@ -499,7 +501,7 @@ def test_kde_colors_and_styles_subplots_single_col(self): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_single_char(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # list of styles # single character style axes = df.plot(kind="kde", style="r", subplots=True) @@ -508,7 +510,7 @@ def test_kde_colors_and_styles_subplots_single_char(self): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_list(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # list of styles styles = list("rgcby") axes = df.plot(kind="kde", style=styles, subplots=True) @@ -518,7 +520,7 @@ def test_kde_colors_and_styles_subplots_list(self): def test_boxplot_colors(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) bp = df.plot.box(return_type="dict") _check_colors_box( bp, @@ -529,7 +531,7 @@ def test_boxplot_colors(self): ) def test_boxplot_colors_dict_colors(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) dict_colors = { "boxes": "#572923", "whiskers": "#982042", @@ -548,7 +550,7 @@ def test_boxplot_colors_dict_colors(self): def test_boxplot_colors_default_color(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # partial colors dict_colors = {"whiskers": "c", "medians": "m"} bp = df.plot.box(color=dict_colors, return_type="dict") @@ -556,7 +558,7 @@ def test_boxplot_colors_default_color(self): @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_boxplot_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) bp = df.plot.box(colormap=colormap, return_type="dict") jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] _check_colors_box( @@ -564,19 +566,19 @@ def test_boxplot_colors_cmap(self, colormap): ) def test_boxplot_colors_single(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # string color is applied to all artists except fliers bp = df.plot.box(color="DodgerBlue", return_type="dict") _check_colors_box(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") def test_boxplot_colors_tuple(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # tuple is also applied to all artists except fliers bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") _check_colors_box(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") def test_boxplot_colors_invalid(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) msg = re.escape( "color dict contains invalid key 'xxxx'. The key must be either " "['boxes', 'whiskers', 'medians', 'caps']" @@ -591,7 +593,7 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(np.random.default_rng(2).randn(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) ax = df.plot() expected = _unpack_cycler(plt.rcParams)[:3] @@ -648,7 +650,9 @@ def test_colors_of_columns_with_same_name(self): assert legend.get_color() == line.get_color() def test_invalid_colormap(self): - df = DataFrame(np.random.default_rng(2).randn(3, 2), columns=["A", "B"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(3, 2), columns=["A", "B"] + ) msg = "(is not a valid value)|(is not a known colormap)" with pytest.raises((ValueError, KeyError), match=msg): df.plot(colormap="invalid_colormap") diff --git a/pandas/tests/plotting/frame/test_frame_legend.py b/pandas/tests/plotting/frame/test_frame_legend.py index 83855fe4d8106..41db738442549 100644 --- a/pandas/tests/plotting/frame/test_frame_legend.py +++ b/pandas/tests/plotting/frame/test_frame_legend.py @@ -98,13 +98,19 @@ def test_df_legend_labels_time_series(self): # Time Series ind = date_range("1/1/2014", periods=3) df = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["a", "b", "c"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["a", "b", "c"], + index=ind, ) df2 = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["d", "e", "f"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["d", "e", "f"], + index=ind, ) df3 = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["g", "h", "i"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["g", "h", "i"], + index=ind, ) ax = df.plot(legend=True, secondary_y="b") _check_legend_labels(ax, labels=["a", "b (right)", "c"]) @@ -118,13 +124,19 @@ def test_df_legend_labels_time_series_scatter(self): # Time Series ind = date_range("1/1/2014", periods=3) df = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["a", "b", "c"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["a", "b", "c"], + index=ind, ) df2 = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["d", "e", "f"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["d", "e", "f"], + index=ind, ) df3 = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["g", "h", "i"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["g", "h", "i"], + index=ind, ) # scatter ax = df.plot.scatter(x="a", y="b", label="data1") @@ -138,7 +150,9 @@ def test_df_legend_labels_time_series_scatter(self): def test_df_legend_labels_time_series_no_mutate(self): ind = date_range("1/1/2014", periods=3) df = DataFrame( - np.random.default_rng(2).randn(3, 3), columns=["a", "b", "c"], index=ind + np.random.default_rng(2).standard_normal(3, 3), + columns=["a", "b", "c"], + index=ind, ) # ensure label args pass through and # index name does not mutate @@ -178,7 +192,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - np.random.default_rng(2).randn(4, 4), + np.random.default_rng(2).standard_normal(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -187,7 +201,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() _check_text_labels(leg_title, "group,individual") - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() _check_text_labels(leg_title, "group,individual") @@ -219,7 +233,9 @@ def test_no_legend(self, kind): def test_missing_markers_legend(self): # 14958 - df = DataFrame(np.random.default_rng(2).randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(8, 3), columns=["A", "B", "C"] + ) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index ac24fda59d64b..bac92d8ee5f84 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -276,7 +276,7 @@ def test_subplots_layout_single_column( def test_subplots_warnings(self, idx): # GH 9464 with tm.assert_produces_warning(None): - df = DataFrame(np.random.default_rng(2).randn(5, 4), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 4), index=idx) df.plot(subplots=True, layout=(3, 2)) def test_subplots_multiple_axes(self): @@ -353,7 +353,7 @@ def test_subplots_ts_share_axes(self): _, axes = mpl.pyplot.subplots(3, 3, sharex=True, sharey=True) mpl.pyplot.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) df = DataFrame( - np.random.default_rng(2).randn(10, 9), + np.random.default_rng(2).standard_normal(10, 9), index=date_range(start="2014-07-01", freq="M", periods=10), ) for i, ax in enumerate(axes.ravel()): @@ -469,7 +469,7 @@ def test_boxplot_subplots_return_type(self, hist_df, rt): def test_df_subplots_patterns_minorticks(self): # GH 10657 df = DataFrame( - np.random.default_rng(2).randn(10, 2), + np.random.default_rng(2).standard_normal(10, 2), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -489,7 +489,7 @@ def test_df_subplots_patterns_minorticks(self): def test_df_subplots_patterns_minorticks_1st_ax_hidden(self): # GH 10657 df = DataFrame( - np.random.default_rng(2).randn(10, 2), + np.random.default_rng(2).standard_normal(10, 2), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -508,7 +508,7 @@ def test_df_subplots_patterns_minorticks_1st_ax_hidden(self): def test_df_subplots_patterns_minorticks_not_shared(self): # GH 10657 df = DataFrame( - np.random.default_rng(2).randn(10, 2), + np.random.default_rng(2).standard_normal(10, 2), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -628,7 +628,7 @@ def test_bar_align_multiple_columns(self, kwargs): ], ) def test_bar_align_single_column(self, kwargs): - df = DataFrame(np.random.default_rng(2).randn(5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5)) self._check_bar_alignment(df, **kwargs) @pytest.mark.parametrize( @@ -643,13 +643,13 @@ def test_bar_align_single_column(self, kwargs): ], ) def test_bar_barwidth_position(self, kwargs): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs) @pytest.mark.parametrize("w", [1, 1.0]) def test_bar_barwidth_position_int(self, w): # GH 12979 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) ax = df.plot.bar(stacked=True, width=w) ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) @@ -670,7 +670,7 @@ def test_bar_barwidth_position_int(self, w): ) def test_bar_barwidth_position_int_width_1(self, kind, kwargs): # GH 12979 - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) self._check_bar_alignment(df, kind=kind, width=1, **kwargs) def _check_bar_alignment( diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index a2e0b87bf41e9..a275aa5b9f458 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -17,7 +17,7 @@ @pytest.fixture def hist_df(): - df = DataFrame(np.random.default_rng(2).randn(30, 2), columns=["A", "B"]) + df = DataFrame(np.random.default_rng(2).standard_normal(30, 2), columns=["A", "B"]) df["C"] = np.random.default_rng(2).choice(["a", "b", "c"], 30) df["D"] = np.random.default_rng(2).choice(["a", "b", "c"], 30) return df diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 53ee98e2146da..04ccbde58d28e 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -73,7 +73,7 @@ def test_stacked_boxplot_set_axis(self): ) def test_boxplot_legacy1(self, kwargs, warn): df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -85,7 +85,7 @@ def test_boxplot_legacy1(self, kwargs, warn): _check_plot_works(df.boxplot, **kwargs) def test_boxplot_legacy1_series(self): - ser = Series(np.random.default_rng(2).randn(6)) + ser = Series(np.random.default_rng(2).standard_normal(6)) _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict") def test_boxplot_legacy2(self): @@ -147,7 +147,7 @@ def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -163,7 +163,7 @@ def test_boxplot_return_type_legacy_return_type(self, return_type): # API change in https://github.com/pandas-dev/pandas/pull/7096 df = DataFrame( - np.random.default_rng(2).randn(6, 4), + np.random.default_rng(2).standard_normal(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -173,7 +173,7 @@ def test_boxplot_return_type_legacy_return_type(self, return_type): def test_boxplot_axis_limits(self, hist_df): df = hist_df.copy() - df["age"] = np.random.default_rng(2).randint(1, 20, df.shape[0]) + df["age"] = np.random.default_rng(2).integers(1, 20, df.shape[0]) # One full row height_ax, weight_ax = df.boxplot(["height", "weight"], by="category") _check_ax_limits(df["height"], height_ax) @@ -182,7 +182,7 @@ def test_boxplot_axis_limits(self, hist_df): def test_boxplot_axis_limits_two_rows(self, hist_df): df = hist_df.copy() - df["age"] = np.random.default_rng(2).randint(1, 20, df.shape[0]) + df["age"] = np.random.default_rng(2).integers(1, 20, df.shape[0]) # Two rows, one partial p = df.boxplot(["height", "weight", "age"], by="category") height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] @@ -196,7 +196,7 @@ def test_boxplot_axis_limits_two_rows(self, hist_df): assert dummy_ax._sharey is None def test_boxplot_empty_column(self): - df = DataFrame(np.random.default_rng(2).randn(20, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(20, 4)) df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type="axes") @@ -217,8 +217,8 @@ def test_boxplot_numeric_data(self): df = DataFrame( { "a": date_range("2012-01-01", periods=100), - "b": np.random.default_rng(2).randn(100), - "c": np.random.default_rng(2).randn(100) + 2, + "b": np.random.default_rng(2).standard_normal(100), + "c": np.random.default_rng(2).standard_normal(100) + 2, "d": date_range("2012-01-01", periods=100).astype(str), "e": date_range("2012-01-01", periods=100, tz="UTC"), "f": timedelta_range("1 days", periods=100), @@ -309,8 +309,8 @@ def test_specified_props_kwd(self, props, expected): def test_plot_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) @@ -323,8 +323,8 @@ def test_plot_xlabel_ylabel(self, vert): def test_boxplot_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) @@ -337,8 +337,8 @@ def test_boxplot_xlabel_ylabel(self, vert): def test_boxplot_group_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) @@ -353,8 +353,8 @@ def test_boxplot_group_xlabel_ylabel(self, vert): def test_boxplot_group_no_xlabel_ylabel(self, vert): df = DataFrame( { - "a": np.random.default_rng(2).randn(10), - "b": np.random.default_rng(2).randn(10), + "a": np.random.default_rng(2).standard_normal(10), + "b": np.random.default_rng(2).standard_normal(10), "group": np.random.default_rng(2).choice(["group1", "group2"], 10), } ) @@ -481,7 +481,9 @@ def test_grouped_box_return_type_arg(self, hist_df, return_type): @pytest.mark.parametrize("return_type", ["dict", "axes", "both"]) def test_grouped_box_return_type_arg_duplcate_cats(self, return_type): columns2 = "X B C D A".split() - df2 = DataFrame(np.random.default_rng(2).randn(6, 5), columns=columns2) + df2 = DataFrame( + np.random.default_rng(2).standard_normal(6, 5), columns=columns2 + ) categories2 = "A B".split() df2["category"] = categories2 * 3 @@ -721,7 +723,9 @@ def test_boxplot_multiindex_column(self): tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).randn(3, 8), index=["A", "B", "C"], columns=index + np.random.default_rng(2).standard_normal(3, 8), + index=["A", "B", "C"], + columns=index, ) col = [("bar", "one"), ("bar", "two")] diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index ff13feeb413cf..f07441543dd94 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -60,7 +60,7 @@ def test_ts_plot_with_tz(self, tz_aware_fixture): def test_fontsize_set_correctly(self): # For issue #8765 - df = DataFrame(np.random.default_rng(2).randn(10, 9), index=range(10)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 9), index=range(10)) _, ax = mpl.pyplot.subplots() df.plot(fontsize=2, ax=ax) for label in ax.get_xticklabels() + ax.get_yticklabels(): @@ -71,19 +71,21 @@ def test_frame_inferred(self): idx = date_range("1/1/1987", freq="MS", periods=100) idx = DatetimeIndex(idx.values, freq=None) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), index=idx) _check_plot_works(df.plot) # axes freq idx = idx[0:40].union(idx[45:99]) - df2 = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) + df2 = DataFrame( + np.random.default_rng(2).standard_normal(len(idx), 3), index=idx + ) _check_plot_works(df2.plot) def test_frame_inferred_n_gt_1(self): # N > 1 idx = date_range("2008-1-1 00:15:00", freq="15T", periods=10) idx = DatetimeIndex(idx.values, freq=None) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), index=idx) _check_plot_works(df.plot) def test_is_error_nozeroindex(self): @@ -112,7 +114,7 @@ def test_nonnumeric_exclude_error(self): @pytest.mark.parametrize("freq", ["S", "T", "H", "D", "W", "M", "Q", "A"]) def test_tsplot_period(self, freq): idx = period_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(ser.plot, ax=ax) @@ -121,7 +123,7 @@ def test_tsplot_period(self, freq): ) def test_tsplot_datetime(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(ser.plot, ax=ax) @@ -150,7 +152,7 @@ def test_both_style_and_color(self): def test_high_freq(self, freq): _, ax = mpl.pyplot.subplots() rng = date_range("1/1/2012", periods=100, freq=freq) - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _check_plot_works(ser.plot, ax=ax) def test_get_datevalue(self): @@ -182,7 +184,7 @@ def check_format_of_first_point(ax, expected_string): @pytest.mark.parametrize("freq", ["S", "T", "H", "D", "W", "M", "Q", "A"]) def test_line_plot_period_series(self, freq): idx = period_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) _check_plot_works(ser.plot, ser.index.freq) @pytest.mark.parametrize( @@ -192,7 +194,7 @@ def test_line_plot_period_mlt_series(self, frqncy): # test period index line plot for series with multiples (`mlt`) of the # frequency (`frqncy`) rule code. tests resolution of issue #14763 idx = period_range("12/31/1999", freq=frqncy, periods=100) - s = Series(np.random.default_rng(2).randn(len(idx)), idx) + s = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) _check_plot_works(s.plot, s.index.freq.rule_code) @pytest.mark.parametrize( @@ -200,14 +202,14 @@ def test_line_plot_period_mlt_series(self, frqncy): ) def test_line_plot_datetime_series(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) _check_plot_works(ser.plot, ser.index.freq.rule_code) @pytest.mark.parametrize("freq", ["S", "T", "H", "D", "W", "M", "Q", "A"]) def test_line_plot_period_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) df = DataFrame( - np.random.default_rng(2).randn(len(idx), 3), + np.random.default_rng(2).standard_normal(len(idx), 3), index=idx, columns=["A", "B", "C"], ) @@ -222,7 +224,7 @@ def test_line_plot_period_mlt_frame(self, frqncy): # #14763 idx = period_range("12/31/1999", freq=frqncy, periods=100) df = DataFrame( - np.random.default_rng(2).randn(len(idx), 3), + np.random.default_rng(2).standard_normal(len(idx), 3), index=idx, columns=["A", "B", "C"], ) @@ -235,7 +237,7 @@ def test_line_plot_period_mlt_frame(self, frqncy): def test_line_plot_datetime_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) df = DataFrame( - np.random.default_rng(2).randn(len(idx), 3), + np.random.default_rng(2).standard_normal(len(idx), 3), index=idx, columns=["A", "B", "C"], ) @@ -247,7 +249,7 @@ def test_line_plot_datetime_frame(self, freq): ) def test_line_plot_inferred_freq(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) ser = Series(ser.values, Index(np.asarray(ser.index))) _check_plot_works(ser.plot, ser.index.inferred_freq) @@ -268,12 +270,12 @@ def test_plot_offset_freq(self): def test_plot_offset_freq_business(self): dr = date_range("2023-01-01", freq="BQS", periods=10) - ser = Series(np.random.default_rng(2).randn(len(dr)), index=dr) + ser = Series(np.random.default_rng(2).standard_normal(len(dr)), index=dr) _check_plot_works(ser.plot) def test_plot_multiple_inferred_freq(self): dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime(2000, 1, 11)]) - ser = Series(np.random.default_rng(2).randn(len(dr)), index=dr) + ser = Series(np.random.default_rng(2).standard_normal(len(dr)), index=dr) _check_plot_works(ser.plot) @pytest.mark.xfail(reason="Api changed in 3.6.0") @@ -281,7 +283,7 @@ def test_uhf(self): import pandas.plotting._matplotlib.converter as conv idx = date_range("2012-6-22 21:59:51.960928", freq="L", periods=500) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), index=idx) _, ax = mpl.pyplot.subplots() df.plot(ax=ax) @@ -297,7 +299,7 @@ def test_uhf(self): def test_irreg_hf(self): idx = date_range("2012-6-22 21:59:51", freq="S", periods=10) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), index=idx) irreg = df.iloc[[0, 1, 3, 4]] _, ax = mpl.pyplot.subplots() @@ -309,7 +311,9 @@ def test_irreg_hf(self): def test_irreg_hf_object(self): idx = date_range("2012-6-22 21:59:51", freq="S", periods=10) - df2 = DataFrame(np.random.default_rng(2).randn(len(idx), 2), index=idx) + df2 = DataFrame( + np.random.default_rng(2).standard_normal(len(idx), 2), index=idx + ) _, ax = mpl.pyplot.subplots() df2.index = df2.index.astype(object) df2.plot(ax=ax) @@ -424,7 +428,7 @@ def test_finder_daily(self): rs2 = [] for n in day_lst: rng = bdate_range("1999-1-1", periods=n) - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -446,7 +450,7 @@ def test_finder_quarterly(self): rs2 = [] for n in yrs: rng = period_range("1987Q2", periods=int(n * 4), freq="Q") - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -468,7 +472,7 @@ def test_finder_monthly(self): rs2 = [] for n in yrs: rng = period_range("1987Q2", periods=int(n * 12), freq="M") - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -484,7 +488,7 @@ def test_finder_monthly(self): def test_finder_monthly_long(self): rng = period_range("1988Q1", periods=24 * 12, freq="M") - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -498,7 +502,7 @@ def test_finder_annual(self): rs = [] for nyears in [5, 10, 19, 49, 99, 199, 599, 1001]: rng = period_range("1987", periods=nyears, freq="A") - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -511,7 +515,7 @@ def test_finder_annual(self): def test_finder_minutely(self): nminutes = 50 * 24 * 60 rng = date_range("1/1/1999", freq="Min", periods=nminutes) - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -523,7 +527,7 @@ def test_finder_minutely(self): def test_finder_hourly(self): nhours = 23 rng = date_range("1/1/1999", freq="H", periods=nhours) - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() @@ -571,7 +575,7 @@ def test_gaps_irregular(self): def test_gaps_non_ts(self): # non-ts idx = [0, 1, 2, 5, 7, 9, 12, 15, 20] - ser = Series(np.random.default_rng(2).randn(len(idx)), idx) + ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx) ser.iloc[2:5] = np.nan _, ax = mpl.pyplot.subplots() ser.plot(ax=ax) @@ -592,7 +596,7 @@ def test_gap_upsample(self): low.plot(ax=ax) idxh = date_range(low.index[0], low.index[-1], freq="12h") - s = Series(np.random.default_rng(2).randn(len(idxh)), idxh) + s = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) s.plot(secondary_y=True) lines = ax.get_lines() assert len(lines) == 1 @@ -607,7 +611,7 @@ def test_gap_upsample(self): assert mask[5:25, 1].all() def test_secondary_y(self): - ser = Series(np.random.default_rng(2).randn(10)) + ser = Series(np.random.default_rng(2).standard_normal(10)) fig, _ = mpl.pyplot.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, "left_ax") @@ -621,16 +625,16 @@ def test_secondary_y(self): mpl.pyplot.close(fig) def test_secondary_y_yaxis(self): - Series(np.random.default_rng(2).randn(10)) - ser2 = Series(np.random.default_rng(2).randn(10)) + Series(np.random.default_rng(2).standard_normal(10)) + ser2 = Series(np.random.default_rng(2).standard_normal(10)) _, ax2 = mpl.pyplot.subplots() ser2.plot(ax=ax2) assert ax2.get_yaxis().get_ticks_position() == "left" mpl.pyplot.close(ax2.get_figure()) def test_secondary_both(self): - ser = Series(np.random.default_rng(2).randn(10)) - ser2 = Series(np.random.default_rng(2).randn(10)) + ser = Series(np.random.default_rng(2).standard_normal(10)) + ser2 = Series(np.random.default_rng(2).standard_normal(10)) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) assert ax.get_yaxis().get_visible() @@ -641,7 +645,7 @@ def test_secondary_both(self): def test_secondary_y_ts(self): idx = date_range("1/1/2000", periods=10) - ser = Series(np.random.default_rng(2).randn(10), idx) + ser = Series(np.random.default_rng(2).standard_normal(10), idx) fig, _ = mpl.pyplot.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, "left_ax") @@ -656,7 +660,7 @@ def test_secondary_y_ts(self): def test_secondary_y_ts_yaxis(self): idx = date_range("1/1/2000", periods=10) - ser2 = Series(np.random.default_rng(2).randn(10), idx) + ser2 = Series(np.random.default_rng(2).standard_normal(10), idx) _, ax2 = mpl.pyplot.subplots() ser2.plot(ax=ax2) assert ax2.get_yaxis().get_ticks_position() == "left" @@ -664,13 +668,13 @@ def test_secondary_y_ts_yaxis(self): def test_secondary_y_ts_visible(self): idx = date_range("1/1/2000", periods=10) - ser2 = Series(np.random.default_rng(2).randn(10), idx) + ser2 = Series(np.random.default_rng(2).standard_normal(10), idx) ax = ser2.plot() assert ax.get_yaxis().get_visible() @td.skip_if_no_scipy def test_secondary_kde(self): - ser = Series(np.random.default_rng(2).randn(10)) + ser = Series(np.random.default_rng(2).standard_normal(10)) fig, ax = mpl.pyplot.subplots() ax = ser.plot(secondary_y=True, kind="density", ax=ax) assert hasattr(ax, "left_ax") @@ -679,21 +683,25 @@ def test_secondary_kde(self): assert axes[1].get_yaxis().get_ticks_position() == "right" def test_secondary_bar(self): - ser = Series(np.random.default_rng(2).randn(10)) + ser = Series(np.random.default_rng(2).standard_normal(10)) fig, ax = mpl.pyplot.subplots() ser.plot(secondary_y=True, kind="bar", ax=ax) axes = fig.get_axes() assert axes[1].get_yaxis().get_ticks_position() == "right" def test_secondary_frame(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 3), columns=["a", "b", "c"] + ) axes = df.plot(secondary_y=["a", "c"], subplots=True) assert axes[0].get_yaxis().get_ticks_position() == "right" assert axes[1].get_yaxis().get_ticks_position() == "left" assert axes[2].get_yaxis().get_ticks_position() == "right" def test_secondary_bar_frame(self): - df = DataFrame(np.random.default_rng(2).randn(5, 3), columns=["a", "b", "c"]) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 3), columns=["a", "b", "c"] + ) axes = df.plot(kind="bar", secondary_y=["a", "c"], subplots=True) assert axes[0].get_yaxis().get_ticks_position() == "right" assert axes[1].get_yaxis().get_ticks_position() == "left" @@ -768,8 +776,8 @@ def test_mixed_freq_irregular_first_df(self): def test_mixed_freq_hf_first(self): idxh = date_range("1/1/1999", periods=365, freq="D") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() high.plot(ax=ax) low.plot(ax=ax) @@ -778,7 +786,7 @@ def test_mixed_freq_hf_first(self): def test_mixed_freq_alignment(self): ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H") - ts_data = np.random.default_rng(2).randn(12) + ts_data = np.random.default_rng(2).standard_normal(12) ts = Series(ts_data, index=ts_ind) ts2 = ts.asfreq("T").interpolate() @@ -792,8 +800,8 @@ def test_mixed_freq_alignment(self): def test_mixed_freq_lf_first(self): idxh = date_range("1/1/1999", periods=365, freq="D") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(legend=True, ax=ax) high.plot(legend=True, ax=ax) @@ -806,8 +814,8 @@ def test_mixed_freq_lf_first(self): def test_mixed_freq_lf_first_hourly(self): idxh = date_range("1/1/1999", periods=240, freq="T") idxl = date_range("1/1/1999", periods=4, freq="H") - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(ax=ax) high.plot(ax=ax) @@ -818,7 +826,7 @@ def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts.iloc[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range("1/3/2000", periods=30, freq="B") - ps = Series(np.random.default_rng(2).randn(len(rng)), rng) + ps = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() irreg.plot(ax=ax) ps.plot(ax=ax) @@ -875,8 +883,8 @@ def test_nat_handling(self): def test_to_weekly_resampling(self): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() high.plot(ax=ax) low.plot(ax=ax) @@ -886,8 +894,8 @@ def test_to_weekly_resampling(self): def test_from_weekly_resampling(self): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(ax=ax) high.plot(ax=ax) @@ -1011,8 +1019,8 @@ def test_mixed_freq_second_millisecond(self): # GH 7772, GH 7760 idxh = date_range("2014-07-01 09:00", freq="S", periods=50) idxl = date_range("2014-07-01 09:00", freq="100L", periods=500) - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) # high to low _, ax = mpl.pyplot.subplots() high.plot(ax=ax) @@ -1025,8 +1033,8 @@ def test_mixed_freq_second_millisecond_low_to_high(self): # GH 7772, GH 7760 idxh = date_range("2014-07-01 09:00", freq="S", periods=50) idxl = date_range("2014-07-01 09:00", freq="100L", periods=500) - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) # low to high _, ax = mpl.pyplot.subplots() low.plot(ax=ax) @@ -1039,7 +1047,8 @@ def test_irreg_dtypes(self): # date idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] df = DataFrame( - np.random.default_rng(2).randn(len(idx), 3), Index(idx, dtype=object) + np.random.default_rng(2).standard_normal(len(idx), 3), + Index(idx, dtype=object), ) _check_plot_works(df.plot) @@ -1047,18 +1056,18 @@ def test_irreg_dtypes_dt64(self): # np.datetime64 idx = date_range("1/1/2000", periods=10) idx = idx[[0, 2, 5, 9]].astype(object) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 3), idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(df.plot, ax=ax) def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) - deltas = np.random.default_rng(2).randint(1, 20, 3).cumsum() + deltas = np.random.default_rng(2).integers(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) df = DataFrame( { - "a": np.random.default_rng(2).randn(len(ts)), - "b": np.random.default_rng(2).randn(len(ts)), + "a": np.random.default_rng(2).standard_normal(len(ts)), + "b": np.random.default_rng(2).standard_normal(len(ts)), }, index=ts, ) @@ -1081,12 +1090,12 @@ def test_time(self): def test_time_change_xlim(self): t = datetime(1, 1, 1, 3, 30, 0) - deltas = np.random.default_rng(2).randint(1, 20, 3).cumsum() + deltas = np.random.default_rng(2).integers(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) df = DataFrame( { - "a": np.random.default_rng(2).randn(len(ts)), - "b": np.random.default_rng(2).randn(len(ts)), + "a": np.random.default_rng(2).standard_normal(len(ts)), + "b": np.random.default_rng(2).standard_normal(len(ts)), }, index=ts, ) @@ -1126,12 +1135,12 @@ def test_time_change_xlim(self): def test_time_musec(self): t = datetime(1, 1, 1, 3, 30, 0) - deltas = np.random.default_rng(2).randint(1, 20, 3).cumsum() + deltas = np.random.default_rng(2).integers(1, 20, 3).cumsum() ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) df = DataFrame( { - "a": np.random.default_rng(2).randn(len(ts)), - "b": np.random.default_rng(2).randn(len(ts)), + "a": np.random.default_rng(2).standard_normal(len(ts)), + "b": np.random.default_rng(2).standard_normal(len(ts)), }, index=ts, ) @@ -1162,8 +1171,8 @@ def test_time_musec(self): def test_secondary_upsample(self): idxh = date_range("1/1/1999", periods=365, freq="D") idxl = date_range("1/1/1999", periods=12, freq="M") - high = Series(np.random.default_rng(2).randn(len(idxh)), idxh) - low = Series(np.random.default_rng(2).randn(len(idxl)), idxl) + high = Series(np.random.default_rng(2).standard_normal(len(idxh)), idxh) + low = Series(np.random.default_rng(2).standard_normal(len(idxl)), idxl) _, ax = mpl.pyplot.subplots() low.plot(ax=ax) ax = high.plot(secondary_y=True, ax=ax) @@ -1280,7 +1289,7 @@ def test_secondary_legend_nonts_multi_col(self): @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_format_date_axis(self): rng = date_range("1/1/2012", periods=12, freq="M") - df = DataFrame(np.random.default_rng(2).randn(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) xaxis = ax.get_xaxis() @@ -1408,7 +1417,7 @@ def test_format_timedelta_ticks_narrow(self): expected_labels = [f"00:00:00.0000000{i:0>2d}" for i in np.arange(10)] rng = timedelta_range("0", periods=10, freq="ns") - df = DataFrame(np.random.default_rng(2).randn(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), rng) _, ax = mpl.pyplot.subplots() df.plot(fontsize=2, ax=ax) mpl.pyplot.draw() @@ -1432,7 +1441,7 @@ def test_format_timedelta_ticks_wide(self): ] rng = timedelta_range("0", periods=10, freq="1 d") - df = DataFrame(np.random.default_rng(2).randn(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(fontsize=2, ax=ax) mpl.pyplot.draw() @@ -1451,14 +1460,14 @@ def test_timedelta_plot(self): def test_timedelta_long_period(self): # test long period index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 d") - s = Series(np.random.default_rng(2).randn(len(index)), index) + s = Series(np.random.default_rng(2).standard_normal(len(index)), index) _, ax = mpl.pyplot.subplots() _check_plot_works(s.plot, ax=ax) def test_timedelta_short_period(self): # test short period index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 ns") - s = Series(np.random.default_rng(2).randn(len(index)), index) + s = Series(np.random.default_rng(2).standard_normal(len(index)), index) _, ax = mpl.pyplot.subplots() _check_plot_works(s.plot, ax=ax) @@ -1503,7 +1512,7 @@ def test_add_matplotlib_datetime64(self): # datetime64 data. This still fails because matplotlib overrides the # ax.xaxis.converter with a DatetimeConverter s = Series( - np.random.default_rng(2).randn(10), + np.random.default_rng(2).standard_normal(10), index=date_range("1970-01-02", periods=10), ) ax = s.plot() diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 848131882f2e6..22fa71f480124 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -49,7 +49,7 @@ def test_plotting_with_float_index_works(self): df = DataFrame( { "def": [1, 1, 1, 2, 2, 2, 3, 3, 3], - "val": np.random.default_rng(2).randn(9), + "val": np.random.default_rng(2).standard_normal(9), }, index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0], ) @@ -61,7 +61,7 @@ def test_plotting_with_float_index_works_apply(self): df = DataFrame( { "def": [1, 1, 1, 2, 2, 2, 3, 3, 3], - "val": np.random.default_rng(2).randn(9), + "val": np.random.default_rng(2).standard_normal(9), }, index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0], ) @@ -108,7 +108,9 @@ def test_groupby_hist_frame_with_legend(self, column, expected_axes_num): index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + np.random.default_rng(2).standard_normal(30, 2), + index=index, + columns=["a", "b"], ) g = df.groupby("c") @@ -122,7 +124,9 @@ def test_groupby_hist_frame_with_legend_raises(self, column): # GH 6279 - DataFrameGroupBy histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + np.random.default_rng(2).standard_normal(30, 2), + index=index, + columns=["a", "b"], ) g = df.groupby("c") @@ -133,7 +137,9 @@ def test_groupby_hist_series_with_legend(self): # GH 6279 - SeriesGroupBy histogram can have a legend index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + np.random.default_rng(2).standard_normal(30, 2), + index=index, + columns=["a", "b"], ) g = df.groupby("c") @@ -145,7 +151,9 @@ def test_groupby_hist_series_with_legend_raises(self): # GH 6279 - SeriesGroupBy histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + np.random.default_rng(2).standard_normal(30, 2), + index=index, + columns=["a", "b"], ) g = df.groupby("c") diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 9daeb83871801..cee67a9c85531 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -72,7 +72,7 @@ def test_hist_legacy_by_fig_error(self, ts): ts.hist(by=ts.index, figure=fig) def test_hist_bins_legacy(self): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 @@ -120,8 +120,8 @@ def test_hist_no_overlap(self): subplot, ) - x = Series(np.random.default_rng(2).randn(2)) - y = Series(np.random.default_rng(2).randn(2)) + x = Series(np.random.default_rng(2).standard_normal(2)) + y = Series(np.random.default_rng(2).standard_normal(2)) subplot(121) x.hist() subplot(122) @@ -156,7 +156,7 @@ def test_plot_fails_when_ax_differs_from_figure(self, ts): ) def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument - ser = Series(np.random.default_rng(2).randint(1, 10)) + ser = Series(np.random.default_rng(2).integers(1, 10)) ax = ser.hist(histtype=histtype) _check_patches_all_filled(ax, filled=expected) @@ -166,7 +166,7 @@ def test_histtype_argument(self, histtype, expected): def test_hist_with_legend(self, by, expected_axes_num, expected_layout): # GH 6279 - Series histogram can have a legend index = 15 * ["1"] + 15 * ["2"] - s = Series(np.random.default_rng(2).randn(30), index=index, name="a") + s = Series(np.random.default_rng(2).standard_normal(30), index=index, name="a") s.index.name = "b" # Use default_axes=True when plotting method generate subplots itself @@ -178,7 +178,7 @@ def test_hist_with_legend(self, by, expected_axes_num, expected_layout): def test_hist_with_legend_raises(self, by): # GH 6279 - Series histogram with legend and label raises index = 15 * ["1"] + 15 * ["2"] - s = Series(np.random.default_rng(2).randn(30), index=index, name="a") + s = Series(np.random.default_rng(2).standard_normal(30), index=index, name="a") s.index.name = "b" with pytest.raises(ValueError, match="Cannot use both legend and label"): @@ -259,9 +259,9 @@ def test_hist_df_legacy(self, hist_df): @pytest.mark.slow def test_hist_df_legacy_layout(self): # make sure layout is handled - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) df[2] = to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 812419200000000000, 819331200000000000, size=10, @@ -277,15 +277,15 @@ def test_hist_df_legacy_layout(self): @pytest.mark.slow def test_hist_df_legacy_layout2(self): - df = DataFrame(np.random.default_rng(2).randn(10, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 1)) _check_plot_works(df.hist) @pytest.mark.slow def test_hist_df_legacy_layout3(self): # make sure layout is handled - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) df[5] = to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 812419200000000000, 819331200000000000, size=10, @@ -301,9 +301,9 @@ def test_hist_df_legacy_layout3(self): "kwargs", [{"sharex": True, "sharey": True}, {"figsize": (8, 10)}, {"bins": 5}] ) def test_hist_df_legacy_layout_kwargs(self, kwargs): - df = DataFrame(np.random.default_rng(2).randn(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) df[5] = to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 812419200000000000, 819331200000000000, size=10, @@ -354,14 +354,14 @@ def test_hist_non_numerical_or_datetime_raises(self): df = DataFrame( { "a": np.random.default_rng(2).rand(10), - "b": np.random.default_rng(2).randint(0, 10, 10), + "b": np.random.default_rng(2).integers(0, 10, 10), "c": to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 ) ), "d": to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 1582800000000000000, 1583500000000000000, 10, dtype=np.int64 ), utc=True, @@ -389,9 +389,9 @@ def test_hist_non_numerical_or_datetime_raises(self): ), ) def test_hist_layout(self, layout_test): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) df[2] = to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 812419200000000000, 819331200000000000, size=10, @@ -403,9 +403,9 @@ def test_hist_layout(self, layout_test): _check_axes_shape(axes, axes_num=3, layout=expected) def test_hist_layout_error(self): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) df[2] = to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 812419200000000000, 819331200000000000, size=10, @@ -427,9 +427,9 @@ def test_hist_layout_error(self): # GH 9351 def test_tight_layout(self): - df = DataFrame(np.random.default_rng(2).randn(100, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(100, 2)) df[2] = to_datetime( - np.random.default_rng(2).randint( + np.random.default_rng(2).integers( 812419200000000000, 819331200000000000, size=100, @@ -500,7 +500,7 @@ def test_hist_column_order_unchanged(self, column, expected): def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument df = DataFrame( - np.random.default_rng(2).randint(1, 10, size=(100, 2)), columns=["a", "b"] + np.random.default_rng(2).integers(1, 10, size=(100, 2)), columns=["a", "b"] ) ax = df.hist(histtype=histtype) _check_patches_all_filled(ax, filled=expected) @@ -517,7 +517,9 @@ def test_hist_with_legend(self, by, column): index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + np.random.default_rng(2).standard_normal(30, 2), + index=index, + columns=["a", "b"], ) # Use default_axes=True when plotting method generate subplots itself @@ -541,14 +543,16 @@ def test_hist_with_legend_raises(self, by, column): # GH 6279 - DataFrame histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).randn(30, 2), index=index, columns=["a", "b"] + np.random.default_rng(2).standard_normal(30, 2), + index=index, + columns=["a", "b"], ) with pytest.raises(ValueError, match="Cannot use both legend and label"): df.hist(legend=True, by=by, column=column, label="d") def test_hist_df_kwargs(self): - df = DataFrame(np.random.default_rng(2).randn(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) _, ax = mpl.pyplot.subplots() ax = df.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 10 @@ -556,7 +560,7 @@ def test_hist_df_kwargs(self): def test_hist_df_with_nonnumerics(self): # GH 9853 df = DataFrame( - np.random.default_rng(2).RandomState(42).randn(10, 4), + np.random.default_rng(2).RandomState(42).standard_normal(10, 4), columns=["A", "B", "C", "D"], ) df["E"] = ["x", "y"] * 5 @@ -567,7 +571,7 @@ def test_hist_df_with_nonnumerics(self): def test_hist_df_with_nonnumerics_no_bins(self): # GH 9853 df = DataFrame( - np.random.default_rng(2).RandomState(42).randn(10, 4), + np.random.default_rng(2).RandomState(42).standard_normal(10, 4), columns=["A", "B", "C", "D"], ) df["E"] = ["x", "y"] * 5 @@ -577,7 +581,9 @@ def test_hist_df_with_nonnumerics_no_bins(self): def test_hist_secondary_legend(self): # GH 9610 - df = DataFrame(np.random.default_rng(2).randn(30, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 4), columns=list("abcd") + ) # primary -> secondary _, ax = mpl.pyplot.subplots() @@ -591,7 +597,9 @@ def test_hist_secondary_legend(self): def test_hist_secondary_secondary(self): # GH 9610 - df = DataFrame(np.random.default_rng(2).randn(30, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 4), columns=list("abcd") + ) # secondary -> secondary _, ax = mpl.pyplot.subplots() ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) @@ -604,7 +612,9 @@ def test_hist_secondary_secondary(self): def test_hist_secondary_primary(self): # GH 9610 - df = DataFrame(np.random.default_rng(2).randn(30, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 4), columns=list("abcd") + ) # secondary -> primary _, ax = mpl.pyplot.subplots() ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) @@ -652,16 +662,16 @@ def test_grouped_hist_legacy(self): from pandas.plotting._matplotlib.hist import _grouped_hist rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) df["D"] = ["X"] * 10 axes = _grouped_hist(df.A, by=df.C) @@ -669,32 +679,32 @@ def test_grouped_hist_legacy(self): def test_grouped_hist_legacy_axes_shape_no_col(self): rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) df["D"] = ["X"] * 10 axes = df.hist(by=df.C) _check_axes_shape(axes, axes_num=4, layout=(2, 2)) def test_grouped_hist_legacy_single_key(self): rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) df["D"] = ["X"] * 10 # group by a key with single value axes = df.hist(by="D", rot=30) @@ -707,16 +717,16 @@ def test_grouped_hist_legacy_grouped_hist_kwargs(self): from pandas.plotting._matplotlib.hist import _grouped_hist rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 @@ -743,16 +753,16 @@ def test_grouped_hist_legacy_grouped_hist(self): from pandas.plotting._matplotlib.hist import _grouped_hist rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) df["D"] = ["X"] * 10 axes = _grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' @@ -762,16 +772,16 @@ def test_grouped_hist_legacy_external_err(self): from pandas.plotting._matplotlib.hist import _grouped_hist rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) df["D"] = ["X"] * 10 # propagate attr exception from matplotlib.Axes.hist with tm.external_error_raised(AttributeError): @@ -779,16 +789,16 @@ def test_grouped_hist_legacy_external_err(self): def test_grouped_hist_legacy_figsize_err(self): rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.randn(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( - rs.randint( + rs.integers( 812419200000000000, 819331200000000000, size=10, dtype=np.int64, ) ) - df["C"] = rs.randint(0, 4, 10) + df["C"] = rs.integers(0, 4, 10) df["D"] = ["X"] * 10 msg = "Specify figure size by tuple instead" with pytest.raises(ValueError, match=msg): @@ -952,7 +962,7 @@ def test_axis_share_xy(self, hist_df): def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument df = DataFrame( - np.random.default_rng(2).randint(1, 10, size=(10, 2)), columns=["a", "b"] + np.random.default_rng(2).integers(1, 10, size=(10, 2)), columns=["a", "b"] ) ax = df.hist(by="a", histtype=histtype) _check_patches_all_filled(ax, filled=expected) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index f28834842d4af..20de9c24cc4f8 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -106,7 +106,7 @@ def test_scatter_matrix_axis(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.default_rng(2).RandomState(42).randn(100, 3)) + df = DataFrame(np.random.default_rng(2).RandomState(42).standard_normal(100, 3)) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning, check_stacklevel=False): @@ -131,7 +131,7 @@ def test_scatter_matrix_axis_smaller(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.default_rng(2).RandomState(42).randn(100, 3)) + df = DataFrame(np.random.default_rng(2).RandomState(42).standard_normal(100, 3)) df[0] = (df[0] - 2) / 3 # we are plotting multiples on a sub-plot @@ -170,9 +170,9 @@ def test_andrews_curves_no_warning(self, iris): "iris", DataFrame( { - "A": np.random.default_rng(2).rand(10), - "B": np.random.default_rng(2).rand(10), - "C": np.random.default_rng(2).rand(10), + "A": np.random.default_rng(2).standard_normal(10), + "B": np.random.default_rng(2).standard_normal(10), + "C": np.random.default_rng(2).standard_normal(10), "Name": ["A"] * 10, } ), @@ -197,9 +197,9 @@ def test_andrews_curves_linecolors(self, request, df, linecolors): "iris", DataFrame( { - "A": np.random.default_rng(2).rand(10), - "B": np.random.default_rng(2).rand(10), - "C": np.random.default_rng(2).rand(10), + "A": np.random.default_rng(2).standard_normal(10), + "B": np.random.default_rng(2).standard_normal(10), + "C": np.random.default_rng(2).standard_normal(10), "Name": ["A"] * 10, } ), @@ -467,7 +467,9 @@ def test_get_standard_colors_no_appending(self): color_after = get_standard_colors(1, color=color_before) assert len(color_after) == len(color_before) - df = DataFrame(np.random.default_rng(2).randn(48, 4), columns=list("ABCD")) + df = DataFrame( + np.random.default_rng(2).standard_normal(48, 4), columns=list("ABCD") + ) color_list = cm.gnuplot(np.linspace(0, 1, 16)) p = df.A.plot.bar(figsize=(16, 7), color=color_list) @@ -597,8 +599,8 @@ def test_externally_shared_axes(self): # Create data df = DataFrame( { - "a": np.random.default_rng(2).randn(1000), - "b": np.random.default_rng(2).randn(1000), + "a": np.random.default_rng(2).standard_normal(1000), + "b": np.random.default_rng(2).standard_normal(1000), } ) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 6c179786d8bb4..175679e171ad2 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -104,7 +104,7 @@ def test_plot_series_barh(self, series): def test_plot_series_bar_ax(self): ax = _check_plot_works( - Series(np.random.default_rng(2).randn(10)).plot.bar, color="black" + Series(np.random.default_rng(2).standard_normal(10)).plot.bar, color="black" ) _check_colors([ax.patches[0]], facecolors=["black"]) @@ -327,14 +327,14 @@ def test_bar_user_colors(self): assert result == expected def test_rotation_default(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) # Default rot 0 _, ax = mpl.pyplot.subplots() axes = df.plot(ax=ax) _check_ticks_props(axes, xrot=0) def test_rotation_30(self): - df = DataFrame(np.random.default_rng(2).randn(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) _, ax = mpl.pyplot.subplots() axes = df.plot(rot=30, ax=ax) _check_ticks_props(axes, xrot=30) @@ -344,7 +344,7 @@ def test_irregular_datetime(self): rng = date_range("1/1/2000", "3/1/2000") rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] - ser = Series(np.random.default_rng(2).randn(len(rng)), rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) _, ax = mpl.pyplot.subplots() ax = ser.plot(ax=ax) xp = DatetimeConverter.convert(datetime(1999, 1, 1), "", ax) @@ -368,7 +368,7 @@ def test_pie_series(self): # if sum of values is less than 1.0, pie handle them as rate and draw # semicircle. series = Series( - np.random.default_rng(2).randint(1, 5), + np.random.default_rng(2).integers(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL", ) @@ -378,7 +378,7 @@ def test_pie_series(self): def test_pie_series_no_label(self): series = Series( - np.random.default_rng(2).randint(1, 5), + np.random.default_rng(2).integers(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL", ) @@ -387,7 +387,7 @@ def test_pie_series_no_label(self): def test_pie_series_less_colors_than_elements(self): series = Series( - np.random.default_rng(2).randint(1, 5), + np.random.default_rng(2).integers(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL", ) @@ -399,7 +399,7 @@ def test_pie_series_less_colors_than_elements(self): def test_pie_series_labels_and_colors(self): series = Series( - np.random.default_rng(2).randint(1, 5), + np.random.default_rng(2).integers(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL", ) @@ -412,7 +412,7 @@ def test_pie_series_labels_and_colors(self): def test_pie_series_autopct_and_fontsize(self): series = Series( - np.random.default_rng(2).randint(1, 5), + np.random.default_rng(2).integers(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL", ) @@ -448,8 +448,10 @@ def test_pie_nan(self): def test_df_series_secondary_legend(self): # GH 9779 - df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) - s = Series(np.random.default_rng(2).randn(30), name="x") + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + ) + s = Series(np.random.default_rng(2).standard_normal(30), name="x") # primary -> secondary (without passing ax) _, ax = mpl.pyplot.subplots() @@ -463,8 +465,10 @@ def test_df_series_secondary_legend(self): def test_df_series_secondary_legend_with_axes(self): # GH 9779 - df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) - s = Series(np.random.default_rng(2).randn(30), name="x") + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + ) + s = Series(np.random.default_rng(2).standard_normal(30), name="x") # primary -> secondary (with passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) @@ -477,8 +481,10 @@ def test_df_series_secondary_legend_with_axes(self): def test_df_series_secondary_legend_both(self): # GH 9779 - df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) - s = Series(np.random.default_rng(2).randn(30), name="x") + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + ) + s = Series(np.random.default_rng(2).standard_normal(30), name="x") # secondary -> secondary (without passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(secondary_y=True, ax=ax) @@ -492,8 +498,10 @@ def test_df_series_secondary_legend_both(self): def test_df_series_secondary_legend_both_with_axis(self): # GH 9779 - df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) - s = Series(np.random.default_rng(2).randn(30), name="x") + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + ) + s = Series(np.random.default_rng(2).standard_normal(30), name="x") # secondary -> secondary (with passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(secondary_y=True, ax=ax) @@ -507,8 +515,10 @@ def test_df_series_secondary_legend_both_with_axis(self): def test_df_series_secondary_legend_both_with_axis_2(self): # GH 9779 - df = DataFrame(np.random.default_rng(2).randn(30, 3), columns=list("abc")) - s = Series(np.random.default_rng(2).randn(30), name="x") + df = DataFrame( + np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + ) + s = Series(np.random.default_rng(2).standard_normal(30), name="x") # secondary -> secondary (with passing ax) _, ax = mpl.pyplot.subplots() ax = df.plot(secondary_y=True, mark_right=False, ax=ax) @@ -525,8 +535,8 @@ def test_df_series_secondary_legend_both_with_axis_2(self): ) def test_secondary_logy(self, input_logy, expected_scale): # GH 25545 - s1 = Series(np.random.default_rng(2).randn(100)) - s2 = Series(np.random.default_rng(2).randn(100)) + s1 = Series(np.random.default_rng(2).standard_normal(100)) + s2 = Series(np.random.default_rng(2).standard_normal(100)) # GH 24980 ax1 = s1.plot(logy=input_logy) @@ -536,7 +546,7 @@ def test_secondary_logy(self, input_logy, expected_scale): assert ax2.get_yscale() == expected_scale def test_plot_fails_with_dupe_color_and_style(self): - x = Series(np.random.default_rng(2).randn(2)) + x = Series(np.random.default_rng(2).standard_normal(2)) _, ax = mpl.pyplot.subplots() msg = ( "Cannot pass 'style' string with a color symbol and 'color' keyword " @@ -644,7 +654,7 @@ def test_dup_datetime_index_plot(self): dr1 = date_range("1/1/2009", periods=4) dr2 = date_range("1/2/2009", periods=4) index = dr1.append(dr2) - values = np.random.default_rng(2).randn(index.size) + values = np.random.default_rng(2).standard_normal(index.size) s = Series(values, index=index) _check_plot_works(s.plot) @@ -671,11 +681,12 @@ def test_errorbar_asymmetrical(self): @pytest.mark.parametrize( "yerr", [ - Series(np.abs(np.random.default_rng(2).randn(10))), - np.abs(np.random.default_rng(2).randn(10)), - list(np.abs(np.random.default_rng(2).randn(10))), + Series(np.abs(np.random.default_rng(2).standard_normal(10))), + np.abs(np.random.default_rng(2).standard_normal(10)), + list(np.abs(np.random.default_rng(2).standard_normal(10))), DataFrame( - np.abs(np.random.default_rng(2).randn(10, 2)), columns=["x", "y"] + np.abs(np.random.default_rng(2).standard_normal((10, 2))), + columns=["x", "y"], ), ], ) @@ -687,7 +698,7 @@ def test_errorbar_plot(self, kind, yerr): @pytest.mark.slow def test_errorbar_plot_yerr_0(self): s = Series(np.arange(10), name="x") - s_err = np.abs(np.random.default_rng(2).randn(10)) + s_err = np.abs(np.random.default_rng(2).standard_normal(10)) ax = _check_plot_works(s.plot, xerr=s_err) _check_has_errorbars(ax, xerr=1, yerr=0) @@ -695,9 +706,10 @@ def test_errorbar_plot_yerr_0(self): @pytest.mark.parametrize( "yerr", [ - Series(np.abs(np.random.default_rng(2).randn(12))), + Series(np.abs(np.random.default_rng(2).standard_normal(12))), DataFrame( - np.abs(np.random.default_rng(2).randn(12, 2)), columns=["x", "y"] + np.abs(np.random.default_rng(2).standard_normal((12, 2))), + columns=["x", "y"], ), ], ) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index f42cc9b868d63..1143dc147eee9 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -40,7 +40,7 @@ def get_objs(): tm.makeStringIndex(10, name="a"), ] - arr = np.random.default_rng(2).randn(10) + arr = np.random.default_rng(2).standard_normal(10) series = [Series(arr, index=idx, name="a") for idx in indexes] objs = indexes + series @@ -548,7 +548,7 @@ class TestSeriesReductions: # intended long-term to be series-specific def test_sum_inf(self): - s = Series(np.random.default_rng(2).randn(10)) + s = Series(np.random.default_rng(2).standard_normal(10)) s2 = s.copy() s[5:8] = np.inf @@ -556,7 +556,7 @@ def test_sum_inf(self): assert np.isinf(s.sum()) - arr = np.random.default_rng(2).randn(100, 100).astype("f4") + arr = np.random.default_rng(2).standard_normal(100, 100).astype("f4") arr[:, 2] = np.inf msg = "use_inf_as_na option is deprecated" @@ -1172,7 +1172,11 @@ def test_min_max_series(self): rng = date_range("1/1/2000", periods=10, freq="4h") lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"] df = DataFrame( - {"TS": rng, "V": np.random.default_rng(2).randn(len(rng)), "L": lvls} + { + "TS": rng, + "V": np.random.default_rng(2).standard_normal(len(rng)), + "L": lvls, + } ) result = df.TS.max() diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index 7ab711abcc8e7..cdb0ba3121f45 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -51,7 +51,7 @@ def simple_date_range_series(): def _simple_date_range_series(start, end, freq="D"): rng = date_range(start, end, freq=freq) - return Series(np.random.default_rng(2).randn(len(rng)), index=rng) + return Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) return _simple_date_range_series @@ -64,7 +64,7 @@ def simple_period_range_series(): def _simple_period_range_series(start, end, freq="D"): rng = period_range(start, end, freq=freq) - return Series(np.random.default_rng(2).randn(len(rng)), index=rng) + return Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) return _simple_period_range_series diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 86ae77953a646..e54cada0e25b8 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -509,7 +509,7 @@ def test_resample_extra_index_point(unit): def test_upsample_with_limit(unit): rng = date_range("1/1/2000", periods=3, freq="5t").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) result = ts.resample("t").ffill(limit=2) expected = ts.reindex(result.index, method="ffill", limit=2) @@ -523,7 +523,7 @@ def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule, unit): rng = date_range("1/1/2000", periods=3, freq=freq, tz=tz_aware_fixture).as_unit( unit ) - ts = Series(np.random.default_rng(2).randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) result = ts.resample(rule).nearest(limit=2) expected = ts.reindex(result.index, method="nearest", limit=2) @@ -637,7 +637,7 @@ def test_resample_dup_index(): # GH 4812 # dup columns with resample raising df = DataFrame( - np.random.default_rng(2).randn(4, 12), + np.random.default_rng(2).standard_normal(4, 12), index=[2000, 2000, 2000, 2000], columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], ) @@ -692,7 +692,7 @@ def _ohlc(group): return [group.iloc[0], group.max(), group.min(), group.iloc[-1]] rng = date_range("1/1/2000 00:00:00", "1/1/2000 5:59:50", freq="10s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) resampled = ts.resample("5min", closed="right", label="right").ohlc() @@ -708,7 +708,7 @@ def _ohlc(group): def test_downsample_non_unique(unit): rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) rng2 = rng.repeat(5).values - ts = Series(np.random.default_rng(2).randn(len(rng2)), index=rng2) + ts = Series(np.random.default_rng(2).standard_normal(len(rng2)), index=rng2) result = ts.resample("M").mean() @@ -722,7 +722,7 @@ def test_asfreq_non_unique(unit): # GH #1077 rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) rng2 = rng.repeat(2).values - ts = Series(np.random.default_rng(2).randn(len(rng2)), index=rng2) + ts = Series(np.random.default_rng(2).standard_normal(len(rng2)), index=rng2) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): @@ -732,7 +732,9 @@ def test_asfreq_non_unique(unit): def test_resample_axis1(unit): rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) df = DataFrame( - np.random.default_rng(2).randn(3, len(rng)), columns=rng, index=["a", "b", "c"] + np.random.default_rng(2).standard_normal(3, len(rng)), + columns=rng, + index=["a", "b", "c"], ) warning_msg = "DataFrame.resample with axis=1 is deprecated." @@ -750,7 +752,7 @@ def test_resample_anchored_ticks(freq, unit): # middle of a desired interval rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) ts[:2] = np.nan # so results are the same result = ts[2:].resample(freq, closed="left", label="left").mean() expected = ts.resample(freq, closed="left", label="left").mean() @@ -762,7 +764,7 @@ def test_resample_single_group(end, unit): mysum = lambda x: x.sum() rng = date_range("2000-1-1", f"2000-{end}-10", freq="D").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) @@ -784,7 +786,7 @@ def test_resample_offset(unit): # GH 31809 rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) resampled = ts.resample("5min", offset="2min").mean() exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min").as_unit( @@ -807,7 +809,7 @@ def test_resample_offset(unit): def test_resample_origin(kwargs, unit): # GH 31809 rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) exp_rng = date_range( "1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min" @@ -822,7 +824,7 @@ def test_resample_origin(kwargs, unit): ) def test_resample_bad_origin(origin, unit): rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) msg = ( "'origin' should be equal to 'epoch', 'start', 'start_day', " "'end', 'end_day' or should be a Timestamp convertible type. Got " @@ -835,7 +837,7 @@ def test_resample_bad_origin(origin, unit): @pytest.mark.parametrize("offset", ["invalid_value", "12dayys", "2000-30-30", object()]) def test_resample_bad_offset(offset, unit): rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) msg = f"'offset' should be a Timedelta convertible type. Got '{offset}' instead." with pytest.raises(ValueError, match=msg): ts.resample("5min", offset=offset) @@ -845,7 +847,7 @@ def test_resample_origin_prime_freq(unit): # GH 31809 start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00" rng = date_range(start, end, freq="7min").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) exp_rng = date_range( "2000-10-01 23:14:00", "2000-10-02 00:22:00", freq="17min" @@ -886,7 +888,7 @@ def test_resample_origin_with_tz(unit): rng = date_range( "2000-01-01 00:00:00", "2000-01-01 02:00", freq="s", tz=tz ).as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) exp_rng = date_range( "1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min", tz=tz @@ -906,7 +908,7 @@ def test_resample_origin_with_tz(unit): # if the series is not tz aware, origin should not be tz aware rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) with pytest.raises(ValueError, match=msg): ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean() @@ -915,7 +917,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(unit): # GH 34474 start, end = "2000-10-01 23:30:00+0500", "2000-12-02 00:30:00+0500" rng = date_range(start, end, freq="7min").as_unit(unit) - random_values = np.random.default_rng(2).randn(len(rng)) + random_values = np.random.default_rng(2).standard_normal(len(rng)) ts_1 = Series(random_values, index=rng) result_1 = ts_1.resample("D", origin="epoch").mean() @@ -996,7 +998,7 @@ def _create_series(values, timestamps, freq="D"): def test_resample_daily_anchored(unit): rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) ts[:2] = np.nan # so results are the same result = ts[2:].resample("D", closed="left", label="left").mean() @@ -1008,7 +1010,7 @@ def test_resample_to_period_monthly_buglet(unit): # GH #1259 rng = date_range("1/1/2000", "12/31/2000").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) result = ts.resample("M", kind="period").mean() exp_index = period_range("Jan-2000", "Dec-2000", freq="M") @@ -1018,7 +1020,7 @@ def test_resample_to_period_monthly_buglet(unit): def test_period_with_agg(): # aggregate a period resampler with a lambda s2 = Series( - np.random.default_rng(2).randint(0, 5, 50), + np.random.default_rng(2).integers(0, 5, 50), index=period_range("2012-01-01", freq="H", periods=50), dtype="float64", ) @@ -1085,7 +1087,7 @@ def test_resample_dtype_coercion(unit): def test_weekly_resample_buglet(unit): # #1327 rng = date_range("1/1/2000", freq="B", periods=20).as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) resampled = ts.resample("W").mean() expected = ts.resample("W-SUN").mean() @@ -1095,7 +1097,7 @@ def test_weekly_resample_buglet(unit): def test_monthly_resample_error(unit): # #1451 dates = date_range("4/16/2012 20:00", periods=5000, freq="h").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(dates)), index=dates) + ts = Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates) # it works! ts.resample("M") @@ -1183,7 +1185,7 @@ def test_resample_anchored_multiday(label, sec): index2 = date_range("2014-10-15 23:00:00", periods=2, freq="2200L") index = index1.union(index2) - s = Series(np.random.default_rng(2).randn(5), index=index) + s = Series(np.random.default_rng(2).standard_normal(5), index=index) # Ensure left closing works result = s.resample("2200L", label=label).mean() @@ -1194,7 +1196,7 @@ def test_corner_cases(unit): # miscellaneous test coverage rng = date_range("1/1/2000", periods=12, freq="t").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) result = ts.resample("5t", closed="right", label="left").mean() ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t").as_unit(unit) @@ -1220,7 +1222,7 @@ def test_corner_cases_date(simple_date_range_series, unit): def test_anchored_lowercase_buglet(unit): dates = date_range("4/16/2012 20:00", periods=50000, freq="s").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(dates)), index=dates) + ts = Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates) # it works! ts.resample("d").mean() @@ -1229,7 +1231,7 @@ def test_upsample_apply_functions(unit): # #1596 rng = date_range("2012-06-12", periods=4, freq="h").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) result = ts.resample("20min").aggregate(["mean", "sum"]) assert isinstance(result, DataFrame) @@ -1237,7 +1239,7 @@ def test_upsample_apply_functions(unit): def test_resample_not_monotonic(unit): rng = date_range("2012-06-12", periods=200, freq="h").as_unit(unit) - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) ts = ts.take(np.random.default_rng(2).permutation(len(ts))) @@ -1459,7 +1461,7 @@ def test_resample_group_info(n, k, unit): prng = np.random.default_rng(2).RandomState(1234) dr = date_range(start="2015-08-27", periods=n // 10, freq="T").as_unit(unit) - ts = Series(prng.randint(0, n // k, n).astype("int64"), index=prng.choice(dr, n)) + ts = Series(prng.integers(0, n // k, n).astype("int64"), index=prng.choice(dr, n)) left = ts.resample("30T").nunique() ix = date_range(start=ts.index.min(), end=ts.index.max(), freq="30T").as_unit(unit) @@ -1483,7 +1485,8 @@ def test_resample_size(unit): n = 10000 dr = date_range("2015-09-19", periods=n, freq="T").as_unit(unit) ts = Series( - np.random.default_rng(2).randn(n), index=np.random.default_rng(2).choice(dr, n) + np.random.default_rng(2).standard_normal(n), + index=np.random.default_rng(2).choice(dr, n), ) left = ts.resample("7T").size() diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 2d76f3ad97328..d2223731867bf 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -157,7 +157,7 @@ def test_basic_upsample(self, freq, simple_period_range_series): def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") - ts = Series(np.random.default_rng(2).randn(len(rng)), rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), rng) result = ts.resample("M", convention="end").ffill(limit=2) expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) @@ -408,7 +408,7 @@ def test_resample_to_quarterly_start_end(self, simple_period_range_series, how): def test_resample_fill_missing(self): rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A") - s = Series(np.random.default_rng(2).randn(4), index=rng) + s = Series(np.random.default_rng(2).standard_normal(4), index=rng) stamps = s.to_timestamp() filled = s.resample("A").ffill() @@ -417,7 +417,7 @@ def test_resample_fill_missing(self): def test_cant_fill_missing_dups(self): rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A") - s = Series(np.random.default_rng(2).randn(5), index=rng) + s = Series(np.random.default_rng(2).standard_normal(5), index=rng) msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): s.resample("A").ffill() @@ -426,7 +426,7 @@ def test_cant_fill_missing_dups(self): @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) def test_resample_5minute(self, freq, kind): rng = period_range("1/1/2000", "1/5/2000", freq="T") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) expected = ts.to_timestamp().resample(freq).mean() if kind != "timestamp": expected = expected.to_period(freq) @@ -458,7 +458,7 @@ def test_resample_irregular_sparse(self): def test_resample_weekly_all_na(self): rng = date_range("1/1/2000", periods=10, freq="W-WED") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) result = ts.resample("W-THU").asfreq() @@ -513,8 +513,8 @@ def test_resample_tz_localized(self): rng = date_range("1/1/2011", periods=20000, freq="H") rng = rng.tz_localize("EST") ts = DataFrame(index=rng) - ts["first"] = np.random.default_rng(2).randn(len(rng)) - ts["second"] = np.cumsum(np.random.default_rng(2).randn(len(rng))) + ts["first"] = np.random.default_rng(2).standard_normal(len(rng)) + ts["second"] = np.cumsum(np.random.default_rng(2).standard_normal(len(rng))) expected = DataFrame( { "first": ts.resample("A").sum()["first"], @@ -532,7 +532,7 @@ def test_resample_tz_localized(self): def test_closed_left_corner(self): # #1465 s = Series( - np.random.default_rng(2).randn(21), + np.random.default_rng(2).standard_normal(21), index=date_range(start="1/1/2012 9:30", freq="1min", periods=21), ) s.iloc[0] = np.nan @@ -631,7 +631,7 @@ def test_monthly_convention_span(self): ) def test_default_right_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), idx) resampled = df.resample(to_freq).mean() tm.assert_frame_equal( @@ -644,7 +644,7 @@ def test_default_right_closed_label(self, from_freq, to_freq): ) def test_default_left_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.default_rng(2).randn(len(idx), 2), idx) + df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), idx) resampled = df.resample(to_freq).mean() tm.assert_frame_equal( @@ -654,7 +654,7 @@ def test_default_left_closed_label(self, from_freq, to_freq): def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") - s = Series(np.random.default_rng(2).randn(len(index)), index=index) + s = Series(np.random.default_rng(2).standard_normal(len(index)), index=index) result = s.resample("A").mean() tm.assert_almost_equal(result.iloc[0], s.mean()) @@ -664,7 +664,7 @@ def test_evenly_divisible_with_no_extra_bins(self): # when the frequency is evenly divisible, sometimes extra bins df = DataFrame( - np.random.default_rng(2).randn(9, 3), + np.random.default_rng(2).standard_normal(9, 3), index=date_range("2000-1-1", periods=9), ) result = df.resample("5D").mean() diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index a346c9be15415..caf6fdc1a6239 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -89,7 +89,7 @@ def test_groupby_resample_on_api(): { "key": ["A", "B"] * 5, "dates": date_range("2016-01-01", periods=10), - "values": np.random.default_rng(2).randn(10), + "values": np.random.default_rng(2).standard_normal(10), } ) @@ -343,7 +343,7 @@ def test_agg_consistency(): # make sure that we are consistent across # similar aggregations with and w/o selection list df = DataFrame( - np.random.default_rng(2).randn(1000, 3), + np.random.default_rng(2).standard_normal(1000, 3), index=date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], ) @@ -358,7 +358,7 @@ def test_agg_consistency(): def test_agg_consistency_int_str_column_mix(): # GH#39025 df = DataFrame( - np.random.default_rng(2).randn(1000, 2), + np.random.default_rng(2).standard_normal(1000, 2), index=date_range("1/1/2012", freq="S", periods=1000), columns=[1, "a"], ) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 113dfe4c1ced1..856e2e152bb39 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -142,7 +142,7 @@ def test_groupby_with_origin(): middle = "1/15/2000 00:00:00" rng = date_range(start, end, freq="1231min") # prime number - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) ts2 = ts[middle:end] # proves that grouper without a fixed origin does not work @@ -364,7 +364,7 @@ def test_median_duplicate_columns(): # GH 14233 df = DataFrame( - np.random.default_rng(2).randn(20, 3), + np.random.default_rng(2).standard_normal(20, 3), columns=list("aaa"), index=date_range("2012-01-01", periods=20, freq="s"), ) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index e27f9ed0c6d1d..6f44ea2340bb4 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -18,7 +18,8 @@ @pytest.fixture def test_series(): return Series( - np.random.default_rng(2).randn(1000), index=date_range("1/1/2000", periods=1000) + np.random.default_rng(2).standard_normal(1000), + index=date_range("1/1/2000", periods=1000), ) @@ -97,7 +98,7 @@ def test_fails_on_no_datetime_index(func): n = 2 index = func(n) name = type(index).__name__ - df = DataFrame({"a": np.random.default_rng(2).randn(n)}, index=index) + df = DataFrame({"a": np.random.default_rng(2).standard_normal(n)}, index=index) msg = ( "Only valid with DatetimeIndex, TimedeltaIndex " @@ -111,7 +112,7 @@ def test_aaa_group_order(): # GH 12840 # check TimeGrouper perform stable sorts n = 20 - data = np.random.default_rng(2).randn(n, 4) + data = np.random.default_rng(2).standard_normal(n, 4) df = DataFrame(data, columns=["A", "B", "C", "D"]) df["key"] = [ datetime(2013, 1, 1), @@ -132,7 +133,7 @@ def test_aaa_group_order(): def test_aggregate_normal(resample_method): """Check TimeGrouper's aggregation is identical as normal groupby.""" - data = np.random.default_rng(2).randn(20, 4) + data = np.random.default_rng(2).standard_normal(20, 4) normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, 3, 4, 5] * 4 @@ -158,7 +159,7 @@ def test_aggregate_normal(resample_method): def test_aggregate_nth(): """Check TimeGrouper's aggregation is identical as normal groupby.""" - data = np.random.default_rng(2).randn(20, 4) + data = np.random.default_rng(2).standard_normal(20, 4) normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, 3, 4, 5] * 4 @@ -210,7 +211,7 @@ def test_aggregate_with_nat(func, fill_value): # and 'nth' doesn't work yet n = 20 - data = np.random.default_rng(2).randn(n, 4).astype("int64") + data = np.random.default_rng(2).standard_normal(n, 4).astype("int64") normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 @@ -241,7 +242,7 @@ def test_aggregate_with_nat(func, fill_value): def test_aggregate_with_nat_size(): # GH 9925 n = 20 - data = np.random.default_rng(2).randn(n, 4).astype("int64") + data = np.random.default_rng(2).standard_normal(n, 4).astype("int64") normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 14708694ca1c9..a119a911e5fbe 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -81,7 +81,7 @@ def test_resample_timedelta_idempotency(): def test_resample_offset_with_timedeltaindex(): # GH 10530 & 31809 rng = timedelta_range(start="0s", periods=25, freq="s") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) with_base = ts.resample("2s", offset="5s").mean() without_base = ts.resample("2s").mean() diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index d0740aa558cd2..386f363c81557 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -123,9 +123,9 @@ def test_append_sorts(self, sort): def test_append_different_columns(self, sort): df = DataFrame( { - "bools": np.random.default_rng(2).randn(10) > 0, - "ints": np.random.default_rng(2).randint(0, 10, 10), - "floats": np.random.default_rng(2).randn(10), + "bools": np.random.default_rng(2).standard_normal(10) > 0, + "ints": np.random.default_rng(2).integers(0, 10, 10), + "floats": np.random.default_rng(2).standard_normal(10), "strings": ["foo", "bar"] * 5, } ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 89cfb8a2081c4..f013771f7d4e1 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -40,8 +40,8 @@ def test_append_concat(self): d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") - s1 = Series(np.random.default_rng(2).randn(10), d1) - s2 = Series(np.random.default_rng(2).randn(10), d2) + s1 = Series(np.random.default_rng(2).standard_normal(10), d1) + s2 = Series(np.random.default_rng(2).standard_normal(10), d2) s1 = s1.to_period() s2 = s2.to_period() @@ -52,8 +52,8 @@ def test_append_concat(self): assert result.index[0] == s1.index[0] def test_concat_copy(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.default_rng(2).randn(4, 3)) - df2 = DataFrame(np.random.default_rng(2).randint(0, 10, size=4).reshape(4, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 3)) + df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: "foo"}, index=range(4)) # These are actual copies. @@ -86,7 +86,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): assert arr.base is not None # Float block was consolidated. - df4 = DataFrame(np.random.default_rng(2).randn(4, 1)) + df4 = DataFrame(np.random.default_rng(2).standard_normal(4, 1)) result = concat([df, df2, df3, df4], axis=1, copy=False) for arr in result._mgr.arrays: if arr.dtype.kind == "f": @@ -107,8 +107,8 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): def test_concat_with_group_keys(self): # axis=0 - df = DataFrame(np.random.default_rng(2).randn(3, 4)) - df2 = DataFrame(np.random.default_rng(2).randn(4, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(3, 4)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) result = concat([df, df2], keys=[0, 1]) exp_index = MultiIndex.from_arrays( @@ -123,8 +123,8 @@ def test_concat_with_group_keys(self): tm.assert_frame_equal(result, expected) # axis=1 - df = DataFrame(np.random.default_rng(2).randn(4, 3)) - df2 = DataFrame(np.random.default_rng(2).randn(4, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(4, 3)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) result = concat([df, df2], keys=[0, 1], axis=1) expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) @@ -135,7 +135,7 @@ def test_concat_with_group_keys(self): tm.assert_frame_equal(result, expected) def test_concat_keys_specific_levels(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]] level = ["three", "two", "one", "zero"] result = concat( @@ -156,10 +156,10 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass): constructor = dict if mapping == "dict" else non_dict_mapping_subclass frames = constructor( { - "foo": DataFrame(np.random.default_rng(2).randn(4, 3)), - "bar": DataFrame(np.random.default_rng(2).randn(4, 3)), - "baz": DataFrame(np.random.default_rng(2).randn(4, 3)), - "qux": DataFrame(np.random.default_rng(2).randn(4, 3)), + "foo": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), + "bar": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), + "baz": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), + "qux": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), } ) @@ -179,8 +179,8 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass): tm.assert_frame_equal(result, expected) def test_concat_keys_and_levels(self): - df = DataFrame(np.random.default_rng(2).randn(1, 3)) - df2 = DataFrame(np.random.default_rng(2).randn(1, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 3)) + df2 = DataFrame(np.random.default_rng(2).standard_normal(1, 4)) levels = [["foo", "baz"], ["one", "two"]] names = ["first", "second"] @@ -221,8 +221,8 @@ def test_concat_keys_and_levels(self): def test_concat_keys_levels_no_overlap(self): # GH #1406 - df = DataFrame(np.random.default_rng(2).randn(1, 3), index=["a"]) - df2 = DataFrame(np.random.default_rng(2).randn(1, 4), index=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 3), index=["a"]) + df2 = DataFrame(np.random.default_rng(2).standard_normal(1, 4), index=["b"]) msg = "Values not found in passed level" with pytest.raises(ValueError, match=msg): @@ -260,8 +260,8 @@ def test_crossed_dtypes_weird_corner(self): ) tm.assert_frame_equal(appended, expected) - df = DataFrame(np.random.default_rng(2).randn(1, 3), index=["a"]) - df2 = DataFrame(np.random.default_rng(2).randn(1, 4), index=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal(1, 3), index=["a"]) + df2 = DataFrame(np.random.default_rng(2).standard_normal(1, 4), index=["b"]) result = concat([df, df2], keys=["one", "two"], names=["first", "second"]) assert result.index.names == ("first", "second") @@ -360,7 +360,7 @@ def test_dtype_coercion(self): tm.assert_series_equal(result.dtypes, df.dtypes) def test_concat_single_with_key(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) result = concat([df], keys=["foo"]) expected = concat([df, df], keys=["foo", "bar"]) @@ -371,7 +371,7 @@ def test_concat_no_items_raises(self): concat([]) def test_concat_exclude_none(self): - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) pieces = [df[:5], None, None, df[5:]] result = concat(pieces) @@ -500,10 +500,14 @@ def test_concat_duplicate_indices_raise(self): # GH 45888: test raise for concat DataFrames with duplicate indices # https://github.com/pandas-dev/pandas/issues/36263 df1 = DataFrame( - np.random.default_rng(2).randn(5), index=[0, 1, 2, 3, 3], columns=["a"] + np.random.default_rng(2).standard_normal(5), + index=[0, 1, 2, 3, 3], + columns=["a"], ) df2 = DataFrame( - np.random.default_rng(2).randn(5), index=[0, 1, 2, 2, 4], columns=["b"] + np.random.default_rng(2).standard_normal(5), + index=[0, 1, 2, 2, 4], + columns=["b"], ) msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 026cda27bc567..5aab96f0fe299 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -14,7 +14,9 @@ class TestEmptyConcat: def test_handle_empty_objects(self, sort): - df = DataFrame(np.random.default_rng(2).randn(10, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 4), columns=list("abcd") + ) dfcopy = df[:5].copy() dfcopy["foo"] = "bar" diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 1c1bc43df713d..ee7f33a6109f0 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -160,7 +160,7 @@ def test_dups_index(self): # single dtypes df = DataFrame( - np.random.default_rng(2).randint(0, 10, size=40).reshape(10, 4), + np.random.default_rng(2).integers(0, 10, size=40).reshape(10, 4), columns=["A", "A", "C", "C"], ) @@ -176,10 +176,11 @@ def test_dups_index(self): df = concat( [ DataFrame( - np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + np.random.default_rng(2).standard_normal(10, 4), + columns=["A", "A", "B", "B"], ), DataFrame( - np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + np.random.default_rng(2).integers(0, 10, size=20).reshape(10, 2), columns=["A", "C"], ), ], @@ -243,7 +244,7 @@ def test_concat_multiindex_rangeindex(self): # when multi-index levels are RangeIndex objects # there is a bug in concat with objects of len 1 - df = DataFrame(np.random.default_rng(2).randn(9, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal(9, 2)) df.index = MultiIndex( levels=[pd.RangeIndex(3), pd.RangeIndex(3)], codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)], diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index 9becb807ae028..4bef86ce6a941 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -60,8 +60,8 @@ def test_concat_series_axis1(self): def test_concat_series_axis1_preserves_series_names(self): # preserve series names, #2489 - s = Series(np.random.default_rng(2).randn(5), name="A") - s2 = Series(np.random.default_rng(2).randn(5), name="B") + s = Series(np.random.default_rng(2).standard_normal(5), name="A") + s2 = Series(np.random.default_rng(2).standard_normal(5), name="B") result = concat([s, s2], axis=1) expected = DataFrame({"A": s, "B": s2}) @@ -73,9 +73,13 @@ def test_concat_series_axis1_preserves_series_names(self): def test_concat_series_axis1_with_reindex(self, sort): # must reindex, #2603 - s = Series(np.random.default_rng(2).randn(3), index=["c", "a", "b"], name="A") + s = Series( + np.random.default_rng(2).standard_normal(3), index=["c", "a", "b"], name="A" + ) s2 = Series( - np.random.default_rng(2).randn(4), index=["d", "a", "b", "c"], name="B" + np.random.default_rng(2).standard_normal(4), + index=["d", "a", "b", "c"], + name="B", ) result = concat([s, s2], axis=1, sort=sort) expected = DataFrame({"A": s, "B": s2}, index=["c", "a", "b", "d"]) @@ -103,10 +107,14 @@ def test_concat_series_axis1_names_applied(self): def test_concat_series_axis1_same_names_ignore_index(self): dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1] s1 = Series( - np.random.default_rng(2).randn(len(dates)), index=dates, name="value" + np.random.default_rng(2).standard_normal(len(dates)), + index=dates, + name="value", ) s2 = Series( - np.random.default_rng(2).randn(len(dates)), index=dates, name="value" + np.random.default_rng(2).standard_normal(len(dates)), + index=dates, + name="value", ) result = concat([s1, s2], axis=1, ignore_index=True) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index e73b4fe0d98fc..eb673cb5fd727 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -36,8 +36,8 @@ def df(self): { "key1": get_test_data(), "key2": get_test_data(), - "data1": np.random.default_rng(2).randn(50), - "data2": np.random.default_rng(2).randn(50), + "data1": np.random.default_rng(2).standard_normal(50), + "data2": np.random.default_rng(2).standard_normal(50), } ) @@ -51,7 +51,7 @@ def df2(self): { "key1": get_test_data(n=10), "key2": get_test_data(ngroups=4, n=10), - "value": np.random.default_rng(2).randn(10), + "value": np.random.default_rng(2).standard_normal(10), } ) @@ -154,13 +154,13 @@ def test_join_on_fails_with_different_right_index(self): df = DataFrame( { "a": np.random.default_rng(2).choice(["m", "f"], size=3), - "b": np.random.default_rng(2).randn(3), + "b": np.random.default_rng(2).standard_normal(3), } ) df2 = DataFrame( { "a": np.random.default_rng(2).choice(["m", "f"], size=10), - "b": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).standard_normal(10), }, index=tm.makeCustomIndex(10, 2), ) @@ -172,14 +172,14 @@ def test_join_on_fails_with_different_left_index(self): df = DataFrame( { "a": np.random.default_rng(2).choice(["m", "f"], size=3), - "b": np.random.default_rng(2).randn(3), + "b": np.random.default_rng(2).standard_normal(3), }, index=tm.makeCustomIndex(3, 2), ) df2 = DataFrame( { "a": np.random.default_rng(2).choice(["m", "f"], size=10), - "b": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).standard_normal(10), } ) msg = r'len\(right_on\) must equal the number of levels in the index of "left"' @@ -190,13 +190,13 @@ def test_join_on_fails_with_different_column_counts(self): df = DataFrame( { "a": np.random.default_rng(2).choice(["m", "f"], size=3), - "b": np.random.default_rng(2).randn(3), + "b": np.random.default_rng(2).standard_normal(3), } ) df2 = DataFrame( { "a": np.random.default_rng(2).choice(["m", "f"], size=10), - "b": np.random.default_rng(2).randn(10), + "b": np.random.default_rng(2).standard_normal(10), }, index=tm.makeCustomIndex(10, 2), ) @@ -335,10 +335,12 @@ def test_join_empty_bug(self): def test_join_unconsolidated(self): # GH #331 - a = DataFrame(np.random.default_rng(2).randn(30, 2), columns=["a", "b"]) - c = Series(np.random.default_rng(2).randn(30)) + a = DataFrame( + np.random.default_rng(2).standard_normal(30, 2), columns=["a", "b"] + ) + c = Series(np.random.default_rng(2).standard_normal(30)) a["c"] = c - d = DataFrame(np.random.default_rng(2).randn(30, 1), columns=["q"]) + d = DataFrame(np.random.default_rng(2).standard_normal(30, 1), columns=["q"]) # it works! a.join(d) @@ -356,10 +358,14 @@ def test_join_multiindex(self): ) df1 = DataFrame( - data=np.random.default_rng(2).randn(6), index=index1, columns=["var X"] + data=np.random.default_rng(2).standard_normal(6), + index=index1, + columns=["var X"], ) df2 = DataFrame( - data=np.random.default_rng(2).randn(6), index=index2, columns=["var Y"] + data=np.random.default_rng(2).standard_normal(6), + index=index2, + columns=["var Y"], ) df1 = df1.sort_index(level=0) @@ -398,12 +404,12 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): "one", ] - data = np.random.default_rng(2).randn(len(key1)) + data = np.random.default_rng(2).standard_normal(len(key1)) data = DataFrame({"key1": key1, "key2": key2, "data": data}) index = lexsorted_two_level_string_multiindex to_join = DataFrame( - np.random.default_rng(2).randn(10, 3), + np.random.default_rng(2).standard_normal(10, 3), index=index, columns=["j_one", "j_two", "j_three"], ) @@ -460,17 +466,21 @@ def test_join_hierarchical_mixed_raises(self): def test_join_float64_float32(self): a = DataFrame( - np.random.default_rng(2).randn(10, 2), columns=["a", "b"], dtype=np.float64 + np.random.default_rng(2).standard_normal(10, 2), + columns=["a", "b"], + dtype=np.float64, ) b = DataFrame( - np.random.default_rng(2).randn(10, 1), columns=["c"], dtype=np.float32 + np.random.default_rng(2).standard_normal(10, 1), + columns=["c"], + dtype=np.float32, ) joined = a.join(b) assert joined.dtypes["a"] == "float64" assert joined.dtypes["b"] == "float64" assert joined.dtypes["c"] == "float32" - a = np.random.default_rng(2).randint(0, 5, 100).astype("int64") + a = np.random.default_rng(2).integers(0, 5, 100).astype("int64") b = np.random.default_rng(2).random(100).astype("float64") c = np.random.default_rng(2).random(100).astype("float32") df = DataFrame({"a": a, "b": b, "c": c}) @@ -526,8 +536,8 @@ def test_join_many_non_unique_index(self): { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.default_rng(2).randn(8), - "D": np.random.default_rng(2).randn(8), + "C": np.random.default_rng(2).standard_normal(8), + "D": np.random.default_rng(2).standard_normal(8), } ) s = Series( @@ -595,7 +605,7 @@ def test_join_non_unique_period_index(self): def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame( - np.random.default_rng(2).randn(20, 6), + np.random.default_rng(2).standard_normal(20, 6), columns=["a", "b", "c", "d", "e", "f"], ) df.insert(0, "id", 0) @@ -612,7 +622,9 @@ def test_mixed_type_join_with_suffix(self): mn.join(cn, rsuffix="_right") def test_join_many(self): - df = DataFrame(np.random.default_rng(2).randn(10, 6), columns=list("abcdef")) + df = DataFrame( + np.random.default_rng(2).standard_normal(10, 6), columns=list("abcdef") + ) df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]] joined = df_list[0].join(df_list[1:]) @@ -641,7 +653,7 @@ def _check_diff_index(df_list, result, exp_index): def test_join_many_mixed(self): df = DataFrame( - np.random.default_rng(2).randn(8, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).standard_normal(8, 4), columns=["A", "B", "C", "D"] ) df["key"] = ["foo", "bar"] * 4 df1 = df.loc[:, ["A", "B"]] @@ -656,10 +668,11 @@ def test_join_dups(self): df = concat( [ DataFrame( - np.random.default_rng(2).randn(10, 4), columns=["A", "A", "B", "B"] + np.random.default_rng(2).standard_normal(10, 4), + columns=["A", "A", "B", "B"], ), DataFrame( - np.random.default_rng(2).randint(0, 10, size=20).reshape(10, 2), + np.random.default_rng(2).integers(0, 10, size=20).reshape(10, 2), columns=["A", "C"], ), ], @@ -672,10 +685,18 @@ def test_join_dups(self): tm.assert_frame_equal(result, expected) # GH 4975, invalid join on dups - w = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) - x = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) - y = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) - z = DataFrame(np.random.default_rng(2).randn(4, 2), columns=["x", "y"]) + w = DataFrame( + np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + ) + x = DataFrame( + np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + ) + y = DataFrame( + np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + ) + z = DataFrame( + np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + ) dta = x.merge(y, left_index=True, right_index=True).merge( z, left_index=True, right_index=True, how="outer" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 990e4a655269c..194ce58416cc4 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -113,8 +113,8 @@ def df(self): { "key1": get_test_data(), "key2": get_test_data(), - "data1": np.random.default_rng(2).randn(50), - "data2": np.random.default_rng(2).randn(50), + "data1": np.random.default_rng(2).standard_normal(50), + "data2": np.random.default_rng(2).standard_normal(50), } ) @@ -128,7 +128,7 @@ def df2(self): { "key1": get_test_data(n=10), "key2": get_test_data(ngroups=4, n=10), - "value": np.random.default_rng(2).randn(10), + "value": np.random.default_rng(2).standard_normal(10), } ) @@ -137,14 +137,15 @@ def left(self): return DataFrame( { "key": ["a", "b", "c", "d", "e", "e", "a"], - "v1": np.random.default_rng(2).randn(7), + "v1": np.random.default_rng(2).standard_normal(7), } ) @pytest.fixture def right(self): return DataFrame( - {"v2": np.random.default_rng(2).randn(4)}, index=["d", "b", "c", "a"] + {"v2": np.random.default_rng(2).standard_normal(4)}, + index=["d", "b", "c", "a"], ) def test_merge_inner_join_empty(self): @@ -185,11 +186,12 @@ def test_merge_index_singlekey_right_vs_left(self): left = DataFrame( { "key": ["a", "b", "c", "d", "e", "e", "a"], - "v1": np.random.default_rng(2).randn(7), + "v1": np.random.default_rng(2).standard_normal(7), } ) right = DataFrame( - {"v2": np.random.default_rng(2).randn(4)}, index=["d", "b", "c", "a"] + {"v2": np.random.default_rng(2).standard_normal(4)}, + index=["d", "b", "c", "a"], ) merged1 = merge( @@ -212,11 +214,12 @@ def test_merge_index_singlekey_inner(self): left = DataFrame( { "key": ["a", "b", "c", "d", "e", "e", "a"], - "v1": np.random.default_rng(2).randn(7), + "v1": np.random.default_rng(2).standard_normal(7), } ) right = DataFrame( - {"v2": np.random.default_rng(2).randn(4)}, index=["d", "b", "c", "a"] + {"v2": np.random.default_rng(2).standard_normal(4)}, + index=["d", "b", "c", "a"], ) # inner join @@ -613,8 +616,8 @@ def test_merge_nosort(self): # GH#2098 d = { - "var1": np.random.default_rng(2).randint(0, 10, size=10), - "var2": np.random.default_rng(2).randint(0, 10, size=10), + "var1": np.random.default_rng(2).integers(0, 10, size=10), + "var2": np.random.default_rng(2).integers(0, 10, size=10), "var3": [ datetime(2012, 1, 12), datetime(2011, 2, 4), diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index f0801e3aad49c..b43275f3ce4af 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -22,7 +22,7 @@ def left(): key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"] - data = np.random.default_rng(2).randn(len(key1)) + data = np.random.default_rng(2).standard_normal(len(key1)) return DataFrame({"key1": key1, "key2": key2, "data": data}) @@ -130,7 +130,7 @@ def run_asserts(left, right, sort): left.insert( 1, "2nd", - np.random.default_rng(2).randint(0, 1000, len(left)).astype("float"), + np.random.default_rng(2).integers(0, 1000, len(left)).astype("float"), ) i = np.random.default_rng(2).permutation(len(left)) @@ -201,7 +201,7 @@ def test_compress_group_combinations(self): { "key1": key1, "key2": key2, - "value1": np.random.default_rng(2).randn(20000), + "value1": np.random.default_rng(2).standard_normal(20000), } ) @@ -209,7 +209,7 @@ def test_compress_group_combinations(self): { "key1": key1[::2], "key2": key2[::2], - "value2": np.random.default_rng(2).randn(10000), + "value2": np.random.default_rng(2).standard_normal(10000), } ) @@ -393,10 +393,10 @@ def test_left_merge_na_buglet(self): left = DataFrame( { "id": list("abcde"), - "v1": np.random.default_rng(2).randn(5), - "v2": np.random.default_rng(2).randn(5), + "v1": np.random.default_rng(2).standard_normal(5), + "v2": np.random.default_rng(2).standard_normal(5), "dummy": list("abcde"), - "v3": np.random.default_rng(2).randn(5), + "v3": np.random.default_rng(2).standard_normal(5), }, columns=["id", "v1", "v2", "dummy", "v3"], ) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 8fe13943f2d98..408142def7fe5 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -57,9 +57,9 @@ def df(): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) @@ -86,9 +86,9 @@ def test_crosstab_multiple(self, df): @pytest.mark.parametrize("box", [np.array, list, tuple]) def test_crosstab_ndarray(self, box): # GH 44076 - a = box(np.random.default_rng(2).randint(0, 5, size=100)) - b = box(np.random.default_rng(2).randint(0, 3, size=100)) - c = box(np.random.default_rng(2).randint(0, 10, size=100)) + a = box(np.random.default_rng(2).integers(0, 5, size=100)) + b = box(np.random.default_rng(2).integers(0, 3, size=100)) + c = box(np.random.default_rng(2).integers(0, 10, size=100)) df = DataFrame({"a": a, "b": b, "c": c}) @@ -126,9 +126,9 @@ def test_crosstab_non_aligned(self): tm.assert_frame_equal(result, expected) def test_crosstab_margins(self): - a = np.random.default_rng(2).randint(0, 7, size=100) - b = np.random.default_rng(2).randint(0, 3, size=100) - c = np.random.default_rng(2).randint(0, 5, size=100) + a = np.random.default_rng(2).integers(0, 7, size=100) + b = np.random.default_rng(2).integers(0, 3, size=100) + c = np.random.default_rng(2).integers(0, 5, size=100) df = DataFrame({"a": a, "b": b, "c": c}) @@ -157,9 +157,9 @@ def test_crosstab_margins(self): def test_crosstab_margins_set_margin_name(self): # GH 15972 - a = np.random.default_rng(2).randint(0, 7, size=100) - b = np.random.default_rng(2).randint(0, 3, size=100) - c = np.random.default_rng(2).randint(0, 5, size=100) + a = np.random.default_rng(2).integers(0, 7, size=100) + b = np.random.default_rng(2).integers(0, 3, size=100) + c = np.random.default_rng(2).integers(0, 5, size=100) df = DataFrame({"a": a, "b": b, "c": c}) @@ -206,10 +206,10 @@ def test_crosstab_margins_set_margin_name(self): ) def test_crosstab_pass_values(self): - a = np.random.default_rng(2).randint(0, 7, size=100) - b = np.random.default_rng(2).randint(0, 3, size=100) - c = np.random.default_rng(2).randint(0, 5, size=100) - values = np.random.default_rng(2).randn(100) + a = np.random.default_rng(2).integers(0, 7, size=100) + b = np.random.default_rng(2).integers(0, 3, size=100) + c = np.random.default_rng(2).integers(0, 5, size=100) + values = np.random.default_rng(2).standard_normal(100) table = crosstab( [a, b], c, values, aggfunc="sum", rownames=["foo", "bar"], colnames=["baz"] @@ -546,8 +546,8 @@ def test_crosstab_with_numpy_size(self): "A": ["one", "one", "two", "three"] * 6, "B": ["A", "B", "C"] * 8, "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, - "D": np.random.default_rng(2).randn(24), - "E": np.random.default_rng(2).randn(24), + "D": np.random.default_rng(2).standard_normal(24), + "E": np.random.default_rng(2).standard_normal(24), } ) result = crosstab( @@ -868,8 +868,8 @@ def test_margin_with_ordered_categorical_column(self): def test_categoricals(a_dtype, b_dtype): # https://github.com/pandas-dev/pandas/issues/37465 g = np.random.default_rng(2).RandomState(25982704) - a = Series(g.randint(0, 3, size=100)).astype(a_dtype) - b = Series(g.randint(0, 2, size=100)).astype(b_dtype) + a = Series(g.integers(0, 3, size=100)).astype(a_dtype) + b = Series(g.integers(0, 2, size=100)).astype(b_dtype) result = crosstab(a, b, margins=True, dropna=False) columns = Index([0, 1, "All"], dtype="object", name="col_0") index = Index([0, 1, 2, "All"], dtype="object", name="row_0") diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 0c0f16bcec5d5..b2a6ac49fdff2 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -239,7 +239,7 @@ def test_labels(right, breaks, closed): def test_cut_pass_series_name_to_factor(): name = "foo" - ser = Series(np.random.default_rng(2).randn(100), name=name) + ser = Series(np.random.default_rng(2).standard_normal(100), name=name) factor = cut(ser, 4) assert factor.name == name @@ -283,7 +283,7 @@ def test_inf_handling(): def test_cut_out_of_bounds(): - arr = np.random.default_rng(2).randn(100) + arr = np.random.default_rng(2).standard_normal(100) result = cut(arr, [-1, 0, 1]) mask = isna(result) @@ -618,7 +618,7 @@ def test_cut_incorrect_labels(labels): @pytest.mark.parametrize("right", [True, False]) @pytest.mark.parametrize("include_lowest", [True, False]) def test_cut_nullable_integer(bins, right, include_lowest): - a = np.random.default_rng(2).randint(0, 10, size=50).astype(float) + a = np.random.default_rng(2).integers(0, 10, size=50).astype(float) a[::2] = np.nan result = cut( pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest @@ -677,7 +677,7 @@ def test_cut_unordered_with_series_labels(): def test_cut_no_warnings(): - df = DataFrame({"value": np.random.default_rng(2).randint(0, 100, 20)}) + df = DataFrame({"value": np.random.default_rng(2).integers(0, 100, 20)}) labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)] with tm.assert_produces_warning(False): df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index d4f490d9d26ed..d68626c566e51 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -322,7 +322,9 @@ def test_melt_missing_columns_raises(self): # attempted with column names absent from the dataframe # Generate data - df = DataFrame(np.random.default_rng(2).randn(5, 4), columns=list("abcd")) + df = DataFrame( + np.random.default_rng(2).standard_normal(5, 4), columns=list("abcd") + ) # Try to melt with missing `value_vars` column name msg = "The following '{Var}' are not present in the DataFrame: {Col}" @@ -668,7 +670,7 @@ def test_pairs(self): class TestWideToLong: def test_simple(self): - x = np.random.default_rng(2).randn(3) + x = np.random.default_rng(2).standard_normal(3) df = DataFrame( { "A1970": {0: "a", 1: "b", 2: "c"}, @@ -704,7 +706,7 @@ def test_stubs(self): def test_separating_character(self): # GH14779 - x = np.random.default_rng(2).randn(3) + x = np.random.default_rng(2).standard_normal(3) df = DataFrame( { "A.1970": {0: "a", 1: "b", 2: "c"}, @@ -728,7 +730,7 @@ def test_separating_character(self): tm.assert_frame_equal(result, expected) def test_escapable_characters(self): - x = np.random.default_rng(2).randn(3) + x = np.random.default_rng(2).standard_normal(3) df = DataFrame( { "A(quarterly)1970": {0: "a", 1: "b", 2: "c"}, diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index d1621d74b1505..18cace98f04d6 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -83,9 +83,9 @@ def data(self): "shiny", "shiny", ], - "D": np.random.default_rng(2).randn(11), - "E": np.random.default_rng(2).randn(11), - "F": np.random.default_rng(2).randn(11), + "D": np.random.default_rng(2).standard_normal(11), + "E": np.random.default_rng(2).standard_normal(11), + "F": np.random.default_rng(2).standard_normal(11), } ) @@ -1070,8 +1070,8 @@ def test_pivot_table_retains_tz(self): dti = date_range("2016-01-01", periods=3, tz="Europe/Amsterdam") df = DataFrame( { - "A": np.random.default_rng(2).randn(3), - "B": np.random.default_rng(2).randn(3), + "A": np.random.default_rng(2).standard_normal(3), + "B": np.random.default_rng(2).standard_normal(3), "C": dti, } ) @@ -1109,7 +1109,7 @@ def test_pivot_no_level_overlap(self): "a": ["a", "a", "a", "a", "b", "b", "b", "b"] * 2, "b": [0, 0, 0, 0, 1, 1, 1, 1] * 2, "c": (["foo"] * 4 + ["bar"] * 4) * 2, - "value": np.random.default_rng(2).randn(16), + "value": np.random.default_rng(2).standard_normal(16), } ) @@ -1149,11 +1149,11 @@ def test_pivot_columns_lexsorted(self): dtype=[("Index", object), ("Symbol", object)], ) items = np.empty(n, dtype=dtype) - iproduct = np.random.default_rng(2).randint(0, len(products), n) + iproduct = np.random.default_rng(2).integers(0, len(products), n) items["Index"] = products["Index"][iproduct] items["Symbol"] = products["Symbol"][iproduct] dr = date_range(date(2000, 1, 1), date(2010, 12, 31)) - dates = dr[np.random.default_rng(2).randint(0, len(dr), n)] + dates = dr[np.random.default_rng(2).integers(0, len(dr), n)] items["Year"] = dates.year items["Month"] = dates.month items["Day"] = dates.day @@ -1690,7 +1690,7 @@ def test_pivot_dtaccessor(self): @pytest.mark.parametrize("i", range(1, 367)) def test_daily(self, i): rng = date_range("1/1/2000", "12/31/2004", freq="D") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) annual = pivot_table( DataFrame(ts), index=ts.index.year, columns=ts.index.dayofyear @@ -1709,7 +1709,7 @@ def test_daily(self, i): @pytest.mark.parametrize("i", range(1, 13)) def test_monthly(self, i): rng = date_range("1/1/2000", "12/31/2004", freq="M") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) annual = pivot_table(DataFrame(ts), index=ts.index.year, columns=ts.index.month) annual.columns = annual.columns.droplevel(0) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 90c8e8c84cc46..907eeca6e9b5e 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -29,7 +29,7 @@ def test_qcut(): - arr = np.random.default_rng(2).randn(1000) + arr = np.random.default_rng(2).standard_normal(1000) # We store the bins as Index that have been # rounded to comparisons are a bit tricky. @@ -47,14 +47,14 @@ def test_qcut(): def test_qcut_bounds(): - arr = np.random.default_rng(2).randn(1000) + arr = np.random.default_rng(2).standard_normal(1000) factor = qcut(arr, 10, labels=False) assert len(np.unique(factor)) == 10 def test_qcut_specify_quantiles(): - arr = np.random.default_rng(2).randn(100) + arr = np.random.default_rng(2).standard_normal(100) factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0]) expected = qcut(arr, 4) @@ -82,7 +82,7 @@ def test_qcut_include_lowest(): def test_qcut_nas(): - arr = np.random.default_rng(2).randn(100) + arr = np.random.default_rng(2).standard_normal(100) arr[:20] = np.nan result = qcut(arr, 4) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 483409e5943bb..e7fea9aa597b8 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -693,7 +693,7 @@ def test_dt_accessor_api(self): [ Series(np.arange(5)), Series(list("abcde")), - Series(np.random.default_rng(2).randn(5)), + Series(np.random.default_rng(2).standard_normal(5)), ], ) def test_dt_accessor_invalid(self, ser): diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 813850efb0e4e..560cf39299cc9 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -77,7 +77,7 @@ def test_getitem_setitem_datetime_tz(tz_source): N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern")) - ts = Series(np.random.default_rng(2).randn(N), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) # also test Timestamp tz handling, GH #2789 result = ts.copy() @@ -108,7 +108,7 @@ def test_getitem_setitem_datetimeindex(): N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") - ts = Series(np.random.default_rng(2).randn(N), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) result = ts["1990-01-01 04:00:00"] expected = ts.iloc[4] @@ -214,7 +214,7 @@ def test_getitem_setitem_datetimeindex(): def test_getitem_setitem_periodindex(): N = 50 rng = period_range("1/1/1990", periods=N, freq="H") - ts = Series(np.random.default_rng(2).randn(N), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) result = ts["1990-01-01 04"] expected = ts.iloc[4] @@ -330,12 +330,14 @@ def test_loc_getitem_over_size_cutoff(monkeypatch): d += 3 * sec # duplicate some values in the list - duplicate_positions = np.random.default_rng(2).randint(0, len(dates) - 1, 20) + duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20) for p in duplicate_positions: dates[p + 1] = dates[p] df = DataFrame( - np.random.default_rng(2).randn(len(dates), 4), index=dates, columns=list("ABCD") + np.random.default_rng(2).standard_normal(len(dates), 4), + index=dates, + columns=list("ABCD"), ) pos = n * 3 @@ -356,7 +358,7 @@ def test_indexing_over_size_cutoff_period_index(monkeypatch): idx = period_range("1/1/2000", freq="T", periods=n) assert idx._engine.over_size_threshold - s = Series(np.random.default_rng(2).randn(len(idx)), index=idx) + s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx) pos = n - 1 timestamp = idx[pos] diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py index 38ebe1f8377a1..61007c08b50e0 100644 --- a/pandas/tests/series/indexing/test_get.py +++ b/pandas/tests/series/indexing/test_get.py @@ -167,7 +167,7 @@ def test_get_with_default(): @pytest.mark.parametrize( "arr", [ - np.random.default_rng(2).randn(10), + np.random.default_rng(2).standard_normal(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), ], ) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index d9b505ad1a2fb..93ccc336468ea 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -95,7 +95,7 @@ def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self): def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype): dtype = any_int_numpy_dtype ser = Series( - np.random.default_rng(2).randn(6), + np.random.default_rng(2).standard_normal(6), index=Index([0, 0, 1, 1, 2, 2], dtype=dtype), ) @@ -106,7 +106,9 @@ def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype): ser["c"] # not monotonic - ser = Series(np.random.default_rng(2).randn(6), index=[2, 2, 0, 0, 1, 1]) + ser = Series( + np.random.default_rng(2).standard_normal(6), index=[2, 2, 0, 0, 1, 1] + ) with pytest.raises(KeyError, match=r"^5$"): ser[5] @@ -147,14 +149,14 @@ def test_getitem_pydatetime_tz(self, tzstr): @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) def test_string_index_alias_tz_aware(self, tz): rng = date_range("1/1/2000", periods=10, tz=tz) - ser = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) result = ser["1/3/2000"] tm.assert_almost_equal(result, ser.iloc[2]) def test_getitem_time_object(self): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) mask = (rng.hour == 9) & (rng.minute == 30) result = ts[time(9, 30)] @@ -238,7 +240,7 @@ def test_getitem_slice_strings_with_datetimeindex(self): ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] ) - ts = Series(np.random.default_rng(2).randn(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx) result = ts["1/2/2000":] expected = ts[1:] @@ -287,7 +289,7 @@ def test_getitem_slice_2d(self, datetime_series): def test_getitem_median_slice_bug(self): index = date_range("20090415", "20090519", freq="2B") - ser = Series(np.random.default_rng(2).randn(13), index=index) + ser = Series(np.random.default_rng(2).standard_normal(13), index=index) indexer = [slice(6, 7, None)] msg = "Indexing with a single-item list" @@ -343,7 +345,8 @@ def test_getitem_slice_bug(self): def test_getitem_slice_integers(self): ser = Series( - np.random.default_rng(2).randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16] + np.random.default_rng(2).standard_normal(8), + index=[2, 4, 6, 8, 10, 12, 14, 16], ) result = ser[:4] @@ -639,7 +642,9 @@ def test_getitem_preserve_name(datetime_series): def test_getitem_with_integer_labels(): # integer indexes, be careful - ser = Series(np.random.default_rng(2).randn(10), index=list(range(0, 20, 2))) + ser = Series( + np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2)) + ) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) with pytest.raises(KeyError, match="not in index"): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 2c33a6e46d95a..93b8b11c14f25 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -25,7 +25,9 @@ def test_basic_indexing(): - s = Series(np.random.default_rng(2).randn(5), index=["a", "b", "a", "a", "b"]) + s = Series( + np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"] + ) warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated" msg = "index 5 is out of bounds for axis 0 with size 5" @@ -98,7 +100,7 @@ def test_basic_getitem_dt64tz_values(): def test_getitem_setitem_ellipsis(): - s = Series(np.random.default_rng(2).randn(10)) + s = Series(np.random.default_rng(2).standard_normal(10)) result = s[...] tm.assert_series_equal(result, s) @@ -307,7 +309,9 @@ def test_preserve_refs(datetime_series): def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl): index = lexsorted_two_level_string_multiindex - ser = Series(np.random.default_rng(2).randn(len(index)), index=index, name="sth") + ser = Series( + np.random.default_rng(2).standard_normal(len(index)), index=index, name="sth" + ) result = indexer_sl(ser)["foo"] assert result.name == ser.name diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 4a4b850f8fb36..3c21cd0d5ca64 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -7,7 +7,7 @@ def test_mask(): # compare with tested results in test_where - s = Series(np.random.default_rng(2).randn(5)) + s = Series(np.random.default_rng(2).standard_normal(5)) cond = s > 0 rs = s.where(~cond, np.nan) @@ -56,7 +56,7 @@ def test_mask_casts2(): def test_mask_inplace(): - s = Series(np.random.default_rng(2).randn(5)) + s = Series(np.random.default_rng(2).standard_normal(5)) cond = s > 0 rs = s.copy() diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 9704e2e628923..8bb9d38cd1d27 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -222,7 +222,8 @@ def test_setitem_slice(self): def test_setitem_slice_integers(self): ser = Series( - np.random.default_rng(2).randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16] + np.random.default_rng(2).standard_normal(8), + index=[2, 4, 6, 8, 10, 12, 14, 16], ) ser[:4] = 0 @@ -258,7 +259,7 @@ def test_setitem_mask_cast(self): def test_setitem_mask_align_and_promote(self): # GH#8387: test that changing types does not break alignment ts = Series( - np.random.default_rng(2).randn(100), index=np.arange(100, 0, -1) + np.random.default_rng(2).standard_normal(100), index=np.arange(100, 0, -1) ).round(5) mask = ts > 0 left = ts.copy() diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index c6d967f87702f..fb0d4de74e75e 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -114,7 +114,7 @@ def test_where_unsafe(): def test_where(): - s = Series(np.random.default_rng(2).randn(5)) + s = Series(np.random.default_rng(2).standard_normal(5)) cond = s > 0 rs = s.where(cond).dropna() @@ -143,7 +143,7 @@ def test_where(): def test_where_error(): - s = Series(np.random.default_rng(2).randn(5)) + s = Series(np.random.default_rng(2).standard_normal(5)) cond = s > 0 msg = "Array conditional must be same shape as self" @@ -319,7 +319,7 @@ def test_broadcast(size, mask, item, box): def test_where_inplace(): - s = Series(np.random.default_rng(2).randn(5)) + s = Series(np.random.default_rng(2).standard_normal(5)) cond = s > 0 rs = s.copy() diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index eb75b81d8d5e5..82261be61f9d1 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -192,7 +192,7 @@ def test_align_with_dataframe_method(method): def test_align_dt64tzindex_mismatched_tzs(): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") - ser = Series(np.random.default_rng(2).randn(len(idx1)), index=idx1) + ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1) ser_central = ser.tz_convert("US/Central") # different timezones convert to UTC @@ -203,7 +203,7 @@ def test_align_dt64tzindex_mismatched_tzs(): def test_align_periodindex(join_type): rng = period_range("1/1/2000", "1/1/2010", freq="A") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) # TODO: assert something? ts.align(ts[::2], join=join_type) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 0106d0270b771..a86af8d7ade0c 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -46,7 +46,7 @@ def test_argsort(self, datetime_series): tm.assert_series_equal(result, expected) def test_argsort_stable(self): - s = Series(np.random.default_rng(2).randint(0, 100, size=10000)) + s = Series(np.random.default_rng(2).integers(0, 100, size=10000)) mindexer = s.argsort(kind="mergesort") qindexer = s.argsort() diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 1d0b611decb00..d5f99f721d323 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -21,7 +21,7 @@ class TestSeriesAsof: def test_asof_nanosecond_index_access(self): ts = Timestamp("20130101").as_unit("ns")._value dti = DatetimeIndex([ts + 50 + i for i in range(100)]) - ser = Series(np.random.default_rng(2).randn(100), index=dti) + ser = Series(np.random.default_rng(2).standard_normal(100), index=dti) first_value = ser.asof(ser.index[0]) @@ -39,7 +39,7 @@ def test_basic(self): # array or list or dates N = 50 rng = date_range("1/1/1990", periods=N, freq="53s") - ts = Series(np.random.default_rng(2).randn(N), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") @@ -119,7 +119,7 @@ def test_periodindex(self): # array or list or dates N = 50 rng = period_range("1/1/1990", periods=N, freq="H") - ts = Series(np.random.default_rng(2).randn(N), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(N), index=rng) ts.iloc[15:30] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="37min") @@ -177,7 +177,7 @@ def test_errors(self): # subset with Series N = 10 rng = date_range("1/1/1990", periods=N, freq="53s") - s = Series(np.random.default_rng(2).randn(N), index=rng) + s = Series(np.random.default_rng(2).standard_normal(N), index=rng) with pytest.raises(ValueError, match="not valid for Series"): s.asof(s.index[0], subset="foo") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9d943c582bacf..9ac40876a8a09 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -326,7 +326,7 @@ def test_astype_to_str_preserves_na(self, value, string_value): @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): - ser = Series(np.random.default_rng(2).randn(5), name="foo") + ser = Series(np.random.default_rng(2).standard_normal(5), name="foo") as_typed = ser.astype(dtype) assert as_typed.dtype == dtype @@ -490,7 +490,7 @@ class TestAstypeCategorical: def test_astype_categorical_to_other(self): cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) ser = Series( - np.random.default_rng(2).RandomState(0).randint(0, 10000, 100) + np.random.default_rng(2).RandomState(0).integers(0, 10000, 100) ).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) @@ -534,7 +534,7 @@ def cmp(a, b): def test_astype_categorical_invalid_conversions(self): # invalid conversion (these are NOT a dtype) cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.default_rng(2).randint(0, 10000, 100)).sort_values() + ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) msg = ( diff --git a/pandas/tests/series/methods/test_autocorr.py b/pandas/tests/series/methods/test_autocorr.py index 3ca35e8049c14..c1d768cf02f37 100644 --- a/pandas/tests/series/methods/test_autocorr.py +++ b/pandas/tests/series/methods/test_autocorr.py @@ -18,7 +18,7 @@ def test_autocorr(self, datetime_series): # Choose a random lag between 1 and length of Series - 2 # and compare the result with the Series corr() function - n = 1 + np.random.default_rng(2).randint(max(1, len(datetime_series) - 2)) + n = 1 + np.random.default_rng(2).integers(max(1, len(datetime_series) - 2)) corr1 = datetime_series.corr(datetime_series.shift(n)) corr2 = datetime_series.autocorr(lag=n) diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index 6ee8df47a433c..c7ca73da9ae66 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -52,7 +52,7 @@ def test_combine_first(self): # mixed types index = tm.makeStringIndex(20) - floats = Series(np.random.default_rng(2).randn(20), index=index) + floats = Series(np.random.default_rng(2).standard_normal(20), index=index) strings = Series(tm.makeStringIndex(10), index=index[::2]) combined = strings.combine_first(floats) diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index 2e7d51fad48d8..153e3b49278bd 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -142,8 +142,8 @@ def test_corr_rank(self): def test_corr_invalid_method(self): # GH PR #22298 - s1 = Series(np.random.default_rng(2).randn(10)) - s2 = Series(np.random.default_rng(2).randn(10)) + s1 = Series(np.random.default_rng(2).standard_normal(10)) + s2 = Series(np.random.default_rng(2).standard_normal(10)) msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " with pytest.raises(ValueError, match=msg): s1.corr(s2, method="____") diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 15c5c73c5159b..54121736c8a80 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -801,7 +801,7 @@ def test_fillna_invalid_method(self, datetime_series): assert "ffil" in str(inst) def test_fillna_listlike_invalid(self): - ser = Series(np.random.default_rng(2).randint(-100, 100, 50)) + ser = Series(np.random.default_rng(2).integers(-100, 100, 50)) msg = '"value" parameter must be a scalar or dict, but you passed a "list"' with pytest.raises(TypeError, match=msg): ser.fillna([1, 2]) @@ -910,7 +910,7 @@ def test_pad_nan(self): def test_series_fillna_limit(self): index = np.arange(10) - s = Series(np.random.default_rng(2).randn(10), index=index) + s = Series(np.random.default_rng(2).standard_normal(10), index=index) result = s[:2].reindex(index) result = result.fillna(method="pad", limit=5) @@ -928,7 +928,7 @@ def test_series_fillna_limit(self): def test_series_pad_backfill_limit(self): index = np.arange(10) - s = Series(np.random.default_rng(2).randn(10), index=index) + s = Series(np.random.default_rng(2).standard_normal(10), index=index) result = s[:2].reindex(index, method="pad", limit=5) @@ -943,7 +943,7 @@ def test_series_pad_backfill_limit(self): tm.assert_series_equal(result, expected) def test_fillna_int(self): - ser = Series(np.random.default_rng(2).randint(-100, 100, 50)) + ser = Series(np.random.default_rng(2).integers(-100, 100, 50)) return_value = ser.fillna(method="ffill", inplace=True) assert return_value is None tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 0e3755281640f..d0026c165dddf 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -218,7 +218,7 @@ def test_interpolate_corners(self, kwargs): def test_interpolate_index_values(self): s = Series(np.nan, index=np.sort(np.random.default_rng(2).rand(30))) - s[::3] = np.random.default_rng(2).randn(10) + s[::3] = np.random.default_rng(2).standard_normal(10) vals = s.index.values.astype(float) @@ -743,7 +743,7 @@ def test_spline_smooth(self): def test_spline_interpolation(self): # Explicit cast to float to avoid implicit cast when setting np.nan s = Series(np.arange(10) ** 2, dtype="float") - s[np.random.default_rng(2).randint(0, 9, 3)] = np.nan + s[np.random.default_rng(2).integers(0, 9, 3)] = np.nan result1 = s.interpolate(method="spline", order=1) expected1 = s.interpolate(method="spline", order=1) tm.assert_series_equal(result1, expected1) @@ -764,7 +764,7 @@ def test_interp_timedelta64(self): def test_series_interpolate_method_values(self): # GH#1646 rng = date_range("1/1/2000", "1/20/2000", freq="D") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) ts[::2] = np.nan diff --git a/pandas/tests/series/methods/test_is_monotonic.py b/pandas/tests/series/methods/test_is_monotonic.py index c36399f09afcb..073ec4172aff6 100644 --- a/pandas/tests/series/methods/test_is_monotonic.py +++ b/pandas/tests/series/methods/test_is_monotonic.py @@ -8,7 +8,7 @@ class TestIsMonotonic: def test_is_monotonic_numeric(self): - ser = Series(np.random.default_rng(2).randint(0, 10, size=1000)) + ser = Series(np.random.default_rng(2).integers(0, 10, size=1000)) assert not ser.is_monotonic_increasing ser = Series(np.arange(1000)) assert ser.is_monotonic_increasing is True diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py index 678b016fcf301..edf3839c2cebb 100644 --- a/pandas/tests/series/methods/test_is_unique.py +++ b/pandas/tests/series/methods/test_is_unique.py @@ -7,7 +7,7 @@ @pytest.mark.parametrize( "data, expected", [ - (np.random.default_rng(2).randint(0, 10, size=1000), False), + (np.random.default_rng(2).integers(0, 10, size=1000), False), (np.arange(1000), True), ([], True), ([np.nan], True), diff --git a/pandas/tests/series/methods/test_matmul.py b/pandas/tests/series/methods/test_matmul.py index 7041142caf80f..6c7040c2ebc7b 100644 --- a/pandas/tests/series/methods/test_matmul.py +++ b/pandas/tests/series/methods/test_matmul.py @@ -13,9 +13,11 @@ class TestMatmul: def test_matmul(self): # matmul test is for GH#10259 - a = Series(np.random.default_rng(2).randn(4), index=["p", "q", "r", "s"]) + a = Series( + np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"] + ) b = DataFrame( - np.random.default_rng(2).randn(3, 4), + np.random.default_rng(2).standard_normal(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"], ).T diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 1deb45ff3a962..e8de1cd89e397 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -219,9 +219,9 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype): dtype = any_numeric_ea_dtype if dtype.startswith("UInt"): # Can't cast from negative float to uint on some platforms - arr = np.random.default_rng(2).randint(1, 10, 10) + arr = np.random.default_rng(2).integers(1, 10, 10) else: - arr = np.random.default_rng(2).randn(10) + arr = np.random.default_rng(2).standard_normal(10) arr = arr.astype(dtype.lower(), copy=False) ser = Series(arr.copy(), dtype=dtype) diff --git a/pandas/tests/series/methods/test_nunique.py b/pandas/tests/series/methods/test_nunique.py index 76117c706b401..826132eb28162 100644 --- a/pandas/tests/series/methods/test_nunique.py +++ b/pandas/tests/series/methods/test_nunique.py @@ -8,7 +8,7 @@ def test_nunique(): # basics.rst doc example - series = Series(np.random.default_rng(2).randn(500)) + series = Series(np.random.default_rng(2).standard_normal(500)) series[20:500] = np.nan series[10:20] = 5000 result = series.nunique() diff --git a/pandas/tests/series/methods/test_quantile.py b/pandas/tests/series/methods/test_quantile.py index 07abe19808e4d..70ad6b3016a57 100644 --- a/pandas/tests/series/methods/test_quantile.py +++ b/pandas/tests/series/methods/test_quantile.py @@ -43,7 +43,7 @@ def test_quantile(self, datetime_series): with pytest.raises(ValueError, match=msg): datetime_series.quantile(invalid) - s = Series(np.random.default_rng(2).randn(100)) + s = Series(np.random.default_rng(2).standard_normal(100)) percentile_array = [-0.5, 0.25, 1.5] with pytest.raises(ValueError, match=msg): s.quantile(percentile_array) diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index c2fd221ec711b..da7c4612cbc49 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -322,7 +322,7 @@ def test_rank_desc_mix_nans_infs(self): def test_rank_methods_series(self, method, op, value): from scipy.stats import rankdata - xs = np.random.default_rng(2).randn(9) + xs = np.random.default_rng(2).standard_normal(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates np.random.default_rng(2).shuffle(xs) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index ee2077dfb33f1..52446f96009d5 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -87,7 +87,7 @@ def test_reindex_series_add_nat(): def test_reindex_with_datetimes(): rng = date_range("1/1/2000", periods=20) - ts = Series(np.random.default_rng(2).randn(20), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(20), index=rng) result = ts.reindex(list(ts.index[5:10])) expected = ts[5:10] diff --git a/pandas/tests/series/methods/test_repeat.py b/pandas/tests/series/methods/test_repeat.py index 794ac2b1aab4b..8ecc8052ff49c 100644 --- a/pandas/tests/series/methods/test_repeat.py +++ b/pandas/tests/series/methods/test_repeat.py @@ -10,7 +10,7 @@ class TestRepeat: def test_repeat(self): - ser = Series(np.random.default_rng(2).randn(3), index=["a", "b", "c"]) + ser = Series(np.random.default_rng(2).standard_normal(3), index=["a", "b", "c"]) reps = ser.repeat(5) exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5)) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index fa30264f5eee7..ec9db8c3830d6 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -51,7 +51,7 @@ def test_replace_noop_doesnt_downcast(self): def test_replace(self): N = 100 - ser = pd.Series(np.random.default_rng(2).randn(N)) + ser = pd.Series(np.random.default_rng(2).standard_normal(N)) ser[0:4] = np.nan ser[6:10] = 0 @@ -67,7 +67,7 @@ def test_replace(self): tm.assert_series_equal(rs, ser) ser = pd.Series( - np.fabs(np.random.default_rng(2).randn(N)), + np.fabs(np.random.default_rng(2).standard_normal(N)), tm.makeDateIndex(N), dtype=object, ) @@ -285,7 +285,7 @@ def test_replace_Int_with_na(self, any_int_ea_dtype): def test_replace2(self): N = 100 ser = pd.Series( - np.fabs(np.random.default_rng(2).randn(N)), + np.fabs(np.random.default_rng(2).standard_normal(N)), tm.makeDateIndex(N), dtype=object, ) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 36ae8a4219158..13d85d182ed0a 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -56,7 +56,7 @@ def test_reset_index(self): levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], ) - s = Series(np.random.default_rng(2).randn(6), index=index) + s = Series(np.random.default_rng(2).standard_normal(6), index=index) rs = s.reset_index(level=1) assert len(rs.columns) == 2 diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 6a2e4747be13e..364394d1d484a 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -77,7 +77,7 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # GH#5856/5853 # Series.sort_values operating on a view - df = DataFrame(np.random.default_rng(2).randn(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) s = df.iloc[:, 0] msg = ( diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index 1ef45c0f521fb..e37f955a91cd3 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -46,7 +46,7 @@ def test_unstack(): levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], ) - s = Series(np.random.default_rng(2).randn(6), index=index) + s = Series(np.random.default_rng(2).standard_normal(6), index=index) exp_index = MultiIndex( levels=[["one", "two", "three"], [0, 1]], codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index b355930bab756..096cbe3ab7e41 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -120,7 +120,7 @@ def test_class_axis(self): def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( - np.random.default_rng(2).randn(1000, 3), + np.random.default_rng(2).standard_normal(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) @@ -134,14 +134,14 @@ def f(x): def test_ndarray_compat_like_func(self): # using an ndarray like function - s = Series(np.random.default_rng(2).randn(10)) + s = Series(np.random.default_rng(2).standard_normal(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) def test_ndarray_compat_ravel(self): # ravel - s = Series(np.random.default_rng(2).randn(10)) + s = Series(np.random.default_rng(2).standard_normal(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) def test_empty_method(self): diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 4981f7bab7a8e..80fd2fd7c0a06 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -155,7 +155,7 @@ class TestSeriesArithmetic: def test_add_series_with_period_index(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") - ts = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) result = ts + ts[::2] expected = ts + ts @@ -369,15 +369,15 @@ def test_add_list_to_masked_array_boolean(self, request): class TestSeriesFlexComparison: @pytest.mark.parametrize("axis", [0, None, "index"]) def test_comparison_flex_basic(self, axis, comparison_op): - left = Series(np.random.default_rng(2).randn(10)) - right = Series(np.random.default_rng(2).randn(10)) + left = Series(np.random.default_rng(2).standard_normal(10)) + right = Series(np.random.default_rng(2).standard_normal(10)) result = getattr(left, comparison_op.__name__)(right, axis=axis) expected = comparison_op(left, right) tm.assert_series_equal(result, expected) def test_comparison_bad_axis(self, comparison_op): - left = Series(np.random.default_rng(2).randn(10)) - right = Series(np.random.default_rng(2).randn(10)) + left = Series(np.random.default_rng(2).standard_normal(10)) + right = Series(np.random.default_rng(2).standard_normal(10)) msg = "No axis named 1 for object type" with pytest.raises(ValueError, match=msg): @@ -716,13 +716,13 @@ def test_series_add_tz_mismatch_converts_to_utc(self): perm = np.random.default_rng(2).permutation(100)[:90] ser1 = Series( - np.random.default_rng(2).randn(90), + np.random.default_rng(2).standard_normal(90), index=rng.take(perm).tz_convert("US/Eastern"), ) perm = np.random.default_rng(2).permutation(100)[:90] ser2 = Series( - np.random.default_rng(2).randn(90), + np.random.default_rng(2).standard_normal(90), index=rng.take(perm).tz_convert("Europe/Berlin"), ) @@ -737,7 +737,7 @@ def test_series_add_tz_mismatch_converts_to_utc(self): def test_series_add_aware_naive_raises(self): rng = date_range("1/1/2011", periods=10, freq="H") - ser = Series(np.random.default_rng(2).randn(len(rng)), index=rng) + ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) ser_utc = ser.tz_localize("utc") @@ -759,7 +759,7 @@ def test_datetime_understood(self): def test_align_date_objects_with_datetimeindex(self): rng = date_range("1/1/2000", periods=20) - ts = Series(np.random.default_rng(2).randn(20), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(20), index=rng) ts_slice = ts[5:] ts2 = ts_slice.copy() diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 52334bd8b25bf..6d2dc22f1cf23 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -177,7 +177,7 @@ def test_constructor(self, datetime_series): ValueError, match=r"Data must be 1-dimensional, got ndarray of shape \(3, 3\) instead", ): - Series(np.random.default_rng(2).randn(3, 3), index=np.arange(3)) + Series(np.random.default_rng(2).standard_normal(3, 3), index=np.arange(3)) mixed.name = "Series" rs = Series(mixed).name @@ -810,7 +810,7 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype) def test_constructor_floating_data_int_dtype(self, frame_or_series): # GH#40110 - arr = np.random.default_rng(2).randn(2) + arr = np.random.default_rng(2).standard_normal(2) # Long-standing behavior (for Series, new in 2.0 for DataFrame) # has been to ignore the dtype on these; @@ -2076,7 +2076,9 @@ def test_series_from_index_dtype_equal_does_not_copy(self): class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): idx = tm.makeDateIndex(10000) - ser = Series(np.random.default_rng(2).randn(len(idx)), idx.astype(object)) + ser = Series( + np.random.default_rng(2).standard_normal(len(idx)), idx.astype(object) + ) # as of 2.0, we no longer silently cast the object-dtype index # to DatetimeIndex GH#39307, GH#23598 assert not isinstance(ser.index, DatetimeIndex) diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py index 9c5c3c3d067de..08950db25b282 100644 --- a/pandas/tests/series/test_npfuncs.py +++ b/pandas/tests/series/test_npfuncs.py @@ -13,7 +13,7 @@ class TestPtp: def test_ptp(self): # GH#21614 N = 1000 - arr = np.random.default_rng(2).randn(N) + arr = np.random.default_rng(2).standard_normal(N) ser = Series(arr) assert np.ptp(ser) == np.ptp(arr) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index c8acdd1f60714..6c3a4d94a4f12 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -67,7 +67,12 @@ def test_empty_name_printing(self): @pytest.mark.parametrize("args", [(), (0, -1)]) def test_float_range(self, args): - str(Series(np.random.default_rng(2).randn(1000), index=np.arange(1000, *args))) + str( + Series( + np.random.default_rng(2).standard_normal(1000), + index=np.arange(1000, *args), + ) + ) def test_empty_object(self): # empty @@ -114,7 +119,7 @@ def test_various_names(self, name, string_series): def test_tuple_name(self): biggie = Series( - np.random.default_rng(2).randn(1000), + np.random.default_rng(2).standard_normal(1000), index=np.arange(1000), name=("foo", "bar", "baz"), ) @@ -123,7 +128,7 @@ def test_tuple_name(self): @pytest.mark.parametrize("arg", [100, 1001]) def test_tidy_repr_name_0(self, arg): # tidy repr - ser = Series(np.random.default_rng(2).randn(arg), name=0) + ser = Series(np.random.default_rng(2).standard_normal(arg), name=0) rep_str = repr(ser) assert "Name: 0" in rep_str @@ -151,7 +156,12 @@ def test_tidy_repr(self): repr(a) # should not raise exception def test_repr_bool_fails(self, capsys): - s = Series([DataFrame(np.random.default_rng(2).randn(2, 2)) for i in range(5)]) + s = Series( + [ + DataFrame(np.random.default_rng(2).standard_normal(2, 2)) + for i in range(5) + ] + ) # It works (with no Cython exception barf)! repr(s) @@ -199,13 +209,13 @@ def test_timeseries_repr_object_dtype(self): index = Index( [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object ) - ts = Series(np.random.default_rng(2).randn(len(index)), index) + ts = Series(np.random.default_rng(2).standard_normal(len(index)), index) repr(ts) ts = tm.makeTimeSeries(1000) assert repr(ts).splitlines()[-1].startswith("Freq:") - ts2 = ts.iloc[np.random.default_rng(2).randint(0, len(ts) - 1, 400)] + ts2 = ts.iloc[np.random.default_rng(2).integers(0, len(ts) - 1, 400)] repr(ts2).splitlines()[-1] def test_latex_repr(self): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 6fd24e438afea..698c727f1beb8 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -28,8 +28,8 @@ def arrays_for_binary_ufunc(): """ A pair of random, length-100 integer-dtype arrays, that are mostly 0. """ - a1 = np.random.default_rng(2).randint(0, 10, 100, dtype="int64") - a2 = np.random.default_rng(2).randint(0, 10, 100, dtype="int64") + a1 = np.random.default_rng(2).integers(0, 10, 100, dtype="int64") + a2 = np.random.default_rng(2).integers(0, 10, 100, dtype="int64") a1[::3] = 0 a2[::4] = 0 return a1, a2 @@ -38,7 +38,7 @@ def arrays_for_binary_ufunc(): @pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp]) def test_unary_ufunc(ufunc, sparse): # Test that ufunc(pd.Series) == pd.Series(ufunc) - arr = np.random.default_rng(2).randint(0, 10, 10, dtype="int64") + arr = np.random.default_rng(2).integers(0, 10, 10, dtype="int64") arr[::2] = 0 if sparse: arr = SparseArray(arr, dtype=pd.SparseDtype("int64", 0)) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 78bab1142c1d8..d93ca92843cfe 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -527,13 +527,13 @@ def test_factorize_mixed_values(self, data, expected_codes, expected_uniques): class TestUnique: def test_ints(self): - arr = np.random.default_rng(2).randint(0, 100, size=50) + arr = np.random.default_rng(2).integers(0, 100, size=50) result = algos.unique(arr) assert isinstance(result, np.ndarray) def test_objects(self): - arr = np.random.default_rng(2).randint(0, 100, size=50).astype("O") + arr = np.random.default_rng(2).integers(0, 100, size=50).astype("O") result = algos.unique(arr) assert isinstance(result, np.ndarray) @@ -878,7 +878,7 @@ def test_unique_masked(self, any_numeric_ea_dtype): def test_nunique_ints(index_or_series_or_array): # GH#36327 - values = index_or_series_or_array(np.random.default_rng(2).randint(0, 20, 30)) + values = index_or_series_or_array(np.random.default_rng(2).integers(0, 20, 30)) result = algos.nunique_ints(values) expected = len(algos.unique(values)) assert result == expected @@ -1175,7 +1175,7 @@ def test_isin_unsigned_dtype(self): class TestValueCounts: def test_value_counts(self): - arr = np.random.default_rng(2).randn(4) + arr = np.random.default_rng(2).standard_normal(4) factor = cut(arr, 4) # assert isinstance(factor, n) @@ -1860,8 +1860,8 @@ def test_is_lexsorted(): def test_groupsort_indexer(): - a = np.random.default_rng(2).randint(0, 1000, 100).astype(np.intp) - b = np.random.default_rng(2).randint(0, 1000, 100).astype(np.intp) + a = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) + b = np.random.default_rng(2).integers(0, 1000, 100).astype(np.intp) result = libalgos.groupsort_indexer(a, 1000)[0] diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 47026d92d8c2e..89206640eb663 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -65,7 +65,7 @@ def test_random_state(): # check array-like # GH32503 - state_arr_like = np.random.default_rng(None).randint( + state_arr_like = np.random.default_rng(None).integers( 0, 2**31, size=624, dtype="uint32" ) assert ( diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2576b653e238d..5c6591e623561 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -18,14 +18,18 @@ @pytest.fixture def _frame(): return DataFrame( - np.random.default_rng(2).randn(10001, 4), columns=list("ABCD"), dtype="float64" + np.random.default_rng(2).standard_normal(10001, 4), + columns=list("ABCD"), + dtype="float64", ) @pytest.fixture def _frame2(): return DataFrame( - np.random.default_rng(2).randn(100, 4), columns=list("ABCD"), dtype="float64" + np.random.default_rng(2).standard_normal(100, 4), + columns=list("ABCD"), + dtype="float64", ) @@ -56,22 +60,22 @@ def _mixed2(_frame2): @pytest.fixture def _integer(): return DataFrame( - np.random.default_rng(2).randint(1, 100, size=(10001, 4)), + np.random.default_rng(2).integers(1, 100, size=(10001, 4)), columns=list("ABCD"), dtype="int64", ) @pytest.fixture -def _integer_randint(_integer): - # randint to get a case with zeros - return _integer * np.random.default_rng(2).randint(0, 2, size=np.shape(_integer)) +def _integer_integers(_integer): + # integers to get a case with zeros + return _integer * np.random.default_rng(2).integers(0, 2, size=np.shape(_integer)) @pytest.fixture def _integer2(): return DataFrame( - np.random.default_rng(2).randint(1, 100, size=(101, 4)), + np.random.default_rng(2).integers(1, 100, size=(101, 4)), columns=list("ABCD"), dtype="int64", ) @@ -126,7 +130,7 @@ def call_op(df, other, flex: bool, opname: str): [ "_integer", "_integer2", - "_integer_randint", + "_integer_integers", "_frame", "_frame2", "_mixed", @@ -157,7 +161,7 @@ def test_run_arithmetic(self, request, fixture, flex, arith): [ "_integer", "_integer2", - "_integer_randint", + "_integer_integers", "_frame", "_frame2", "_mixed", @@ -192,8 +196,8 @@ def test_run_binary(self, request, fixture, flex, comparison_op): # self.run_binary(df.iloc[:, i], binary_comp, flex) def test_invalid(self): - array = np.random.default_rng(2).randn(1_000_001) - array2 = np.random.default_rng(2).randn(100) + array = np.random.default_rng(2).standard_normal(1_000_001) + array2 = np.random.default_rng(2).standard_normal(100) # no op result = expr._can_use_numexpr(operator.add, None, array, array, "evaluate") diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index afe4aa28b9c4b..0410599945eb5 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -161,7 +161,9 @@ def test_multilevel_consolidate(self): index = MultiIndex.from_tuples( [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")] ) - df = DataFrame(np.random.default_rng(2).randn(4, 4), index=index, columns=index) + df = DataFrame( + np.random.default_rng(2).standard_normal(4, 4), index=index, columns=index + ) df["Totals", ""] = df.sum(1) df = df._consolidate() @@ -171,8 +173,8 @@ def test_level_with_tuples(self): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - series = Series(np.random.default_rng(2).randn(6), index=index) - frame = DataFrame(np.random.default_rng(2).randn(6, 4), index=index) + series = Series(np.random.default_rng(2).standard_normal(6), index=index) + frame = DataFrame(np.random.default_rng(2).standard_normal(6, 4), index=index) result = series[("foo", "bar", 0)] result2 = series.loc[("foo", "bar", 0)] @@ -196,8 +198,8 @@ def test_level_with_tuples(self): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - series = Series(np.random.default_rng(2).randn(6), index=index) - frame = DataFrame(np.random.default_rng(2).randn(6, 4), index=index) + series = Series(np.random.default_rng(2).standard_normal(6), index=index) + frame = DataFrame(np.random.default_rng(2).standard_normal(6, 4), index=index) result = series[("foo", "bar")] result2 = series.loc[("foo", "bar")] diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2fe7f9f66d37c..13728520d502d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -34,7 +34,7 @@ def arr_shape(): @pytest.fixture def arr_float(arr_shape): - return np.random.default_rng(2).randn(*arr_shape) + return np.random.default_rng(2).standard_normal(*arr_shape) @pytest.fixture @@ -44,12 +44,12 @@ def arr_complex(arr_float): @pytest.fixture def arr_int(arr_shape): - return np.random.default_rng(2).randint(-10, 10, arr_shape) + return np.random.default_rng(2).integers(-10, 10, arr_shape) @pytest.fixture def arr_bool(arr_shape): - return np.random.default_rng(2).randint(0, 2, arr_shape) == 0 + return np.random.default_rng(2).integers(0, 2, arr_shape) == 0 @pytest.fixture @@ -64,12 +64,12 @@ def arr_utf(arr_float): @pytest.fixture def arr_date(arr_shape): - return np.random.default_rng(2).randint(0, 20000, arr_shape).astype("M8[ns]") + return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]") @pytest.fixture def arr_tdelta(arr_shape): - return np.random.default_rng(2).randint(0, 20000, arr_shape).astype("m8[ns]") + return np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]") @pytest.fixture @@ -190,18 +190,18 @@ def setup_method(self): arr_shape = (11, 7) - self.arr_float = np.random.default_rng(2).randn(*arr_shape) - self.arr_float1 = np.random.default_rng(2).randn(*arr_shape) + self.arr_float = np.random.default_rng(2).standard_normal(*arr_shape) + self.arr_float1 = np.random.default_rng(2).standard_normal(*arr_shape) self.arr_complex = self.arr_float + self.arr_float1 * 1j - self.arr_int = np.random.default_rng(2).randint(-10, 10, arr_shape) - self.arr_bool = np.random.default_rng(2).randint(0, 2, arr_shape) == 0 + self.arr_int = np.random.default_rng(2).integers(-10, 10, arr_shape) + self.arr_bool = np.random.default_rng(2).integers(0, 2, arr_shape) == 0 self.arr_str = np.abs(self.arr_float).astype("S") self.arr_utf = np.abs(self.arr_float).astype("U") self.arr_date = ( - np.random.default_rng(2).randint(0, 20000, arr_shape).astype("M8[ns]") + np.random.default_rng(2).integers(0, 20000, arr_shape).astype("M8[ns]") ) self.arr_tdelta = ( - np.random.default_rng(2).randint(0, 20000, arr_shape).astype("m8[ns]") + np.random.default_rng(2).integers(0, 20000, arr_shape).astype("m8[ns]") ) self.arr_nan = np.tile(np.nan, arr_shape) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index be1321bd7fb47..e36d0627d08d3 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -35,7 +35,7 @@ def left_right(): low, high, n = -1 << 10, 1 << 10, 1 << 20 left = DataFrame( - np.random.default_rng(2).randint(low, high, (n, 7)), columns=list("ABCDEFG") + np.random.default_rng(2).integers(low, high, (n, 7)), columns=list("ABCDEFG") ) left["left"] = left.sum(axis=1) @@ -63,7 +63,7 @@ def test_int64_overflow(self): "F": B, "G": A, "H": B, - "values": np.random.default_rng(2).randn(2500), + "values": np.random.default_rng(2).standard_normal(2500), } ) @@ -99,7 +99,7 @@ def test_int64_overflow_groupby_large_range(self): @pytest.mark.parametrize("agg", ["mean", "median"]) def test_int64_overflow_groupby_large_df_shuffled(self, agg): rs = np.random.default_rng(2).RandomState(42) - arr = rs.randint(-1 << 12, 1 << 12, (1 << 15, 5)) + arr = rs.integers(-1 << 12, 1 << 12, (1 << 15, 5)) i = rs.choice(len(arr), len(arr) * 4) arr = np.vstack((arr, arr[i])) # add some duplicate rows @@ -201,10 +201,12 @@ class TestMerge: def test_int64_overflow_outer_merge(self): # #2690, combinatorial explosion df1 = DataFrame( - np.random.default_rng(2).randn(1000, 7), columns=list("ABCDEF") + ["G1"] + np.random.default_rng(2).standard_normal(1000, 7), + columns=list("ABCDEF") + ["G1"], ) df2 = DataFrame( - np.random.default_rng(2).randn(1000, 7), columns=list("ABCDEF") + ["G2"] + np.random.default_rng(2).standard_normal(1000, 7), + columns=list("ABCDEF") + ["G2"], ) result = merge(df1, df2, how="outer") assert len(result) == 2000 @@ -248,7 +250,7 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): # one-2-many/none match low, high, n = -1 << 10, 1 << 10, 1 << 11 left = DataFrame( - np.random.default_rng(2).randint(low, high, (n, 7)).astype("int64"), + np.random.default_rng(2).integers(low, high, (n, 7)).astype("int64"), columns=list("ABCDEFG"), ) @@ -260,7 +262,7 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): left = concat([left, left], ignore_index=True) right = DataFrame( - np.random.default_rng(2).randint(low, high, (n // 2, 7)).astype("int64"), + np.random.default_rng(2).integers(low, high, (n // 2, 7)).astype("int64"), columns=list("ABCDEFG"), ) @@ -268,8 +270,8 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): i = np.random.default_rng(2).choice(len(left), n) right = concat([right, right, left.iloc[i]], ignore_index=True) - left["left"] = np.random.default_rng(2).randn(len(left)) - right["right"] = np.random.default_rng(2).randn(len(right)) + left["left"] = np.random.default_rng(2).standard_normal(len(left)) + right["right"] = np.random.default_rng(2).standard_normal(len(right)) # shuffle left & right frames i = np.random.default_rng(2).permutation(len(left)) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 61c1d54eadc6d..a29e42d0b6688 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -42,7 +42,7 @@ def dtype_fill_out_dtype(request): class TestTake: def test_1d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype - data = np.random.default_rng(2).randint(0, 2, 4).astype(dtype) + data = np.random.default_rng(2).integers(0, 2, 4).astype(dtype) indexer = [2, 1, 0, -1] result = algos.take_nd(data, indexer, fill_value=fill_value) @@ -58,7 +58,7 @@ def test_1d_fill_nonna(self, dtype_fill_out_dtype): def test_2d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype - data = np.random.default_rng(2).randint(0, 2, (5, 3)).astype(dtype) + data = np.random.default_rng(2).integers(0, 2, (5, 3)).astype(dtype) indexer = [2, 1, 0, -1] result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) @@ -83,7 +83,7 @@ def test_2d_fill_nonna(self, dtype_fill_out_dtype): def test_3d_fill_nonna(self, dtype_fill_out_dtype): dtype, fill_value, out_dtype = dtype_fill_out_dtype - data = np.random.default_rng(2).randint(0, 2, (5, 4, 3)).astype(dtype) + data = np.random.default_rng(2).integers(0, 2, (5, 4, 3)).astype(dtype) indexer = [2, 1, 0, -1] result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) @@ -115,7 +115,7 @@ def test_3d_fill_nonna(self, dtype_fill_out_dtype): assert result.dtype == dtype def test_1d_other_dtypes(self): - arr = np.random.default_rng(2).randn(10).astype(np.float32) + arr = np.random.default_rng(2).standard_normal(10).astype(np.float32) indexer = [1, 2, 3, -1] result = algos.take_nd(arr, indexer) @@ -124,7 +124,7 @@ def test_1d_other_dtypes(self): tm.assert_almost_equal(result, expected) def test_2d_other_dtypes(self): - arr = np.random.default_rng(2).randn(10, 5).astype(np.float32) + arr = np.random.default_rng(2).standard_normal(10, 5).astype(np.float32) indexer = [1, 2, 3, -1] @@ -165,7 +165,7 @@ def test_2d_bool(self): assert result.dtype == np.object_ def test_2d_float32(self): - arr = np.random.default_rng(2).randn(4, 3).astype(np.float32) + arr = np.random.default_rng(2).standard_normal(4, 3).astype(np.float32) indexer = [0, 2, -1, 1, -1] # axis=0 @@ -184,7 +184,7 @@ def test_2d_float32(self): def test_2d_datetime64(self): # 2005/01/01 - 2006/01/01 arr = ( - np.random.default_rng(2).randint(11_045_376, 11_360_736, (5, 3)) + np.random.default_rng(2).integers(11_045_376, 11_360_736, (5, 3)) * 100_000_000_000 ) arr = arr.view(dtype="datetime64[ns]") diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py index 34d9ff820d584..8dd1bd47e4728 100644 --- a/pandas/tests/tslibs/test_ccalendar.py +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -26,7 +26,7 @@ def test_get_day_of_year_numeric(date_tuple, expected): def test_get_day_of_year_dt(): - dt = datetime.fromordinal(1 + np.random.default_rng(2).randint(365 * 4000)) + dt = datetime.fromordinal(1 + np.random.default_rng(2).integers(365 * 4000)) result = ccalendar.get_day_of_year(dt.year, dt.month, dt.day) expected = (dt - dt.replace(month=1, day=1)).days + 1 diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index c2977b81a9b4a..a23d0c1c13e09 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -135,7 +135,7 @@ def test_multiindex_objects(): Series(dtype=object), DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), DataFrame(), - tm.makeMissingDataframe(), + DataFrame(np.full((10, 4), np.nan)), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), @@ -161,7 +161,7 @@ def test_hash_pandas_object(obj, index): Series(["a", None, "c"]), Series([True, False, True]), DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), - tm.makeMissingDataframe(), + DataFrame(np.full((10, 4), np.nan)), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), diff --git a/pandas/tests/util/test_make_objects.py b/pandas/tests/util/test_make_objects.py index feeefea835423..74e2366db2a1c 100644 --- a/pandas/tests/util/test_make_objects.py +++ b/pandas/tests/util/test_make_objects.py @@ -10,6 +10,6 @@ def test_make_multiindex_respects_k(): # GH#38795 respect 'k' arg - N = np.random.default_rng(2).randint(0, 100) + N = np.random.default_rng(2).integers(0, 100) mi = tm.makeMultiIndex(k=N) assert len(mi) == N diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index f45fe7443497f..57698d6dad4d0 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -124,7 +124,7 @@ def halflife_with_times(request): @pytest.fixture def series(): """Make mocked series as fixture.""" - arr = np.random.default_rng(2).randn(100) + arr = np.random.default_rng(2).standard_normal(100) locs = np.arange(20, 40) arr[locs] = np.NaN series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100)) @@ -135,7 +135,7 @@ def series(): def frame(): """Make mocked frame as fixture.""" return DataFrame( - np.random.default_rng(2).randn(100, 10), + np.random.default_rng(2).standard_normal(100, 10), index=bdate_range(datetime(2009, 1, 1), periods=100), ) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index db84672ed3d2b..d765ae0f4ad75 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -21,7 +21,7 @@ def test_getitem(step): - frame = DataFrame(np.random.default_rng(2).randn(5, 5)) + frame = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) r = frame.rolling(window=5, step=step) tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns) diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 2ba8850218c8e..d2114c425218c 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -56,8 +56,8 @@ def test_rolling_apply_with_pandas_objects(window): # 5071 df = DataFrame( { - "A": np.random.default_rng(2).randn(5), - "B": np.random.default_rng(2).randint(0, 10, size=5), + "A": np.random.default_rng(2).standard_normal(5), + "B": np.random.default_rng(2).integers(0, 10, size=5), }, index=date_range("20130101", periods=5, freq="s"), ) @@ -187,7 +187,7 @@ def numpysum(x, par): def test_nans(raw): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -202,7 +202,7 @@ def test_nans(raw): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.default_rng(2).randn(20)) + obj2 = Series(np.random.default_rng(2).standard_normal(20)) result = obj2.rolling(10, min_periods=5).apply(f, raw=raw) assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -213,7 +213,7 @@ def test_nans(raw): def test_center(raw): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 089771a27d14c..c510bf8d51e09 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -415,7 +415,7 @@ def test_ewma_nan_handling_cases(s, adjust, ignore_na, w): def test_ewm_alpha(): # GH 10789 - arr = np.random.default_rng(2).randn(100) + arr = np.random.default_rng(2).standard_normal(100) locs = np.arange(20, 40) arr[locs] = np.NaN @@ -431,7 +431,7 @@ def test_ewm_alpha(): def test_ewm_domain_checks(): # GH 12492 - arr = np.random.default_rng(2).randn(100) + arr = np.random.default_rng(2).standard_normal(100) locs = np.arange(20, 40) arr[locs] = np.NaN @@ -483,7 +483,7 @@ def test_ew_empty_series(method): @pytest.mark.parametrize("name", ["mean", "var", "std"]) def test_ew_min_periods(min_periods, name): # excluding NaNs correctly - arr = np.random.default_rng(2).randn(50) + arr = np.random.default_rng(2).standard_normal(50) arr[:10] = np.NaN arr[-10:] = np.NaN s = Series(arr) @@ -524,8 +524,8 @@ def test_ew_min_periods(min_periods, name): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_ewm_corr_cov(name): - A = Series(np.random.default_rng(2).randn(50), index=range(50)) - B = A[2:] + np.random.default_rng(2).randn(48) + A = Series(np.random.default_rng(2).standard_normal(50), index=range(50)) + B = A[2:] + np.random.default_rng(2).standard_normal(48) A[:10] = np.NaN B.iloc[-10:] = np.NaN @@ -539,8 +539,8 @@ def test_ewm_corr_cov(name): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_ewm_corr_cov_min_periods(name, min_periods): # GH 7898 - A = Series(np.random.default_rng(2).randn(50), index=range(50)) - B = A[2:] + np.random.default_rng(2).randn(48) + A = Series(np.random.default_rng(2).standard_normal(50), index=range(50)) + B = A[2:] + np.random.default_rng(2).standard_normal(48) A[:10] = np.NaN B.iloc[-10:] = np.NaN @@ -565,13 +565,15 @@ def test_ewm_corr_cov_min_periods(name, min_periods): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_different_input_array_raise_exception(name): - A = Series(np.random.default_rng(2).randn(50), index=range(50)) + A = Series(np.random.default_rng(2).standard_normal(50), index=range(50)) A[:10] = np.NaN msg = "other must be a DataFrame or Series" # exception raised is Exception with pytest.raises(ValueError, match=msg): - getattr(A.ewm(com=20, min_periods=5), name)(np.random.default_rng(2).randn(50)) + getattr(A.ewm(com=20, min_periods=5), name)( + np.random.default_rng(2).standard_normal(50) + ) @pytest.mark.parametrize("name", ["var", "std", "mean"]) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index c5d90173b1fdc..432117dbd2155 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -266,7 +266,7 @@ def test_rank(window, method, pct, ascending, test_data): def test_expanding_corr(series): A = series.dropna() - B = (A + np.random.default_rng(2).randn(len(A)))[:-5] + B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5] result = A.expanding().corr(B) @@ -292,7 +292,7 @@ def test_expanding_quantile(series): def test_expanding_cov(series): A = series - B = (A + np.random.default_rng(2).randn(len(A)))[:-5] + B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5] result = A.expanding().cov(B) @@ -353,7 +353,7 @@ def test_expanding_func(func, static_comp, frame_or_series): ids=["sum", "mean", "max", "min"], ) def test_expanding_min_periods(func, static_comp): - ser = Series(np.random.default_rng(2).randn(50)) + ser = Series(np.random.default_rng(2).standard_normal(50)) msg = "The 'axis' keyword in Series.expanding is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): @@ -367,7 +367,7 @@ def test_expanding_min_periods(func, static_comp): assert isna(result.iloc[13]) assert notna(result.iloc[14]) - ser2 = Series(np.random.default_rng(2).randn(20)) + ser2 = Series(np.random.default_rng(2).standard_normal(20)) with tm.assert_produces_warning(FutureWarning, match=msg): result = getattr(ser2.expanding(min_periods=5, axis=0), func)() assert isna(result[3]) @@ -403,7 +403,7 @@ def test_expanding_apply(engine_and_raw, frame_or_series): def test_expanding_min_periods_apply(engine_and_raw): engine, raw = engine_and_raw - ser = Series(np.random.default_rng(2).randn(50)) + ser = Series(np.random.default_rng(2).standard_normal(50)) result = ser.expanding(min_periods=30).apply( lambda x: x.mean(), raw=raw, engine=engine @@ -418,7 +418,7 @@ def test_expanding_min_periods_apply(engine_and_raw): assert isna(result.iloc[13]) assert notna(result.iloc[14]) - ser2 = Series(np.random.default_rng(2).randn(20)) + ser2 = Series(np.random.default_rng(2).standard_normal(20)) result = ser2.expanding(min_periods=5).apply( lambda x: x.mean(), raw=raw, engine=engine ) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 7417ea004e240..5e04c56dfc712 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -51,7 +51,7 @@ def pairwise_other_frame(): def test_rolling_cov(series): A = series - B = A + np.random.default_rng(2).randn(len(A)) + B = A + np.random.default_rng(2).standard_normal(len(A)) result = A.rolling(window=50, min_periods=25).cov(B) tm.assert_almost_equal(result.iloc[-1], np.cov(A[-50:], B[-50:])[0, 1]) @@ -59,7 +59,7 @@ def test_rolling_cov(series): def test_rolling_corr(series): A = series - B = A + np.random.default_rng(2).randn(len(A)) + B = A + np.random.default_rng(2).standard_normal(len(A)) result = A.rolling(window=50, min_periods=25).corr(B) tm.assert_almost_equal(result.iloc[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) @@ -96,7 +96,7 @@ def test_flex_binary_frame(method, frame): frame2 = frame.copy() frame2 = DataFrame( - np.random.default_rng(2).randn(*frame2.shape), + np.random.default_rng(2).standard_normal(*frame2.shape), index=frame2.index, columns=frame2.columns, ) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 6fbbfed3e9742..82274296beb9d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -695,7 +695,7 @@ def test_rolling_window_as_string(center, expected_data): npr = np.random.default_rng(2).RandomState(seed=421) - data = npr.randint(1, high=100, size=len(days)) + data = npr.integers(1, high=100, size=len(days)) df = DataFrame({"DateCol": days, "metric": data}) df.set_index("DateCol", inplace=True) diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index 229c3ff303dfe..bc0b3e496038c 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -149,7 +149,7 @@ def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp): ], ) def test_nans(compare_func, roll_func, kwargs): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -164,7 +164,7 @@ def test_nans(compare_func, roll_func, kwargs): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.default_rng(2).randn(20)) + obj2 = Series(np.random.default_rng(2).standard_normal(20)) result = getattr(obj2.rolling(10, min_periods=5), roll_func)(**kwargs) assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -176,7 +176,7 @@ def test_nans(compare_func, roll_func, kwargs): def test_nans_count(): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN result = obj.rolling(50, min_periods=30).count() @@ -240,7 +240,7 @@ def test_min_periods_count(series, step): ], ) def test_center(roll_func, kwargs, minp): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -463,8 +463,12 @@ def test_rolling_median_resample(): def test_rolling_median_memory_error(): # GH11722 n = 20000 - Series(np.random.default_rng(2).randn(n)).rolling(window=2, center=False).median() - Series(np.random.default_rng(2).randn(n)).rolling(window=2, center=False).median() + Series(np.random.default_rng(2).standard_normal(n)).rolling( + window=2, center=False + ).median() + Series(np.random.default_rng(2).standard_normal(n)).rolling( + window=2, center=False + ).median() @pytest.mark.parametrize( diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py index 126ca345c72ff..32296ae3f2470 100644 --- a/pandas/tests/window/test_rolling_quantile.py +++ b/pandas/tests/window/test_rolling_quantile.py @@ -88,7 +88,7 @@ def test_time_rule_frame(raw, frame, q): @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) def test_nans(q): compare_func = partial(scoreatpercentile, per=q) - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -103,7 +103,7 @@ def test_nans(q): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.default_rng(2).randn(20)) + obj2 = Series(np.random.default_rng(2).standard_normal(20)) result = obj2.rolling(10, min_periods=5).quantile(q) assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -127,7 +127,7 @@ def test_min_periods(series, minp, q, step): @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) def test_center(q): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 87b15253154b8..cd8e555c829fb 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -85,7 +85,7 @@ def test_nans(sp_func, roll_func): import scipy.stats compare_func = partial(getattr(scipy.stats, sp_func), bias=False) - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -100,7 +100,7 @@ def test_nans(sp_func, roll_func): assert not isna(result.iloc[-6]) assert isna(result.iloc[-5]) - obj2 = Series(np.random.default_rng(2).randn(20)) + obj2 = Series(np.random.default_rng(2).standard_normal(20)) result = getattr(obj2.rolling(10, min_periods=5), roll_func)() assert isna(result.iloc[3]) assert notna(result.iloc[4]) @@ -128,7 +128,7 @@ def test_min_periods(series, minp, roll_func, step): @pytest.mark.parametrize("roll_func", ["kurt", "skew"]) def test_center(roll_func): - obj = Series(np.random.default_rng(2).randn(50)) + obj = Series(np.random.default_rng(2).standard_normal(50)) obj[:10] = np.NaN obj[-10:] = np.NaN @@ -186,7 +186,7 @@ def test_rolling_skew_edge_cases(step): expected = Series([np.NaN] * 5)[::step] # yields all NaN (window too small) - d = Series(np.random.default_rng(2).randn(5)) + d = Series(np.random.default_rng(2).standard_normal(5)) x = d.rolling(window=2, step=step).skew() tm.assert_series_equal(expected, x) @@ -207,7 +207,7 @@ def test_rolling_kurt_edge_cases(step): # yields all NaN (window too small) expected = Series([np.NaN] * 5)[::step] - d = Series(np.random.default_rng(2).randn(5)) + d = Series(np.random.default_rng(2).standard_normal(5)) x = d.rolling(window=3, step=step).kurt() tm.assert_series_equal(expected, x) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 48807f9678a87..caea3e98f262f 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -504,7 +504,7 @@ def test_perf_min(self): N = 10000 dfp = DataFrame( - {"B": np.random.default_rng(2).randn(N)}, + {"B": np.random.default_rng(2).standard_normal(N)}, index=date_range("20130101", periods=N, freq="s"), ) expected = dfp.rolling(2, min_periods=1).min() diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 00c402d76f95c..53612f87a67d3 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -112,7 +112,7 @@ def test_constructor_with_win_type_invalid(frame_or_series): @td.skip_if_no_scipy def test_window_with_args(step): # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.default_rng(2).randn(100)).rolling( + r = Series(np.random.default_rng(2).standard_normal(100)).rolling( window=10, min_periods=1, win_type="gaussian", step=step ) expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) @@ -223,7 +223,7 @@ def test_cmov_window_corner(step): assert len(result) == 0 # shorter than window - vals = Series(np.random.default_rng(2).randn(5)) + vals = Series(np.random.default_rng(2).standard_normal(5)) result = vals.rolling(10, win_type="boxcar", step=step).mean() assert np.isnan(result).all() assert len(result) == len(range(0, 5, step or 1)) @@ -324,7 +324,7 @@ def test_cmov_window_frame(f, xp, step): @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4, 5]) @td.skip_if_no_scipy def test_cmov_window_na_min_periods(step, min_periods): - vals = Series(np.random.default_rng(2).randn(10)) + vals = Series(np.random.default_rng(2).standard_normal(10)) vals[4] = np.nan vals[8] = np.nan From b2f42adf477cb4ddc883600074650b6ca6898d59 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Jul 2023 10:48:46 -0700 Subject: [PATCH 05/22] Fix more --- pandas/tests/apply/test_frame_apply.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 6 +- pandas/tests/computation/test_eval.py | 30 +++--- pandas/tests/frame/indexing/test_getitem.py | 4 +- pandas/tests/frame/indexing/test_indexing.py | 4 +- pandas/tests/frame/indexing/test_where.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/methods/test_clip.py | 8 +- pandas/tests/frame/methods/test_cov_corr.py | 4 +- pandas/tests/frame/methods/test_diff.py | 2 +- pandas/tests/frame/methods/test_fillna.py | 2 +- .../tests/frame/methods/test_interpolate.py | 2 +- pandas/tests/frame/methods/test_matmul.py | 4 +- pandas/tests/frame/methods/test_reindex.py | 4 +- pandas/tests/frame/methods/test_replace.py | 4 +- .../tests/frame/methods/test_reset_index.py | 8 +- pandas/tests/frame/methods/test_round.py | 2 +- pandas/tests/frame/methods/test_shift.py | 6 +- pandas/tests/frame/methods/test_swapaxes.py | 6 +- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/frame/methods/test_to_records.py | 2 +- pandas/tests/frame/test_arithmetic.py | 8 +- pandas/tests/frame/test_constructors.py | 18 ++-- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/frame/test_query_eval.py | 34 +++--- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/generic/test_frame.py | 4 +- pandas/tests/generic/test_series.py | 2 +- pandas/tests/groupby/aggregate/test_other.py | 4 +- pandas/tests/groupby/test_filters.py | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 2 +- .../indexes/datetimes/test_partial_slicing.py | 8 +- pandas/tests/indexes/multi/test_sorting.py | 2 +- pandas/tests/indexes/period/test_indexing.py | 4 +- .../indexes/period/test_partial_slicing.py | 8 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexing/conftest.py | 9 +- .../multiindex/test_chaining_and_caching.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 2 +- .../indexing/multiindex/test_multiindex.py | 2 +- .../tests/indexing/multiindex/test_partial.py | 2 +- pandas/tests/indexing/test_floats.py | 4 +- pandas/tests/indexing/test_iloc.py | 6 +- pandas/tests/indexing/test_indexing.py | 4 +- pandas/tests/indexing/test_loc.py | 10 +- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/io/excel/test_writers.py | 10 +- pandas/tests/io/formats/style/test_style.py | 2 +- pandas/tests/io/formats/test_format.py | 6 +- pandas/tests/io/formats/test_info.py | 4 +- pandas/tests/io/json/test_pandas.py | 2 +- .../io/parser/dtypes/test_dtypes_basic.py | 2 +- pandas/tests/io/parser/test_c_parser_only.py | 2 +- pandas/tests/io/parser/test_multi_thread.py | 10 +- pandas/tests/io/pytables/test_append.py | 10 +- pandas/tests/io/pytables/test_complex.py | 8 +- pandas/tests/io/pytables/test_errors.py | 4 +- pandas/tests/io/pytables/test_put.py | 4 +- pandas/tests/io/pytables/test_read.py | 12 +-- pandas/tests/io/pytables/test_select.py | 10 +- pandas/tests/io/pytables/test_store.py | 18 ++-- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_stata.py | 10 +- pandas/tests/plotting/frame/test_frame.py | 101 +++++++++--------- .../tests/plotting/frame/test_frame_color.py | 12 +-- .../tests/plotting/frame/test_frame_legend.py | 18 ++-- .../plotting/frame/test_frame_subplots.py | 50 +++++---- pandas/tests/plotting/test_boxplot_method.py | 28 ++--- pandas/tests/plotting/test_datetimelike.py | 8 +- pandas/tests/plotting/test_hist_method.py | 16 +-- pandas/tests/plotting/test_misc.py | 2 +- pandas/tests/plotting/test_series.py | 4 +- pandas/tests/resample/test_datetime_index.py | 10 +- pandas/tests/resample/test_resample_api.py | 16 +-- .../tests/resample/test_resampler_grouper.py | 2 +- pandas/tests/reshape/concat/test_index.py | 4 +- pandas/tests/reshape/concat/test_invalid.py | 2 +- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/series/indexing/test_datetime.py | 6 +- pandas/tests/series/methods/test_cov_corr.py | 4 +- .../tests/series/methods/test_interpolate.py | 2 +- .../tests/series/methods/test_reset_index.py | 2 +- pandas/tests/test_expressions.py | 8 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/test_nanops.py | 6 +- pandas/tests/test_take.py | 2 +- pandas/tests/window/test_expanding.py | 6 +- pandas/tests/window/test_pairwise.py | 2 +- pandas/tests/window/test_rolling.py | 8 +- 90 files changed, 361 insertions(+), 335 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 158e9c083be13..195056874479e 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -659,7 +659,7 @@ def test_apply_category_equalness(val): def test_infer_row_shape(): # GH 17437 # if row shape is changing, infer it - df = DataFrame(np.random.default_rng(2).rand(10, 2)) + df = DataFrame(np.random.default_rng(2).random(10, 2)) result = df.apply(np.fft.fft, axis=0).shape assert result == (10, 2) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 4fba28ee3bca3..9d018ea8157ff 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -535,7 +535,7 @@ def test_df_div_zero_int(self): def test_df_div_zero_series_does_not_commute(self): # integer div, but deal with the 0's (GH#9144) - df = pd.DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = pd.DataFrame(np.random.default_rng(2).standard_normal((10, 5))) ser = df[0] res = ser / df res2 = df / ser @@ -602,7 +602,7 @@ def test_df_mod_zero_int(self): def test_df_mod_zero_series_does_not_commute(self): # GH#3590, modulo as ints # not commutative with series - df = pd.DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = pd.DataFrame(np.random.default_rng(2).standard_normal((10, 5))) ser = df[0] res = ser % df res2 = df % ser @@ -933,7 +933,7 @@ def test_datetime64_with_index(self): result = ser - ser.index.to_period() df = pd.DataFrame( - np.random.default_rng(2).standard_normal(5, 2), + np.random.default_rng(2).standard_normal((5, 2)), index=pd.date_range("20130101", periods=5), ) df["date"] = pd.Timestamp("20130102") diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index cce20aa201a3a..2b3eb166834c1 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -102,7 +102,7 @@ def lhs(request): nan_df1[nan_df1 > 0.5] = np.nan opts = ( - DataFrame(np.random.default_rng(2).standard_normal(10, 5)), + DataFrame(np.random.default_rng(2).standard_normal((10, 5))), Series(np.random.default_rng(2).standard_normal(5)), Series([1, 2, np.nan, np.nan, 5]), nan_df1, @@ -366,7 +366,7 @@ def test_frame_invert(self, engine, parser): # ~ ## # frame # float always raises - lhs = DataFrame(np.random.default_rng(2).standard_normal(5, 2)) + lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2))) if engine == "numexpr": msg = "couldn't find matching opcode for 'invert_dd'" with pytest.raises(NotImplementedError, match=msg): @@ -456,7 +456,7 @@ def test_frame_negate(self, engine, parser): expr = "-lhs" # float - lhs = DataFrame(np.random.default_rng(2).standard_normal(5, 2)) + lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2))) expect = -lhs result = pd.eval(expr, engine=engine, parser=parser) tm.assert_frame_equal(expect, result) @@ -1103,7 +1103,7 @@ def test_constant(self): assert x == 1 def test_single_variable(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) df2 = self.eval("df", local_dict={"df": df}) tm.assert_frame_equal(df, df2) @@ -1145,7 +1145,7 @@ def test_assignment_fails(self): def test_assignment_column_multiple_raise(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) # multiple assignees with pytest.raises(SyntaxError, match="invalid syntax"): @@ -1153,7 +1153,7 @@ def test_assignment_column_multiple_raise(self): def test_assignment_column_invalid_assign(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) # invalid assignees msg = "left hand side of an assignment must be a single name" @@ -1162,7 +1162,7 @@ def test_assignment_column_invalid_assign(self): def test_assignment_column_invalid_assign_function_call(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) msg = "cannot assign to function call" with pytest.raises(SyntaxError, match=msg): @@ -1170,7 +1170,7 @@ def test_assignment_column_invalid_assign_function_call(self): def test_assignment_single_assign_existing(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) # single assignment - existing variable expected = df.copy() @@ -1180,7 +1180,7 @@ def test_assignment_single_assign_existing(self): def test_assignment_single_assign_new(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) # single assignment - new variable expected = df.copy() @@ -1190,7 +1190,7 @@ def test_assignment_single_assign_new(self): def test_assignment_single_assign_local_overlap(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) df = df.copy() a = 1 # noqa: F841 @@ -1202,7 +1202,7 @@ def test_assignment_single_assign_local_overlap(self): def test_assignment_single_assign_name(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) a = 1 # noqa: F841 @@ -1214,7 +1214,7 @@ def test_assignment_single_assign_name(self): def test_assignment_multiple_raises(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) # multiple assignment df.eval("c = a + b", inplace=True) @@ -1224,7 +1224,7 @@ def test_assignment_multiple_raises(self): def test_assignment_explicit(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) # explicit targets self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) @@ -1245,7 +1245,7 @@ def test_column_in(self): def test_assignment_not_inplace(self): # see gh-9297 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) actual = df.eval("c = a + b", inplace=False) @@ -1873,7 +1873,7 @@ def test_inf(engine, parser): def test_query_token(engine, column): # See: https://github.com/pandas-dev/pandas/pull/42826 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=[column, "b"] + np.random.default_rng(2).standard_normal((5, 2)), columns=[column, "b"] ) expected = df[df[column] > 5] query_string = f"`{column}` > 5" diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index dad6aa35e2a3a..bf53cb6f8e680 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -37,7 +37,7 @@ def test_getitem_unused_level_raises(self): def test_getitem_periodindex(self): rng = period_range("1/1/2000", periods=5) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5), columns=rng) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)), columns=rng) ts = df[rng[0]] tm.assert_series_equal(ts, df.iloc[:, 0]) @@ -131,7 +131,7 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): else: # MultiIndex columns frame = DataFrame( - np.random.default_rng(2).standard_normal(8, 3), + np.random.default_rng(2).standard_normal((8, 3)), columns=Index( [("foo", "bar"), ("baz", "qux"), ("peek", "aboo")], name=("sth", "sth2"), diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 7c4a6bf0a64b4..9154add5120fd 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -524,7 +524,7 @@ def test_loc_setitem_boolean_mask_allfalse(self): tm.assert_frame_equal(result, df) def test_getitem_fancy_slice_integers_step(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) # this is OK df.iloc[:8:2] @@ -533,7 +533,7 @@ def test_getitem_fancy_slice_integers_step(self): def test_getitem_setitem_integer_slice_keyerrors(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 5), index=range(0, 20, 2) + np.random.default_rng(2).standard_normal((10, 5)), index=range(0, 20, 2) ) # this is OK diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 35208b5eb4c0f..b80117f9258ba 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -530,7 +530,7 @@ def test_where_axis_multiple_dtypes(self): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ - DataFrame(np.random.default_rng(2).standard_normal(10, 2)), + DataFrame(np.random.default_rng(2).standard_normal((10, 2))), DataFrame( np.random.default_rng(2).integers(0, 10, size=(10, 2)), dtype="int64", diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index a775d4ad5922d..6fad30555b83b 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -95,7 +95,7 @@ def test_xs_corner(self): def test_xs_duplicates(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), + np.random.default_rng(2).standard_normal((5, 2)), index=["b", "b", "c", "b", "a"], ) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index 7a78adcc9dc4d..9bfcc39c28b08 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -30,7 +30,7 @@ def test_inplace_clip(self, float_frame): def test_dataframe_clip(self): # GH#2747 - df = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2))) for lb, ub in [(-1, 1), (1, -1)]: clipped_df = df.clip(lb, ub) @@ -60,7 +60,7 @@ def test_clip_mixed_numeric(self): def test_clip_against_series(self, inplace): # GH#6966 - df = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2))) lb = Series(np.random.default_rng(2).standard_normal(1000)) ub = lb + 1 @@ -107,8 +107,8 @@ def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): @pytest.mark.parametrize("axis", [0, 1, None]) def test_clip_against_frame(self, axis): - df = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) - lb = DataFrame(np.random.default_rng(2).standard_normal(1000, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2))) + lb = DataFrame(np.random.default_rng(2).standard_normal((1000, 2))) ub = lb + 1 clipped_df = df.clip(lb, ub, axis=axis) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 97264164099e9..30b6789cac41c 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -66,8 +66,8 @@ def test_cov(self, float_frame, float_string_frame): @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) def test_cov_ddof(self, test_ddof): # GH#34611 - np_array1 = np.random.default_rng(2).rand(10) - np_array2 = np.random.default_rng(2).rand(10) + np_array1 = np.random.default_rng(2).random(10) + np_array2 = np.random.default_rng(2).random(10) df = DataFrame({0: np_array1, 1: np_array2}) result = df.cov(ddof=test_ddof) expected_np = np.cov(np_array1, np_array2, ddof=test_ddof) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index bda563314306a..031b08c585ccb 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -285,7 +285,7 @@ def test_diff_integer_na(self, axis, expected): def test_diff_readonly(self): # https://github.com/pandas-dev/pandas/issues/35559 - arr = np.random.default_rng(2).standard_normal(5, 2) + arr = np.random.default_rng(2).standard_normal((5, 2)) arr.flags.writeable = False df = DataFrame(arr) result = df.diff() diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 833eaa390f254..371afd7cc8712 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -635,7 +635,7 @@ def test_fillna_invalid_value(self, float_frame): def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] - data = np.random.default_rng(2).rand(20, 5) + data = np.random.default_rng(2).random(20, 5) df = DataFrame(index=range(20), columns=cols, data=data) msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 7d9f5d9e0d3bb..609495a3d9fec 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -430,7 +430,7 @@ def test_interp_time_inplace_axis(self): # GH 9687 periods = 5 idx = date_range(start="2014-01-01", periods=periods) - data = np.random.default_rng(2).rand(periods, periods) + data = np.random.default_rng(2).random(periods, periods) data[data < 0.5] = np.nan expected = DataFrame(index=idx, columns=idx, data=data) diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py index de1c6d7c62c4b..7d566406f366d 100644 --- a/pandas/tests/frame/methods/test_matmul.py +++ b/pandas/tests/frame/methods/test_matmul.py @@ -86,8 +86,8 @@ def test_matmul(self): def test_matmul_message_shapes(self): # GH#21581 exception message should reflect original shapes, # not transposed shapes - a = np.random.default_rng(2).rand(10, 4) - b = np.random.default_rng(2).rand(5, 3) + a = np.random.default_rng(2).random((10, 4)) + b = np.random.default_rng(2).random((5, 3)) df = DataFrame(b) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index f7be83d132fa0..cc1be2066a8a3 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -698,7 +698,7 @@ def test_reindex_nan(self): tm.assert_frame_equal(left, right) def test_reindex_name_remains(self): - s = Series(np.random.default_rng(2).rand(10)) + s = Series(np.random.default_rng(2).random(10)) df = DataFrame(s, index=np.arange(len(s))) i = Series(np.arange(10), name="iname") @@ -708,7 +708,7 @@ def test_reindex_name_remains(self): df = df.reindex(Index(np.arange(10), name="tmpname")) assert df.index.name == "tmpname" - s = Series(np.random.default_rng(2).rand(10)) + s = Series(np.random.default_rng(2).random(10)) df = DataFrame(s.T, index=np.arange(len(s))) i = Series(np.arange(10), name="iname") df = df.reindex(columns=i) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 7773fa3d9dd26..933851526f28f 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -983,12 +983,12 @@ def test_replace_bool_with_string(self): tm.assert_frame_equal(result, expected) def test_replace_pure_bool_with_string_no_op(self): - df = DataFrame(np.random.default_rng(2).rand(2, 2) > 0.5) + df = DataFrame(np.random.default_rng(2).random(2, 2) > 0.5) result = df.replace("asdf", "fdsa") tm.assert_frame_equal(df, result) def test_replace_bool_with_bool(self): - df = DataFrame(np.random.default_rng(2).rand(2, 2) > 0.5) + df = DataFrame(np.random.default_rng(2).random(2, 2) > 0.5) result = df.replace(False, True) expected = DataFrame(np.ones((2, 2), dtype=bool)) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index bbc954388f0e8..d61306636c6f2 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -312,7 +312,7 @@ def test_reset_index_multiindex_nan(self): { "A": ["a", "b", "c"], "B": [0, 1, np.nan], - "C": np.random.default_rng(2).rand(3), + "C": np.random.default_rng(2).random(3), } ) rs = df.set_index(["A", "B"]).reset_index() @@ -322,7 +322,7 @@ def test_reset_index_multiindex_nan(self): { "A": [np.nan, "b", "c"], "B": [0, 1, 2], - "C": np.random.default_rng(2).rand(3), + "C": np.random.default_rng(2).random(3), } ) rs = df.set_index(["A", "B"]).reset_index() @@ -336,7 +336,7 @@ def test_reset_index_multiindex_nan(self): { "A": ["a", "b", "c"], "B": [np.nan, np.nan, np.nan], - "C": np.random.default_rng(2).rand(3), + "C": np.random.default_rng(2).random(3), } ) rs = df.set_index(["A", "B"]).reset_index() @@ -602,7 +602,7 @@ def test_reset_index_delevel_infer_dtype(self): tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) df = DataFrame( - np.random.default_rng(2).standard_normal(8, 3), + np.random.default_rng(2).standard_normal((8, 3)), columns=["A", "B", "C"], index=index, ) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index 45d51ed52fe7c..5dab5a0172cc2 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -196,7 +196,7 @@ def test_round_builtin(self): def test_round_nonunique_categorical(self): # See GH#21809 idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) - df = DataFrame(np.random.default_rng(2).rand(6, 3), columns=list("abc")) + df = DataFrame(np.random.default_rng(2).random(6, 3), columns=list("abc")) expected = df.round(3) expected.index = idx diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 87eba3a232df4..993e4892606cd 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -270,7 +270,7 @@ def test_shift_with_periodindex(self, frame_or_series): def test_shift_other_axis(self): # shift other axis # GH#6371 - df = DataFrame(np.random.default_rng(2).rand(10, 5)) + df = DataFrame(np.random.default_rng(2).random(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, @@ -281,7 +281,7 @@ def test_shift_other_axis(self): def test_shift_named_axis(self): # shift named axis - df = DataFrame(np.random.default_rng(2).rand(10, 5)) + df = DataFrame(np.random.default_rng(2).random(10, 5)) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, @@ -650,7 +650,7 @@ def test_shift_axis1_categorical_columns(self): def test_shift_axis1_many_periods(self): # GH#44978 periods > len(columns) - df = DataFrame(np.random.default_rng(2).rand(5, 3)) + df = DataFrame(np.random.default_rng(2).random(5, 3)) shifted = df.shift(6, axis=1, fill_value=None) expected = df * np.nan diff --git a/pandas/tests/frame/methods/test_swapaxes.py b/pandas/tests/frame/methods/test_swapaxes.py index a110ba700e3f7..53a4691d48b1c 100644 --- a/pandas/tests/frame/methods/test_swapaxes.py +++ b/pandas/tests/frame/methods/test_swapaxes.py @@ -7,20 +7,20 @@ class TestSwapAxes: def test_swapaxes(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) def test_swapaxes_noop(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(df, df.swapaxes(0, 0)) def test_swapaxes_invalid_axis(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) msg = "'DataFrame.swapaxes' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): msg = "No axis named 2 for object type DataFrame" diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 2cf9cf2cc8472..74308aab77247 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -418,7 +418,7 @@ def test_to_csv_params(self, nrows, df_params, func_params, ncols): def test_to_csv_from_csv_w_some_infs(self, float_frame): # test roundtrip with inf, -inf, nan, as full columns and mix float_frame["G"] = np.nan - f = lambda x: [np.inf, np.nan][np.random.default_rng(2).rand() < 0.5] + f = lambda x: [np.inf, np.nan][np.random.default_rng(2).random() < 0.5] float_frame["H"] = float_frame.index.map(f) with tm.ensure_clean() as path: diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 00790de651517..44875be57f1a4 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -78,7 +78,7 @@ def test_to_records_with_Mapping_type(self): all(x in frame for x in ["Type", "Subject", "From"]) def test_to_records_floats(self): - df = DataFrame(np.random.default_rng(2).rand(10, 10)) + df = DataFrame(np.random.default_rng(2).random(10, 10)) df.to_records() def test_to_records_index_name(self): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 01c482a3adf8e..82a433fa6e811 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -206,7 +206,7 @@ def test_timestamp_compare(self, left, right): "stringcol": list(tm.rands(10)), } ) - df.loc[np.random.default_rng(2).rand(len(df)) > 0.5, "dates2"] = pd.NaT + df.loc[np.random.default_rng(2).random(len(df)) > 0.5, "dates2"] = pd.NaT left_f = getattr(operator, left) right_f = getattr(operator, right) @@ -268,7 +268,7 @@ def test_df_boolean_comparison_error(self): def test_df_float_none_comparison(self): df = DataFrame( - np.random.default_rng(2).standard_normal(8, 3), + np.random.default_rng(2).standard_normal((8, 3)), index=range(8), columns=["A", "B", "C"], ) @@ -707,7 +707,7 @@ def test_sub_alignment_with_duplicate_index(self): @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"]) def test_arithmetic_with_duplicate_columns(self, op): # operations - df = DataFrame({"A": np.arange(10), "B": np.random.default_rng(2).rand(10)}) + df = DataFrame({"A": np.arange(10), "B": np.random.default_rng(2).random(10)}) expected = getattr(df, op)(df) expected.columns = ["A", "A"] df.columns = ["A", "A"] @@ -1015,7 +1015,7 @@ def test_arith_non_pandas_object(self): added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val2, axis="index"), added) - val3 = np.random.default_rng(2).rand(*df.shape) + val3 = np.random.default_rng(2).random(*df.shape) added = DataFrame(df.values + val3, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val3), added) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 48fcf59458730..86a77ed2e9ba8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -365,7 +365,7 @@ def test_constructor_mixed_dtypes(self, typ, ad): if typ == "int": dtypes = MIXED_INT_DTYPES arrays = [ - np.array(np.random.default_rng(2).rand(10), dtype=d) for d in dtypes + np.array(np.random.default_rng(2).random(10), dtype=d) for d in dtypes ] elif typ == "float": dtypes = MIXED_FLOAT_DTYPES @@ -386,8 +386,8 @@ def test_constructor_mixed_dtypes(self, typ, ad): def test_constructor_complex_dtypes(self): # GH10952 - a = np.random.default_rng(2).rand(10).astype(np.complex64) - b = np.random.default_rng(2).rand(10).astype(np.complex128) + a = np.random.default_rng(2).random(10).astype(np.complex64) + b = np.random.default_rng(2).random(10).astype(np.complex128) df = DataFrame({"a": a, "b": b}) assert a.dtype == df.a.dtype @@ -707,13 +707,15 @@ def test_constructor_error_msgs(self): msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" with pytest.raises(ValueError, match=msg): DataFrame( - np.random.default_rng(2).rand(2, 3), columns=["A", "B", "C"], index=[1] + np.random.default_rng(2).random(2, 3), + columns=["A", "B", "C"], + index=[1], ) msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" with pytest.raises(ValueError, match=msg): DataFrame( - np.random.default_rng(2).rand(2, 3), columns=["A", "B"], index=[1, 2] + np.random.default_rng(2).random(2, 3), columns=["A", "B"], index=[1, 2] ) # gh-26429 @@ -953,7 +955,7 @@ def test_constructor_extension_scalar_data(self, data, dtype): def test_nested_dict_frame_constructor(self): rng = pd.period_range("1/1/2000", periods=5) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5), columns=rng) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)), columns=rng) data = {} for col in df.columns: @@ -2760,7 +2762,7 @@ def test_floating_values_integer_dtype(self): # GH#40110 make DataFrame behavior with arraylike floating data and # inty dtype match Series behavior - arr = np.random.default_rng(2).standard_normal(10, 5) + arr = np.random.default_rng(2).standard_normal((10, 5)) # GH#49599 in 2.0 we raise instead of either # a) silently ignoring dtype and returningfloat (the old Series behavior) or @@ -2992,7 +2994,7 @@ def test_construction_from_ndarray_datetimelike(self): assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays) def test_construction_from_ndarray_with_eadtype_mismatched_columns(self): - arr = np.random.default_rng(2).standard_normal(10, 2) + arr = np.random.default_rng(2).standard_normal((10, 2)) dtype = pd.array([2.0]).dtype msg = r"len\(arrays\) must match len\(columns\)" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 206ce857df960..1203b0841b7a2 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -183,14 +183,14 @@ def test_changing_dtypes_with_duplicate_columns(self): # the location indexer is a slice # GH 6120 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=["that", "that"] + np.random.default_rng(2).standard_normal((5, 2)), columns=["that", "that"] ) expected = DataFrame(1.0, index=range(5), columns=["that", "that"]) df["that"] = 1.0 check(df, expected) - df = DataFrame(np.random.default_rng(2).rand(5, 2), columns=["that", "that"]) + df = DataFrame(np.random.default_rng(2).random(5, 2), columns=["that", "that"]) expected = DataFrame(1, index=range(5), columns=["that", "that"]) df["that"] = 1 diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index ef0ea07ec26e6..7ecbf1f6e122b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,7 +160,7 @@ def test_query_empty_string(self): def test_eval_resolvers_as_list(self): # GH 14095 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((10, 2)), columns=list("ab") ) dict1 = {"a": 1} dict2 = {"b": 2} @@ -170,7 +170,7 @@ def test_eval_resolvers_as_list(self): def test_eval_resolvers_combined(self): # GH 34966 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((10, 2)), columns=list("ab") ) dict1 = {"c": 2} @@ -194,7 +194,7 @@ def test_query_with_named_multiindex(self, parser, engine): a = np.random.default_rng(2).choice(["red", "green"], size=10) b = np.random.default_rng(2).choice(["eggs", "ham"], size=10) index = MultiIndex.from_arrays([a, b], names=["color", "food"]) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)), index=index) ind = Series( df.index.get_level_values("color").values, index=index, name="color" ) @@ -244,7 +244,7 @@ def test_query_with_unnamed_multiindex(self, parser, engine): a = np.random.default_rng(2).choice(["red", "green"], size=10) b = np.random.default_rng(2).choice(["eggs", "ham"], size=10) index = MultiIndex.from_arrays([a, b]) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) @@ -333,7 +333,7 @@ def test_query_with_partially_named_multiindex(self, parser, engine): b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, "rating"] - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2)), index=index) res = df.query("rating == 1", parser=parser, engine=engine) ind = Series( df.index.get_level_values("rating").values, index=index, name="rating" @@ -424,8 +424,8 @@ def test_date_query_with_NaT(self, engine, parser): df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) - df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT - df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates3"] = pd.NaT + df.loc[np.random.default_rng(2).random(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).random(n) > 0.5, "dates3"] = pd.NaT res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] tm.assert_frame_equal(res, expec) @@ -462,7 +462,7 @@ def test_date_index_query_with_NaT_duplicates(self, engine, parser): d["dates1"] = date_range("1/1/2012", periods=n) d["dates3"] = date_range("1/1/2014", periods=n) df = DataFrame(d) - df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).random(n) > 0.5, "dates1"] = pd.NaT return_value = df.set_index("dates1", inplace=True, drop=True) assert return_value is None res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) @@ -670,7 +670,7 @@ def test_query_undefined_local(self): engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).random(10, 2), columns=list("ab")) with pytest.raises( UndefinedVariableError, match="local variable 'c' is not defined" ): @@ -712,8 +712,8 @@ def test_inf(self, op, f, engine, parser): n = 10 df = DataFrame( { - "a": np.random.default_rng(2).rand(n), - "b": np.random.default_rng(2).rand(n), + "a": np.random.default_rng(2).random(n), + "b": np.random.default_rng(2).random(n), } ) df.loc[::2, 0] = np.inf @@ -742,8 +742,8 @@ def test_method_calls_in_query(self, engine, parser): n = 10 df = DataFrame( { - "a": 2 * np.random.default_rng(2).rand(n), - "b": np.random.default_rng(2).rand(n), + "a": 2 * np.random.default_rng(2).random(n), + "b": np.random.default_rng(2).random(n), } ) expected = df[df["a"].astype("int") == 0] @@ -753,7 +753,7 @@ def test_method_calls_in_query(self, engine, parser): df = DataFrame( { "a": np.where( - np.random.default_rng(2).rand(n) < 0.5, + np.random.default_rng(2).random(n) < 0.5, np.nan, np.random.default_rng(2).standard_normal(n), ), @@ -792,8 +792,8 @@ def test_date_query_with_NaT(self, engine, parser): df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) - df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT - df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates3"] = pd.NaT + df.loc[np.random.default_rng(2).random(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).random(n) > 0.5, "dates3"] = pd.NaT res = df.query( "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser ) @@ -835,7 +835,7 @@ def test_date_index_query_with_NaT_duplicates(self, engine, parser): df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) - df.loc[np.random.default_rng(2).rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.default_rng(2).random(n) > 0.5, "dates1"] = pd.NaT return_value = df.set_index("dates1", inplace=True, drop=True) assert return_value is None msg = r"'BoolOp' nodes are not implemented" diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 5be064261f88c..318fa95dbc1e0 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -347,7 +347,7 @@ def test_unstack_preserve_dtypes(self): "state": ["IL", "MI", "NC"], "index": ["a", "b", "c"], "some_categories": Series(["a", "b", "c"]).astype("category"), - "A": np.random.default_rng(2).rand(3), + "A": np.random.default_rng(2).random(3), "B": 1, "C": "foo", "D": pd.Timestamp("20010102"), diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 37709d612714f..620d5055f5d3b 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -84,7 +84,7 @@ def test_metadata_propagation_indiv_groupby(self): def test_metadata_propagation_indiv_resample(self): # resample df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 2), + np.random.default_rng(2).standard_normal((1000, 2)), index=date_range("20130101", periods=1000, freq="s"), ) result = df.resample("1T") @@ -189,7 +189,7 @@ def test_validate_bool_args(self, value): def test_unexpected_keyword(self): # GH8597 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=["jim", "joe"] + np.random.default_rng(2).standard_normal((5, 2)), columns=["jim", "joe"] ) ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) ts = df["joe"].copy() diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index f384ced88389e..4ea205ac13c47 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -107,7 +107,7 @@ def test_nonbool_single_element_raise(self, data): def test_metadata_propagation_indiv_resample(self): # resample ts = Series( - np.random.default_rng(2).rand(1000), + np.random.default_rng(2).random(1000), index=date_range("20130101", periods=1000, freq="s"), name="foo", ) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 0260a4ec2af93..beec7f6a824fe 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -93,8 +93,8 @@ def test_agg_period_index(): # GH 3579 index = period_range(start="1999-01", periods=5, freq="M") - s1 = Series(np.random.default_rng(2).rand(len(index)), index=index) - s2 = Series(np.random.default_rng(2).rand(len(index)), index=index) + s1 = Series(np.random.default_rng(2).random(len(index)), index=index) + s2 = Series(np.random.default_rng(2).random(len(index)), index=index) df = DataFrame.from_dict({"s1": s1, "s2": s2}) grouped = df.groupby(df.index.month) list(grouped) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 77ad4c7070ebd..9f5e2c87349e4 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -606,7 +606,7 @@ def test_filter_non_bool_raises(): def test_filter_dropna_with_empty_groups(): # GH 10780 - data = Series(np.random.default_rng(2).rand(9), index=np.repeat([1, 2, 3], 3)) + data = Series(np.random.default_rng(2).random(9), index=np.repeat([1, 2, 3], 3)) grouped = data.groupby(level=0) result_false = grouped.filter(lambda x: x.mean() > 1, dropna=False) expected_false = Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3)) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 243e8e59fca70..1965b75932c5c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2440,7 +2440,7 @@ def test_groupby_list_level(): ) def test_groups_repr_truncates(max_seq_items, expected): # GH 1135 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 1))) df["a"] = df.index with pd.option_context("display.max_seq_items", max_seq_items): diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 81a32eb9047bf..6c87c88f75108 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -139,7 +139,7 @@ def test_misc_coverage(self): def test_groupby_function_tuple_1677(self): df = DataFrame( - np.random.default_rng(2).rand(100), + np.random.default_rng(2).random(100), index=date_range("1/1/2000", periods=100), ) monthly_group = df.groupby(lambda x: (x.year, x.month)) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 3f163ad882fca..4c5a831f6b1f6 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -127,7 +127,7 @@ def test_slice_year(self): expected = s[s.index.year == 2005] tm.assert_series_equal(result, expected) - df = DataFrame(np.random.default_rng(2).rand(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).random(len(dti), 5), index=dti) result = df.loc["2005"] expected = df[df.index.year == 2005] tm.assert_frame_equal(result, expected) @@ -158,7 +158,7 @@ def test_slice_quarter(self): s = Series(np.arange(len(dti)), index=dti) assert len(s["2001Q1"]) == 90 - df = DataFrame(np.random.default_rng(2).rand(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).random(len(dti), 5), index=dti) assert len(df.loc["1Q01"]) == 90 def test_slice_month(self): @@ -166,7 +166,7 @@ def test_slice_month(self): s = Series(np.arange(len(dti)), index=dti) assert len(s["2005-11"]) == 30 - df = DataFrame(np.random.default_rng(2).rand(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).random(len(dti), 5), index=dti) assert len(df.loc["2005-11"]) == 30 tm.assert_series_equal(s["2005-11"], s["11-2005"]) @@ -361,7 +361,7 @@ def test_partial_slicing_with_multiindex_series(self): # GH 4294 # partial slice on a series mi ser = DataFrame( - np.random.default_rng(2).rand(1000, 1000), + np.random.default_rng(2).random(1000, 1000), index=date_range("2000-1-1", periods=1000), ).stack() diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 2aa6325a7dab7..4d650b7c8c545 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -141,7 +141,7 @@ def test_unsortedindex_doc_examples(): { "jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], - "jolie": np.random.default_rng(2).rand(4), + "jolie": np.random.default_rng(2).random(4), } ) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index f93f79967d07a..c0c6f3c977ceb 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -193,7 +193,7 @@ def test_getitem_seconds(self): with pytest.raises(IndexError, match="only integers, slices"): idx[val] - ser = Series(np.random.default_rng(2).rand(len(idx)), index=idx) + ser = Series(np.random.default_rng(2).random(len(idx)), index=idx) tm.assert_series_equal(ser["2013/01/01 10:00"], ser[3600:3660]) tm.assert_series_equal(ser["2013/01/01 9H"], ser[:3600]) for d in ["2013/01/01", "2013/01", "2013"]: @@ -225,7 +225,7 @@ def test_getitem_day(self, idx_range): with pytest.raises(IndexError, match="only integers, slices"): idx[val] - ser = Series(np.random.default_rng(2).rand(len(idx)), index=idx) + ser = Series(np.random.default_rng(2).random(len(idx)), index=idx) tm.assert_series_equal(ser["2013/01"], ser[0:31]) tm.assert_series_equal(ser["2013/02"], ser[31:59]) tm.assert_series_equal(ser["2014"], ser[365:]) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 9a2eece108652..e52866abbe234 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -37,13 +37,13 @@ def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): def test_getitem_periodindex_quarter_string(self): pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") - ser = Series(np.random.default_rng(2).rand(len(pi)), index=pi).cumsum() + ser = Series(np.random.default_rng(2).random(len(pi)), index=pi).cumsum() # Todo: fix these accessors! assert ser["05Q4"] == ser.iloc[2] def test_pindex_slice_index(self): pi = period_range(start="1/1/10", end="12/31/12", freq="M") - s = Series(np.random.default_rng(2).rand(len(pi)), index=pi) + s = Series(np.random.default_rng(2).random(len(pi)), index=pi) res = s["2010"] exp = s[0:12] tm.assert_series_equal(res, exp) @@ -69,7 +69,7 @@ def test_range_slice_day(self, make_range): with pytest.raises(TypeError, match=msg): idx[v:] - s = Series(np.random.default_rng(2).rand(len(idx)), index=idx) + s = Series(np.random.default_rng(2).random(len(idx)), index=idx) tm.assert_series_equal(s["2013/01/02":], s[1:]) tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5]) @@ -99,7 +99,7 @@ def test_range_slice_seconds(self, make_range): with pytest.raises(TypeError, match=msg): idx[v:] - s = Series(np.random.default_rng(2).rand(len(idx)), index=idx) + s = Series(np.random.default_rng(2).random(len(idx)), index=idx) tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660]) tm.assert_series_equal(s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960]) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 251b152d9344f..b0d2cf1736e57 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -150,7 +150,7 @@ def test_constructor_from_frame_series_freq(self): dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] expected = DatetimeIndex(dts, freq="MS") - df = DataFrame(np.random.default_rng(2).rand(5, 3)) + df = DataFrame(np.random.default_rng(2).random(5, 3)) df["date"] = dts result = DatetimeIndex(df["date"], freq="MS") diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py index a9c4782246b75..08156bc2c400b 100644 --- a/pandas/tests/indexing/conftest.py +++ b/pandas/tests/indexing/conftest.py @@ -12,7 +12,7 @@ @pytest.fixture def series_ints(): - return Series(np.random.default_rng(2).rand(4), index=np.arange(0, 8, 2)) + return Series(np.random.default_rng(2).random(4), index=np.arange(0, 8, 2)) @pytest.fixture @@ -27,7 +27,7 @@ def frame_ints(): @pytest.fixture def series_uints(): return Series( - np.random.default_rng(2).rand(4), + np.random.default_rng(2).random(4), index=Index(np.arange(0, 8, 2, dtype=np.uint64)), ) @@ -74,7 +74,8 @@ def frame_ts(): @pytest.fixture def series_floats(): return Series( - np.random.default_rng(2).rand(4), index=Index(range(0, 8, 2), dtype=np.float64) + np.random.default_rng(2).random(4), + index=Index(range(0, 8, 2), dtype=np.float64), ) @@ -121,6 +122,6 @@ def frame_multi(): @pytest.fixture def series_multi(): return Series( - np.random.default_rng(2).rand(4), + np.random.default_rng(2).random(4), index=MultiIndex.from_product([[1, 2], [3, 4]]), ) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 6ff086f5c3c59..60eec0c9e4342 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -42,7 +42,7 @@ def test_detect_chained_assignment(using_copy_on_write): def test_cache_updating(using_copy_on_write): # 5216 # make sure that we don't try to set a dead cache - a = np.random.default_rng(2).rand(10, 3) + a = np.random.default_rng(2).random(10, 3) df = DataFrame(a, columns=["x", "y", "z"]) df_original = df.copy() tuples = [(i, j) for i in range(5) for j in range(2)] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 7b1840469f471..478fe788f5ef7 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -752,7 +752,7 @@ def test_missing_key_combination(self): ], names=["one", "two", "three"], ) - df = DataFrame(np.random.default_rng(2).rand(4, 3), index=mi) + df = DataFrame(np.random.default_rng(2).random(4, 3), index=mi) msg = r"\('b', '1', slice\(None, None, None\)\)" with pytest.raises(KeyError, match=msg): df.loc[("b", "1", slice(None)), :] diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 90ccaa693546f..9a54e2d0ae12e 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -22,7 +22,7 @@ def test_multiindex_perf_warn(self): { "jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], - "jolie": np.random.default_rng(2).rand(4), + "jolie": np.random.default_rng(2).random(4), } ).set_index(["jim", "joe"]) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 31896fbd4126e..80479a2016181 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -105,7 +105,7 @@ def test_getitem_partial_column_select(self): codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], levels=[["a", "b"], ["x", "y"], ["p", "q"]], ) - df = DataFrame(np.random.default_rng(2).rand(3, 2), index=idx) + df = DataFrame(np.random.default_rng(2).random(3, 2), index=idx) result = df.loc[("a", "y"), :] expected = df.loc[("a", "y")] diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 0a90cff2cee21..d818df3fcb963 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -353,7 +353,7 @@ def test_slice_integer_frame_getitem(self, index_func): # similar to above, but on the getitem dim (of a DataFrame) index = index_func(5) - s = DataFrame(np.random.default_rng(2).standard_normal(5, 2), index=index) + s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index) # getitem for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: @@ -408,7 +408,7 @@ def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): # similar to above, but on the getitem dim (of a DataFrame) index = index_func(5) - s = DataFrame(np.random.default_rng(2).standard_normal(5, 2), index=index) + s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index) # setitem sc = s.copy() diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f4fca2f1ec8bf..36209c08deddd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -239,7 +239,7 @@ def check(result, expected): tm.assert_frame_equal(result, expected) dfl = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("AB") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB") ) check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[])) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) @@ -1131,7 +1131,9 @@ def view(self): def test_iloc_getitem_with_duplicates(self): df = DataFrame( - np.random.default_rng(2).rand(3, 3), columns=list("ABC"), index=list("aab") + np.random.default_rng(2).random(3, 3), + columns=list("ABC"), + index=list("aab"), ) result = df.iloc[0] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 67bd45fb2807a..16b510aa1f792 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -645,8 +645,8 @@ class TestMisc: def test_float_index_to_mixed(self): df = DataFrame( { - 0.0: np.random.default_rng(2).rand(10), - 1.0: np.random.default_rng(2).rand(10), + 0.0: np.random.default_rng(2).random(10), + 1.0: np.random.default_rng(2).random(10), } ) df["a"] = 10 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 17f5894a64d2c..4d9c322b5f709 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -538,7 +538,7 @@ def test_loc_index(self): def test_loc_general(self): df = DataFrame( - np.random.default_rng(2).rand(4, 4), + np.random.default_rng(2).random(4, 4), columns=["A", "B", "C", "D"], index=["A", "B", "C", "D"], ) @@ -1618,7 +1618,7 @@ def test_loc_setitem_single_column_mixed(self): def test_loc_setitem_cast2(self): # GH#7704 # dtype conversion on setting - df = DataFrame(np.random.default_rng(2).rand(30, 3), columns=tuple("ABC")) + df = DataFrame(np.random.default_rng(2).random(30, 3), columns=tuple("ABC")) df["event"] = np.nan df.loc[10, "event"] = "foo" result = df.dtypes @@ -2414,7 +2414,7 @@ def test_loc_getitem_label_slice_period_timedelta(self, index): def test_loc_getitem_slice_floats_inexact(self): index = [52195.504153, 52196.303147, 52198.369883] - df = DataFrame(np.random.default_rng(2).rand(3, 2), index=index) + df = DataFrame(np.random.default_rng(2).random(3, 2), index=index) s1 = df.loc[52195.1:52196.5] assert len(s1) == 2 @@ -2428,7 +2428,7 @@ def test_loc_getitem_slice_floats_inexact(self): def test_loc_getitem_float_slice_floatindex(self, float_numpy_dtype): dtype = float_numpy_dtype ser = Series( - np.random.default_rng(2).rand(10), index=np.arange(10, 20, dtype=dtype) + np.random.default_rng(2).random(10), index=np.arange(10, 20, dtype=dtype) ) assert len(ser.loc[12.0:]) == 8 @@ -2751,7 +2751,7 @@ def test_loc_named_index(self): def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): # gh-14836 df = DataFrame( - np.random.default_rng(2).rand(3, 3), columns=columns, index=list("ABC") + np.random.default_rng(2).random(3, 3), columns=columns, index=list("ABC") ) expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 6969a49c87f65..5780f8af93116 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -424,7 +424,7 @@ def test_categorical_block_pickle(self): def test_iget(self): cols = Index(list("abc")) - values = np.random.default_rng(2).rand(3, 3) + values = np.random.default_rng(2).random(3, 3) block = new_block( values=values.copy(), placement=BlockPlacement(np.arange(3, dtype=np.intp)), diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 871be620d18d1..c80220da41728 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -357,7 +357,7 @@ def test_excel_sheet_size(self, path): col_df.to_excel(path) def test_excel_sheet_by_name_raise(self, path): - gt = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + gt = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) gt.to_excel(path) with ExcelFile(path) as xl: @@ -567,7 +567,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): frame.to_excel(path, "test1", index=False) # test index_label - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0 df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( @@ -576,7 +576,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0 df.to_excel( path, "test1", @@ -590,7 +590,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): df.index.names = ["test"] assert df.index.names == recons.index.names - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) >= 0 + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) >= 0 df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) with ExcelFile(path) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype( @@ -1120,7 +1120,7 @@ def test_datetimes(self, path): def test_bytes_io(self, engine): # see gh-7074 with BytesIO() as bio: - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) # Pass engine explicitly, as there is no file path to infer from. with ExcelWriter(bio, engine=engine) as writer: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 0775b0cdbdca3..d046178f1258f 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -731,7 +731,7 @@ def test_map_subset_multiindex(self, slice_): idx = MultiIndex.from_product([["a", "b"], [1, 2]]) col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) - df = DataFrame(np.random.default_rng(2).rand(4, 4), columns=col, index=idx) + df = DataFrame(np.random.default_rng(2).random(4, 4), columns=col, index=idx) with ctx: df.style.map(lambda x: "color: red;", subset=slice_).to_html() diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 47b16119a0b37..37ae131b7486f 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -407,7 +407,7 @@ def test_repr_truncates_terminal_size(self, monkeypatch): def test_repr_truncates_terminal_size_full(self, monkeypatch): # GH 22984 ensure entire window is filled terminal_size = (80, 24) - df = DataFrame(np.random.default_rng(2).rand(1, 7)) + df = DataFrame(np.random.default_rng(2).random(1, 7)) monkeypatch.setattr( "pandas.io.formats.format.get_terminal_size", lambda: terminal_size @@ -2032,7 +2032,7 @@ def test_info_repr(self): def test_info_repr_max_cols(self): # GH #6939 - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) with option_context( "display.large_repr", "info", @@ -2215,7 +2215,7 @@ def test_max_rows_fitted(self, length, min_rows, max_rows, expected): https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options """ formatter = fmt.DataFrameFormatter( - DataFrame(np.random.default_rng(2).rand(length, 3)), + DataFrame(np.random.default_rng(2).random(length, 3)), max_rows=max_rows, min_rows=min_rows, ) diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 326cfc6c00af7..70f6940ce0297 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -91,7 +91,7 @@ def test_info_smoke_test(fixture_func_name, request): ], ) def test_info_default_verbose_selection(num_columns, max_info_columns, verbose): - frame = DataFrame(np.random.default_rng(2).standard_normal(5, num_columns)) + frame = DataFrame(np.random.default_rng(2).standard_normal((5, num_columns))) with option_context("display.max_info_columns", max_info_columns): io_default = StringIO() frame.info(buf=io_default) @@ -263,7 +263,7 @@ def test_info_shows_column_dtypes(): def test_info_max_cols(): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) for len_, verbose in [(5, None), (5, False), (12, True)]: # For verbose always ^ setting ^ summarize ^ full output with option_context("max_info_columns", 4): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 1c772677cc4b8..21d9bb84fdfef 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -977,7 +977,7 @@ def test_weird_nested_json(self): def test_doc_example(self): dfj2 = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=list("AB") + np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB") ) dfj2["date"] = Timestamp("20130101") dfj2["ints"] = range(5) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index f8d8a88a31cb4..8cc9df8e6768a 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -31,7 +31,7 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): parser = all_parsers df = DataFrame( - np.random.default_rng(2).rand(5, 2).round(4), + np.random.default_rng(2).random(5, 2).round(4), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"], ) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index d244a06d72566..3ae1f9241d58c 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -130,7 +130,7 @@ def test_dtype_and_names_error(c_parser_only): def test_unsupported_dtype(c_parser_only, match, kwargs): parser = c_parser_only df = DataFrame( - np.random.default_rng(2).rand(5, 2), + np.random.default_rng(2).random(5, 2), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"], ) diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index cf897107be4fa..c5b757d619e7a 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -125,11 +125,11 @@ def test_multi_thread_path_multipart_read_csv(all_parsers): file_name = "__thread_pool_reader__.csv" df = DataFrame( { - "a": np.random.default_rng(2).rand(num_rows), - "b": np.random.default_rng(2).rand(num_rows), - "c": np.random.default_rng(2).rand(num_rows), - "d": np.random.default_rng(2).rand(num_rows), - "e": np.random.default_rng(2).rand(num_rows), + "a": np.random.default_rng(2).random(num_rows), + "b": np.random.default_rng(2).random(num_rows), + "c": np.random.default_rng(2).random(num_rows), + "d": np.random.default_rng(2).random(num_rows), + "e": np.random.default_rng(2).random(num_rows), "foo": ["foo"] * num_rows, "bar": ["bar"] * num_rows, "baz": ["baz"] * num_rows, diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 6f8ce09d6b1d8..2e45a64c87103 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -129,7 +129,7 @@ def test_append_series(setup_path): tm.assert_series_equal(result, expected, check_index_type=True) # multi-index - mi = DataFrame(np.random.default_rng(2).standard_normal(5, 1), columns=["A"]) + mi = DataFrame(np.random.default_rng(2).standard_normal((5, 1)), columns=["A"]) mi["B"] = np.arange(len(mi)) mi["C"] = "foo" mi.loc[3:5, "C"] = "bar" @@ -308,7 +308,7 @@ def test_append_with_different_block_ordering(setup_path): with ensure_clean_store(setup_path) as store: for i in range(10): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=list("AB") + np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB") ) df["index"] = range(10) df["index"] += i * 10 @@ -330,7 +330,7 @@ def test_append_with_different_block_ordering(setup_path): # combinations) with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), + np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB"), dtype="float64", ) @@ -609,7 +609,7 @@ def check_col(key, name, size): index = date_range("1/1/2000", periods=8) df_dc = DataFrame( - np.random.default_rng(2).standard_normal(8, 3), + np.random.default_rng(2).standard_normal((8, 3)), index=index, columns=["A", "B", "C"], ) @@ -690,7 +690,7 @@ def test_append_misc_empty_frame(setup_path): store.select("df") # repeated append of 0/non-zero frames - df = DataFrame(np.random.default_rng(2).rand(10, 3), columns=list("ABC")) + df = DataFrame(np.random.default_rng(2).random(10, 3), columns=list("ABC")) store.append("df", df) tm.assert_frame_equal(store.select("df"), df) store.append("df", df_empty) diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index b02f2fb0441ce..86659f6f4c65c 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -16,7 +16,7 @@ def test_complex_fixed(tmp_path, setup_path): df = DataFrame( - np.random.default_rng(2).rand(4, 5).astype(np.complex64), + np.random.default_rng(2).random(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) @@ -27,7 +27,7 @@ def test_complex_fixed(tmp_path, setup_path): tm.assert_frame_equal(df, reread) df = DataFrame( - np.random.default_rng(2).rand(4, 5).astype(np.complex128), + np.random.default_rng(2).random(4, 5).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) @@ -39,7 +39,7 @@ def test_complex_fixed(tmp_path, setup_path): def test_complex_table(tmp_path, setup_path): df = DataFrame( - np.random.default_rng(2).rand(4, 5).astype(np.complex64), + np.random.default_rng(2).random(4, 5).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) @@ -50,7 +50,7 @@ def test_complex_table(tmp_path, setup_path): tm.assert_frame_equal(df, reread) df = DataFrame( - np.random.default_rng(2).rand(4, 5).astype(np.complex128), + np.random.default_rng(2).random(4, 5).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 9dc7d8212c407..fbcbaba7fc6a7 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -166,7 +166,7 @@ def test_append_with_diff_col_name_types_raises_value_error(setup_path): def test_invalid_complib(setup_path): df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) with tm.ensure_clean(setup_path) as path: msg = r"complib only supports \[.*\] compression." @@ -204,7 +204,7 @@ def test_unsuppored_hdf_file_error(datapath): def test_read_hdf_errors(setup_path, tmp_path): df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) path = tmp_path / setup_path diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index c3b539ea3dc13..93b00abad61ec 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -223,7 +223,7 @@ def test_store_index_types(setup_path, format, index): with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=list("AB") + np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB") ) df.index = index(len(df)) @@ -354,7 +354,7 @@ def test_store_periodindex(tmp_path, setup_path, format): # GH 7796 # test of PeriodIndex in HDFStore df = DataFrame( - np.random.default_rng(2).standard_normal(5, 1), + np.random.default_rng(2).standard_normal((5, 1)), index=pd.period_range("20220101", freq="M", periods=5), ) diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index d783286241003..a0ea1301adf83 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -214,7 +214,7 @@ def test_read_hdf_open_store(tmp_path, setup_path): # GH10330 # No check for non-string path_or-buf, and no test of open store df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -233,7 +233,7 @@ def test_read_hdf_index_not_view(tmp_path, setup_path): # Ensure that the index of the DataFrame is not a view # into the original recarray that pytables reads in df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=[0, 1, 2, 3], columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=[0, 1, 2, 3], columns=list("ABCDE") ) path = tmp_path / setup_path @@ -246,7 +246,7 @@ def test_read_hdf_index_not_view(tmp_path, setup_path): def test_read_hdf_iterator(tmp_path, setup_path): df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -264,7 +264,7 @@ def test_read_hdf_iterator(tmp_path, setup_path): def test_read_nokey(tmp_path, setup_path): # GH10443 df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) # Categorical dtype not supported for "fixed" format. So no need @@ -310,7 +310,7 @@ def test_read_nokey_empty(tmp_path, setup_path): def test_read_from_pathlib_path(tmp_path, setup_path): # GH11773 expected = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) filename = tmp_path / setup_path path_obj = Path(filename) @@ -327,7 +327,7 @@ def test_read_from_py_localpath(tmp_path, setup_path): from py.path import local as LocalPath expected = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) filename = tmp_path / setup_path path_obj = LocalPath(filename) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 1a5e34e8cc544..6530b7c74aa3e 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -185,7 +185,7 @@ def test_select_dtypes(setup_path): # bool columns (GH #2849) df = DataFrame( - np.random.default_rng(2).standard_normal(5, 2), columns=["A", "B"] + np.random.default_rng(2).standard_normal((5, 2)), columns=["A", "B"] ) df["object"] = "foo" df.loc[4:5, "object"] = "bar" @@ -206,8 +206,8 @@ def test_select_dtypes(setup_path): # integer index df = DataFrame( { - "A": np.random.default_rng(2).rand(20), - "B": np.random.default_rng(2).rand(20), + "A": np.random.default_rng(2).random(20), + "B": np.random.default_rng(2).random(20), } ) _maybe_remove(store, "df_int") @@ -219,8 +219,8 @@ def test_select_dtypes(setup_path): # float index df = DataFrame( { - "A": np.random.default_rng(2).rand(20), - "B": np.random.default_rng(2).rand(20), + "A": np.random.default_rng(2).random(20), + "B": np.random.default_rng(2).random(20), "index": np.arange(20, dtype="f8"), } ) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index da1124268393b..9461df18e7491 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -624,7 +624,7 @@ def test_coordinates(setup_path): # pass array/mask as the coordinates with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 2), + np.random.default_rng(2).standard_normal((1000, 2)), index=date_range("20000101", periods=1000), ) store.append("df", df) @@ -664,7 +664,7 @@ def test_coordinates(setup_path): tm.assert_frame_equal(result, expected) # list - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) store.append("df2", df) result = store.select("df2", where=[0, 3, 5]) expected = df.iloc[[0, 3, 5]] @@ -688,8 +688,8 @@ def test_start_stop_table(setup_path): # table df = DataFrame( { - "A": np.random.default_rng(2).rand(20), - "B": np.random.default_rng(2).rand(20), + "A": np.random.default_rng(2).random(20), + "B": np.random.default_rng(2).random(20), } ) store.append("df", df) @@ -725,8 +725,8 @@ def test_start_stop_fixed(setup_path): # fixed, GH 8287 df = DataFrame( { - "A": np.random.default_rng(2).rand(20), - "B": np.random.default_rng(2).rand(20), + "A": np.random.default_rng(2).random(20), + "B": np.random.default_rng(2).random(20), }, index=date_range("20130101", periods=20), ) @@ -903,7 +903,7 @@ def test_columns_multiindex_modified(tmp_path, setup_path): # BUG: 7212 df = DataFrame( - np.random.default_rng(2).rand(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -942,7 +942,7 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): for index in types_should_fail: df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=index(2) + np.random.default_rng(2).standard_normal((10, 2)), columns=index(2) ) path = tmp_path / setup_path with catch_warnings(record=True): @@ -952,7 +952,7 @@ def test_to_hdf_with_object_column_names(tmp_path, setup_path): for index in types_should_run: df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=index(2) + np.random.default_rng(2).standard_normal((10, 2)), columns=index(2) ) path = tmp_path / setup_path with catch_warnings(record=True): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 31ad48ed2b330..c201f36c0d076 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -157,7 +157,7 @@ def test_to_html_compat(self): tm.makeCustomDataframe( 4, 3, - data_gen_f=lambda *args: np.random.default_rng(2).rand(), + data_gen_f=lambda *args: np.random.default_rng(2).random(), c_idx_names=False, r_idx_names=False, ) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b0e5422540b1c..99392b9b1b3d7 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -373,7 +373,7 @@ def test_read_write_dta10(self, version): def test_stata_doc_examples(self): with tm.ensure_clean() as path: df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), columns=list("AB") + np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB") ) df.to_stata(path) @@ -1982,7 +1982,9 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten if use_dict: compression_arg = {"method": compression} - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), columns=list("AB")) + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB") + ) df.index.name = "index" with tm.ensure_clean(file_name) as path: df.to_stata(path, version=version, compression=compression_arg) @@ -2020,7 +2022,9 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten def test_compression_dict(method, file_ext): file_name = f"test.{file_ext}" archive_name = "test.dta" - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2), columns=list("AB")) + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB") + ) df.index.name = "index" with tm.ensure_clean(file_name) as path: compression = {"method": method, "archive_name": archive_name} diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index eb93dc344aab9..5ee766b964b18 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -97,7 +97,8 @@ def test_plot_invalid_arg(self): @pytest.mark.slow def test_plot_tick_props(self): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) ax = _check_plot_works(df.plot, use_index=True) @@ -115,14 +116,16 @@ def test_plot_tick_props(self): ) def test_plot_other_args(self, kwargs): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) _check_plot_works(df.plot, **kwargs) @pytest.mark.slow def test_plot_visible_ax(self): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) # We have to redo it here because _check_plot_works does two plots, # once without an ax kwarg and once with an ax kwarg and the new sharex @@ -145,7 +148,8 @@ def test_plot_visible_ax(self): @pytest.mark.slow def test_plot_title(self): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) _check_plot_works(df.plot, title="blah") @@ -153,7 +157,7 @@ def test_plot_title(self): def test_plot_multiindex(self): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) ) ax = _check_plot_works(df.plot, use_index=True) _check_ticks_props(ax, xrot=0) @@ -189,7 +193,7 @@ def test_plot_multiindex_unicode(self): def test_plot_single_column_bar(self, layout): # GH 6951 # Test with single column - df = DataFrame({"x": np.random.default_rng(2).rand(10)}) + df = DataFrame({"x": np.random.default_rng(2).random(10)}) axes = _check_plot_works(df.plot.bar, subplots=True, layout=layout) _check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -197,7 +201,7 @@ def test_plot_single_column_bar(self, layout): def test_plot_passed_ax(self): # When ax is supplied and required number of axes is 1, # passed ax should be used: - df = DataFrame({"x": np.random.default_rng(2).rand(10)}) + df = DataFrame({"x": np.random.default_rng(2).random(10)}) _, ax = mpl.pyplot.subplots() axes = df.plot.bar(subplots=True, ax=ax) assert len(axes) == 1 @@ -380,7 +384,7 @@ def test_period_compat(self): # GH 9012 # period-array conversions df = DataFrame( - np.random.default_rng(2).rand(21, 2), + np.random.default_rng(2).random(21, 2), index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), columns=["a", "b"], ) @@ -428,7 +432,7 @@ def test_unsorted_index_lims_x_y(self): def test_negative_log(self): df = -DataFrame( - np.random.default_rng(2).rand(6, 4), + np.random.default_rng(2).random(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -449,7 +453,7 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): @pytest.mark.parametrize("mult", [1, -1]) def test_line_area_stacked(self, kind, mult): df = mult * DataFrame( - np.random.default_rng(2).rand(6, 4), columns=["w", "x", "y", "z"] + np.random.default_rng(2).random(6, 4), columns=["w", "x", "y", "z"] ) ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) @@ -461,10 +465,10 @@ def test_line_area_stacked_sep_df(self, kind): # each column has either positive or negative value sep_df = DataFrame( { - "w": np.random.default_rng(2).rand(6), - "x": np.random.default_rng(2).rand(6), - "y": -np.random.default_rng(2).rand(6), - "z": -np.random.default_rng(2).rand(6), + "w": np.random.default_rng(2).random(6), + "x": np.random.default_rng(2).random(6), + "y": -np.random.default_rng(2).random(6), + "z": -np.random.default_rng(2).random(6), } ) ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False) @@ -491,7 +495,7 @@ def test_line_area_stacked_mixed(self): @pytest.mark.parametrize("kind", ["line", "area"]) def test_line_area_stacked_positive_idx(self, kind): df = DataFrame( - np.random.default_rng(2).rand(6, 4), columns=["w", "x", "y", "z"] + np.random.default_rng(2).random(6, 4), columns=["w", "x", "y", "z"] ) # Use an index with strictly positive values, preventing # matplotlib from warning about ignoring xlim @@ -559,7 +563,7 @@ def test_line_area_nan_df_stacked_area(self, idx, kwargs): @pytest.mark.parametrize("kwargs", [{}, {"secondary_y": True}]) def test_line_lim(self, kwargs): - df = DataFrame(np.random.default_rng(2).rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.default_rng(2).random(6, 3), columns=["x", "y", "z"]) ax = df.plot(**kwargs) xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -567,7 +571,7 @@ def test_line_lim(self, kwargs): assert xmax >= lines[0].get_data()[0][-1] def test_line_lim_subplots(self): - df = DataFrame(np.random.default_rng(2).rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.default_rng(2).random(6, 3), columns=["x", "y", "z"]) axes = df.plot(secondary_y=True, subplots=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) for ax in axes: @@ -586,7 +590,7 @@ def test_line_lim_subplots(self): @pytest.mark.parametrize("stacked", [True, False]) def test_area_lim(self, stacked): df = DataFrame( - np.random.default_rng(2).rand(6, 4), columns=["x", "y", "z", "four"] + np.random.default_rng(2).random(6, 4), columns=["x", "y", "z", "four"] ) neg_df = -df @@ -605,7 +609,7 @@ def test_area_lim(self, stacked): def test_area_sharey_dont_overwrite(self): # GH37942 - df = DataFrame(np.random.default_rng(2).rand(4, 2), columns=["x", "y"]) + df = DataFrame(np.random.default_rng(2).random(4, 2), columns=["x", "y"]) fig, (ax1, ax2) = mpl.pyplot.subplots(1, 2, sharey=True) df.plot(ax=ax1, kind="area") @@ -661,7 +665,7 @@ def test_barh_barwidth_subplots(self, meth, dim): assert getattr(r, dim)() == width def test_bar_bottom_left_bottom(self): - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -671,7 +675,7 @@ def test_bar_bottom_left_bottom(self): assert result == [-1, -2, -3, -4, -5] def test_bar_bottom_left_left(self): - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) result = [p.get_x() for p in ax.patches] assert result == [1] * 25 @@ -681,7 +685,7 @@ def test_bar_bottom_left_left(self): assert result == [1, 2, 3, 4, 5] def test_bar_bottom_left_subplots(self): - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) axes = df.plot.bar(subplots=True, bottom=-1) for ax in axes: result = [p.get_y() for p in ax.patches] @@ -1105,14 +1109,14 @@ def test_hist_df(self): _check_ax_scales(axes, yaxis="log") def test_hist_df_series(self): - series = Series(np.random.default_rng(2).rand(10)) + series = Series(np.random.default_rng(2).random(10)) axes = series.plot.hist(rot=40) _check_ticks_props(axes, xrot=40, yrot=0) def test_hist_df_series_cumulative_density(self): from matplotlib.patches import Rectangle - series = Series(np.random.default_rng(2).rand(10)) + series = Series(np.random.default_rng(2).random(10)) ax = series.plot.hist(cumulative=True, bins=4, density=True) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -1121,7 +1125,7 @@ def test_hist_df_series_cumulative_density(self): def test_hist_df_series_cumulative(self): from matplotlib.patches import Rectangle - series = Series(np.random.default_rng(2).rand(10)) + series = Series(np.random.default_rng(2).random(10)) ax = series.plot.hist(cumulative=True, bins=4) rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -1393,16 +1397,16 @@ def test_all_invalid_plot_data(self, kind): ) def test_partially_invalid_plot_data_numeric(self, kind): df = DataFrame( - np.random.default_rng(2).RandomState(42).standard_normal(10, 2), + np.random.default_rng(2).RandomState(42).standard_normal((10, 2)), dtype=object, ) - df[np.random.default_rng(2).rand(df.shape[0]) > 0.5] = "a" + df[np.random.default_rng(2).random(df.shape[0]) > 0.5] = "a" msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) msg = "invalid_plot_kind is not a valid plot kind" with pytest.raises(ValueError, match=msg): df.plot(kind="invalid_plot_kind") @@ -1520,7 +1524,7 @@ def test_hexbin_cmap(self, kwargs, expected): def test_pie_df_err(self): df = DataFrame( - np.random.default_rng(2).rand(5, 3), + np.random.default_rng(2).random(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1531,7 +1535,7 @@ def test_pie_df_err(self): @pytest.mark.parametrize("y", ["Y", 2]) def test_pie_df(self, y): df = DataFrame( - np.random.default_rng(2).rand(5, 3), + np.random.default_rng(2).random(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1540,7 +1544,7 @@ def test_pie_df(self, y): def test_pie_df_subplots(self): df = DataFrame( - np.random.default_rng(2).rand(5, 3), + np.random.default_rng(2).random(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1557,7 +1561,7 @@ def test_pie_df_subplots(self): def test_pie_df_labels_colors(self): df = DataFrame( - np.random.default_rng(2).rand(5, 3), + np.random.default_rng(2).random(5, 3), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1577,7 +1581,7 @@ def test_pie_df_labels_colors(self): _check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - df = DataFrame(np.random.default_rng(2).rand(4, 4)) + df = DataFrame(np.random.default_rng(2).random(4, 4)) for i in range(4): df.iloc[i, i] = np.nan _, axes = mpl.pyplot.subplots(ncols=4) @@ -1731,8 +1735,8 @@ def test_errorbar_plot_iterator(self): def test_errorbar_with_integer_column_names(self): # test with integer column names - df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 2))) - df_err = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 2))) + df = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 2)))) + df_err = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 2)))) ax = _check_plot_works(df.plot, yerr=df_err) _check_has_errorbars(ax, xerr=0, yerr=2) ax = _check_plot_works(df.plot, y=0, yerr=1) @@ -1743,7 +1747,7 @@ def test_errorbar_with_integer_column_names(self): def test_errorbar_with_partial_columns_kind(self, kind): df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 3))) df_err = DataFrame( - np.abs(np.random.default_rng(2).standard_normal(10, 2)), columns=[0, 2] + np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2] ) ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) _check_has_errorbars(ax, xerr=0, yerr=2) @@ -1752,7 +1756,7 @@ def test_errorbar_with_partial_columns_kind(self, kind): def test_errorbar_with_partial_columns_dti(self): df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 3))) df_err = DataFrame( - np.abs(np.random.default_rng(2).standard_normal(10, 2)), columns=[0, 2] + np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2] ) ix = date_range("1/1/2000", periods=10, freq="M") df.set_index(ix, inplace=True) @@ -1804,7 +1808,7 @@ def test_errorbar_timeseries(self, kind): _check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): - err = np.random.default_rng(2).rand(3, 2, 5) + err = np.random.default_rng(2).random(3, 2, 5) # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... df = DataFrame(np.arange(15).reshape(3, 5)).T @@ -1823,7 +1827,8 @@ def test_errorbar_asymmetrical(self): def test_table(self): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) _check_plot_works(df.plot, table=True) _check_plot_works(df.plot, table=df) @@ -1837,12 +1842,12 @@ def test_table(self): def test_errorbar_scatter(self): df = DataFrame( - np.abs(np.random.default_rng(2).standard_normal(5, 2)), + np.abs(np.random.default_rng(2).standard_normal((5, 2))), index=range(5), columns=["x", "y"], ) df_err = DataFrame( - np.abs(np.random.default_rng(2).standard_normal(5, 2)) / 5, + np.abs(np.random.default_rng(2).standard_normal((5, 2))) / 5, index=range(5), columns=["x", "y"], ) @@ -1871,7 +1876,7 @@ def _check_errorbar_color(containers, expected, has_err="has_xerr"): # GH 8081 df = DataFrame( - np.abs(np.random.default_rng(2).standard_normal(10, 5)), + np.abs(np.random.default_rng(2).standard_normal((10, 5))), columns=["a", "b", "c", "d", "e"], ) ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") @@ -2059,7 +2064,7 @@ def test_df_gridspec_patterns_vert_horiz(self): ) df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), + np.random.default_rng(2).standard_normal((10, 2)), index=ts.index, columns=list("AB"), ) @@ -2197,7 +2202,7 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = mpl.pyplot.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(np.random.default_rng(2).rand(10)).plot(ax=ax) + Series(np.random.default_rng(2).random(10)).plot(ax=ax) def test_plain_axes_df(self): # supplied ax itself is a plain Axes, but because the cmap keyword @@ -2219,16 +2224,16 @@ def test_plain_axes_make_axes_locatable(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(np.random.default_rng(2).rand(10)).plot(ax=ax) - Series(np.random.default_rng(2).rand(10)).plot(ax=cax) + Series(np.random.default_rng(2).random(10)).plot(ax=ax) + Series(np.random.default_rng(2).random(10)).plot(ax=cax) def test_plain_axes_make_inset_axes(self): fig, ax = mpl.pyplot.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(np.random.default_rng(2).rand(10)).plot(ax=ax) - Series(np.random.default_rng(2).rand(10)).plot(ax=iax) + Series(np.random.default_rng(2).random(10)).plot(ax=ax) + Series(np.random.default_rng(2).random(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index f438b6301ef90..c42185642676f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -54,7 +54,7 @@ def test_rgb_tuple_color(self, color): _check_plot_works(df.plot, x="x", y="y", color=color) def test_color_empty_string(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) with pytest.raises(ValueError, match="Invalid color argument:"): df.plot(color="") @@ -287,7 +287,7 @@ def test_line_colors_hex(self): def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.default_rng(2).rand(10, 2)).plot(color=colors) + DataFrame(np.random.default_rng(2).random(10, 2)).plot(color=colors) assert len(colors) == 3 def test_line_colors_and_styles_subplots(self): @@ -362,7 +362,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) ax = df.plot.area(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -379,7 +379,7 @@ def test_area_colors_poly(self): from matplotlib import cm from matplotlib.collections import PolyCollection - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) ax = df.plot.area(colormap="jet") jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=jet_colors) @@ -395,7 +395,7 @@ def test_area_colors_stacked_false(self): from matplotlib import cm from matplotlib.collections import PolyCollection - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] # When stacked=False, alpha is set to 0.5 ax = df.plot.area(colormap=cm.jet, stacked=False) @@ -443,7 +443,7 @@ def test_hist_colors_single_color(self): @td.skip_if_no_scipy def test_kde_colors(self): custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).rand(5, 5)) + df = DataFrame(np.random.default_rng(2).random(5, 5)) ax = df.plot.kde(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) diff --git a/pandas/tests/plotting/frame/test_frame_legend.py b/pandas/tests/plotting/frame/test_frame_legend.py index 41db738442549..da0438dfb8260 100644 --- a/pandas/tests/plotting/frame/test_frame_legend.py +++ b/pandas/tests/plotting/frame/test_frame_legend.py @@ -62,10 +62,10 @@ def test_legend_false(self): @td.skip_if_no_scipy @pytest.mark.parametrize("kind", ["line", "bar", "barh", "kde", "area", "hist"]) def test_df_legend_labels(self, kind): - df = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(np.random.default_rng(2).random(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["j", "k", "l"]) ax = df.plot(kind=kind, legend=True) _check_legend_labels(ax, labels=df.columns) @@ -82,9 +82,9 @@ def test_df_legend_labels(self, kind): @td.skip_if_no_scipy def test_df_legend_labels_secondary_y(self): - df = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["g", "h", "i"]) + df = DataFrame(np.random.default_rng(2).random(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["g", "h", "i"]) # Secondary Y ax = df.plot(legend=True, secondary_y="b") _check_legend_labels(ax, labels=["a", "b (right)", "c"]) @@ -227,14 +227,14 @@ def test_legend_name(self): ], ) def test_no_legend(self, kind): - df = DataFrame(np.random.default_rng(2).rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).random(3, 3), columns=["a", "b", "c"]) ax = df.plot(kind=kind, legend=False) _check_legend_labels(ax, visible=False) def test_missing_markers_legend(self): # 14958 df = DataFrame( - np.random.default_rng(2).standard_normal(8, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((8, 3)), columns=["A", "B", "C"] ) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index bac92d8ee5f84..14701413e26a0 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -36,7 +36,8 @@ class TestDataFramePlotsSubplots: @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots(self, kind): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) @@ -65,7 +66,8 @@ def test_subplots(self, kind): @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots_no_share_x(self, kind): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) axes = df.plot(kind=kind, subplots=True, sharex=False) for ax in axes: @@ -79,7 +81,8 @@ def test_subplots_no_share_x(self, kind): @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots_no_legend(self, kind): df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) axes = df.plot(kind=kind, subplots=True, legend=False) for ax in axes: @@ -88,7 +91,7 @@ def test_subplots_no_legend(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries(self, kind): idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.default_rng(2).rand(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).random(10, 3), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -111,7 +114,7 @@ def test_subplots_timeseries(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries_rot(self, kind): idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.default_rng(2).rand(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).random(10, 3), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) for ax in axes: _check_visible(ax.xaxis) @@ -228,7 +231,8 @@ def test_subplots_timeseries_y_axis_not_supported(self): def test_subplots_layout_multi_column(self, layout, exp_layout): # GH 6667 df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) axes = df.plot(subplots=True, layout=layout) @@ -238,7 +242,8 @@ def test_subplots_layout_multi_column(self, layout, exp_layout): def test_subplots_layout_multi_column_error(self): # GH 6667 df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) msg = "Layout of 1x1 must be larger than required size 3" @@ -261,7 +266,8 @@ def test_subplots_layout_single_column( ): # GH 6667 df = DataFrame( - np.random.default_rng(2).rand(10, 1), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 1), + index=list(string.ascii_letters[:10]), ) axes = df.plot(subplots=True, **kwargs) _check_axes_shape( @@ -283,7 +289,8 @@ def test_subplots_multiple_axes(self): # GH 5353, 6970, GH 7069 fig, axes = mpl.pyplot.subplots(2, 3) df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) @@ -300,7 +307,8 @@ def test_subplots_multiple_axes(self): def test_subplots_multiple_axes_error(self): # GH 5353, 6970, GH 7069 df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 3), + index=list(string.ascii_letters[:10]), ) msg = "The number of passed axes must be 3, the same as the output plot" _, axes = mpl.pyplot.subplots(2, 3) @@ -325,7 +333,8 @@ def test_subplots_multiple_axes_2_dim(self, layout, exp_layout): # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes _, axes = mpl.pyplot.subplots(2, 2) df = DataFrame( - np.random.default_rng(2).rand(10, 4), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 4), + index=list(string.ascii_letters[:10]), ) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) @@ -341,7 +350,8 @@ def test_subplots_multiple_axes_single_col(self): # single column _, axes = mpl.pyplot.subplots(1, 1) df = DataFrame( - np.random.default_rng(2).rand(10, 1), index=list(string.ascii_letters[:10]) + np.random.default_rng(2).random(10, 1), + index=list(string.ascii_letters[:10]), ) axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) @@ -392,7 +402,7 @@ def test_subplots_sharex_axes_existing_axes(self): def test_subplots_dup_columns(self): # GH 10962 - df = DataFrame(np.random.default_rng(2).rand(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).random(5, 5), columns=list("aaaaa")) axes = df.plot(subplots=True) for ax in axes: _check_legend_labels(ax, labels=["a"]) @@ -400,7 +410,7 @@ def test_subplots_dup_columns(self): def test_subplots_dup_columns_secondary_y(self): # GH 10962 - df = DataFrame(np.random.default_rng(2).rand(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).random(5, 5), columns=list("aaaaa")) axes = df.plot(subplots=True, secondary_y="a") for ax in axes: # (right) is only attached when subplots=False @@ -409,7 +419,7 @@ def test_subplots_dup_columns_secondary_y(self): def test_subplots_dup_columns_secondary_y_no_subplot(self): # GH 10962 - df = DataFrame(np.random.default_rng(2).rand(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).random(5, 5), columns=list("aaaaa")) ax = df.plot(secondary_y="a") _check_legend_labels(ax, labels=["a (right)"] * 5) assert len(ax.lines) == 0 @@ -469,7 +479,7 @@ def test_boxplot_subplots_return_type(self, hist_df, rt): def test_df_subplots_patterns_minorticks(self): # GH 10657 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), + np.random.default_rng(2).standard_normal((10, 2)), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -489,7 +499,7 @@ def test_df_subplots_patterns_minorticks(self): def test_df_subplots_patterns_minorticks_1st_ax_hidden(self): # GH 10657 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), + np.random.default_rng(2).standard_normal((10, 2)), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -508,7 +518,7 @@ def test_df_subplots_patterns_minorticks_1st_ax_hidden(self): def test_df_subplots_patterns_minorticks_not_shared(self): # GH 10657 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), + np.random.default_rng(2).standard_normal((10, 2)), index=date_range("1/1/2000", periods=10), columns=list("AB"), ) @@ -524,7 +534,7 @@ def test_df_subplots_patterns_minorticks_not_shared(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = DataFrame(np.random.default_rng(2).rand(10, 2)) + df = DataFrame(np.random.default_rng(2).random(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -540,7 +550,7 @@ def test_subplots_sharex_false(self): def test_subplots_constrained_layout(self): # GH 25261 idx = date_range(start="now", periods=10) - df = DataFrame(np.random.default_rng(2).rand(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).random(10, 3), index=idx) kwargs = {} if hasattr(mpl.pyplot.Figure, "get_constrained_layout"): kwargs["constrained_layout"] = True diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 04ccbde58d28e..cf50201cca1db 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -89,14 +89,14 @@ def test_boxplot_legacy1_series(self): _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict") def test_boxplot_legacy2(self): - df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): _check_plot_works(df.boxplot, by="X") def test_boxplot_legacy2_with_ax(self): - df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # When ax is supplied and required number of axes is 1, @@ -107,7 +107,7 @@ def test_boxplot_legacy2_with_ax(self): assert ax_axes is axes def test_boxplot_legacy2_with_ax_return_type(self): - df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) fig, ax = mpl.pyplot.subplots() @@ -116,7 +116,7 @@ def test_boxplot_legacy2_with_ax_return_type(self): assert ax_axes is axes["A"] def test_boxplot_legacy2_with_multi_col(self): - df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # Multiple columns with an ax argument should use same figure @@ -128,7 +128,7 @@ def test_boxplot_legacy2_with_multi_col(self): assert axes["Col1"].get_figure() is fig def test_boxplot_legacy2_by_none(self): - df = DataFrame(np.random.default_rng(2).rand(10, 2), columns=["Col1", "Col2"]) + df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # When by is None, check that all relevant lines are present in the @@ -202,7 +202,7 @@ def test_boxplot_empty_column(self): def test_figsize(self): df = DataFrame( - np.random.default_rng(2).rand(10, 5), columns=["A", "B", "C", "D", "E"] + np.random.default_rng(2).random(10, 5), columns=["A", "B", "C", "D", "E"] ) result = df.boxplot(return_type="axes", figsize=(12, 8)) assert result.figure.bbox_inches.width == 12 @@ -240,7 +240,7 @@ def test_boxplot_numeric_data(self): ) def test_color_kwd(self, colors_kwd, expected): # GH: 26214 - df = DataFrame(np.random.default_rng(2).rand(10, 2)) + df = DataFrame(np.random.default_rng(2).random(10, 2)) result = df.boxplot(color=colors_kwd, return_type="dict") for k, v in expected.items(): assert result[k][0].get_color() == v @@ -270,7 +270,7 @@ def test_color_kwd(self, colors_kwd, expected): ) def test_colors_in_theme(self, scheme, expected): # GH: 40769 - df = DataFrame(np.random.default_rng(2).rand(10, 2)) + df = DataFrame(np.random.default_rng(2).random(10, 2)) import matplotlib.pyplot as plt plt.style.use(scheme) @@ -284,7 +284,7 @@ def test_colors_in_theme(self, scheme, expected): ) def test_color_kwd_errors(self, dict_colors, msg): # GH: 26214 - df = DataFrame(np.random.default_rng(2).rand(10, 2)) + df = DataFrame(np.random.default_rng(2).random(10, 2)) with pytest.raises(ValueError, match=msg): df.boxplot(color=dict_colors, return_type="dict") @@ -381,7 +381,7 @@ def test_boxplot_legacy1_return_type(self, hist_df): def test_boxplot_legacy2(self): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) ) grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): @@ -392,7 +392,7 @@ def test_boxplot_legacy2(self): def test_boxplot_legacy2_return_type(self): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) ) grouped = df.groupby(level=1) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") @@ -405,7 +405,7 @@ def test_boxplot_legacy2_return_type(self): def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).rand(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) ) msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): @@ -693,8 +693,8 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): df = DataFrame( { "cat": np.random.default_rng(2).choice(list("abcde"), 100), - "v": np.random.default_rng(2).rand(100), - "v1": np.random.default_rng(2).rand(100), + "v": np.random.default_rng(2).random(100), + "v1": np.random.default_rng(2).random(100), } ) grouped = df.groupby("cat") diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index f07441543dd94..21685b973709c 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -918,10 +918,10 @@ def test_from_resampling_area_line_mixed(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") high = DataFrame( - np.random.default_rng(2).rand(len(idxh), 3), index=idxh, columns=[0, 1, 2] + np.random.default_rng(2).random(len(idxh), 3), index=idxh, columns=[0, 1, 2] ) low = DataFrame( - np.random.default_rng(2).rand(len(idxl), 3), index=idxl, columns=[0, 1, 2] + np.random.default_rng(2).random(len(idxl), 3), index=idxl, columns=[0, 1, 2] ) _, ax = mpl.pyplot.subplots() @@ -970,10 +970,10 @@ def test_from_resampling_area_line_mixed_high_to_low(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") high = DataFrame( - np.random.default_rng(2).rand(len(idxh), 3), index=idxh, columns=[0, 1, 2] + np.random.default_rng(2).random(len(idxh), 3), index=idxh, columns=[0, 1, 2] ) low = DataFrame( - np.random.default_rng(2).rand(len(idxl), 3), index=idxl, columns=[0, 1, 2] + np.random.default_rng(2).random(len(idxl), 3), index=idxl, columns=[0, 1, 2] ) _, ax = mpl.pyplot.subplots() high.plot(kind=kind1, stacked=True, ax=ax) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index cee67a9c85531..257dfc9eec5e4 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -72,7 +72,7 @@ def test_hist_legacy_by_fig_error(self, ts): ts.hist(by=ts.index, figure=fig) def test_hist_bins_legacy(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) ax = df.hist(bins=2)[0][0] assert len(ax.patches) == 2 @@ -259,7 +259,7 @@ def test_hist_df_legacy(self, hist_df): @pytest.mark.slow def test_hist_df_legacy_layout(self): # make sure layout is handled - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) df[2] = to_datetime( np.random.default_rng(2).integers( 812419200000000000, @@ -283,7 +283,7 @@ def test_hist_df_legacy_layout2(self): @pytest.mark.slow def test_hist_df_legacy_layout3(self): # make sure layout is handled - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) df[5] = to_datetime( np.random.default_rng(2).integers( 812419200000000000, @@ -301,7 +301,7 @@ def test_hist_df_legacy_layout3(self): "kwargs", [{"sharex": True, "sharey": True}, {"figsize": (8, 10)}, {"bins": 5}] ) def test_hist_df_legacy_layout_kwargs(self, kwargs): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) df[5] = to_datetime( np.random.default_rng(2).integers( 812419200000000000, @@ -353,7 +353,7 @@ def test_hist_non_numerical_or_datetime_raises(self): # gh-10444, GH32590 df = DataFrame( { - "a": np.random.default_rng(2).rand(10), + "a": np.random.default_rng(2).random(10), "b": np.random.default_rng(2).integers(0, 10, 10), "c": to_datetime( np.random.default_rng(2).integers( @@ -389,7 +389,7 @@ def test_hist_non_numerical_or_datetime_raises(self): ), ) def test_hist_layout(self, layout_test): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) df[2] = to_datetime( np.random.default_rng(2).integers( 812419200000000000, @@ -403,7 +403,7 @@ def test_hist_layout(self, layout_test): _check_axes_shape(axes, axes_num=3, layout=expected) def test_hist_layout_error(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) df[2] = to_datetime( np.random.default_rng(2).integers( 812419200000000000, @@ -552,7 +552,7 @@ def test_hist_with_legend_raises(self, by, column): df.hist(legend=True, by=by, column=column, label="d") def test_hist_df_kwargs(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) _, ax = mpl.pyplot.subplots() ax = df.plot.hist(bins=5, ax=ax) assert len(ax.patches) == 10 diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 20de9c24cc4f8..a2aab6c3c596d 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -483,7 +483,7 @@ def test_dictionary_color(self, kind): expected = [(0.5, 0.24, 0.6), (0.3, 0.7, 0.7)] - df1 = DataFrame(np.random.default_rng(2).rand(2, 2), columns=data_files) + df1 = DataFrame(np.random.default_rng(2).random(2, 2), columns=data_files) dic_color = {"b": (0.3, 0.7, 0.7), "a": (0.5, 0.24, 0.6)} ax = df1.plot(kind=kind, color=dic_color) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 175679e171ad2..1297eba930c41 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -661,7 +661,7 @@ def test_dup_datetime_index_plot(self): def test_errorbar_asymmetrical(self): # GH9536 s = Series(np.arange(10), name="x") - err = np.random.default_rng(2).rand(2, 10) + err = np.random.default_rng(2).random(2, 10) ax = s.plot(yerr=err, xerr=err) @@ -674,7 +674,7 @@ def test_errorbar_asymmetrical(self): f"with the shape \\(2, {len(s)}\\)" ) with pytest.raises(ValueError, match=msg): - s.plot(yerr=np.random.default_rng(2).rand(2, 11)) + s.plot(yerr=np.random.default_rng(2).random(2, 11)) @pytest.mark.slow @pytest.mark.parametrize("kind", ["line", "bar"]) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index e54cada0e25b8..2f66cfb027921 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -46,7 +46,7 @@ def _index_freq(): @pytest.fixture def _static_values(index): - return np.random.default_rng(2).rand(len(index)) + return np.random.default_rng(2).random(len(index)) @pytest.fixture(params=["s", "ms", "us", "ns"]) @@ -94,7 +94,7 @@ def test_custom_grouper_df(index, unit): b = Grouper(freq=Minute(5), closed="right", label="right") dti = index.as_unit(unit) df = DataFrame( - np.random.default_rng(2).rand(len(dti), 10), index=dti, dtype="float64" + np.random.default_rng(2).random(len(dti), 10), index=dti, dtype="float64" ) r = df.groupby(b).agg("sum") @@ -341,7 +341,7 @@ def test_resample_basic_from_daily(unit): start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" ).as_unit(unit) - s = Series(np.random.default_rng(2).rand(len(dti)), dti) + s = Series(np.random.default_rng(2).random(len(dti)), dti) # to weekly result = s.resample("w-sun").last() @@ -456,7 +456,7 @@ def test_resample_upsample(unit): start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" ).as_unit(unit) - s = Series(np.random.default_rng(2).rand(len(dti)), dti) + s = Series(np.random.default_rng(2).random(len(dti)), dti) # to minutely, by padding result = s.resample("Min").ffill() @@ -657,7 +657,7 @@ def test_resample_reresample(unit): dti = date_range( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D" ).as_unit(unit) - s = Series(np.random.default_rng(2).rand(len(dti)), dti) + s = Series(np.random.default_rng(2).random(len(dti)), dti) bs = s.resample("B", closed="right", label="right").mean() result = bs.resample("8H").mean() assert len(result) == 22 diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index caf6fdc1a6239..026fe0ee5cc9a 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -24,7 +24,7 @@ def dti(): @pytest.fixture def _test_series(dti): - return Series(np.random.default_rng(2).rand(len(dti)), dti) + return Series(np.random.default_rng(2).random(len(dti)), dti) @pytest.fixture @@ -279,7 +279,7 @@ def test_transform_frame(on): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index ) expected = df.groupby(pd.Grouper(freq="20min")).transform("mean") if on == "date": @@ -358,7 +358,7 @@ def test_agg_consistency(): def test_agg_consistency_int_str_column_mix(): # GH#39025 df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 2), + np.random.default_rng(2).standard_normal((1000, 2)), index=date_range("1/1/2012", freq="S", periods=1000), columns=[1, "a"], ) @@ -380,7 +380,7 @@ def test_agg(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index ) df_col = df.reset_index() df_mult = df_col.copy() @@ -494,7 +494,7 @@ def test_agg_misc(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index ) df_col = df.reset_index() df_mult = df_col.copy() @@ -593,7 +593,7 @@ def test_multi_agg_axis_1_raises(func): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index ).T warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): @@ -608,7 +608,7 @@ def test_agg_nested_dicts(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index ) df_col = df.reset_index() df_mult = df_col.copy() @@ -1015,7 +1015,7 @@ def test_df_axis_param_depr(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).rand(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index ).T # Deprecation error when axis=1 is explicitly passed diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 856e2e152bb39..63239b6c82d4e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -273,7 +273,7 @@ def test_apply_with_mutated_index(): # GH 15169 index = date_range("1-1-2015", "12-31-15", freq="D") df = DataFrame( - data={"col1": np.random.default_rng(2).rand(len(index))}, index=index + data={"col1": np.random.default_rng(2).random(len(index))}, index=index ) def f(x): diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index ee7f33a6109f0..b2b5b885e809e 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -80,12 +80,12 @@ def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out): def test_concat_rename_index(self): a = DataFrame( - np.random.default_rng(2).rand(3, 3), + np.random.default_rng(2).random(3, 3), columns=list("ABC"), index=Index(list("abc"), name="index_a"), ) b = DataFrame( - np.random.default_rng(2).rand(3, 3), + np.random.default_rng(2).random(3, 3), columns=list("ABC"), index=Index(list("abc"), name="index_b"), ) diff --git a/pandas/tests/reshape/concat/test_invalid.py b/pandas/tests/reshape/concat/test_invalid.py index f2317c5ab4fac..c84571c5c7107 100644 --- a/pandas/tests/reshape/concat/test_invalid.py +++ b/pandas/tests/reshape/concat/test_invalid.py @@ -34,7 +34,7 @@ def test_concat_invalid_first_argument(self): def test_concat_generator_obj(self): # generator ok though - concat(DataFrame(np.random.default_rng(2).rand(5, 5)) for _ in range(3)) + concat(DataFrame(np.random.default_rng(2).random(5, 5)) for _ in range(3)) def test_concat_textreader_obj(self): # text reader ok diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index eb673cb5fd727..3ddfdd530f330 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -466,7 +466,7 @@ def test_join_hierarchical_mixed_raises(self): def test_join_float64_float32(self): a = DataFrame( - np.random.default_rng(2).standard_normal(10, 2), + np.random.default_rng(2).standard_normal((10, 2)), columns=["a", "b"], dtype=np.float64, ) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 560cf39299cc9..4ad0d1e35d6d0 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -372,7 +372,7 @@ def test_indexing_over_size_cutoff_period_index(monkeypatch): def test_indexing_unordered(): # GH 2437 rng = date_range(start="2011-01-01", end="2011-01-15") - ts = Series(np.random.default_rng(2).rand(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).random(len(rng)), index=rng) ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) for t in ts.index: @@ -418,7 +418,7 @@ def test_indexing_unordered2(): def test_indexing(): idx = date_range("2001-1-1", periods=20, freq="M") - ts = Series(np.random.default_rng(2).rand(len(idx)), index=idx) + ts = Series(np.random.default_rng(2).random(len(idx)), index=idx) # getting @@ -470,7 +470,7 @@ def test_getitem_str_year_with_datetimeindex(): def test_getitem_str_second_with_datetimeindex(): # GH14826, indexing with a seconds resolution string / datetime object df = DataFrame( - np.random.default_rng(2).rand(5, 5), + np.random.default_rng(2).random(5, 5), columns=["open", "high", "low", "close", "volume"], index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"), ) diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index 153e3b49278bd..51866800399a0 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -45,8 +45,8 @@ def test_cov(self, datetime_series): @pytest.mark.parametrize("dtype", ["float64", "Float64"]) def test_cov_ddof(self, test_ddof, dtype): # GH#34611 - np_array1 = np.random.default_rng(2).rand(10) - np_array2 = np.random.default_rng(2).rand(10) + np_array1 = np.random.default_rng(2).random(10) + np_array2 = np.random.default_rng(2).random(10) s1 = Series(np_array1, dtype=dtype) s2 = Series(np_array2, dtype=dtype) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index d0026c165dddf..3b099720dcdf2 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -217,7 +217,7 @@ def test_interpolate_corners(self, kwargs): tm.assert_series_equal(s.interpolate(**kwargs), s) def test_interpolate_index_values(self): - s = Series(np.nan, index=np.sort(np.random.default_rng(2).rand(30))) + s = Series(np.nan, index=np.sort(np.random.default_rng(2).random(30))) s[::3] = np.random.default_rng(2).standard_normal(10) vals = s.index.values.astype(float) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 13d85d182ed0a..39578212d4af0 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -18,7 +18,7 @@ class TestResetIndex: def test_reset_index_dti_round_trip(self): dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) - d1 = DataFrame({"v": np.random.default_rng(2).rand(len(dti))}, index=dti) + d1 = DataFrame({"v": np.random.default_rng(2).random(len(dti))}, index=dti) d2 = d1.reset_index() assert d2.dtypes.iloc[0] == np.dtype("M8[ns]") d3 = d2.set_index("index") diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 5c6591e623561..ad0371acfe7a6 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -305,8 +305,8 @@ def testit(): def test_bool_ops_raise_on_arithmetic(self, op_str, opname): df = DataFrame( { - "a": np.random.default_rng(2).rand(10) > 0.5, - "b": np.random.default_rng(2).rand(10) > 0.5, + "a": np.random.default_rng(2).random(10) > 0.5, + "b": np.random.default_rng(2).random(10) > 0.5, } ) @@ -339,8 +339,8 @@ def test_bool_ops_warn_on_arithmetic(self, op_str, opname): n = 10 df = DataFrame( { - "a": np.random.default_rng(2).rand(n) > 0.5, - "b": np.random.default_rng(2).rand(n) > 0.5, + "a": np.random.default_rng(2).random(n) > 0.5, + "b": np.random.default_rng(2).random(n) > 0.5, } ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0410599945eb5..a406c3047b9b3 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -84,7 +84,9 @@ def test_groupby_corner(self): names=["one", "two", "three"], ) df = DataFrame( - [np.random.default_rng(2).rand(4)], columns=["a", "b", "c", "d"], index=midx + [np.random.default_rng(2).random(4)], + columns=["a", "b", "c", "d"], + index=midx, ) # should work df.groupby(level="three") diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 13728520d502d..3188ac70b8254 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -34,7 +34,7 @@ def arr_shape(): @pytest.fixture def arr_float(arr_shape): - return np.random.default_rng(2).standard_normal(*arr_shape) + return np.random.default_rng(2).standard_normal(arr_shape) @pytest.fixture @@ -190,8 +190,8 @@ def setup_method(self): arr_shape = (11, 7) - self.arr_float = np.random.default_rng(2).standard_normal(*arr_shape) - self.arr_float1 = np.random.default_rng(2).standard_normal(*arr_shape) + self.arr_float = np.random.default_rng(2).standard_normal(arr_shape) + self.arr_float1 = np.random.default_rng(2).standard_normal(arr_shape) self.arr_complex = self.arr_float + self.arr_float1 * 1j self.arr_int = np.random.default_rng(2).integers(-10, 10, arr_shape) self.arr_bool = np.random.default_rng(2).integers(0, 2, arr_shape) == 0 diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index a29e42d0b6688..1b69ff1a35e20 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -124,7 +124,7 @@ def test_1d_other_dtypes(self): tm.assert_almost_equal(result, expected) def test_2d_other_dtypes(self): - arr = np.random.default_rng(2).standard_normal(10, 5).astype(np.float32) + arr = np.random.default_rng(2).standard_normal((10, 5)).astype(np.float32) indexer = [1, 2, 3, -1] diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 432117dbd2155..a0882feb5dd5c 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -231,7 +231,7 @@ def test_expanding_sem(frame_or_series): @pytest.mark.parametrize("method", ["skew", "kurt"]) def test_expanding_skew_kurt_numerical_stability(method): # GH: 6929 - s = Series(np.random.default_rng(2).rand(10)) + s = Series(np.random.default_rng(2).random(10)) expected = getattr(s.expanding(3), method)() s = s + 5000 result = getattr(s.expanding(3), method)() @@ -246,7 +246,7 @@ def test_expanding_skew_kurt_numerical_stability(method): def test_rank(window, method, pct, ascending, test_data): length = 20 if test_data == "default": - ser = Series(data=np.random.default_rng(2).rand(length)) + ser = Series(data=np.random.default_rng(2).random(length)) elif test_data == "duplicates": ser = Series(data=np.random.default_rng(2).choice(3, length)) elif test_data == "nans": @@ -625,7 +625,7 @@ def mean_w_arg(x, const): engine, raw = engine_and_raw - df = DataFrame(np.random.default_rng(2).rand(20, 3)) + df = DataFrame(np.random.default_rng(2).random(20, 3)) expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 5e04c56dfc712..9f2782831a32d 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -135,7 +135,7 @@ def test_corr_sanity(): res = df[0].rolling(5, center=True).corr(df[1]) assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) - df = DataFrame(np.random.default_rng(2).rand(30, 2)) + df = DataFrame(np.random.default_rng(2).random(30, 2)) res = df[0].rolling(5, center=True).corr(df[1]) assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 82274296beb9d..7d138727dd339 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1038,7 +1038,7 @@ def test_rolling_numerical_accuracy_jump(): index = date_range(start="2020-01-01", end="2020-01-02", freq="60s").append( DatetimeIndex(["2020-01-03"]) ) - data = np.random.default_rng(2).rand(len(index)) + data = np.random.default_rng(2).random(len(index)) df = DataFrame({"data": data}, index=index) result = df.rolling("60s").mean() @@ -1465,7 +1465,7 @@ def test_groupby_rolling_nan_included(): @pytest.mark.parametrize("method", ["skew", "kurt"]) def test_rolling_skew_kurt_numerical_stability(method): # GH#6929 - ser = Series(np.random.default_rng(2).rand(10)) + ser = Series(np.random.default_rng(2).random(10)) ser_copy = ser.copy() expected = getattr(ser.rolling(3), method)() tm.assert_series_equal(ser, ser_copy) @@ -1654,7 +1654,7 @@ def test_rolling_numeric_dtypes(): def test_rank(window, method, pct, ascending, test_data): length = 20 if test_data == "default": - ser = Series(data=np.random.default_rng(2).rand(length)) + ser = Series(data=np.random.default_rng(2).random(length)) elif test_data == "duplicates": ser = Series(data=np.random.default_rng(2).choice(3, length)) elif test_data == "nans": @@ -1679,7 +1679,7 @@ def test_rolling_quantile_np_percentile(): col = 5 idx = date_range("20100101", periods=row, freq="B") df = DataFrame( - np.random.default_rng(2).rand(row * col).reshape((row, -1)), index=idx + np.random.default_rng(2).random(row * col).reshape((row, -1)), index=idx ) df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) From af5829b17bb72ee78da463bd77acb0d14a9d4b20 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:10:36 -0700 Subject: [PATCH 06/22] Replace more --- pandas/tests/apply/test_invalid_arg.py | 2 +- pandas/tests/extension/json/array.py | 2 +- pandas/tests/extension/list/array.py | 2 +- .../frame/constructors/test_from_records.py | 4 +- pandas/tests/frame/indexing/test_indexing.py | 6 +- pandas/tests/frame/indexing/test_set_value.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/frame/indexing/test_where.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/methods/test_drop.py | 2 +- pandas/tests/frame/methods/test_dropna.py | 2 +- pandas/tests/frame/methods/test_fillna.py | 2 +- pandas/tests/frame/methods/test_rank.py | 4 +- pandas/tests/frame/methods/test_reindex.py | 2 +- .../tests/frame/methods/test_reset_index.py | 2 +- pandas/tests/frame/methods/test_sample.py | 18 ++--- pandas/tests/frame/methods/test_set_index.py | 4 +- pandas/tests/frame/methods/test_to_csv.py | 4 +- pandas/tests/frame/methods/test_to_dict.py | 2 +- pandas/tests/frame/methods/test_to_records.py | 6 +- pandas/tests/frame/test_arithmetic.py | 14 +++- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/frame/test_query_eval.py | 4 +- pandas/tests/groupby/test_categorical.py | 2 +- pandas/tests/groupby/test_function.py | 2 +- pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_libgroupby.py | 10 +-- pandas/tests/groupby/test_pipe.py | 2 +- pandas/tests/groupby/test_quantile.py | 6 +- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/indexes/multi/test_get_set.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 2 +- pandas/tests/indexing/multiindex/test_iloc.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 10 +-- .../tests/indexing/multiindex/test_partial.py | 2 +- pandas/tests/indexing/test_categorical.py | 4 +- pandas/tests/indexing/test_iloc.py | 6 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/indexing/test_scalar.py | 2 +- pandas/tests/interchange/test_impl.py | 2 +- pandas/tests/io/excel/test_style.py | 8 +- pandas/tests/io/parser/test_index_col.py | 2 +- pandas/tests/io/pytables/test_complex.py | 8 +- pandas/tests/io/pytables/test_errors.py | 8 +- pandas/tests/io/pytables/test_read.py | 24 ++++-- pandas/tests/io/pytables/test_round_trip.py | 6 +- pandas/tests/io/pytables/test_select.py | 4 +- pandas/tests/io/pytables/test_store.py | 6 +- pandas/tests/io/pytables/test_time_series.py | 6 +- pandas/tests/io/pytables/test_timezones.py | 20 +++-- pandas/tests/io/test_parquet.py | 6 +- pandas/tests/plotting/frame/test_frame.py | 40 +++++----- .../tests/plotting/frame/test_frame_color.py | 78 +++++++++---------- .../tests/plotting/frame/test_frame_legend.py | 16 ++-- .../plotting/frame/test_frame_subplots.py | 8 +- pandas/tests/plotting/test_boxplot_method.py | 14 ++-- pandas/tests/plotting/test_datetimelike.py | 24 +++--- pandas/tests/plotting/test_groupby.py | 12 +-- pandas/tests/plotting/test_hist_method.py | 30 +++---- pandas/tests/plotting/test_misc.py | 4 +- pandas/tests/plotting/test_series.py | 4 +- pandas/tests/resample/test_datetime_index.py | 2 +- pandas/tests/reshape/merge/test_join.py | 5 +- pandas/tests/reshape/test_crosstab.py | 2 +- pandas/tests/series/methods/test_astype.py | 4 +- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_multilevel.py | 4 +- pandas/tests/test_nanops.py | 6 +- pandas/tests/test_sorting.py | 2 +- pandas/tests/window/test_api.py | 2 +- pandas/tests/window/test_expanding.py | 2 +- pandas/tests/window/test_rolling.py | 2 +- 75 files changed, 274 insertions(+), 249 deletions(-) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 315bbbac749eb..5e6026a9348c4 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -93,7 +93,7 @@ def test_series_nested_renamer(renamer): def test_apply_dict_depr(): tsdf = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), + np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=10), ) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 49dce278a7413..1f0eaaaaa589e 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -239,7 +239,7 @@ def make_data(): return [ UserDict( [ - (rng.choice(string.ascii_letters), rng.integers(0, 100)) + (rng.choice(list(string.ascii_letters)), rng.integers(0, 100)) for _ in range(rng.integers(0, 10)) ] ) diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 9554aef3842fb..5b8955087436e 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -128,7 +128,7 @@ def make_data(): rng = np.random.default_rng(2) data = np.empty(100, dtype=object) data[:] = [ - [rng.choice(string.ascii_letters) for _ in range(rng.integers(0, 10))] + [rng.choice(list(string.ascii_letters)) for _ in range(rng.integers(0, 10))] for _ in range(100) ] return data diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 133bd130a3add..95f9f2ba4051e 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -192,7 +192,7 @@ def test_from_records_dictlike(self): def test_from_records_with_index_data(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"] ) data = np.random.default_rng(2).standard_normal(10) @@ -202,7 +202,7 @@ def test_from_records_with_index_data(self): def test_from_records_bad_index_column(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"] ) # should pass diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9154add5120fd..deddbcb79706b 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -224,7 +224,7 @@ def test_getitem_boolean_list(self, lst): tm.assert_frame_equal(result, expected) def test_getitem_boolean_iadd(self): - arr = np.random.default_rng(2).standard_normal(5, 5) + arr = np.random.default_rng(2).standard_normal((5, 5)) df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"]) @@ -726,7 +726,7 @@ def test_getitem_setitem_boolean_multi(self): def test_getitem_setitem_float_labels(self, using_array_manager): index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)), index=index) result = df.loc[1.5:4] expected = df.reindex([1.5, 2, 3, 4]) @@ -754,7 +754,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): # #2727 index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5)), index=index) # positional slicing only via iloc! msg = ( diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index e7c973aa8ec75..b4e4f238b4fa1 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -42,7 +42,7 @@ def test_set_value_resize(self, float_frame): def test_set_value_with_index_dtype_change(self): df_orig = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=range(3), columns=list("ABC"), ) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index f6ef6e42ef18c..b379c12465361 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -403,7 +403,7 @@ def test_setitem_period_d_dtype(self): def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) - df = DataFrame(np.random.default_rng(2).standard_normal(3, 3), columns=cols) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 3)), columns=cols) df[False] = ["a", "b", "c"] diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index b80117f9258ba..f8bef2ca4c97a 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -434,7 +434,7 @@ def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): def test_where_align(self): def create(): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 3))) df.iloc[3:5, 0] = np.nan df.iloc[4:6, 1] = np.nan df.iloc[5:8, 2] = np.nan diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 6fad30555b83b..9bad3a12e0e8f 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -348,7 +348,7 @@ def test_xs_IndexSlice_argument_not_implemented(self, frame_or_series): codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) - obj = DataFrame(np.random.default_rng(2).standard_normal(6, 4), index=index) + obj = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index) if frame_or_series is Series: obj = obj[0] diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 4382b575e7221..cf00934fd1c52 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -160,7 +160,7 @@ def test_drop(self): # inplace cache issue # GH#5628 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((10, 3)), columns=list("abc") ) expected = df[~(df.b > 0)] return_value = df.drop(labels=df[df.b > 0].index, inplace=True) diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index aeb82ba586614..11edf665b5494 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -65,7 +65,7 @@ def test_dropIncompleteRows(self, float_frame): assert return_value is None def test_dropna(self): - df = DataFrame(np.random.default_rng(2).standard_normal(6, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((6, 4))) df.iloc[:2, 2] = np.nan dropped = df.dropna(axis=1) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 371afd7cc8712..df1f9715e5792 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -135,7 +135,7 @@ def test_fillna_different_dtype(self): def test_fillna_limit_and_value(self): # limit and value - df = DataFrame(np.random.default_rng(2).standard_normal(10, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 3))) df.iloc[2:7, 0] = np.nan df.iloc[3:5, 2] = np.nan diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 35776e49e2c50..efe4b17f95ce8 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -128,7 +128,9 @@ def test_rank2(self): def test_rank_does_not_mutate(self): # GH#18521 # Check rank does not mutate DataFrame - df = DataFrame(np.random.default_rng(2).standard_normal(10, 3), dtype="float64") + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 3)), dtype="float64" + ) expected = df.copy() df.rank() result = df diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index cc1be2066a8a3..36d5938095276 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1026,7 +1026,7 @@ def test_reindex_with_nans(self): tm.assert_frame_equal(result, expected) def test_reindex_multi(self): - df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 3))) result = df.reindex(index=range(4), columns=range(4)) expected = df.reindex(list(range(4))).reindex(columns=range(4)) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index d61306636c6f2..fa28ebc16e942 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -250,7 +250,7 @@ def test_reset_index_right_dtype(self): assert reset["time"].dtype == np.float64 def test_reset_index_multiindex_col(self): - vals = np.random.default_rng(2).standard_normal(3, 3).astype(object) + vals = np.random.default_rng(2).standard_normal((3, 3)).astype(object) idx = ["x", "y", "z"] full = np.hstack(([[x] for x in idx], vals)) df = DataFrame( diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 2f6e839d224cc..d11b330491378 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -37,29 +37,25 @@ def test_sample(self, test, obj): ) tm.assert_equal( - obj.sample(n=4, random_state=np.random.default_rng(2).RandomState(test)), - obj.sample(n=4, random_state=np.random.default_rng(2).RandomState(test)), + obj.sample(n=4, random_state=np.random.default_rng(test)), + obj.sample(n=4, random_state=np.random.default_rng(test)), ) tm.assert_equal( - obj.sample( - frac=0.7, random_state=np.random.default_rng(2).RandomState(test) - ), - obj.sample( - frac=0.7, random_state=np.random.default_rng(2).RandomState(test) - ), + obj.sample(frac=0.7, random_state=np.random.default_rng(test)), + obj.sample(frac=0.7, random_state=np.random.default_rng(test)), ) tm.assert_equal( obj.sample( frac=2, replace=True, - random_state=np.random.default_rng(2).RandomState(test), + random_state=np.random.default_rng(test), ), obj.sample( frac=2, replace=True, - random_state=np.random.default_rng(2).RandomState(test), + random_state=np.random.default_rng(test), ), ) @@ -345,7 +341,7 @@ def test_sample_is_copy(self): # GH#27357, GH#30784: ensure the result of sample is an actual copy and # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"] ) df2 = df.sample(3) diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 4a5f8617724dc..5984e591dd6c1 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -67,7 +67,9 @@ def test_set_index_empty_dataframe(self): def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) - df = DataFrame(np.random.default_rng(2).standard_normal(3, 3), columns=columns) + df = DataFrame( + np.random.default_rng(2).standard_normal((3, 3)), columns=columns + ) result = df.set_index(df.columns[0]) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 74308aab77247..5db059448402d 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -74,7 +74,7 @@ def test_to_csv_from_csv2(self, float_frame): with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: # duplicate index df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=["a", "a", "b"], columns=["x", "y", "z"], ) @@ -84,7 +84,7 @@ def test_to_csv_from_csv2(self, float_frame): midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=midx, columns=["x", "y", "z"], ) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index ac689357a1458..1446a74b29c32 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -149,7 +149,7 @@ def test_to_dict(self, mapping): @pytest.mark.parametrize("mapping", [list, defaultdict, []]) def test_to_dict_errors(self, mapping): # GH#16122 - df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 3))) msg = "|".join( [ "unsupported type: ", diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 44875be57f1a4..1236cb135842e 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -20,7 +20,7 @@ class TestDataFrameToRecords: def test_to_records_timeseries(self): index = date_range("1/1/2000", periods=10) df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), + np.random.default_rng(2).standard_normal((10, 3)), index=index, columns=["a", "b", "c"], ) @@ -82,12 +82,12 @@ def test_to_records_floats(self): df.to_records() def test_to_records_index_name(self): - df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 3))) df.index.name = "X" rs = df.to_records() assert "X" in rs.dtype.fields - df = DataFrame(np.random.default_rng(2).standard_normal(3, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 3))) rs = df.to_records() assert "index" in rs.dtype.fields diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 82a433fa6e811..68ae8a4cb2a79 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -90,7 +90,7 @@ def test_comparison_with_categorical_dtype(self): def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), columns=list("ABCD") + np.random.default_rng(2).standard_normal((6, 4)), columns=list("ABCD") ) msg = "The truth value of a DataFrame is ambiguous" with pytest.raises(ValueError, match=msg): @@ -1810,7 +1810,9 @@ def test_alignment_non_pandas(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), index=index, columns=columns + np.random.default_rng(2).standard_normal((3, 3)), + index=index, + columns=columns, ) align = DataFrame._align_for_op @@ -1828,7 +1830,9 @@ def test_alignment_non_pandas_length_mismatch(self, val): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), index=index, columns=columns + np.random.default_rng(2).standard_normal((3, 3)), + index=index, + columns=columns, ) align = DataFrame._align_for_op @@ -1844,7 +1848,9 @@ def test_alignment_non_pandas_index_columns(self): index = ["A", "B", "C"] columns = ["X", "Y", "Z"] df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), index=index, columns=columns + np.random.default_rng(2).standard_normal((3, 3)), + index=index, + columns=columns, ) align = DataFrame._align_for_op diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 1203b0841b7a2..3b65ee9a431db 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -298,10 +298,10 @@ def test_multi_dtype2(self): def test_dups_across_blocks(self, using_array_manager): # dups across blocks df_float = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), dtype="float64" + np.random.default_rng(2).standard_normal((10, 3)), dtype="float64" ) df_int = DataFrame( - np.random.default_rng(2).standard_normal(10, 3).astype("int64") + np.random.default_rng(2).standard_normal((10, 3)).astype("int64") ) df_bool = DataFrame(True, index=df_float.index, columns=df_float.columns) df_object = DataFrame("foo", index=df_float.index, columns=df_float.columns) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 7ecbf1f6e122b..3de7790efdafe 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -541,7 +541,7 @@ def test_query_builtin(self, engine, parser): def test_query(self, engine, parser): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"] ) tm.assert_frame_equal( @@ -1118,7 +1118,7 @@ class TestDataFrameEvalWithFrame: @pytest.fixture def frame(self): return DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((10, 3)), columns=list("abc") ) def test_simple_expr(self, frame, parser, engine): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 6cbac17d3fb62..229582f007710 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -679,7 +679,7 @@ def test_datetime(): def test_categorical_index(): - s = np.random.default_rng(2).RandomState(12345) + s = np.random.default_rng(2) levels = ["foo", "bar", "baz", "qux"] codes = s.integers(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 852de27052053..dc3941bd9dfee 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -743,7 +743,7 @@ def test_nlargest(): def test_nlargest_mi_grouper(): # see gh-21411 - npr = np.random.default_rng(2).RandomState(123456789) + npr = np.random.default_rng(2) dts = date_range("20180101", periods=10) iterables = [dts, ["one", "two"]] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 1965b75932c5c..11b5561b8c3bb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1211,7 +1211,7 @@ def test_groupby_with_hier_columns(): [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")] ) df = DataFrame( - np.random.default_rng(2).standard_normal(8, 4), index=index, columns=columns + np.random.default_rng(2).standard_normal((8, 4)), index=index, columns=columns ) result = df.groupby(level=0).mean() @@ -2563,7 +2563,7 @@ def test_groupby_numerical_stability_cumsum(): def test_groupby_cumsum_skipna_false(): # GH#46216 don't propagate np.nan above the diagonal - arr = np.random.default_rng(2).standard_normal(5, 5) + arr = np.random.default_rng(2).standard_normal((5, 5)) df = DataFrame(arr) for i in range(5): df.iloc[i, i] = np.nan diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index ed810ebb439e4..3e842eeb6faad 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -18,7 +18,7 @@ class GroupVarTestMixin: def test_group_var_generic_1d(self): - prng = np.random.default_rng(2).RandomState(1234) + prng = np.random.default_rng(2) out = (np.nan * np.ones((5, 1))).astype(self.dtype) counts = np.zeros(5, dtype="int64") @@ -35,7 +35,7 @@ def test_group_var_generic_1d(self): tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_1d_flat_labels(self): - prng = np.random.default_rng(2).RandomState(1234) + prng = np.random.default_rng(2) out = (np.nan * np.ones((1, 1))).astype(self.dtype) counts = np.zeros(1, dtype="int64") @@ -51,7 +51,7 @@ def test_group_var_generic_1d_flat_labels(self): tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_all_finite(self): - prng = np.random.default_rng(2).RandomState(1234) + prng = np.random.default_rng(2) out = (np.nan * np.ones((5, 2))).astype(self.dtype) counts = np.zeros(5, dtype="int64") @@ -66,7 +66,7 @@ def test_group_var_generic_2d_all_finite(self): tm.assert_numpy_array_equal(counts, expected_counts) def test_group_var_generic_2d_some_nan(self): - prng = np.random.default_rng(2).RandomState(1234) + prng = np.random.default_rng(2) out = (np.nan * np.ones((5, 2))).astype(self.dtype) counts = np.zeros(5, dtype="int64") @@ -109,7 +109,7 @@ class TestGroupVarFloat64(GroupVarTestMixin): rtol = 1e-5 def test_group_var_large_inputs(self): - prng = np.random.default_rng(2).RandomState(1234) + prng = np.random.default_rng(2) out = np.array([[np.nan]], dtype=self.dtype) counts = np.array([0], dtype="int64") diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 3e3086f9095cc..ee13c37391065 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -12,7 +12,7 @@ def test_pipe(): # Test the pipe method of DataFrameGroupBy. # Issue #17871 - random_state = np.random.default_rng(2).RandomState(1234567890) + random_state = np.random.default_rng(2) df = DataFrame( { diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 84ba768d64728..8f3de4d8ff7cc 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -93,11 +93,7 @@ def test_quantile_array(): def test_quantile_array2(): # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 - arr = ( - np.random.default_rng(2) - .RandomState(0) - .integers(0, 5, size=(10, 3), dtype=np.int64) - ) + arr = np.random.default_rng(2).integers(0, 5, size=(10, 3), dtype=np.int64) df = DataFrame(arr, columns=list("ABC")) result = df.groupby("A").quantile([0.3, 0.7]) expected = DataFrame( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8d7bcf367144f..8823467032370 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -59,7 +59,7 @@ def demean(arr): return arr - arr.mean(axis=0) people = DataFrame( - np.random.default_rng(2).standard_normal(5, 5), + np.random.default_rng(2).standard_normal((5, 5)), columns=["a", "b", "c", "d", "e"], index=["Joe", "Steve", "Wes", "Jim", "Travis"], ) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 9687ac34da4b7..0720a1e1c648c 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -322,7 +322,7 @@ def test_set_value_keeps_names(): lev2 = ["1", "2", "3"] * 2 idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) df = pd.DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), columns=["one", "two", "three", "four"], index=idx, ) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 4d650b7c8c545..bd8eb87a8e9e2 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -241,7 +241,7 @@ def test_remove_unused_levels_large(first_type, second_type): # because tests should be deterministic (and this test in particular # checks that levels are removed, which is not the case for every # random input): - rng = np.random.default_rng(2).RandomState(4) # seed is arbitrary value that works + rng = np.random.default_rng(2) # seed is arbitrary value that works size = 1 << 16 df = DataFrame( diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index 9275cc376ad44..77f9511b63c3a 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -17,7 +17,7 @@ def simple_multiindex_dataframe(): random data by default. """ - data = np.random.default_rng(2).standard_normal(3, 3) + data = np.random.default_rng(2).standard_normal((3, 3)) return DataFrame( data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] ) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 478fe788f5ef7..ee160e7dcabcd 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -76,7 +76,7 @@ def test_loc_getitem_general(self, any_real_numpy_dtype): def test_loc_getitem_multiindex_missing_label_raises(self): # GH#21593 df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -155,7 +155,7 @@ def test_loc_getitem_array(self): def test_loc_multiindex_labels(self): df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=[["i", "i", "j"], ["A", "A", "B"]], index=[["i", "i", "j"], ["X", "X", "Y"]], ) @@ -182,7 +182,7 @@ def test_loc_multiindex_labels(self): def test_loc_multiindex_ints(self): df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -192,7 +192,7 @@ def test_loc_multiindex_ints(self): def test_loc_multiindex_missing_label_raises(self): df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) @@ -204,7 +204,7 @@ def test_loc_multiindex_missing_label_raises(self): def test_loc_multiindex_list_missing_label(self, key, pos): # GH 27148 - lists with missing labels _do_ raise df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 80479a2016181..dd1d5a8fbcc57 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -66,7 +66,7 @@ def test_xs_partial( ], ) df = DataFrame( - np.random.default_rng(2).standard_normal(8, 4), + np.random.default_rng(2).standard_normal((8, 4)), index=index, columns=list("abcd"), ) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index c8957dd02ef82..b45d197af332e 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -403,7 +403,7 @@ def test_loc_getitem_listlike_unused_category_raises_keyerror(self): def test_ix_categorical_index(self): # GH 12531 df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=list("ABC"), columns=list("XYZ"), ) @@ -428,7 +428,7 @@ def test_ix_categorical_index(self): def test_ix_categorical_index_non_unique(self): # non-unique df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=list("ABA"), columns=list("XYX"), ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 36209c08deddd..61d4dc05d4ac3 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -639,7 +639,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): # multi axis slicing issue with single block # surfaced in GH 6059 - arr = np.random.default_rng(2).standard_normal(6, 4) + arr = np.random.default_rng(2).standard_normal((6, 4)) index = date_range("20130101", periods=6) columns = list("ABCD") df = DataFrame(arr, index=index, columns=columns) @@ -664,7 +664,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): tm.assert_frame_equal(result, expected) # related - arr = np.random.default_rng(2).standard_normal(6, 4) + arr = np.random.default_rng(2).standard_normal((6, 4)) index = list(range(0, 12, 2)) columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) @@ -1060,7 +1060,7 @@ def test_iloc_setitem_dictionary_value(self): def test_iloc_getitem_float_duplicates(self): df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=[0.1, 0.2, 0.2], columns=list("abc"), ) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 16b510aa1f792..be562f4003094 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -307,7 +307,7 @@ def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame( - np.random.default_rng(2).standard_normal(5, 5), + np.random.default_rng(2).standard_normal((5, 5)), columns=["A", "B", "B", "B", "A"], ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4d9c322b5f709..ade08ed269b1a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2761,7 +2761,7 @@ def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_col def test_loc_setitem_float_intindex(): # GH 8720 - rand_data = np.random.default_rng(2).standard_normal(8, 4) + rand_data = np.random.default_rng(2).standard_normal((8, 4)) result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 507b86452efa2..c184f5141d566 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -332,7 +332,7 @@ def test_partial_setting2(self): # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame( - np.random.default_rng(2).standard_normal(8, 4), + np.random.default_rng(2).standard_normal((8, 4)), index=dates, columns=["A", "B", "C", "D"], ) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 37ea891ce18d7..2753b3574e583 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -76,7 +76,7 @@ def test_at_iat_coercion(self): # as timestamp is not a tuple! dates = date_range("1/1/2000", periods=8) df = DataFrame( - np.random.default_rng(2).standard_normal(8, 4), + np.random.default_rng(2).standard_normal((8, 4)), index=dates, columns=["A", "B", "C", "D"], ) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 35e4c2b261e49..358c4b581de8d 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -176,7 +176,7 @@ def test_missing_from_masked(): df2 = df.__dataframe__() - rng = np.random.default_rng(2).RandomState(42) + rng = np.random.default_rng(2) dict_null = {col: rng.integers(low=0, high=len(df)) for col in df.columns} for col, num_nulls in dict_null.items(): null_idx = df.index[ diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index f5bd983fa77fd..d683ab58594b7 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -130,7 +130,7 @@ def test_styler_to_excel_unstyled(engine): @pytest.mark.parametrize("css, attrs, expected", shared_style_params) def test_styler_to_excel_basic(engine, css, attrs, expected): pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 1))) styler = df.style.map(lambda x: css) with tm.ensure_clean(".xlsx") as path: @@ -161,7 +161,7 @@ def test_styler_to_excel_basic(engine, css, attrs, expected): @pytest.mark.parametrize("css, attrs, expected", shared_style_params) def test_styler_to_excel_basic_indexes(engine, css, attrs, expected): pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 1))) styler = df.style styler.map_index(lambda x: css, axis=0) @@ -230,7 +230,7 @@ def test_styler_to_excel_border_style(engine, border_style): expected = border_style pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 1))) styler = df.style.map(lambda x: css) with tm.ensure_clean(".xlsx") as path: @@ -260,7 +260,7 @@ def test_styler_custom_converter(): def custom_converter(css): return {"font": {"color": {"rgb": "111222"}}} - df = DataFrame(np.random.default_rng(2).standard_normal(1, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 1))) styler = df.style.map(lambda x: "color: #888999") with tm.ensure_clean(".xlsx") as path: with ExcelWriter(path, engine="openpyxl") as writer: diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 8fa196bbd00f1..a7ded00e758b7 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -197,7 +197,7 @@ def test_no_multi_index_level_names_empty(all_parsers): parser = all_parsers midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) expected = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), index=midx, columns=["x", "y", "z"], ) diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index 86659f6f4c65c..7df6223df70e7 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -16,7 +16,7 @@ def test_complex_fixed(tmp_path, setup_path): df = DataFrame( - np.random.default_rng(2).random(4, 5).astype(np.complex64), + np.random.default_rng(2).random((4, 5)).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) @@ -27,7 +27,7 @@ def test_complex_fixed(tmp_path, setup_path): tm.assert_frame_equal(df, reread) df = DataFrame( - np.random.default_rng(2).random(4, 5).astype(np.complex128), + np.random.default_rng(2).random((4, 5)).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) @@ -39,7 +39,7 @@ def test_complex_fixed(tmp_path, setup_path): def test_complex_table(tmp_path, setup_path): df = DataFrame( - np.random.default_rng(2).random(4, 5).astype(np.complex64), + np.random.default_rng(2).random((4, 5)).astype(np.complex64), index=list("abcd"), columns=list("ABCDE"), ) @@ -50,7 +50,7 @@ def test_complex_table(tmp_path, setup_path): tm.assert_frame_equal(df, reread) df = DataFrame( - np.random.default_rng(2).random(4, 5).astype(np.complex128), + np.random.default_rng(2).random((4, 5)).astype(np.complex128), index=list("abcd"), columns=list("ABCDE"), ) diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index fbcbaba7fc6a7..84103d2bd79ca 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -166,7 +166,9 @@ def test_append_with_diff_col_name_types_raises_value_error(setup_path): def test_invalid_complib(setup_path): df = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) with tm.ensure_clean(setup_path) as path: msg = r"complib only supports \[.*\] compression." @@ -204,7 +206,9 @@ def test_unsuppored_hdf_file_error(datapath): def test_read_hdf_errors(setup_path, tmp_path): df = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) path = tmp_path / setup_path diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index a0ea1301adf83..85b0fc2abf963 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -214,7 +214,9 @@ def test_read_hdf_open_store(tmp_path, setup_path): # GH10330 # No check for non-string path_or-buf, and no test of open store df = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -233,7 +235,9 @@ def test_read_hdf_index_not_view(tmp_path, setup_path): # Ensure that the index of the DataFrame is not a view # into the original recarray that pytables reads in df = DataFrame( - np.random.default_rng(2).random(4, 5), index=[0, 1, 2, 3], columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=[0, 1, 2, 3], + columns=list("ABCDE"), ) path = tmp_path / setup_path @@ -246,7 +250,9 @@ def test_read_hdf_index_not_view(tmp_path, setup_path): def test_read_hdf_iterator(tmp_path, setup_path): df = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) df.index.name = "letters" df = df.set_index(keys="E", append=True) @@ -264,7 +270,9 @@ def test_read_hdf_iterator(tmp_path, setup_path): def test_read_nokey(tmp_path, setup_path): # GH10443 df = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) # Categorical dtype not supported for "fixed" format. So no need @@ -310,7 +318,9 @@ def test_read_nokey_empty(tmp_path, setup_path): def test_read_from_pathlib_path(tmp_path, setup_path): # GH11773 expected = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) filename = tmp_path / setup_path path_obj = Path(filename) @@ -327,7 +337,9 @@ def test_read_from_py_localpath(tmp_path, setup_path): from py.path import local as LocalPath expected = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) filename = tmp_path / setup_path path_obj = LocalPath(filename) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index ad8833f141305..84c8c0a314342 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -183,7 +183,7 @@ def test_get(setup_path): def test_put_integer(setup_path): # non-date, non-string index - df = DataFrame(np.random.default_rng(2).standard_normal(50, 100)) + df = DataFrame(np.random.default_rng(2).standard_normal((50, 100))) _check_roundtrip(df, tm.assert_frame_equal, setup_path) @@ -407,7 +407,9 @@ def test_empty_series(dtype, setup_path): def test_can_serialize_dates(setup_path): rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] - frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) + frame = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng + ) _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 6530b7c74aa3e..4fdcb02e0e501 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -41,7 +41,7 @@ def test_select_columns_in_where(setup_path): # With a DataFrame df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), + np.random.default_rng(2).standard_normal((10, 3)), index=index, columns=["A", "B", "C"], ) @@ -664,7 +664,7 @@ def test_frame_select_complex2(tmp_path): selection = read_hdf(pp, "df", where="A=[2,3]") hist = DataFrame( - np.random.default_rng(2).standard_normal(25, 1), + np.random.default_rng(2).standard_normal((25, 1)), columns=["data"], index=MultiIndex.from_tuples( [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 9461df18e7491..0ead7541bb375 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -763,7 +763,7 @@ def test_start_stop_fixed(setup_path): def test_select_filter_corner(setup_path): - df = DataFrame(np.random.default_rng(2).standard_normal(50, 100)) + df = DataFrame(np.random.default_rng(2).standard_normal((50, 100))) df.index = [f"{c:3d}" for c in df.index] df.columns = [f"{c:3d}" for c in df.columns] @@ -903,7 +903,9 @@ def test_columns_multiindex_modified(tmp_path, setup_path): # BUG: 7212 df = DataFrame( - np.random.default_rng(2).random(4, 5), index=list("abcd"), columns=list("ABCDE") + np.random.default_rng(2).random((4, 5)), + index=list("abcd"), + columns=list("ABCDE"), ) df.index.name = "letters" df = df.set_index(keys="E", append=True) diff --git a/pandas/tests/io/pytables/test_time_series.py b/pandas/tests/io/pytables/test_time_series.py index 8b96390b611fe..c7f99e5b136ea 100644 --- a/pandas/tests/io/pytables/test_time_series.py +++ b/pandas/tests/io/pytables/test_time_series.py @@ -45,7 +45,9 @@ def test_tseries_indices_series(setup_path): def test_tseries_indices_frame(setup_path): with ensure_clean_store(setup_path) as store: idx = tm.makeDateIndex(10) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), index=idx) + df = DataFrame( + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx + ) store["a"] = df result = store["a"] @@ -54,7 +56,7 @@ def test_tseries_indices_frame(setup_path): tm.assert_class_equal(result.index, df.index, obj="dataframe index") idx = tm.makePeriodIndex(10) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), idx) + df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 3)), idx) store["a"] = df result = store["a"] diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index e80874e515731..1eb7a34bead56 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -180,7 +180,9 @@ def test_tseries_select_index_column(setup_path): # check that no tz still works rng = date_range("1/1/2000", "1/30/2000") - frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) + frame = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng + ) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -189,7 +191,9 @@ def test_tseries_select_index_column(setup_path): # check utc rng = date_range("1/1/2000", "1/30/2000", tz="UTC") - frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) + frame = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng + ) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -198,7 +202,9 @@ def test_tseries_select_index_column(setup_path): # double check non-utc rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") - frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) + frame = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng + ) with ensure_clean_store(setup_path) as store: store.append("frame", frame) @@ -211,7 +217,9 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): # index rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") rng = rng._with_freq(None) # freq doesn't round-trip - df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) + df = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng + ) store["df"] = df result = store["df"] tm.assert_frame_equal(result, df) @@ -262,7 +270,9 @@ def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): def test_fixed_offset_tz(setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") - frame = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 4), index=rng) + frame = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng + ) with ensure_clean_store(setup_path) as store: store["frame"] = frame diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 36764c3803a60..0eb623937f21c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -569,7 +569,7 @@ def test_write_column_index_string(self, pa): # Write column indexes with string column names arrays = ["bar", "baz", "foo", "qux"] df = pd.DataFrame( - np.random.default_rng(2).standard_normal(8, 4), columns=arrays + np.random.default_rng(2).standard_normal((8, 4)), columns=arrays ) df.columns.name = "StringCol" @@ -581,7 +581,7 @@ def test_write_column_index_nonstring(self, engine): # Write column indexes with string column names arrays = [1, 2, 3, 4] df = pd.DataFrame( - np.random.default_rng(2).standard_normal(8, 4), columns=arrays + np.random.default_rng(2).standard_normal((8, 4)), columns=arrays ) df.columns.name = "NonStringCol" if engine == "fastparquet": @@ -997,7 +997,7 @@ def test_filter_row_groups(self, pa): def test_read_parquet_manager(self, pa, using_array_manager): # ensure that read_parquet honors the pandas.options.mode.data_manager option df = pd.DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"] ) with tm.ensure_clean() as path: diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 5ee766b964b18..1a73f87c1f589 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -269,7 +269,7 @@ def test_nonnumeric_exclude(self): def test_implicit_label(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"] ) ax = df.plot(x="a", y="b") _check_text_labels(ax.xaxis.get_label(), "a") @@ -432,7 +432,7 @@ def test_unsorted_index_lims_x_y(self): def test_negative_log(self): df = -DataFrame( - np.random.default_rng(2).random(6, 4), + np.random.default_rng(2).random((6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -453,7 +453,7 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): @pytest.mark.parametrize("mult", [1, -1]) def test_line_area_stacked(self, kind, mult): df = mult * DataFrame( - np.random.default_rng(2).random(6, 4), columns=["w", "x", "y", "z"] + np.random.default_rng(2).random((6, 4)), columns=["w", "x", "y", "z"] ) ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) @@ -478,7 +478,7 @@ def test_line_area_stacked_sep_df(self, kind): def test_line_area_stacked_mixed(self): mixed_df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -495,7 +495,7 @@ def test_line_area_stacked_mixed(self): @pytest.mark.parametrize("kind", ["line", "area"]) def test_line_area_stacked_positive_idx(self, kind): df = DataFrame( - np.random.default_rng(2).random(6, 4), columns=["w", "x", "y", "z"] + np.random.default_rng(2).random((6, 4)), columns=["w", "x", "y", "z"] ) # Use an index with strictly positive values, preventing # matplotlib from warning about ignoring xlim @@ -590,7 +590,7 @@ def test_line_lim_subplots(self): @pytest.mark.parametrize("stacked", [True, False]) def test_area_lim(self, stacked): df = DataFrame( - np.random.default_rng(2).random(6, 4), columns=["x", "y", "z", "four"] + np.random.default_rng(2).random((6, 4)), columns=["x", "y", "z", "four"] ) neg_df = -df @@ -620,14 +620,14 @@ def test_area_sharey_dont_overwrite(self): @pytest.mark.parametrize("stacked", [True, False]) def test_bar_linewidth(self, stacked): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.bar(stacked=stacked, linewidth=2) for r in ax.patches: assert r.get_linewidth() == 2 def test_bar_linewidth_subplots(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # subplots axes = df.plot.bar(linewidth=2, subplots=True) _check_axes_shape(axes, axes_num=5, layout=(5, 1)) @@ -640,7 +640,7 @@ def test_bar_linewidth_subplots(self): ) @pytest.mark.parametrize("stacked", [True, False]) def test_bar_barwidth(self, meth, dim, stacked): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) width = 0.9 @@ -655,7 +655,7 @@ def test_bar_barwidth(self, meth, dim, stacked): "meth, dim", [("bar", "get_width"), ("barh", "get_height")] ) def test_barh_barwidth_subplots(self, meth, dim): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) width = 0.9 @@ -740,7 +740,7 @@ def test_bar_categorical(self, idx): @pytest.mark.parametrize("x, y", [("x", "y"), (1, 2)]) def test_plot_scatter(self, x, y): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -749,7 +749,7 @@ def test_plot_scatter(self, x, y): def test_plot_scatter_error(self): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -762,7 +762,7 @@ def test_plot_scatter_error(self): def test_plot_scatter_shape(self): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -932,7 +932,7 @@ def test_plot_scatter_without_norm(self): ) def test_plot_bar(self, kwargs): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1037,7 +1037,7 @@ def test_boxplot_vertical_positions(self, hist_df): def test_boxplot_return_type_invalid(self): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1048,7 +1048,7 @@ def test_boxplot_return_type_invalid(self): @pytest.mark.parametrize("return_type", ["dict", "axes", "both"]) def test_boxplot_return_type_invalid_type(self, return_type): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -1329,7 +1329,7 @@ def test_style_by_column(self, markers): fig = plt.gcf() fig.clf() fig.add_subplot(111) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 3))) ax = df.plot(style=markers) for idx, line in enumerate(ax.get_lines()[: len(markers)]): assert line.get_marker() == markers[idx] @@ -1397,7 +1397,7 @@ def test_all_invalid_plot_data(self, kind): ) def test_partially_invalid_plot_data_numeric(self, kind): df = DataFrame( - np.random.default_rng(2).RandomState(42).standard_normal((10, 2)), + np.random.default_rng(2).standard_normal((10, 2)), dtype=object, ) df[np.random.default_rng(2).random(df.shape[0]) > 0.5] = "a" @@ -1745,7 +1745,7 @@ def test_errorbar_with_integer_column_names(self): @pytest.mark.slow @pytest.mark.parametrize("kind", ["line", "bar"]) def test_errorbar_with_partial_columns_kind(self, kind): - df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 3))) + df = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 3)))) df_err = DataFrame( np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2] ) @@ -1754,7 +1754,7 @@ def test_errorbar_with_partial_columns_kind(self, kind): @pytest.mark.slow def test_errorbar_with_partial_columns_dti(self): - df = DataFrame(np.abs(np.random.default_rng(2).standard_normal(10, 3))) + df = DataFrame(np.abs(np.random.default_rng(2).standard_normal((10, 3)))) df_err = DataFrame( np.abs(np.random.default_rng(2).standard_normal((10, 2))), columns=[0, 2] ) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index c42185642676f..ff239e684f465 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -38,7 +38,7 @@ class TestDataFrameColor: def test_mpl2_color_cycle_str(self, color): # GH 15516 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"] ) _check_plot_works(df.plot, color=color) @@ -100,31 +100,31 @@ def test_color_and_marker(self, color, expected): def test_bar_colors(self): default_colors = _unpack_cycler(plt.rcParams) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.bar() _check_colors(ax.patches[::5], facecolors=default_colors[:5]) def test_bar_colors_custom(self): custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.bar(color=custom_colors) _check_colors(ax.patches[::5], facecolors=custom_colors) @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_bar_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.bar(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] _check_colors(ax.patches[::5], facecolors=rgba_colors) def test_bar_colors_single_col(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") _check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) def test_bar_colors_green(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot(kind="bar", color="green") _check_colors(ax.patches[::5], facecolors=["green"] * 5) @@ -246,7 +246,7 @@ def test_scatter_colorbar_different_cmap(self): def test_line_colors(self): custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -261,26 +261,26 @@ def test_line_colors(self): @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_line_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=rgba_colors) def test_line_colors_single_col(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') ax = df.loc[:, [0]].plot(color="DodgerBlue") _check_colors(ax.lines, linecolors=["DodgerBlue"]) def test_line_colors_single_color(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot(color="red") _check_colors(ax.get_lines(), linecolors=["red"] * 5) def test_line_colors_hex(self): # GH 10299 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] ax = df.plot(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -294,7 +294,7 @@ def test_line_colors_and_styles_subplots(self): # GH 9894 default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -302,7 +302,7 @@ def test_line_colors_and_styles_subplots(self): @pytest.mark.parametrize("color", ["k", "green"]) def test_line_colors_and_styles_subplots_single_color_str(self, color): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) axes = df.plot(subplots=True, color=color) for ax in axes: _check_colors(ax.get_lines(), linecolors=[color]) @@ -310,14 +310,14 @@ def test_line_colors_and_styles_subplots_single_color_str(self, color): @pytest.mark.parametrize("color", ["rgcby", list("rgcby")]) def test_line_colors_and_styles_subplots_custom_colors(self, color): # GH 9894 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) axes = df.plot(color=color, subplots=True) for ax, c in zip(axes, list(color)): _check_colors(ax.get_lines(), linecolors=[c]) def test_line_colors_and_styles_subplots_colormap_hex(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # GH 10299 custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] axes = df.plot(color=custom_colors, subplots=True) @@ -327,7 +327,7 @@ def test_line_colors_and_styles_subplots_colormap_hex(self): @pytest.mark.parametrize("cmap", ["jet", cm.jet]) def test_line_colors_and_styles_subplots_colormap_subplot(self, cmap): # GH 9894 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] axes = df.plot(colormap=cmap, subplots=True) for ax, c in zip(axes, rgba_colors): @@ -335,7 +335,7 @@ def test_line_colors_and_styles_subplots_colormap_subplot(self, cmap): def test_line_colors_and_styles_subplots_single_col(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) @@ -343,7 +343,7 @@ def test_line_colors_and_styles_subplots_single_col(self): def test_line_colors_and_styles_subplots_single_char(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # single character style axes = df.plot(style="r", subplots=True) for ax in axes: @@ -351,7 +351,7 @@ def test_line_colors_and_styles_subplots_single_char(self): def test_line_colors_and_styles_subplots_list_styles(self): # GH 9894 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # list of styles styles = list("rgcby") axes = df.plot(style=styles, subplots=True) @@ -413,30 +413,30 @@ def test_area_colors_stacked_false(self): def test_hist_colors(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.hist() _check_colors(ax.patches[::10], facecolors=default_colors[:5]) def test_hist_colors_single_custom(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) custom_colors = "rgcby" ax = df.plot.hist(color=custom_colors) _check_colors(ax.patches[::10], facecolors=custom_colors) @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_hist_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.hist(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] _check_colors(ax.patches[::10], facecolors=rgba_colors) def test_hist_colors_single_col(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") _check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) def test_hist_colors_single_color(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot(kind="hist", color="green") _check_colors(ax.patches[::10], facecolors=["green"] * 5) @@ -451,7 +451,7 @@ def test_kde_colors(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_kde_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.kde(colormap=colormap) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=rgba_colors) @@ -460,7 +460,7 @@ def test_kde_colors_cmap(self, colormap): def test_kde_colors_and_styles_subplots(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -469,14 +469,14 @@ def test_kde_colors_and_styles_subplots(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["k", "red"]) def test_kde_colors_and_styles_subplots_single_col_str(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) axes = df.plot(kind="kde", color=colormap, subplots=True) for ax in axes: _check_colors(ax.get_lines(), linecolors=[colormap]) @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_custom_color(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) custom_colors = "rgcby" axes = df.plot(kind="kde", color=custom_colors, subplots=True) for ax, c in zip(axes, list(custom_colors)): @@ -485,7 +485,7 @@ def test_kde_colors_and_styles_subplots_custom_color(self): @td.skip_if_no_scipy @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_kde_colors_and_styles_subplots_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] axes = df.plot(kind="kde", colormap=colormap, subplots=True) for ax, c in zip(axes, rgba_colors): @@ -493,7 +493,7 @@ def test_kde_colors_and_styles_subplots_cmap(self, colormap): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_single_col(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) @@ -501,7 +501,7 @@ def test_kde_colors_and_styles_subplots_single_col(self): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_single_char(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # list of styles # single character style axes = df.plot(kind="kde", style="r", subplots=True) @@ -510,7 +510,7 @@ def test_kde_colors_and_styles_subplots_single_char(self): @td.skip_if_no_scipy def test_kde_colors_and_styles_subplots_list(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # list of styles styles = list("rgcby") axes = df.plot(kind="kde", style=styles, subplots=True) @@ -520,7 +520,7 @@ def test_kde_colors_and_styles_subplots_list(self): def test_boxplot_colors(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) bp = df.plot.box(return_type="dict") _check_colors_box( bp, @@ -531,7 +531,7 @@ def test_boxplot_colors(self): ) def test_boxplot_colors_dict_colors(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) dict_colors = { "boxes": "#572923", "whiskers": "#982042", @@ -550,7 +550,7 @@ def test_boxplot_colors_dict_colors(self): def test_boxplot_colors_default_color(self): default_colors = _unpack_cycler(mpl.pyplot.rcParams) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # partial colors dict_colors = {"whiskers": "c", "medians": "m"} bp = df.plot.box(color=dict_colors, return_type="dict") @@ -558,7 +558,7 @@ def test_boxplot_colors_default_color(self): @pytest.mark.parametrize("colormap", ["jet", cm.jet]) def test_boxplot_colors_cmap(self, colormap): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) bp = df.plot.box(colormap=colormap, return_type="dict") jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] _check_colors_box( @@ -566,19 +566,19 @@ def test_boxplot_colors_cmap(self, colormap): ) def test_boxplot_colors_single(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # string color is applied to all artists except fliers bp = df.plot.box(color="DodgerBlue", return_type="dict") _check_colors_box(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") def test_boxplot_colors_tuple(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # tuple is also applied to all artists except fliers bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") _check_colors_box(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") def test_boxplot_colors_invalid(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) msg = re.escape( "color dict contains invalid key 'xxxx'. The key must be either " "['boxes', 'whiskers', 'medians', 'caps']" diff --git a/pandas/tests/plotting/frame/test_frame_legend.py b/pandas/tests/plotting/frame/test_frame_legend.py index da0438dfb8260..08e462df2a196 100644 --- a/pandas/tests/plotting/frame/test_frame_legend.py +++ b/pandas/tests/plotting/frame/test_frame_legend.py @@ -98,17 +98,17 @@ def test_df_legend_labels_time_series(self): # Time Series ind = date_range("1/1/2014", periods=3) df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["a", "b", "c"], index=ind, ) df2 = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["d", "e", "f"], index=ind, ) df3 = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["g", "h", "i"], index=ind, ) @@ -124,17 +124,17 @@ def test_df_legend_labels_time_series_scatter(self): # Time Series ind = date_range("1/1/2014", periods=3) df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["a", "b", "c"], index=ind, ) df2 = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["d", "e", "f"], index=ind, ) df3 = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["g", "h", "i"], index=ind, ) @@ -150,7 +150,7 @@ def test_df_legend_labels_time_series_scatter(self): def test_df_legend_labels_time_series_no_mutate(self): ind = date_range("1/1/2014", periods=3) df = DataFrame( - np.random.default_rng(2).standard_normal(3, 3), + np.random.default_rng(2).standard_normal((3, 3)), columns=["a", "b", "c"], index=ind, ) @@ -201,7 +201,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() _check_text_labels(leg_title, "group,individual") - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() _check_text_labels(leg_title, "group,individual") diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 14701413e26a0..fda2178535806 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -333,7 +333,7 @@ def test_subplots_multiple_axes_2_dim(self, layout, exp_layout): # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes _, axes = mpl.pyplot.subplots(2, 2) df = DataFrame( - np.random.default_rng(2).random(10, 4), + np.random.default_rng(2).random((10, 4)), index=list(string.ascii_letters[:10]), ) with warnings.catch_warnings(): @@ -653,13 +653,13 @@ def test_bar_align_single_column(self, kwargs): ], ) def test_bar_barwidth_position(self, kwargs): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs) @pytest.mark.parametrize("w", [1, 1.0]) def test_bar_barwidth_position_int(self, w): # GH 12979 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) ax = df.plot.bar(stacked=True, width=w) ticks = ax.xaxis.get_ticklocs() tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) @@ -680,7 +680,7 @@ def test_bar_barwidth_position_int(self, w): ) def test_bar_barwidth_position_int_width_1(self, kind, kwargs): # GH 12979 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) self._check_bar_alignment(df, kind=kind, width=1, **kwargs) def _check_bar_alignment( diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index cf50201cca1db..55d2481f3efcb 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -73,7 +73,7 @@ def test_stacked_boxplot_set_axis(self): ) def test_boxplot_legacy1(self, kwargs, warn): df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -147,7 +147,7 @@ def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -163,7 +163,7 @@ def test_boxplot_return_type_legacy_return_type(self, return_type): # API change in https://github.com/pandas-dev/pandas/pull/7096 df = DataFrame( - np.random.default_rng(2).standard_normal(6, 4), + np.random.default_rng(2).standard_normal((6, 4)), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -420,9 +420,7 @@ def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) height = Series(np.random.default_rng(2).normal(60, 10, size=n)) - gender = ( - np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) - ) + gender = np.random.default_rng(2).choice(["male", "female"], size=n) df = DataFrame({"height": height, "weight": weight, "gender": gender}) gb = df.groupby("gender") @@ -439,9 +437,7 @@ def test_grouped_plot_fignums_excluded_col(self): n = 10 weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) height = Series(np.random.default_rng(2).normal(60, 10, size=n)) - gender = ( - np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) - ) + gender = np.random.default_rng(2).choice(["male", "female"], size=n) df = DataFrame({"height": height, "weight": weight, "gender": gender}) # now works with GH 5610 as gender is excluded df.groupby("gender").hist() diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 21685b973709c..27d0a21452cb0 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -71,13 +71,15 @@ def test_frame_inferred(self): idx = date_range("1/1/1987", freq="MS", periods=100) idx = DatetimeIndex(idx.values, freq=None) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), index=idx) + df = DataFrame( + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx + ) _check_plot_works(df.plot) # axes freq idx = idx[0:40].union(idx[45:99]) df2 = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), 3), index=idx + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx ) _check_plot_works(df2.plot) @@ -85,7 +87,9 @@ def test_frame_inferred_n_gt_1(self): # N > 1 idx = date_range("2008-1-1 00:15:00", freq="15T", periods=10) idx = DatetimeIndex(idx.values, freq=None) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), index=idx) + df = DataFrame( + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx + ) _check_plot_works(df.plot) def test_is_error_nozeroindex(self): @@ -209,7 +213,7 @@ def test_line_plot_datetime_series(self, freq): def test_line_plot_period_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) df = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), 3), + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx, columns=["A", "B", "C"], ) @@ -224,7 +228,7 @@ def test_line_plot_period_mlt_frame(self, frqncy): # #14763 idx = period_range("12/31/1999", freq=frqncy, periods=100) df = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), 3), + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx, columns=["A", "B", "C"], ) @@ -237,7 +241,7 @@ def test_line_plot_period_mlt_frame(self, frqncy): def test_line_plot_datetime_frame(self, freq): idx = date_range("12/31/1999", freq=freq, periods=100) df = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), 3), + np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx, columns=["A", "B", "C"], ) @@ -1047,7 +1051,7 @@ def test_irreg_dtypes(self): # date idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] df = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), 3), + np.random.default_rng(2).standard_normal((len(idx), 3)), Index(idx, dtype=object), ) _check_plot_works(df.plot) @@ -1056,7 +1060,7 @@ def test_irreg_dtypes_dt64(self): # np.datetime64 idx = date_range("1/1/2000", periods=10) idx = idx[[0, 2, 5, 9]].astype(object) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 3), idx) + df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 3)), idx) _, ax = mpl.pyplot.subplots() _check_plot_works(df.plot, ax=ax) @@ -1524,9 +1528,7 @@ def test_add_matplotlib_datetime64(self): def test_matplotlib_scatter_datetime64(self): # https://github.com/matplotlib/matplotlib/issues/11391 - df = DataFrame( - np.random.default_rng(2).RandomState(0).rand(10, 2), columns=["x", "y"] - ) + df = DataFrame(np.random.default_rng(2).random((10, 2)), columns=["x", "y"]) df["time"] = date_range("2018-01-01", periods=10, freq="D") _, ax = mpl.pyplot.subplots() ax.scatter(x="time", y="y", data=df) diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 22fa71f480124..1a7c8f17fe8eb 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -21,26 +21,20 @@ class TestDataFrameGroupByPlots: def test_series_groupby_plotting_nominally_works(self): n = 10 weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) - gender = ( - np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) - ) + gender = np.random.default_rng(2).choice(["male", "female"], size=n) weight.groupby(gender).plot() def test_series_groupby_plotting_nominally_works_hist(self): n = 10 height = Series(np.random.default_rng(2).normal(60, 10, size=n)) - gender = ( - np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) - ) + gender = np.random.default_rng(2).choice(["male", "female"], size=n) height.groupby(gender).hist() def test_series_groupby_plotting_nominally_works_alpha(self): n = 10 height = Series(np.random.default_rng(2).normal(60, 10, size=n)) - gender = ( - np.random.default_rng(2).RandomState(42).choice(["male", "female"], size=n) - ) + gender = np.random.default_rng(2).choice(["male", "female"], size=n) # Regression test for GH8733 height.groupby(gender).plot(alpha=0.5) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 257dfc9eec5e4..45c053da4a8d8 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -560,7 +560,7 @@ def test_hist_df_kwargs(self): def test_hist_df_with_nonnumerics(self): # GH 9853 df = DataFrame( - np.random.default_rng(2).RandomState(42).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"], ) df["E"] = ["x", "y"] * 5 @@ -571,7 +571,7 @@ def test_hist_df_with_nonnumerics(self): def test_hist_df_with_nonnumerics_no_bins(self): # GH 9853 df = DataFrame( - np.random.default_rng(2).RandomState(42).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"], ) df["E"] = ["x", "y"] * 5 @@ -661,7 +661,7 @@ class TestDataFrameGroupByPlots: def test_grouped_hist_legacy(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.default_rng(2).RandomState(42) + rs = np.random.default_rng(2) df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( rs.integers( @@ -678,8 +678,8 @@ def test_grouped_hist_legacy(self): _check_axes_shape(axes, axes_num=4, layout=(2, 2)) def test_grouped_hist_legacy_axes_shape_no_col(self): - rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + rs = np.random.default_rng(2) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, @@ -694,8 +694,8 @@ def test_grouped_hist_legacy_axes_shape_no_col(self): _check_axes_shape(axes, axes_num=4, layout=(2, 2)) def test_grouped_hist_legacy_single_key(self): - rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + rs = np.random.default_rng(2) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, @@ -716,8 +716,8 @@ def test_grouped_hist_legacy_grouped_hist_kwargs(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + rs = np.random.default_rng(2) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, @@ -752,7 +752,7 @@ def test_grouped_hist_legacy_grouped_hist_kwargs(self): def test_grouped_hist_legacy_grouped_hist(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.default_rng(2).RandomState(42) + rs = np.random.default_rng(2) df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) df["B"] = to_datetime( rs.integers( @@ -771,8 +771,8 @@ def test_grouped_hist_legacy_grouped_hist(self): def test_grouped_hist_legacy_external_err(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + rs = np.random.default_rng(2) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, @@ -788,8 +788,8 @@ def test_grouped_hist_legacy_external_err(self): _grouped_hist(df.A, by=df.C, foo="bar") def test_grouped_hist_legacy_figsize_err(self): - rs = np.random.default_rng(2).RandomState(42) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + rs = np.random.default_rng(2) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, @@ -808,7 +808,7 @@ def test_grouped_hist_legacy2(self): n = 10 weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) height = Series(np.random.default_rng(2).normal(60, 10, size=n)) - gender_int = np.random.default_rng(2).RandomState(42).choice([0, 1], size=n) + gender_int = np.random.default_rng(2).choice([0, 1], size=n) df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int}) gb = df_int.groupby("gender") axes = gb.hist() diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index a2aab6c3c596d..ff04b0d5f4948 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -106,7 +106,7 @@ def test_scatter_matrix_axis(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.default_rng(2).RandomState(42).standard_normal(100, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((100, 3))) # we are plotting multiples on a sub-plot with tm.assert_produces_warning(UserWarning, check_stacklevel=False): @@ -131,7 +131,7 @@ def test_scatter_matrix_axis_smaller(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.default_rng(2).RandomState(42).standard_normal(100, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((100, 3))) df[0] = (df[0] - 2) / 3 # we are plotting multiples on a sub-plot diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 1297eba930c41..7cf6b37246d09 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -327,14 +327,14 @@ def test_bar_user_colors(self): assert result == expected def test_rotation_default(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) # Default rot 0 _, ax = mpl.pyplot.subplots() axes = df.plot(ax=ax) _check_ticks_props(axes, xrot=0) def test_rotation_30(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) _, ax = mpl.pyplot.subplots() axes = df.plot(rot=30, ax=ax) _check_ticks_props(axes, xrot=30) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 2f66cfb027921..231bdfa48b2ac 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1458,7 +1458,7 @@ def test_resample_group_info(n, k, unit): # GH10914 # use a fixed seed to always have the same uniques - prng = np.random.default_rng(2).RandomState(1234) + prng = np.random.default_rng(2) dr = date_range(start="2015-08-27", periods=n // 10, freq="T").as_unit(unit) ts = Series(prng.integers(0, n // k, n).astype("int64"), index=prng.choice(dr, n)) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 3ddfdd530f330..129408d8cdfd5 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -409,7 +409,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): index = lexsorted_two_level_string_multiindex to_join = DataFrame( - np.random.default_rng(2).standard_normal(10, 3), + np.random.default_rng(2).standard_normal((10, 3)), index=index, columns=["j_one", "j_two", "j_three"], ) @@ -653,7 +653,8 @@ def _check_diff_index(df_list, result, exp_index): def test_join_many_mixed(self): df = DataFrame( - np.random.default_rng(2).standard_normal(8, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).standard_normal((8, 4)), + columns=["A", "B", "C", "D"], ) df["key"] = ["foo", "bar"] * 4 df1 = df.loc[:, ["A", "B"]] diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 408142def7fe5..bb7bc5a0b690e 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -867,7 +867,7 @@ def test_margin_with_ordered_categorical_column(self): @pytest.mark.parametrize("b_dtype", ["category", "int64"]) def test_categoricals(a_dtype, b_dtype): # https://github.com/pandas-dev/pandas/issues/37465 - g = np.random.default_rng(2).RandomState(25982704) + g = np.random.default_rng(2) a = Series(g.integers(0, 3, size=100)).astype(a_dtype) b = Series(g.integers(0, 2, size=100)).astype(b_dtype) result = crosstab(a, b, margins=True, dropna=False) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 9ac40876a8a09..ca6f9937ebe97 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -489,9 +489,7 @@ def test_astype_string_to_extension_dtype_roundtrip( class TestAstypeCategorical: def test_astype_categorical_to_other(self): cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series( - np.random.default_rng(2).RandomState(0).integers(0, 10000, 100) - ).sort_values() + ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) expected = ser diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 6d2dc22f1cf23..34f39e4b745b2 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -177,7 +177,7 @@ def test_constructor(self, datetime_series): ValueError, match=r"Data must be 1-dimensional, got ndarray of shape \(3, 3\) instead", ): - Series(np.random.default_rng(2).standard_normal(3, 3), index=np.arange(3)) + Series(np.random.default_rng(2).standard_normal((3, 3)), index=np.arange(3)) mixed.name = "Series" rs = Series(mixed).name diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ad0371acfe7a6..c7ea19e34891a 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -18,7 +18,7 @@ @pytest.fixture def _frame(): return DataFrame( - np.random.default_rng(2).standard_normal(10001, 4), + np.random.default_rng(2).standard_normal((10001, 4)), columns=list("ABCD"), dtype="float64", ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a406c3047b9b3..6a72ff5f93832 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -176,7 +176,7 @@ def test_level_with_tuples(self): ) series = Series(np.random.default_rng(2).standard_normal(6), index=index) - frame = DataFrame(np.random.default_rng(2).standard_normal(6, 4), index=index) + frame = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index) result = series[("foo", "bar", 0)] result2 = series.loc[("foo", "bar", 0)] @@ -201,7 +201,7 @@ def test_level_with_tuples(self): ) series = Series(np.random.default_rng(2).standard_normal(6), index=index) - frame = DataFrame(np.random.default_rng(2).standard_normal(6, 4), index=index) + frame = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index) result = series[("foo", "bar")] result2 = series.loc[("foo", "bar")] diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 3188ac70b8254..5aec1c6d17513 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1007,7 +1007,7 @@ def test_nanstd_roundoff(self, ddof): @property def prng(self): - return np.random.default_rng(2).RandomState(1234) + return np.random.default_rng(2) class TestNanskewFixedValues: @@ -1058,7 +1058,7 @@ def test_nans_skipna(self, samples, actual_skew): @property def prng(self): - return np.random.default_rng(2).RandomState(1234) + return np.random.default_rng(2) class TestNankurtFixedValues: @@ -1109,7 +1109,7 @@ def test_nans_skipna(self, samples, actual_kurt): @property def prng(self): - return np.random.default_rng(2).RandomState(1234) + return np.random.default_rng(2) class TestDatetime64NaNOps: diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index e36d0627d08d3..f01882cc095da 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -98,7 +98,7 @@ def test_int64_overflow_groupby_large_range(self): @pytest.mark.parametrize("agg", ["mean", "median"]) def test_int64_overflow_groupby_large_df_shuffled(self, agg): - rs = np.random.default_rng(2).RandomState(42) + rs = np.random.default_rng(2) arr = rs.integers(-1 << 12, 1 << 12, (1 << 15, 5)) i = rs.choice(len(arr), len(arr) * 4) arr = np.vstack((arr, arr[i])) # add some duplicate rows diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index d765ae0f4ad75..d901fe58950e3 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -21,7 +21,7 @@ def test_getitem(step): - frame = DataFrame(np.random.default_rng(2).standard_normal(5, 5)) + frame = DataFrame(np.random.default_rng(2).standard_normal((5, 5))) r = frame.rolling(window=5, step=step) tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index a0882feb5dd5c..aebb9e86c763f 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -625,7 +625,7 @@ def mean_w_arg(x, const): engine, raw = engine_and_raw - df = DataFrame(np.random.default_rng(2).random(20, 3)) + df = DataFrame(np.random.default_rng(2).random((20, 3))) expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0 diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 7d138727dd339..6bb262b63f49b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -693,7 +693,7 @@ def test_rolling_window_as_string(center, expected_data): date_today = datetime.now() days = date_range(date_today, date_today + timedelta(365), freq="D") - npr = np.random.default_rng(2).RandomState(seed=421) + npr = np.random.default_rng(2) data = npr.integers(1, high=100, size=len(days)) df = DataFrame({"DateCol": days, "metric": data}) From 420d21cfee1cf07aed4c803ccc6f704ceb86e3b1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:12:08 -0700 Subject: [PATCH 07/22] address rand --- pandas/tests/groupby/test_libgroupby.py | 10 +++++----- pandas/tests/indexes/multi/test_sorting.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index 3e842eeb6faad..35b8fa93b8e03 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -22,7 +22,7 @@ def test_group_var_generic_1d(self): out = (np.nan * np.ones((5, 1))).astype(self.dtype) counts = np.zeros(5, dtype="int64") - values = 10 * prng.rand(15, 1).astype(self.dtype) + values = 10 * prng.random((15, 1)).astype(self.dtype) labels = np.tile(np.arange(5), (3,)).astype("intp") expected_out = ( @@ -39,7 +39,7 @@ def test_group_var_generic_1d_flat_labels(self): out = (np.nan * np.ones((1, 1))).astype(self.dtype) counts = np.zeros(1, dtype="int64") - values = 10 * prng.rand(5, 1).astype(self.dtype) + values = 10 * prng.random((5, 1)).astype(self.dtype) labels = np.zeros(5, dtype="intp") expected_out = np.array([[values.std(ddof=1) ** 2]]) @@ -55,7 +55,7 @@ def test_group_var_generic_2d_all_finite(self): out = (np.nan * np.ones((5, 2))).astype(self.dtype) counts = np.zeros(5, dtype="int64") - values = 10 * prng.rand(10, 2).astype(self.dtype) + values = 10 * prng.random((10, 2)).astype(self.dtype) labels = np.tile(np.arange(5), (2,)).astype("intp") expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2 @@ -70,7 +70,7 @@ def test_group_var_generic_2d_some_nan(self): out = (np.nan * np.ones((5, 2))).astype(self.dtype) counts = np.zeros(5, dtype="int64") - values = 10 * prng.rand(10, 2).astype(self.dtype) + values = 10 * prng.random((10, 2)).astype(self.dtype) values[:, 1] = np.nan labels = np.tile(np.arange(5), (2,)).astype("intp") @@ -113,7 +113,7 @@ def test_group_var_large_inputs(self): out = np.array([[np.nan]], dtype=self.dtype) counts = np.array([0], dtype="int64") - values = (prng.rand(10**6) + 10**12).astype(self.dtype) + values = (prng.random(10**6) + 10**12).astype(self.dtype) values.shape = (10**6, 1) labels = np.zeros(10**6, dtype="intp") diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index bd8eb87a8e9e2..b1de30ae4aa20 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -248,7 +248,7 @@ def test_remove_unused_levels_large(first_type, second_type): { "first": rng.integers(0, 1 << 13, size).astype(first_type), "second": rng.integers(0, 1 << 10, size).astype(second_type), - "third": rng.rand(size), + "third": rng.random(size), } ) df = df.groupby(["first", "second"]).sum() From d26de4e79558bb082123089c53f404868e661eaa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Jul 2023 17:41:20 -0700 Subject: [PATCH 08/22] More fixes --- pandas/tests/apply/test_frame_apply.py | 4 +- pandas/tests/arithmetic/test_numeric.py | 4 +- pandas/tests/computation/test_eval.py | 20 +++++----- .../copy_view/test_core_functionalities.py | 2 +- pandas/tests/frame/indexing/test_delitem.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 18 ++++----- pandas/tests/frame/indexing/test_insert.py | 4 +- pandas/tests/frame/indexing/test_mask.py | 6 +-- pandas/tests/frame/indexing/test_setitem.py | 4 +- pandas/tests/frame/indexing/test_where.py | 4 +- pandas/tests/frame/methods/test_astype.py | 2 +- pandas/tests/frame/methods/test_at_time.py | 4 +- .../tests/frame/methods/test_between_time.py | 16 ++++++-- pandas/tests/frame/methods/test_cov_corr.py | 4 +- pandas/tests/frame/methods/test_diff.py | 2 +- pandas/tests/frame/methods/test_dot.py | 4 +- pandas/tests/frame/methods/test_drop.py | 2 +- pandas/tests/frame/methods/test_fillna.py | 14 +++---- pandas/tests/frame/methods/test_matmul.py | 6 +-- pandas/tests/frame/methods/test_quantile.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 5 ++- pandas/tests/frame/methods/test_sample.py | 2 +- pandas/tests/frame/methods/test_shift.py | 4 +- pandas/tests/frame/methods/test_sort_index.py | 2 +- .../tests/frame/methods/test_sort_values.py | 2 +- pandas/tests/frame/methods/test_to_numpy.py | 2 +- pandas/tests/frame/test_arithmetic.py | 8 ++-- pandas/tests/frame/test_nonunique_indexes.py | 4 +- pandas/tests/frame/test_npfuncs.py | 2 +- pandas/tests/frame/test_query_eval.py | 26 ++++++------- pandas/tests/frame/test_reductions.py | 4 +- pandas/tests/frame/test_repr_info.py | 2 +- pandas/tests/frame/test_subclass.py | 4 +- pandas/tests/groupby/aggregate/test_other.py | 2 +- pandas/tests/groupby/test_categorical.py | 2 +- pandas/tests/groupby/test_function.py | 4 +- .../indexes/datetimes/test_partial_slicing.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- .../multiindex/test_chaining_and_caching.py | 2 +- pandas/tests/indexing/multiindex/test_iloc.py | 2 +- .../tests/indexing/multiindex/test_setitem.py | 2 +- .../indexing/test_chaining_and_caching.py | 2 +- pandas/tests/indexing/test_floats.py | 2 +- pandas/tests/indexing/test_iloc.py | 10 ++--- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 6 +-- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/io/excel/test_writers.py | 2 +- pandas/tests/io/formats/test_format.py | 4 +- pandas/tests/io/json/test_pandas.py | 2 +- pandas/tests/io/pytables/test_append.py | 2 +- pandas/tests/io/pytables/test_errors.py | 6 +-- pandas/tests/io/pytables/test_select.py | 4 +- pandas/tests/io/pytables/test_store.py | 2 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/test_parquet.py | 2 +- pandas/tests/io/test_stata.py | 2 +- pandas/tests/plotting/frame/test_frame.py | 37 ++++++++++--------- .../tests/plotting/frame/test_frame_color.py | 14 +++---- .../plotting/frame/test_frame_subplots.py | 34 ++++++++--------- .../tests/plotting/frame/test_hist_box_by.py | 4 +- pandas/tests/plotting/test_boxplot_method.py | 37 +++++++++++++------ pandas/tests/plotting/test_datetimelike.py | 4 +- pandas/tests/plotting/test_groupby.py | 8 ++-- pandas/tests/plotting/test_hist_method.py | 10 ++--- pandas/tests/resample/test_datetime_index.py | 4 +- pandas/tests/resample/test_resample_api.py | 12 +++--- pandas/tests/resample/test_time_grouper.py | 10 ++--- pandas/tests/reshape/concat/test_concat.py | 20 +++++----- pandas/tests/reshape/concat/test_empty.py | 2 +- pandas/tests/reshape/concat/test_index.py | 2 +- pandas/tests/reshape/concat/test_invalid.py | 2 +- pandas/tests/reshape/merge/test_join.py | 6 +-- pandas/tests/reshape/test_melt.py | 2 +- pandas/tests/series/indexing/test_datetime.py | 2 +- pandas/tests/series/methods/test_matmul.py | 2 +- .../tests/series/methods/test_sort_values.py | 2 +- pandas/tests/test_take.py | 2 +- pandas/tests/window/conftest.py | 2 +- 79 files changed, 255 insertions(+), 224 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 195056874479e..f51858ebbcf05 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -659,7 +659,7 @@ def test_apply_category_equalness(val): def test_infer_row_shape(): # GH 17437 # if row shape is changing, infer it - df = DataFrame(np.random.default_rng(2).random(10, 2)) + df = DataFrame(np.random.default_rng(2).random((10, 2))) result = df.apply(np.fft.fft, axis=0).shape assert result == (10, 2) @@ -914,7 +914,7 @@ def test_consistent_coerce_for_shapes(lst): # we want column names to NOT be propagated # just because the shape matches the input shape df = DataFrame( - np.random.default_rng(2).standard_normal(4, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] ) result = df.apply(lambda x: lst, axis=1) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 9d018ea8157ff..01c167b272f2c 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -770,7 +770,7 @@ def test_divmod_series(self, numeric_idx): @pytest.mark.parametrize("other", [np.nan, 7, -23, 2.718, -3.14, np.inf]) def test_ops_np_scalar(self, other): - vals = np.random.default_rng(2).standard_normal(5, 3) + vals = np.random.default_rng(2).standard_normal((5, 3)) f = lambda x: pd.DataFrame( x, index=list("ABCDE"), columns=["jim", "joe", "jolie"] ) @@ -1414,7 +1414,7 @@ def test_dataframe_div_silenced(): columns=list("ABCD"), ) pdf2 = pd.DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), index=list("abcdefghjk"), columns=list("ABCX"), ) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 2b3eb166834c1..67b9afd6054f7 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -624,7 +624,7 @@ def test_unary_in_function(self): ) def test_disallow_scalar_bool_ops(self, ex, engine, parser): x, a, b = np.random.default_rng(2).standard_normal(3), 1, 2 # noqa: F841 - df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).standard_normal((3, 2))) # noqa: F841 msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" with pytest.raises(NotImplementedError, match=msg): @@ -990,7 +990,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 10)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 10))) s = Series(np.random.default_rng(2).standard_normal(10000)) is_python_engine = engine == "python" @@ -1108,19 +1108,19 @@ def test_single_variable(self): tm.assert_frame_equal(df, df2) def test_failing_subscript_with_name_error(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # noqa: F841 with pytest.raises(NameError, match="name 'x' is not defined"): self.eval("df[x > 2] > 2") def test_lhs_expression_subscript(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) expected = (df + 1)[df > 2] tm.assert_frame_equal(result, expected) def test_attr_expression(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((5, 3)), columns=list("abc") ) expr1 = "df.a < df.b" expec1 = df.a < df.b @@ -1135,9 +1135,9 @@ def test_attr_expression(self): def test_assignment_fails(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((5, 3)), columns=list("abc") ) - df2 = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df2 = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) expr1 = "df = df2" msg = "cannot assign without a target object" with pytest.raises(ValueError, match=msg): @@ -1451,7 +1451,7 @@ def test_nested_period_index_subscript_expression(self): tm.assert_frame_equal(r, e) def test_date_boolean(self, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) df["dates1"] = date_range("1/1/2012", periods=5) res = self.eval( "df.dates1 < 20130101", @@ -1525,7 +1525,7 @@ def test_check_many_exprs(self, engine, parser): ], ) def test_fails_and_or_not(self, expr, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) if parser == "python": msg = "'BoolOp' nodes are not implemented" if "not" in expr: @@ -1549,7 +1549,7 @@ def test_fails_and_or_not(self, expr, engine, parser): @pytest.mark.parametrize("char", ["|", "&"]) def test_fails_ampersand_pipe(self, char, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) # noqa: F841 + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # noqa: F841 ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)" if parser == "python": msg = "cannot evaluate scalar only bool ops" diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index 1ffa97a41ab69..08e49aa3813d8 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -76,7 +76,7 @@ def func(): def test_delete(using_copy_on_write): df = DataFrame( - np.random.default_rng(2).standard_normal(4, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"] ) del df["b"] if using_copy_on_write: diff --git a/pandas/tests/frame/indexing/test_delitem.py b/pandas/tests/frame/indexing/test_delitem.py index b6b0bf03f49a3..4c57955a2113b 100644 --- a/pandas/tests/frame/indexing/test_delitem.py +++ b/pandas/tests/frame/indexing/test_delitem.py @@ -55,6 +55,6 @@ def test_delitem_col_still_multiindex(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).standard_normal(3, 4), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 4)), columns=index) del df[("a", "", "")] assert isinstance(df.columns, MultiIndex) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index deddbcb79706b..1bc2cba7f74af 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -245,7 +245,7 @@ def test_boolean_index_empty_corner(self): def test_getitem_ix_mixed_integer(self): df = DataFrame( - np.random.default_rng(2).standard_normal(4, 3), + np.random.default_rng(2).standard_normal((4, 3)), index=[1, 10, "C", "E"], columns=[1, 2, 3], ) @@ -604,13 +604,13 @@ def test_getitem_setitem_non_ix_labels(self): tm.assert_frame_equal(result2, expected) def test_ix_multi_take(self): - df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 2))) rs = df.loc[df.index == 0, :] xp = df.reindex([0]) tm.assert_frame_equal(rs, xp) # GH#1321 - df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 2))) rs = df.loc[df.index == 0, df.columns == 1] xp = df.reindex(index=[0], columns=[1]) tm.assert_frame_equal(rs, xp) @@ -710,7 +710,7 @@ def test_getitem_setitem_boolean_misaligned(self, float_frame): tm.assert_frame_equal(cp, expected) def test_getitem_setitem_boolean_multi(self): - df = DataFrame(np.random.default_rng(2).standard_normal(3, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 2))) # get k1 = np.array([True, False, True]) @@ -813,7 +813,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): def test_setitem_single_column_mixed_datetime(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=["a", "b", "c", "d", "e"], columns=["foo", "bar", "baz"], ) @@ -944,7 +944,7 @@ def test_setitem_frame_align(self, float_frame): def test_getitem_setitem_ix_duplicates(self): # #1201 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=["foo", "foo", "bar", "baz", "bar"], ) @@ -963,7 +963,7 @@ def test_getitem_setitem_ix_duplicates(self): def test_getitem_ix_boolean_duplicates_multiple(self): # #1201 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=["foo", "foo", "bar", "baz", "bar"], ) @@ -1012,7 +1012,7 @@ def test_single_element_ix_dont_upcast(self, float_frame): def test_iloc_row(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), index=range(0, 20, 2) + np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2) ) result = df.iloc[1] @@ -1035,7 +1035,7 @@ def test_iloc_row(self): def test_iloc_row_slice_view(self, using_copy_on_write, request): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), index=range(0, 20, 2) + np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2) ) original = df.copy() diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 137102ac86e29..12229c28e0a80 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -18,7 +18,7 @@ class TestDataFrameInsert: def test_insert(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=np.arange(5), columns=["c", "b", "a"], ) @@ -74,7 +74,7 @@ def test_insert_with_columns_dups(self): tm.assert_frame_equal(df, exp) def test_insert_item_cache(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.default_rng(2).standard_normal(4, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) ser = df[0] if using_array_manager: diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 374a02c6f5acc..8536d6e8ff9db 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -18,14 +18,14 @@ class TestDataFrameMask: def test_mask(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) cond = df > 0 rs = df.where(cond, np.nan) tm.assert_frame_equal(rs, df.mask(df <= 0)) tm.assert_frame_equal(rs, df.mask(~cond)) - other = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + other = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) rs = df.where(cond, other) tm.assert_frame_equal(rs, df.mask(df <= 0, other)) tm.assert_frame_equal(rs, df.mask(~cond, other)) @@ -40,7 +40,7 @@ def test_mask2(self): def test_mask_inplace(self): # GH#8801 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) cond = df > 0 rdf = df.copy() diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index b379c12465361..4e5bd74fba666 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -106,7 +106,7 @@ def test_setitem_benchmark(self): def test_setitem_different_dtype(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=np.arange(5), columns=["c", "b", "a"], ) @@ -357,7 +357,7 @@ def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3), index=rng) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), index=rng) df["Index"] = rng rs = Index(df["Index"]) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index f8bef2ca4c97a..0d2364c0c5ffd 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -25,7 +25,7 @@ def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame): if request.param == "default": return DataFrame( - np.random.default_rng(2).standard_normal(5, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((5, 3)), columns=["A", "B", "C"] ) if request.param == "float_string": return float_string_frame @@ -148,7 +148,7 @@ def _check_align(df, cond, other, check_dtypes=True): def test_where_invalid(self): # invalid conditions df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((5, 3)), columns=["A", "B", "C"] ) cond = df > 0 diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index a96c7bf8b3185..34cbebe1b3d3f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -264,7 +264,7 @@ def test_astype_duplicate_col(self): def test_astype_duplicate_col_series_arg(self): # GH#44417 - vals = np.random.default_rng(2).standard_normal(3, 4) + vals = np.random.default_rng(2).standard_normal((3, 4)) df = DataFrame(vals, columns=["A", "B", "C", "A"]) dtypes = df.dtypes dtypes.iloc[0] = str diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index 43bc61138291b..088e84e083084 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -32,7 +32,9 @@ def test_localized_at_time(self, tzstr, frame_or_series): def test_at_time(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 2)), index=rng + ) ts = tm.get_obj(ts, frame_or_series) rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 95285250b6181..be6f6f05babdf 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -22,7 +22,9 @@ class TestBetweenTime: def test_between_time_formats(self, frame_or_series): # GH#11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 2)), index=rng + ) ts = tm.get_obj(ts, frame_or_series) strings = [ @@ -69,7 +71,9 @@ def test_between_time_types(self, frame_or_series): def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 2)), index=rng + ) ts = tm.get_obj(ts, frame_or_series) stime = time(0, 0) @@ -103,7 +107,9 @@ def test_between_time(self, inclusive_endpoints_fixture, frame_or_series): # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 2)), index=rng + ) ts = tm.get_obj(ts, frame_or_series) stime = time(22, 0) etime = time(9, 0) @@ -209,7 +215,9 @@ def test_between_time_datetimeindex(self): def test_between_time_incorrect_arg_inclusive(self): # GH40245 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 2), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 2)), index=rng + ) stime = time(0, 0) etime = time(1, 0) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 30b6789cac41c..ff0b795b44da6 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -314,7 +314,9 @@ def test_corrwith(self, datetime_frame, dtype): index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] df1 = DataFrame( - np.random.default_rng(2).standard_normal(5, 4), index=index, columns=columns + np.random.default_rng(2).standard_normal((5, 4)), + index=index, + columns=columns, ) df2 = DataFrame( np.random.default_rng(2).standard_normal(4, 4), diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 031b08c585ccb..62b20232c12f2 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -156,7 +156,7 @@ def test_diff_timedelta(self): tm.assert_frame_equal(res, exp) def test_diff_mixed_dtype(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) df["A"] = np.array([1, 2, 3, 4, 5], dtype=object) result = df.diff() diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py index addd0309d0bb9..5cec758a619be 100644 --- a/pandas/tests/frame/methods/test_dot.py +++ b/pandas/tests/frame/methods/test_dot.py @@ -90,7 +90,7 @@ def obj(self): @pytest.fixture def other(self): return DataFrame( - np.random.default_rng(2).standard_normal(3, 4), + np.random.default_rng(2).standard_normal((3, 4)), index=["1", "2", "3"], columns=["p", "q", "r", "s"], ).T @@ -111,7 +111,7 @@ class TestDataFrameDot(DotSharedTests): @pytest.fixture def obj(self): return DataFrame( - np.random.default_rng(2).standard_normal(3, 4), + np.random.default_rng(2).standard_normal((3, 4)), index=["a", "b", "c"], columns=["p", "q", "r", "s"], ) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index cf00934fd1c52..fde0c424d0418 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -55,7 +55,7 @@ def test_drop_with_non_unique_datetime_index_and_invalid_keys(): # define dataframe with unique datetime index df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), ) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index df1f9715e5792..4f505e4e72a27 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -442,7 +442,7 @@ def test_bfill(self, datetime_frame): def test_frame_pad_backfill_limit(self): index = np.arange(10) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)), index=index) result = df[:2].reindex(index, method="pad", limit=5) @@ -461,7 +461,7 @@ def test_frame_pad_backfill_limit(self): def test_frame_fillna_limit(self): index = np.arange(10) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)), index=index) result = df[:2].reindex(index) msg = "DataFrame.fillna with 'method' is deprecated" @@ -485,14 +485,14 @@ def test_frame_fillna_limit(self): def test_fillna_skip_certain_blocks(self): # don't try to fill boolean, int blocks - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4).astype(int)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)).astype(int)) # it works! df.fillna(np.nan) @pytest.mark.parametrize("type", [int, float]) def test_fillna_positive_limit(self, type): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)).astype(type) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))).astype(type) msg = "Limit must be greater than 0" with pytest.raises(ValueError, match=msg): @@ -500,14 +500,14 @@ def test_fillna_positive_limit(self, type): @pytest.mark.parametrize("type", [int, float]) def test_fillna_integer_limit(self, type): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)).astype(type) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))).astype(type) msg = "Limit must be an integer" with pytest.raises(ValueError, match=msg): df.fillna(0, limit=0.5) def test_fillna_inplace(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) df.loc[:4, 1] = np.nan df.loc[-4:, 3] = np.nan @@ -595,7 +595,7 @@ def test_fillna_dataframe(self): tm.assert_frame_equal(result, expected) def test_fillna_columns(self): - arr = np.random.default_rng(2).standard_normal(10, 10) + arr = np.random.default_rng(2).standard_normal((10, 10)) arr[:, ::2] = np.nan df = DataFrame(arr) diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py index 7d566406f366d..32c6e974266d9 100644 --- a/pandas/tests/frame/methods/test_matmul.py +++ b/pandas/tests/frame/methods/test_matmul.py @@ -15,7 +15,7 @@ class TestMatMul: def test_matmul(self): # matmul test is for GH#10259 a = DataFrame( - np.random.default_rng(2).standard_normal(3, 4), + np.random.default_rng(2).standard_normal((3, 4)), index=["a", "b", "c"], columns=["p", "q", "r", "s"], ) @@ -70,12 +70,12 @@ def test_matmul(self): # unaligned df = DataFrame( - np.random.default_rng(2).standard_normal(3, 4), + np.random.default_rng(2).standard_normal((3, 4)), index=[1, 2, 3], columns=range(4), ) df2 = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=range(5), columns=[1, 2, 3], ) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index d9ccd1d2f6c42..90bbec54502b3 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -774,7 +774,7 @@ def test_quantile_item_cache( # previous behavior incorrect retained an invalid _item_cache entry interpolation, method = interp_method df = DataFrame( - np.random.default_rng(2).standard_normal(4, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] ) df["D"] = df["A"] * 2 ser = df["A"] diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 36d5938095276..5bab0842e37f3 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -804,7 +804,7 @@ def test_reindex_axes(self): assert index_freq == seq_freq def test_reindex_fill_value(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) # axis=0 result = df.reindex(list(range(15))) @@ -1048,7 +1048,8 @@ def test_reindex_multi(self): tm.assert_frame_equal(result, expected) df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3) + 1j, columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((5, 3)) + 1j, + columns=["a", "b", "c"], ) result = df.reindex(index=[0, 1], columns=["a", "b"]) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index d11b330491378..2559b71e97fb1 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -16,7 +16,7 @@ def obj(self, frame_or_series): if frame_or_series is Series: arr = np.random.default_rng(2).standard_normal(10) else: - arr = np.random.default_rng(2).standard_normal(10, 10) + arr = np.random.default_rng(2).standard_normal((10, 10)) return frame_or_series(arr, dtype=None) @pytest.mark.parametrize("test", list(range(10))) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 993e4892606cd..e18d52a4cc547 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -20,7 +20,7 @@ class TestDataFrameShift: def test_shift_axis1_with_valid_fill_value_one_array(self): # Case with axis=1 that does not go through the "len(arrays)>1" path # in DataFrame.shift - data = np.random.default_rng(2).standard_normal(5, 3) + data = np.random.default_rng(2).standard_normal((5, 3)) df = DataFrame(data) res = df.shift(axis=1, periods=1, fill_value=12345) expected = df.T.shift(periods=1, fill_value=12345).T @@ -650,7 +650,7 @@ def test_shift_axis1_categorical_columns(self): def test_shift_axis1_many_periods(self): # GH#44978 periods > len(columns) - df = DataFrame(np.random.default_rng(2).random(5, 3)) + df = DataFrame(np.random.default_rng(2).random((5, 3))) shifted = df.shift(6, axis=1, fill_value=None) expected = df * np.nan diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 21a93eebcf4e9..4c8b27bfde06b 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -688,7 +688,7 @@ def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): ], ) def test_sort_index_multilevel_repr_8017(self, gen, extra): - data = np.random.default_rng(2).standard_normal(3, 4) + data = np.random.default_rng(2).standard_normal((3, 4)) columns = MultiIndex.from_tuples([("red", i) for i in gen]) df = DataFrame(data, index=list("def"), columns=columns) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 272a574dcd7e8..c7619bc4ea9a5 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -601,7 +601,7 @@ def test_sort_values_nat_na_position_default(self): def test_sort_values_item_cache(self, using_array_manager, using_copy_on_write): # previous behavior incorrect retained an invalid _item_cache entry df = DataFrame( - np.random.default_rng(2).standard_normal(4, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] ) df["D"] = df["A"] * 2 ser = df["A"] diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index a0cd3344008fc..bdb9b2c055061 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -24,7 +24,7 @@ def test_to_numpy_dtype(self): @td.skip_array_manager_invalid_test def test_to_numpy_copy(self, using_copy_on_write): - arr = np.random.default_rng(2).standard_normal(4, 3) + arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame(arr) if using_copy_on_write: assert df.values.base is not arr diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 68ae8a4cb2a79..5be30fecc13d3 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -291,8 +291,8 @@ class TestFrameFlexComparisons: # TODO: test_bool_flex_frame needs a better name @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"]) def test_bool_flex_frame(self, op): - data = np.random.default_rng(2).standard_normal(5, 3) - other_data = np.random.default_rng(2).standard_normal(5, 3) + data = np.random.default_rng(2).standard_normal((5, 3)) + other_data = np.random.default_rng(2).standard_normal((5, 3)) df = DataFrame(data) other = DataFrame(other_data) ndim_5 = np.ones(df.shape + (1, 3)) @@ -323,7 +323,7 @@ def test_bool_flex_frame(self, op): def test_bool_flex_series(self, box): # Series # list/tuple - data = np.random.default_rng(2).standard_normal(5, 3) + data = np.random.default_rng(2).standard_normal((5, 3)) df = DataFrame(data) idx_ser = box(np.random.default_rng(2).standard_normal(5)) col_ser = box(np.random.default_rng(2).standard_normal(3)) @@ -363,7 +363,7 @@ def test_bool_flex_series(self, box): col_ser = Series(np.random.default_rng(2).standard_normal(3)) def test_bool_flex_frame_na(self): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # NA df.loc[0, 0] = np.nan rs = df.eq(df) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 3b65ee9a431db..822347d7a9a24 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -21,7 +21,7 @@ class TestDataFrameNonuniqueIndexes: def test_setattr_columns_vs_construct_with_columns(self): # assignment # GH 3687 - arr = np.random.default_rng(2).standard_normal(3, 2) + arr = np.random.default_rng(2).standard_normal((3, 2)) idx = list(range(2)) df = DataFrame(arr, columns=["A", "A"]) df.columns = idx @@ -164,7 +164,7 @@ def test_dup_across_dtypes(self): def test_column_dups_indexes(self): # check column dups with index equal and not equal to df's index df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=["a", "b", "c", "d", "e"], columns=["A", "B", "A"], ) diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index b214babd5351c..6f0e3522c3a66 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -31,7 +31,7 @@ def test_sum_deprecated_axis_behavior(self): # GH#52042 deprecated behavior of df.sum(axis=None), which gets # called when we do np.sum(df) - arr = np.random.default_rng(2).standard_normal(4, 3) + arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame(arr) msg = "The behavior of DataFrame.sum with axis=None is deprecated" diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3de7790efdafe..086b3c8541cf2 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -399,7 +399,7 @@ def parser(self): def test_date_query_with_attribute_access(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -410,7 +410,7 @@ def test_date_query_with_attribute_access(self, engine, parser): tm.assert_frame_equal(res, expec) def test_date_query_no_attribute_access(self, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -496,7 +496,7 @@ def test_query_scope(self, engine, parser): skip_if_no_pandas_parser(parser) df = DataFrame( - np.random.default_rng(2).standard_normal(20, 2), columns=list("ab") + np.random.default_rng(2).standard_normal((20, 2)), columns=list("ab") ) a, b = 1, 2 # noqa: F841 @@ -587,8 +587,8 @@ def test_query_index_without_name(self, engine, parser): def test_nested_scope(self, engine, parser): skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) - df2 = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) + df2 = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) expected = df[(df > 0) & (df2 > 0)] result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) @@ -608,7 +608,7 @@ def test_nested_scope(self, engine, parser): tm.assert_frame_equal(result, expected) def test_nested_raises_on_local_self_reference(self, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # can't reference ourself b/c we're a local so @ is necessary with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): @@ -618,7 +618,7 @@ def test_local_syntax(self, engine, parser): skip_if_no_pandas_parser(parser) df = DataFrame( - np.random.default_rng(2).standard_normal(100, 10), + np.random.default_rng(2).standard_normal((100, 10)), columns=list("abcdefghij"), ) b = 1 @@ -670,7 +670,7 @@ def test_query_undefined_local(self): engine, parser = self.engine, self.parser skip_if_no_pandas_parser(parser) - df = DataFrame(np.random.default_rng(2).random(10, 2), columns=list("ab")) + df = DataFrame(np.random.default_rng(2).random((10, 2)), columns=list("ab")) with pytest.raises( UndefinedVariableError, match="local variable 'c' is not defined" ): @@ -776,7 +776,7 @@ def parser(self): return "python" def test_date_query_no_attribute_access(self, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) df["dates1"] = date_range("1/1/2012", periods=5) df["dates2"] = date_range("1/1/2013", periods=5) df["dates3"] = date_range("1/1/2014", periods=5) @@ -848,8 +848,8 @@ def test_nested_scope(self, engine, parser): result = pd.eval("x + 1", engine=engine, parser=parser) assert result == 2 - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) - df2 = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) + df2 = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) # don't have the pandas parser msg = r"The '@' prefix is only supported by the pandas parser" @@ -930,7 +930,7 @@ def test_query_builtin(self, engine, parser): class TestDataFrameQueryStrings: def test_str_query_method(self, parser, engine): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 1), columns=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)), columns=["b"]) df["strings"] = Series(list("aabbccddee")) expect = df[df.strings == "a"] @@ -971,7 +971,7 @@ def test_str_query_method(self, parser, engine): tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) def test_str_list_query_method(self, parser, engine): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 1), columns=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)), columns=["b"]) df["strings"] = Series(list("aabbccddee")) expect = df[df.strings.isin(["a", "b"])] diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 29750074c6cf9..f53038a7c9884 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -465,7 +465,7 @@ def test_var_std(self, datetime_frame): def test_numeric_only_flag(self, meth): # GH 9201 df1 = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), columns=["foo", "bar", "baz"], ) # Cast to object to avoid implicit cast when setting entry to "100" below @@ -474,7 +474,7 @@ def test_numeric_only_flag(self, meth): df1.loc[0, "foo"] = "100" df2 = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), columns=["foo", "bar", "baz"], ) # Cast to object to avoid implicit cast when setting entry to "a" below diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index bdd865576f7ed..70517db1478b9 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -345,7 +345,7 @@ def test_frame_datetime64_pre1900_repr(self): def test_frame_to_string_with_periodindex(self): index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") - frame = DataFrame(np.random.default_rng(2).standard_normal(3, 4), index=index) + frame = DataFrame(np.random.default_rng(2).standard_normal((3, 4)), index=index) # it works! frame.to_string() diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 6215342f31dc0..867014f467c34 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -660,7 +660,9 @@ def test_corrwith(self): index = ["a", "b", "c", "d", "e"] columns = ["one", "two", "three", "four"] df1 = tm.SubclassedDataFrame( - np.random.default_rng(2).standard_normal(5, 4), index=index, columns=columns + np.random.default_rng(2).standard_normal((5, 4)), + index=index, + columns=columns, ) df2 = tm.SubclassedDataFrame( np.random.default_rng(2).standard_normal(4, 4), diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index beec7f6a824fe..9d3ebbd3672ae 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -87,7 +87,7 @@ def test_agg_datetimes_mixed(): def test_agg_period_index(): prng = period_range("2012-1-1", freq="M", periods=3) - df = DataFrame(np.random.default_rng(2).standard_normal(3, 2), index=prng) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 2)), index=prng) rs = df.groupby(level=0).sum() assert isinstance(rs.index, PeriodIndex) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 229582f007710..43cdb8267deda 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -710,7 +710,7 @@ def test_describe_categorical_columns(): categories=["foo", "bar", "baz", "qux"], ordered=True, ) - df = DataFrame(np.random.default_rng(2).standard_normal(20, 4), columns=cats) + df = DataFrame(np.random.default_rng(2).standard_normal((20, 4)), columns=cats) result = df.groupby([1, 2, 3, 4] * 5).describe() tm.assert_index_equal(result.stack().columns, cats) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index dc3941bd9dfee..cb25e8dc96f92 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -518,7 +518,7 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only): def test_idxmin_idxmax_axis1(): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"] ) df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] @@ -551,7 +551,7 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): request.node.add_marker(pytest.mark.xfail(reason=msg)) df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), columns=["A", "B", "C", "D"] + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"] ) df["E"] = "x" groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 4c5a831f6b1f6..5cfee9341a38c 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -21,7 +21,7 @@ class TestSlicing: def test_string_index_series_name_converted(self): # GH#1644 df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), index=date_range("1/1/2000", periods=10), ) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index b0d2cf1736e57..2c21bc07c087f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -150,7 +150,7 @@ def test_constructor_from_frame_series_freq(self): dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] expected = DatetimeIndex(dts, freq="MS") - df = DataFrame(np.random.default_rng(2).random(5, 3)) + df = DataFrame(np.random.default_rng(2).random((5, 3))) df["date"] = dts result = DatetimeIndex(df["date"], freq="MS") diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 60eec0c9e4342..7adc697610f3c 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -42,7 +42,7 @@ def test_detect_chained_assignment(using_copy_on_write): def test_cache_updating(using_copy_on_write): # 5216 # make sure that we don't try to set a dead cache - a = np.random.default_rng(2).random(10, 3) + a = np.random.default_rng(2).random((10, 3)) df = DataFrame(a, columns=["x", "y", "z"]) df_original = df.copy() tuples = [(i, j) for i in range(5) for j in range(2)] diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index 77f9511b63c3a..2052886619fac 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -75,7 +75,7 @@ def test_iloc_getitem_multiple_items(): def test_iloc_getitem_labels(): # this is basically regular indexing - arr = np.random.default_rng(2).standard_normal(4, 3) + arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame( arr, columns=[["i", "i", "j"], ["A", "A", "B"]], diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index f7dd8def0ced5..609bf985f14a1 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -336,7 +336,7 @@ def test_frame_getitem_setitem_multislice(self): def test_frame_setitem_multi_column(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]], ) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index bb30f9c844429..fe39d760028a7 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -400,7 +400,7 @@ def test_detect_chained_assignment_is_copy(self): @pytest.mark.arm_slow def test_detect_chained_assignment_sorting(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) ser = df.iloc[:, 0].sort_values() tm.assert_series_equal(ser, df.iloc[:, 0].sort_values()) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index d818df3fcb963..1c91b69597d78 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -15,7 +15,7 @@ def gen_obj(klass, index): obj = Series(np.arange(len(index)), index=index) else: obj = DataFrame( - np.random.default_rng(2).standard_normal(len(index), len(index)), + np.random.default_rng(2).standard_normal((len(index), len(index))), index=index, columns=index, ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 61d4dc05d4ac3..6a5b68d7d6a3c 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -406,7 +406,7 @@ def test_iloc_getitem_slice(self): def test_iloc_getitem_slice_dups(self): df1 = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"], ) df2 = DataFrame( @@ -553,7 +553,7 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( # is redundant with another test comparing iloc against loc def test_iloc_getitem_frame(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2), columns=range(0, 8, 2), ) @@ -603,7 +603,7 @@ def test_iloc_getitem_frame(self): def test_iloc_getitem_labelled_frame(self): # try with labelled frame df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), index=list("abcdefghij"), columns=list("ABCD"), ) @@ -679,7 +679,7 @@ def test_iloc_getitem_doc_issue(self, using_array_manager): def test_iloc_setitem_series(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), index=list("abcdefghij"), columns=list("ABCD"), ) @@ -1078,7 +1078,7 @@ def test_iloc_getitem_float_duplicates(self): tm.assert_series_equal(df.loc[0.2, "a"], expect) df = DataFrame( - np.random.default_rng(2).standard_normal(4, 3), + np.random.default_rng(2).standard_normal((4, 3)), index=[1, 0.2, 0.2, 1], columns=list("abc"), ) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index be562f4003094..e110517a46a8b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -280,7 +280,7 @@ def test_dups_fancy_indexing_not_in_order(self): def test_dups_fancy_indexing_only_missing_label(self): # List containing only missing label dfnu = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), index=list("AABCD") + np.random.default_rng(2).standard_normal((5, 3)), index=list("AABCD") ) with pytest.raises( KeyError, diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ade08ed269b1a..8b8eb977f08de 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1098,7 +1098,7 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): assert (sliced_df["a"] == 4).all() # These should not return copies - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) if using_copy_on_write: assert df[0] is not df.loc[:, 0] else: @@ -1606,7 +1606,7 @@ def test_loc_getitem_index_namedtuple(self): def test_loc_setitem_single_column_mixed(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), index=["a", "b", "c", "d", "e"], columns=["foo", "bar", "baz"], ) @@ -2848,7 +2848,7 @@ def test_loc_datetimelike_mismatched_dtypes(): # GH#32650 dont mix and match datetime/timedelta/period dtypes df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), columns=["a", "b", "c"], index=date_range("2012", freq="H", periods=5), ) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 5780f8af93116..ed9b827e249f2 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -350,7 +350,7 @@ def test_delete_datetimelike(self): def test_split(self): # GH#37799 - values = np.random.default_rng(2).standard_normal(3, 4) + values = np.random.default_rng(2).standard_normal((3, 4)) blk = new_block(values, placement=BlockPlacement([3, 1, 6]), ndim=2) result = blk._split() diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index c80220da41728..65f4856d1ecea 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -615,7 +615,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): tm.assert_frame_equal(df, recons) def test_excel_roundtrip_indexname(self, merge_cells, path): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) df.index.name = "foo" df.to_excel(path, merge_cells=merge_cells) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 37ae131b7486f..64a4a9f42f437 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -327,7 +327,7 @@ def test_repr_should_return_str(self): def test_repr_no_backslash(self): with option_context("mode.sim_interactive", True): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) assert "\\" not in repr(df) def test_expand_frame_repr(self): @@ -1193,7 +1193,7 @@ def test_wide_repr(self): def test_wide_repr_wide_columns(self): with option_context("mode.sim_interactive", True, "display.max_columns", 20): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), + np.random.default_rng(2).standard_normal((5, 3)), columns=["a" * 90, "b" * 90, "c" * 90], ) rep_str = repr(df) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 21d9bb84fdfef..b91c5bcc6ae0a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1173,7 +1173,7 @@ def test_datetime_tz(self): def test_sparse(self): # GH4377 df.to_json segfaults with non-ndarray blocks - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) df.loc[:8] = np.nan sdf = df.astype("Sparse") diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 2e45a64c87103..1fc2877e70c65 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -690,7 +690,7 @@ def test_append_misc_empty_frame(setup_path): store.select("df") # repeated append of 0/non-zero frames - df = DataFrame(np.random.default_rng(2).random(10, 3), columns=list("ABC")) + df = DataFrame(np.random.default_rng(2).random((10, 3)), columns=list("ABC")) store.append("df", df) tm.assert_frame_equal(store.select("df"), df) store.append("df", df_empty) diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 84103d2bd79ca..e8e62d3fdd33b 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -115,7 +115,7 @@ def test_invalid_terms(tmp_path, setup_path): # from the docs path = tmp_path / setup_path dfq = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=list("ABCD"), index=date_range("20130101", periods=10), ) @@ -128,7 +128,7 @@ def test_invalid_terms(tmp_path, setup_path): # catch the invalid reference path = tmp_path / setup_path dfq = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=list("ABCD"), index=date_range("20130101", periods=10), ) @@ -146,7 +146,7 @@ def test_invalid_terms(tmp_path, setup_path): def test_append_with_diff_col_name_types_raises_value_error(setup_path): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 1))) df2 = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) df3 = DataFrame({(1, 2): np.random.default_rng(2).standard_normal(10)}) df4 = DataFrame({("1", 2): np.random.default_rng(2).standard_normal(10)}) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 4fdcb02e0e501..4309e8fcde1f6 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -64,7 +64,7 @@ def test_select_columns_in_where(setup_path): def test_select_with_dups(setup_path): # single dtypes df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), columns=["A", "A", "B", "B"] + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"] ) df.index = date_range("20130101 9:30", periods=10, freq="T") @@ -87,7 +87,7 @@ def test_select_with_dups(setup_path): df = concat( [ DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"], ), DataFrame( diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 0ead7541bb375..6c271032bb80e 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -500,7 +500,7 @@ def test_remove(setup_path): def test_same_name_scoping(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame( - np.random.default_rng(2).standard_normal(20, 2), + np.random.default_rng(2).standard_normal((20, 2)), index=date_range("20130101", periods=20), ) store.put("df", df, format="table") diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index c201f36c0d076..cafe690e338d6 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1278,7 +1278,7 @@ def test_fallback_success(self, datapath): def test_to_html_timestamp(self): rng = date_range("2000-01-01", periods=10) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4), index=rng) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)), index=rng) result = df.to_html() assert "2000-01-01" in result diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 0eb623937f21c..d091cec53bfce 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -517,7 +517,7 @@ def test_write_column_multiindex(self, engine): # Not able to write column multi-indexes with non-string column names. mi_columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) df = pd.DataFrame( - np.random.default_rng(2).standard_normal(4, 3), columns=mi_columns + np.random.default_rng(2).standard_normal((4, 3)), columns=mi_columns ) if engine == "fastparquet": diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 99392b9b1b3d7..6deb2b0a05ecc 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -381,7 +381,7 @@ def test_write_preserves_original(self): # 9795 df = DataFrame( - np.random.default_rng(2).standard_normal(5, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((5, 4)), columns=list("abcd") ) df.loc[2, "a":"c"] = np.nan df_copy = df.copy() diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 1a73f87c1f589..8b6c1cc9fb134 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -97,7 +97,7 @@ def test_plot_invalid_arg(self): @pytest.mark.slow def test_plot_tick_props(self): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) @@ -116,7 +116,7 @@ def test_plot_tick_props(self): ) def test_plot_other_args(self, kwargs): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) _check_plot_works(df.plot, **kwargs) @@ -124,7 +124,7 @@ def test_plot_other_args(self, kwargs): @pytest.mark.slow def test_plot_visible_ax(self): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) # We have to redo it here because _check_plot_works does two plots, @@ -148,7 +148,7 @@ def test_plot_visible_ax(self): @pytest.mark.slow def test_plot_title(self): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) _check_plot_works(df.plot, title="blah") @@ -157,7 +157,8 @@ def test_plot_title(self): def test_plot_multiindex(self): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random((10, 3)), + index=MultiIndex.from_tuples(tuples), ) ax = _check_plot_works(df.plot, use_index=True) _check_ticks_props(ax, xrot=0) @@ -665,7 +666,7 @@ def test_barh_barwidth_subplots(self, meth, dim): assert getattr(r, dim)() == width def test_bar_bottom_left_bottom(self): - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -675,7 +676,7 @@ def test_bar_bottom_left_bottom(self): assert result == [-1, -2, -3, -4, -5] def test_bar_bottom_left_left(self): - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) result = [p.get_x() for p in ax.patches] assert result == [1] * 25 @@ -685,7 +686,7 @@ def test_bar_bottom_left_left(self): assert result == [1, 2, 3, 4, 5] def test_bar_bottom_left_subplots(self): - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) axes = df.plot.bar(subplots=True, bottom=-1) for ax in axes: result = [p.get_y() for p in ax.patches] @@ -1065,13 +1066,13 @@ def test_kde_df(self): @td.skip_if_no_scipy def test_kde_df_rot(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) ax = df.plot(kind="kde", rot=20, fontsize=5) _check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) @td.skip_if_no_scipy def test_kde_df_subplots(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) axes = _check_plot_works( df.plot, default_axes=True, @@ -1082,7 +1083,7 @@ def test_kde_df_subplots(self): @td.skip_if_no_scipy def test_kde_df_logy(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) axes = df.plot(kind="kde", logy=True, subplots=True) _check_ax_scales(axes, yaxis="log") @@ -1132,7 +1133,7 @@ def test_hist_df_series_cumulative(self): tm.assert_almost_equal(rects[-2].get_height(), 10.0) def test_hist_df_orientation(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) # if horizontal, yticklabels are rotated axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") _check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) @@ -1524,7 +1525,7 @@ def test_hexbin_cmap(self, kwargs, expected): def test_pie_df_err(self): df = DataFrame( - np.random.default_rng(2).random(5, 3), + np.random.default_rng(2).random((5, 3)), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1535,7 +1536,7 @@ def test_pie_df_err(self): @pytest.mark.parametrize("y", ["Y", 2]) def test_pie_df(self, y): df = DataFrame( - np.random.default_rng(2).random(5, 3), + np.random.default_rng(2).random((5, 3)), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1544,7 +1545,7 @@ def test_pie_df(self, y): def test_pie_df_subplots(self): df = DataFrame( - np.random.default_rng(2).random(5, 3), + np.random.default_rng(2).random((5, 3)), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1561,7 +1562,7 @@ def test_pie_df_subplots(self): def test_pie_df_labels_colors(self): df = DataFrame( - np.random.default_rng(2).random(5, 3), + np.random.default_rng(2).random((5, 3)), columns=["X", "Y", "Z"], index=["a", "b", "c", "d", "e"], ) @@ -1827,7 +1828,7 @@ def test_errorbar_asymmetrical(self): def test_table(self): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) _check_plot_works(df.plot, table=True) @@ -2158,7 +2159,7 @@ def _get_boxed_grid(): axes = _get_boxed_grid() df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), index=ts.index, columns=list("ABCD"), ) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index ff239e684f465..5a878da563d5f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -287,7 +287,7 @@ def test_line_colors_hex(self): def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.default_rng(2).random(10, 2)).plot(color=colors) + DataFrame(np.random.default_rng(2).random((10, 2))).plot(color=colors) assert len(colors) == 3 def test_line_colors_and_styles_subplots(self): @@ -362,7 +362,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) ax = df.plot.area(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -379,7 +379,7 @@ def test_area_colors_poly(self): from matplotlib import cm from matplotlib.collections import PolyCollection - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) ax = df.plot.area(colormap="jet") jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] _check_colors(ax.get_lines(), linecolors=jet_colors) @@ -395,7 +395,7 @@ def test_area_colors_stacked_false(self): from matplotlib import cm from matplotlib.collections import PolyCollection - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] # When stacked=False, alpha is set to 0.5 ax = df.plot.area(colormap=cm.jet, stacked=False) @@ -443,7 +443,7 @@ def test_hist_colors_single_color(self): @td.skip_if_no_scipy def test_kde_colors(self): custom_colors = "rgcby" - df = DataFrame(np.random.default_rng(2).random(5, 5)) + df = DataFrame(np.random.default_rng(2).random((5, 5))) ax = df.plot.kde(color=custom_colors) _check_colors(ax.get_lines(), linecolors=custom_colors) @@ -593,7 +593,7 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(np.random.default_rng(2).standard_normal(5, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) ax = df.plot() expected = _unpack_cycler(plt.rcParams)[:3] @@ -651,7 +651,7 @@ def test_colors_of_columns_with_same_name(self): def test_invalid_colormap(self): df = DataFrame( - np.random.default_rng(2).standard_normal(3, 2), columns=["A", "B"] + np.random.default_rng(2).standard_normal((3, 2)), columns=["A", "B"] ) msg = "(is not a valid value)|(is not a known colormap)" with pytest.raises((ValueError, KeyError), match=msg): diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index fda2178535806..526ffd80c9efc 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -36,7 +36,7 @@ class TestDataFramePlotsSubplots: @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots(self, kind): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) @@ -66,7 +66,7 @@ def test_subplots(self, kind): @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots_no_share_x(self, kind): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) axes = df.plot(kind=kind, subplots=True, sharex=False) @@ -81,7 +81,7 @@ def test_subplots_no_share_x(self, kind): @pytest.mark.parametrize("kind", ["bar", "barh", "line", "area"]) def test_subplots_no_legend(self, kind): df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) axes = df.plot(kind=kind, subplots=True, legend=False) @@ -91,7 +91,7 @@ def test_subplots_no_legend(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries(self, kind): idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.default_rng(2).random(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).random((10, 3)), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) @@ -114,7 +114,7 @@ def test_subplots_timeseries(self, kind): @pytest.mark.parametrize("kind", ["line", "area"]) def test_subplots_timeseries_rot(self, kind): idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.default_rng(2).random(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).random((10, 3)), index=idx) axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) for ax in axes: _check_visible(ax.xaxis) @@ -231,7 +231,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): def test_subplots_layout_multi_column(self, layout, exp_layout): # GH 6667 df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) @@ -242,7 +242,7 @@ def test_subplots_layout_multi_column(self, layout, exp_layout): def test_subplots_layout_multi_column_error(self): # GH 6667 df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) msg = "Layout of 1x1 must be larger than required size 3" @@ -266,7 +266,7 @@ def test_subplots_layout_single_column( ): # GH 6667 df = DataFrame( - np.random.default_rng(2).random(10, 1), + np.random.default_rng(2).random((10, 1)), index=list(string.ascii_letters[:10]), ) axes = df.plot(subplots=True, **kwargs) @@ -282,14 +282,14 @@ def test_subplots_layout_single_column( def test_subplots_warnings(self, idx): # GH 9464 with tm.assert_produces_warning(None): - df = DataFrame(np.random.default_rng(2).standard_normal(5, 4), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 4)), index=idx) df.plot(subplots=True, layout=(3, 2)) def test_subplots_multiple_axes(self): # GH 5353, 6970, GH 7069 fig, axes = mpl.pyplot.subplots(2, 3) df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) @@ -307,7 +307,7 @@ def test_subplots_multiple_axes(self): def test_subplots_multiple_axes_error(self): # GH 5353, 6970, GH 7069 df = DataFrame( - np.random.default_rng(2).random(10, 3), + np.random.default_rng(2).random((10, 3)), index=list(string.ascii_letters[:10]), ) msg = "The number of passed axes must be 3, the same as the output plot" @@ -350,7 +350,7 @@ def test_subplots_multiple_axes_single_col(self): # single column _, axes = mpl.pyplot.subplots(1, 1) df = DataFrame( - np.random.default_rng(2).random(10, 1), + np.random.default_rng(2).random((10, 1)), index=list(string.ascii_letters[:10]), ) @@ -402,7 +402,7 @@ def test_subplots_sharex_axes_existing_axes(self): def test_subplots_dup_columns(self): # GH 10962 - df = DataFrame(np.random.default_rng(2).random(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).random((5, 5)), columns=list("aaaaa")) axes = df.plot(subplots=True) for ax in axes: _check_legend_labels(ax, labels=["a"]) @@ -410,7 +410,7 @@ def test_subplots_dup_columns(self): def test_subplots_dup_columns_secondary_y(self): # GH 10962 - df = DataFrame(np.random.default_rng(2).random(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).random((5, 5)), columns=list("aaaaa")) axes = df.plot(subplots=True, secondary_y="a") for ax in axes: # (right) is only attached when subplots=False @@ -419,7 +419,7 @@ def test_subplots_dup_columns_secondary_y(self): def test_subplots_dup_columns_secondary_y_no_subplot(self): # GH 10962 - df = DataFrame(np.random.default_rng(2).random(5, 5), columns=list("aaaaa")) + df = DataFrame(np.random.default_rng(2).random((5, 5)), columns=list("aaaaa")) ax = df.plot(secondary_y="a") _check_legend_labels(ax, labels=["a (right)"] * 5) assert len(ax.lines) == 0 @@ -534,7 +534,7 @@ def test_df_subplots_patterns_minorticks_not_shared(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = DataFrame(np.random.default_rng(2).random(10, 2)) + df = DataFrame(np.random.default_rng(2).random((10, 2))) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -550,7 +550,7 @@ def test_subplots_sharex_false(self): def test_subplots_constrained_layout(self): # GH 25261 idx = date_range(start="now", periods=10) - df = DataFrame(np.random.default_rng(2).random(10, 3), index=idx) + df = DataFrame(np.random.default_rng(2).random((10, 3)), index=idx) kwargs = {} if hasattr(mpl.pyplot.Figure, "get_constrained_layout"): kwargs["constrained_layout"] = True diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index a275aa5b9f458..697aa3eefad03 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -17,7 +17,9 @@ @pytest.fixture def hist_df(): - df = DataFrame(np.random.default_rng(2).standard_normal(30, 2), columns=["A", "B"]) + df = DataFrame( + np.random.default_rng(2).standard_normal((30, 2)), columns=["A", "B"] + ) df["C"] = np.random.default_rng(2).choice(["a", "b", "c"], 30) df["D"] = np.random.default_rng(2).choice(["a", "b", "c"], 30) return df diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 55d2481f3efcb..6217dbfa5aa44 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -89,14 +89,18 @@ def test_boxplot_legacy1_series(self): _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict") def test_boxplot_legacy2(self): - df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) + df = DataFrame( + np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"] + ) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): _check_plot_works(df.boxplot, by="X") def test_boxplot_legacy2_with_ax(self): - df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) + df = DataFrame( + np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"] + ) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # When ax is supplied and required number of axes is 1, @@ -107,7 +111,9 @@ def test_boxplot_legacy2_with_ax(self): assert ax_axes is axes def test_boxplot_legacy2_with_ax_return_type(self): - df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) + df = DataFrame( + np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"] + ) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) fig, ax = mpl.pyplot.subplots() @@ -116,7 +122,9 @@ def test_boxplot_legacy2_with_ax_return_type(self): assert ax_axes is axes["A"] def test_boxplot_legacy2_with_multi_col(self): - df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) + df = DataFrame( + np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"] + ) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # Multiple columns with an ax argument should use same figure @@ -128,7 +136,9 @@ def test_boxplot_legacy2_with_multi_col(self): assert axes["Col1"].get_figure() is fig def test_boxplot_legacy2_by_none(self): - df = DataFrame(np.random.default_rng(2).random(10, 2), columns=["Col1", "Col2"]) + df = DataFrame( + np.random.default_rng(2).random((10, 2)), columns=["Col1", "Col2"] + ) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) df["Y"] = Series(["A"] * 10) # When by is None, check that all relevant lines are present in the @@ -196,7 +206,7 @@ def test_boxplot_axis_limits_two_rows(self, hist_df): assert dummy_ax._sharey is None def test_boxplot_empty_column(self): - df = DataFrame(np.random.default_rng(2).standard_normal(20, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((20, 4))) df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type="axes") @@ -240,7 +250,7 @@ def test_boxplot_numeric_data(self): ) def test_color_kwd(self, colors_kwd, expected): # GH: 26214 - df = DataFrame(np.random.default_rng(2).random(10, 2)) + df = DataFrame(np.random.default_rng(2).random((10, 2))) result = df.boxplot(color=colors_kwd, return_type="dict") for k, v in expected.items(): assert result[k][0].get_color() == v @@ -270,7 +280,7 @@ def test_color_kwd(self, colors_kwd, expected): ) def test_colors_in_theme(self, scheme, expected): # GH: 40769 - df = DataFrame(np.random.default_rng(2).random(10, 2)) + df = DataFrame(np.random.default_rng(2).random((10, 2))) import matplotlib.pyplot as plt plt.style.use(scheme) @@ -284,7 +294,7 @@ def test_colors_in_theme(self, scheme, expected): ) def test_color_kwd_errors(self, dict_colors, msg): # GH: 26214 - df = DataFrame(np.random.default_rng(2).random(10, 2)) + df = DataFrame(np.random.default_rng(2).random((10, 2))) with pytest.raises(ValueError, match=msg): df.boxplot(color=dict_colors, return_type="dict") @@ -381,7 +391,8 @@ def test_boxplot_legacy1_return_type(self, hist_df): def test_boxplot_legacy2(self): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random((10, 3)), + index=MultiIndex.from_tuples(tuples), ) grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning, check_stacklevel=False): @@ -392,7 +403,8 @@ def test_boxplot_legacy2(self): def test_boxplot_legacy2_return_type(self): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random((10, 3)), + index=MultiIndex.from_tuples(tuples), ) grouped = df.groupby(level=1) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") @@ -405,7 +417,8 @@ def test_boxplot_legacy2_return_type(self): def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame( - np.random.default_rng(2).random(10, 3), index=MultiIndex.from_tuples(tuples) + np.random.default_rng(2).random((10, 3)), + index=MultiIndex.from_tuples(tuples), ) msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 27d0a21452cb0..fd9c960ccf56f 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -695,7 +695,7 @@ def test_secondary_bar(self): def test_secondary_frame(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((5, 3)), columns=["a", "b", "c"] ) axes = df.plot(secondary_y=["a", "c"], subplots=True) assert axes[0].get_yaxis().get_ticks_position() == "right" @@ -704,7 +704,7 @@ def test_secondary_frame(self): def test_secondary_bar_frame(self): df = DataFrame( - np.random.default_rng(2).standard_normal(5, 3), columns=["a", "b", "c"] + np.random.default_rng(2).standard_normal((5, 3)), columns=["a", "b", "c"] ) axes = df.plot(kind="bar", secondary_y=["a", "c"], subplots=True) assert axes[0].get_yaxis().get_ticks_position() == "right" diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index 1a7c8f17fe8eb..5ebf93510a615 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -102,7 +102,7 @@ def test_groupby_hist_frame_with_legend(self, column, expected_axes_num): index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), + np.random.default_rng(2).standard_normal((30, 2)), index=index, columns=["a", "b"], ) @@ -118,7 +118,7 @@ def test_groupby_hist_frame_with_legend_raises(self, column): # GH 6279 - DataFrameGroupBy histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), + np.random.default_rng(2).standard_normal((30, 2)), index=index, columns=["a", "b"], ) @@ -131,7 +131,7 @@ def test_groupby_hist_series_with_legend(self): # GH 6279 - SeriesGroupBy histogram can have a legend index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), + np.random.default_rng(2).standard_normal((30, 2)), index=index, columns=["a", "b"], ) @@ -145,7 +145,7 @@ def test_groupby_hist_series_with_legend_raises(self): # GH 6279 - SeriesGroupBy histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), + np.random.default_rng(2).standard_normal((30, 2)), index=index, columns=["a", "b"], ) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 45c053da4a8d8..7111146d70334 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -277,7 +277,7 @@ def test_hist_df_legacy_layout(self): @pytest.mark.slow def test_hist_df_legacy_layout2(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 1)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 1))) _check_plot_works(df.hist) @pytest.mark.slow @@ -517,7 +517,7 @@ def test_hist_with_legend(self, by, column): index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), + np.random.default_rng(2).standard_normal((30, 2)), index=index, columns=["a", "b"], ) @@ -543,7 +543,7 @@ def test_hist_with_legend_raises(self, by, column): # GH 6279 - DataFrame histogram with legend and label raises index = Index(15 * ["1"] + 15 * ["2"], name="c") df = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), + np.random.default_rng(2).standard_normal((30, 2)), index=index, columns=["a", "b"], ) @@ -662,7 +662,7 @@ def test_grouped_hist_legacy(self): from pandas.plotting._matplotlib.hist import _grouped_hist rs = np.random.default_rng(2) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, @@ -753,7 +753,7 @@ def test_grouped_hist_legacy_grouped_hist(self): from pandas.plotting._matplotlib.hist import _grouped_hist rs = np.random.default_rng(2) - df = DataFrame(rs.standard_normal(10, 1), columns=["A"]) + df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( 812419200000000000, diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 231bdfa48b2ac..db188c6c96087 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -637,7 +637,7 @@ def test_resample_dup_index(): # GH 4812 # dup columns with resample raising df = DataFrame( - np.random.default_rng(2).standard_normal(4, 12), + np.random.default_rng(2).standard_normal((4, 12)), index=[2000, 2000, 2000, 2000], columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], ) @@ -732,7 +732,7 @@ def test_asfreq_non_unique(unit): def test_resample_axis1(unit): rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) df = DataFrame( - np.random.default_rng(2).standard_normal(3, len(rng)), + np.random.default_rng(2).standard_normal((3, len(rng))), columns=rng, index=["a", "b", "c"], ) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 026fe0ee5cc9a..e0002091a9d0a 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -279,7 +279,7 @@ def test_transform_frame(on): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index ) expected = df.groupby(pd.Grouper(freq="20min")).transform("mean") if on == "date": @@ -380,7 +380,7 @@ def test_agg(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index ) df_col = df.reset_index() df_mult = df_col.copy() @@ -494,7 +494,7 @@ def test_agg_misc(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index ) df_col = df.reset_index() df_mult = df_col.copy() @@ -593,7 +593,7 @@ def test_multi_agg_axis_1_raises(func): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index ).T warning_msg = "DataFrame.resample with axis=1 is deprecated." with tm.assert_produces_warning(FutureWarning, match=warning_msg): @@ -608,7 +608,7 @@ def test_agg_nested_dicts(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index ) df_col = df.reset_index() df_mult = df_col.copy() @@ -1015,7 +1015,7 @@ def test_df_axis_param_depr(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" df = DataFrame( - np.random.default_rng(2).random(10, 2), columns=list("AB"), index=index + np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index ).T # Deprecation error when axis=1 is explicitly passed diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 6f44ea2340bb4..8c06f1e8a1e38 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -112,7 +112,7 @@ def test_aaa_group_order(): # GH 12840 # check TimeGrouper perform stable sorts n = 20 - data = np.random.default_rng(2).standard_normal(n, 4) + data = np.random.default_rng(2).standard_normal((n, 4)) df = DataFrame(data, columns=["A", "B", "C", "D"]) df["key"] = [ datetime(2013, 1, 1), @@ -133,7 +133,7 @@ def test_aaa_group_order(): def test_aggregate_normal(resample_method): """Check TimeGrouper's aggregation is identical as normal groupby.""" - data = np.random.default_rng(2).standard_normal(20, 4) + data = np.random.default_rng(2).standard_normal((20, 4)) normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, 3, 4, 5] * 4 @@ -159,7 +159,7 @@ def test_aggregate_normal(resample_method): def test_aggregate_nth(): """Check TimeGrouper's aggregation is identical as normal groupby.""" - data = np.random.default_rng(2).standard_normal(20, 4) + data = np.random.default_rng(2).standard_normal((20, 4)) normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, 3, 4, 5] * 4 @@ -211,7 +211,7 @@ def test_aggregate_with_nat(func, fill_value): # and 'nth' doesn't work yet n = 20 - data = np.random.default_rng(2).standard_normal(n, 4).astype("int64") + data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64") normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 @@ -242,7 +242,7 @@ def test_aggregate_with_nat(func, fill_value): def test_aggregate_with_nat_size(): # GH 9925 n = 20 - data = np.random.default_rng(2).standard_normal(n, 4).astype("int64") + data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64") normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index f013771f7d4e1..a317a7756fed5 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -52,7 +52,7 @@ def test_append_concat(self): assert result.index[0] == s1.index[0] def test_concat_copy(self, using_array_manager, using_copy_on_write): - df = DataFrame(np.random.default_rng(2).standard_normal(4, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1)) df3 = DataFrame({5: "foo"}, index=range(4)) @@ -107,7 +107,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): def test_concat_with_group_keys(self): # axis=0 - df = DataFrame(np.random.default_rng(2).standard_normal(3, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((3, 4))) df2 = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) result = concat([df, df2], keys=[0, 1]) @@ -123,7 +123,7 @@ def test_concat_with_group_keys(self): tm.assert_frame_equal(result, expected) # axis=1 - df = DataFrame(np.random.default_rng(2).standard_normal(4, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) df2 = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) result = concat([df, df2], keys=[0, 1], axis=1) @@ -135,7 +135,7 @@ def test_concat_with_group_keys(self): tm.assert_frame_equal(result, expected) def test_concat_keys_specific_levels(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]] level = ["three", "two", "one", "zero"] result = concat( @@ -156,10 +156,10 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass): constructor = dict if mapping == "dict" else non_dict_mapping_subclass frames = constructor( { - "foo": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), - "bar": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), - "baz": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), - "qux": DataFrame(np.random.default_rng(2).standard_normal(4, 3)), + "foo": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), + "bar": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), + "baz": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), + "qux": DataFrame(np.random.default_rng(2).standard_normal((4, 3))), } ) @@ -360,7 +360,7 @@ def test_dtype_coercion(self): tm.assert_series_equal(result.dtypes, df.dtypes) def test_concat_single_with_key(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) result = concat([df], keys=["foo"]) expected = concat([df, df], keys=["foo", "bar"]) @@ -371,7 +371,7 @@ def test_concat_no_items_raises(self): concat([]) def test_concat_exclude_none(self): - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) pieces = [df[:5], None, None, df[5:]] result = concat(pieces) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 5aab96f0fe299..573b945e24d38 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -15,7 +15,7 @@ class TestEmptyConcat: def test_handle_empty_objects(self, sort): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((10, 4)), columns=list("abcd") ) dfcopy = df[:5].copy() diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index b2b5b885e809e..7d719ba57c0b1 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -176,7 +176,7 @@ def test_dups_index(self): df = concat( [ DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"], ), DataFrame( diff --git a/pandas/tests/reshape/concat/test_invalid.py b/pandas/tests/reshape/concat/test_invalid.py index c84571c5c7107..5e6703b185f27 100644 --- a/pandas/tests/reshape/concat/test_invalid.py +++ b/pandas/tests/reshape/concat/test_invalid.py @@ -34,7 +34,7 @@ def test_concat_invalid_first_argument(self): def test_concat_generator_obj(self): # generator ok though - concat(DataFrame(np.random.default_rng(2).random(5, 5)) for _ in range(3)) + concat(DataFrame(np.random.default_rng(2).random((5, 5))) for _ in range(3)) def test_concat_textreader_obj(self): # text reader ok diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 129408d8cdfd5..fd06f67684b6f 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -336,7 +336,7 @@ def test_join_empty_bug(self): def test_join_unconsolidated(self): # GH #331 a = DataFrame( - np.random.default_rng(2).standard_normal(30, 2), columns=["a", "b"] + np.random.default_rng(2).standard_normal((30, 2)), columns=["a", "b"] ) c = Series(np.random.default_rng(2).standard_normal(30)) a["c"] = c @@ -471,7 +471,7 @@ def test_join_float64_float32(self): dtype=np.float64, ) b = DataFrame( - np.random.default_rng(2).standard_normal(10, 1), + np.random.default_rng(2).standard_normal((10, 1)), columns=["c"], dtype=np.float32, ) @@ -669,7 +669,7 @@ def test_join_dups(self): df = concat( [ DataFrame( - np.random.default_rng(2).standard_normal(10, 4), + np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"], ), DataFrame( diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index d68626c566e51..b73a23b223650 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -323,7 +323,7 @@ def test_melt_missing_columns_raises(self): # Generate data df = DataFrame( - np.random.default_rng(2).standard_normal(5, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((5, 4)), columns=list("abcd") ) # Try to melt with missing `value_vars` column name diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 4ad0d1e35d6d0..30db5adc07d1d 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -470,7 +470,7 @@ def test_getitem_str_year_with_datetimeindex(): def test_getitem_str_second_with_datetimeindex(): # GH14826, indexing with a seconds resolution string / datetime object df = DataFrame( - np.random.default_rng(2).random(5, 5), + np.random.default_rng(2).random((5, 5)), columns=["open", "high", "low", "close", "volume"], index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"), ) diff --git a/pandas/tests/series/methods/test_matmul.py b/pandas/tests/series/methods/test_matmul.py index 6c7040c2ebc7b..4ca3ad3f7031e 100644 --- a/pandas/tests/series/methods/test_matmul.py +++ b/pandas/tests/series/methods/test_matmul.py @@ -17,7 +17,7 @@ def test_matmul(self): np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"] ) b = DataFrame( - np.random.default_rng(2).standard_normal(3, 4), + np.random.default_rng(2).standard_normal((3, 4)), index=["1", "2", "3"], columns=["p", "q", "r", "s"], ).T diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 364394d1d484a..c3e074dc68c82 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -77,7 +77,7 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # GH#5856/5853 # Series.sort_values operating on a view - df = DataFrame(np.random.default_rng(2).standard_normal(10, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) s = df.iloc[:, 0] msg = ( diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index 1b69ff1a35e20..4f34ab34c35f0 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -165,7 +165,7 @@ def test_2d_bool(self): assert result.dtype == np.object_ def test_2d_float32(self): - arr = np.random.default_rng(2).standard_normal(4, 3).astype(np.float32) + arr = np.random.default_rng(2).standard_normal((4, 3)).astype(np.float32) indexer = [0, 2, -1, 1, -1] # axis=0 diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 57698d6dad4d0..2dd4458172593 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -135,7 +135,7 @@ def series(): def frame(): """Make mocked frame as fixture.""" return DataFrame( - np.random.default_rng(2).standard_normal(100, 10), + np.random.default_rng(2).standard_normal((100, 10)), index=bdate_range(datetime(2009, 1, 1), periods=100), ) From 7a30da03ece5d15df982d10f5e8e35351ec01a05 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 13 Jul 2023 10:38:38 -0700 Subject: [PATCH 09/22] Fix more standard_normal --- pandas/tests/apply/test_frame_apply.py | 4 ++-- pandas/tests/arithmetic/test_numeric.py | 2 +- pandas/tests/computation/test_eval.py | 6 +++--- pandas/tests/frame/indexing/test_delitem.py | 2 +- pandas/tests/frame/indexing/test_getitem.py | 4 ++-- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- pandas/tests/frame/indexing/test_setitem.py | 6 +++--- pandas/tests/frame/indexing/test_where.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 6 +++--- pandas/tests/frame/methods/test_cov_corr.py | 2 +- pandas/tests/frame/methods/test_diff.py | 2 +- pandas/tests/frame/methods/test_dot.py | 2 +- pandas/tests/frame/methods/test_drop.py | 4 ++-- pandas/tests/frame/methods/test_head_tail.py | 2 +- pandas/tests/frame/methods/test_matmul.py | 2 +- pandas/tests/frame/methods/test_pop.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 2 +- pandas/tests/frame/methods/test_sort_index.py | 4 ++-- pandas/tests/frame/methods/test_sort_values.py | 4 ++-- pandas/tests/frame/methods/test_to_csv.py | 4 ++-- pandas/tests/frame/methods/test_to_period.py | 2 +- pandas/tests/frame/test_constructors.py | 4 ++-- pandas/tests/frame/test_reductions.py | 2 +- pandas/tests/frame/test_repr_info.py | 4 ++-- pandas/tests/frame/test_stack_unstack.py | 12 ++++++++---- pandas/tests/frame/test_subclass.py | 2 +- pandas/tests/generic/test_generic.py | 2 +- pandas/tests/groupby/test_categorical.py | 4 ++-- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexes/test_subclass.py | 2 +- pandas/tests/indexing/conftest.py | 14 +++++++------- pandas/tests/indexing/multiindex/test_getitem.py | 2 +- pandas/tests/indexing/multiindex/test_iloc.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 10 +++++----- pandas/tests/indexing/multiindex/test_partial.py | 2 +- pandas/tests/indexing/multiindex/test_setitem.py | 4 ++-- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 6 +++--- pandas/tests/io/excel/test_style.py | 2 +- pandas/tests/io/formats/test_info.py | 6 +++--- pandas/tests/io/formats/test_printing.py | 2 +- pandas/tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/test_parquet.py | 4 ++-- pandas/tests/plotting/frame/test_frame.py | 16 ++++++++-------- pandas/tests/plotting/frame/test_frame_legend.py | 2 +- .../tests/plotting/frame/test_frame_subplots.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 4 ++-- pandas/tests/plotting/test_datetimelike.py | 4 +++- pandas/tests/plotting/test_hist_method.py | 6 +++--- pandas/tests/plotting/test_series.py | 10 +++++----- pandas/tests/resample/test_period_index.py | 2 +- pandas/tests/reshape/concat/test_concat.py | 6 +++--- pandas/tests/reshape/concat/test_index.py | 2 +- pandas/tests/reshape/merge/test_join.py | 12 ++++++------ pandas/tests/series/test_repr.py | 2 +- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/window/test_pairwise.py | 2 +- 59 files changed, 121 insertions(+), 115 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index f51858ebbcf05..120cd1cabdc45 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -376,7 +376,7 @@ def test_apply_reduce_to_dict(): def test_apply_differently_indexed(): - df = DataFrame(np.random.default_rng(2).standard_normal(20, 10)) + df = DataFrame(np.random.default_rng(2).standard_normal((20, 10))) result = df.apply(Series.describe, axis=0) expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) @@ -863,7 +863,7 @@ def test_infer_output_shape_listlike_columns(): # GH 16353 df = DataFrame( - np.random.default_rng(2).standard_normal(6, 3), columns=["A", "B", "C"] + np.random.default_rng(2).standard_normal((6, 3)), columns=["A", "B", "C"] ) result = df.apply(lambda x: [1, 2, 3], axis=1) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 01c167b272f2c..42fa03b38f6ff 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1295,7 +1295,7 @@ def test_arithmetic_with_frame_or_series(self, op): result = op(index, other) tm.assert_series_equal(result, expected) - other = pd.DataFrame(np.random.default_rng(2).standard_normal(2, 5)) + other = pd.DataFrame(np.random.default_rng(2).standard_normal((2, 5))) expected = op(pd.DataFrame([index, index]), other) result = op(index, other) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 67b9afd6054f7..1aa918527a11b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -795,7 +795,7 @@ def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type): tm.assert_frame_equal(res, df < 2) df3 = DataFrame( - np.random.default_rng(2).standard_normal(*df.shape), + np.random.default_rng(2).standard_normal(df.shape), index=df.index, columns=df.columns, ) @@ -985,7 +985,7 @@ def test_performance_warning_for_poor_alignment(self, engine, parser): with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) - df = DataFrame(np.random.default_rng(2).standard_normal(10, 10000)) + df = DataFrame(np.random.default_rng(2).standard_normal((10, 10000))) s = Series(np.random.default_rng(2).standard_normal(10000)) with tm.assert_produces_warning(False): pd.eval("df + s", engine=engine, parser=parser) @@ -1092,7 +1092,7 @@ def test_bool_ops_with_constants(self, rhs, lhs, op): assert res == exp def test_4d_ndarray_fails(self): - x = np.random.default_rng(2).standard_normal(3, 4, 5, 6) + x = np.random.default_rng(2).standard_normal((3, 4, 5, 6)) y = Series(np.random.default_rng(2).standard_normal(10)) msg = "N-dimensional objects, where N > 2, are not supported with eval" with pytest.raises(NotImplementedError, match=msg): diff --git a/pandas/tests/frame/indexing/test_delitem.py b/pandas/tests/frame/indexing/test_delitem.py index 4c57955a2113b..daec991b7a8db 100644 --- a/pandas/tests/frame/indexing/test_delitem.py +++ b/pandas/tests/frame/indexing/test_delitem.py @@ -16,7 +16,7 @@ def test_delitem(self, float_frame): def test_delitem_multiindex(self): midx = MultiIndex.from_product([["A", "B"], [1, 2]]) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), columns=midx) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), columns=midx) assert len(df.columns) == 4 assert ("A",) in df.columns assert "A" in df.columns diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index bf53cb6f8e680..9fed2116b2896 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -94,7 +94,7 @@ def test_getitem_list_missing_key(self): def test_getitem_list_duplicates(self): # GH#1943 df = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), columns=list("AABC") + np.random.default_rng(2).standard_normal((4, 4)), columns=list("AABC") ) df.columns.name = "foo" @@ -427,7 +427,7 @@ def test_getitem_slice_float64(self, frame_or_series): start, end = values[[5, 15]] - data = np.random.default_rng(2).standard_normal(20, 3) + data = np.random.default_rng(2).standard_normal((20, 3)) if frame_or_series is not DataFrame: data = data[:, 0] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 1bc2cba7f74af..fdee39e443908 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1057,7 +1057,7 @@ def test_iloc_row_slice_view(self, using_copy_on_write, request): def test_iloc_col(self): df = DataFrame( - np.random.default_rng(2).standard_normal(4, 10), columns=range(0, 20, 2) + np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) result = df.iloc[:, 1] @@ -1080,7 +1080,7 @@ def test_iloc_col(self): def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): df = DataFrame( - np.random.default_rng(2).standard_normal(4, 10), columns=range(0, 20, 2) + np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) original = df.copy() subset = df.iloc[:, slice(4, 8)] diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 4e5bd74fba666..3885cbef3ed93 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -777,7 +777,7 @@ def expected(self, idx): def test_setitem_dt64series(self, idx, expected): # convert to utc - df = DataFrame(np.random.default_rng(2).standard_normal(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).standard_normal((2, 1)), columns=["A"]) df["B"] = idx df["B"] = idx.to_series(index=[0, 1]).dt.tz_convert(None) @@ -787,7 +787,7 @@ def test_setitem_dt64series(self, idx, expected): def test_setitem_datetimeindex(self, idx, expected): # setting a DataFrame column with a tzaware DTI retains the dtype - df = DataFrame(np.random.default_rng(2).standard_normal(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).standard_normal((2, 1)), columns=["A"]) # assign to frame df["B"] = idx @@ -796,7 +796,7 @@ def test_setitem_datetimeindex(self, idx, expected): def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): # setting a DataFrame column with a tzaware DTI retains the dtype - df = DataFrame(np.random.default_rng(2).standard_normal(2, 1), columns=["A"]) + df = DataFrame(np.random.default_rng(2).standard_normal((2, 1)), columns=["A"]) # object array of datetimes with a tz df["B"] = idx.to_pydatetime() diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 0d2364c0c5ffd..667759e0abafe 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -474,7 +474,7 @@ def test_where_complex(self): def test_where_axis(self): # GH 9736 - df = DataFrame(np.random.default_rng(2).standard_normal(2, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((2, 2))) mask = DataFrame([[False, False], [False, False]]) ser = Series([0, 1]) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 9bad3a12e0e8f..b0c3ae1f1a5e3 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -154,7 +154,7 @@ def test_xs_doc_example(self): index = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).standard_normal(3, 8), + np.random.default_rng(2).standard_normal((3, 8)), index=["A", "B", "C"], columns=index, ) @@ -187,7 +187,7 @@ def test_xs_level(self, multiindex_dataframe_random_data): tm.assert_frame_equal(result, expected) def test_xs_level_eq_2(self): - arr = np.random.default_rng(2).standard_normal(3, 5) + arr = np.random.default_rng(2).standard_normal((3, 5)) index = MultiIndex( levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], @@ -277,7 +277,7 @@ def test_xs_missing_values_in_index(self): ) def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index): # see GH#2903 - arr = np.random.default_rng(2).standard_normal(4, 4) + arr = np.random.default_rng(2).standard_normal((4, 4)) index = MultiIndex( levels=[["a", "b"], ["bar", "foo", "hello", "world"]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]], diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index ff0b795b44da6..59c195a65f89e 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -319,7 +319,7 @@ def test_corrwith(self, datetime_frame, dtype): columns=columns, ) df2 = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=index[:4], columns=columns, ) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 62b20232c12f2..b401f182242b1 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -13,7 +13,7 @@ class TestDataFrameDiff: def test_diff_requires_integer(self): - df = DataFrame(np.random.default_rng(2).standard_normal(2, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((2, 2))) with pytest.raises(ValueError, match="periods must be an integer"): df.diff(1.5) diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py index 5cec758a619be..9cd63860bae27 100644 --- a/pandas/tests/frame/methods/test_dot.py +++ b/pandas/tests/frame/methods/test_dot.py @@ -119,7 +119,7 @@ def obj(self): @pytest.fixture def other(self): return DataFrame( - np.random.default_rng(2).standard_normal(4, 2), + np.random.default_rng(2).standard_normal((4, 2)), index=["p", "q", "r", "s"], columns=["1", "2"], ) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index fde0c424d0418..0a796982e9fca 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -305,7 +305,7 @@ def test_mixed_depth_drop(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index) result = df.drop("a", axis=1) expected = df.drop([("a", "", "")], axis=1) @@ -440,7 +440,7 @@ def test_drop_preserve_names(self): [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] ) - df = DataFrame(np.random.default_rng(2).standard_normal(6, 3), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((6, 3)), index=index) result = df.drop([(0, 2)]) assert result.index.names == ("one", "two") diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py index 4f32e5781b07a..9363c4d79983f 100644 --- a/pandas/tests/frame/methods/test_head_tail.py +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -9,7 +9,7 @@ def test_head_tail_generic(index, frame_or_series): ndim = 2 if frame_or_series is DataFrame else 1 shape = (len(index),) * ndim - vals = np.random.default_rng(2).standard_normal(*shape) + vals = np.random.default_rng(2).standard_normal(shape) obj = frame_or_series(vals, index=index) tm.assert_equal(obj.head(), obj.iloc[:5]) diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py index 32c6e974266d9..be9462b64fa1b 100644 --- a/pandas/tests/frame/methods/test_matmul.py +++ b/pandas/tests/frame/methods/test_matmul.py @@ -20,7 +20,7 @@ def test_matmul(self): columns=["p", "q", "r", "s"], ) b = DataFrame( - np.random.default_rng(2).standard_normal(4, 2), + np.random.default_rng(2).standard_normal((4, 2)), index=["p", "q", "r", "s"], columns=["one", "two"], ) diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index dc352181ff957..617f0c3a27885 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -54,7 +54,7 @@ def test_mixed_depth_pop(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index) df1 = df.copy() df2 = df.copy() diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 5bab0842e37f3..d93684d9c0658 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -576,7 +576,7 @@ def test_reindex_level_verify(self, idx, indexer, check_index_type): def test_non_monotonic_reindex_methods(self): dr = date_range("2013-08-01", periods=6, freq="B") - data = np.random.default_rng(2).standard_normal(6, 1) + data = np.random.default_rng(2).standard_normal((6, 1)) df = DataFrame(data, index=dr, columns=list("A")) df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) # index is not monotonic increasing or decreasing diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py index 4c8b27bfde06b..228b62a418813 100644 --- a/pandas/tests/frame/methods/test_sort_index.py +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -56,7 +56,7 @@ def test_sort_index_non_existent_label_multiindex(self): def test_sort_index_reorder_on_ops(self): # GH#15687 df = DataFrame( - np.random.default_rng(2).standard_normal(8, 2), + np.random.default_rng(2).standard_normal((8, 2)), index=MultiIndex.from_product( [["a", "b"], ["big", "small"], ["red", "blu"]], names=["letter", "size", "color"], @@ -217,7 +217,7 @@ def test_sort_index_multi_index(self): def test_sort_index_inplace(self): frame = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index c7619bc4ea9a5..bd7d882f6d94a 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -95,7 +95,7 @@ def test_sort_values_by_empty_list(self): def test_sort_values_inplace(self): frame = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) @@ -646,7 +646,7 @@ def test_sort_values_no_op_reset_index(self): class TestDataFrameSortKey: # test key sorting (issue 27237) def test_sort_values_inplace_key(self, sort_by_key): frame = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 5db059448402d..dabdd8e59b81d 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -108,8 +108,8 @@ def test_to_csv_from_csv2(self, float_frame): def test_to_csv_from_csv3(self): with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: - df1 = DataFrame(np.random.default_rng(2).standard_normal(3, 1)) - df2 = DataFrame(np.random.default_rng(2).standard_normal(3, 1)) + df1 = DataFrame(np.random.default_rng(2).standard_normal((3, 1))) + df2 = DataFrame(np.random.default_rng(2).standard_normal((3, 1))) df1.to_csv(path) df2.to_csv(path, mode="a", header=False) diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py index 199753025effe..cadbf11622671 100644 --- a/pandas/tests/frame/methods/test_to_period.py +++ b/pandas/tests/frame/methods/test_to_period.py @@ -42,7 +42,7 @@ def test_to_period_without_freq(self, frame_or_series): ) obj = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), index=idx, columns=idx + np.random.default_rng(2).standard_normal((4, 4)), index=idx, columns=idx ) obj = tm.get_obj(obj, frame_or_series) expected = obj.copy() diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 86a77ed2e9ba8..ad6a32ae559e8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2458,14 +2458,14 @@ def test_dataframe_constructor_infer_multiindex(self): index_lists = [["a", "a", "b", "b"], ["x", "y", "x", "y"]] multi = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=[np.array(x) for x in index_lists], ) assert isinstance(multi.index, MultiIndex) assert not isinstance(multi.columns, MultiIndex) multi = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), columns=index_lists + np.random.default_rng(2).standard_normal((4, 4)), columns=index_lists ) assert isinstance(multi.columns, MultiIndex) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 493f1cab34219..3b150e1a2718e 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1859,7 +1859,7 @@ def test_prod_sum_min_count_mixed_object(): def test_reduction_axis_none_returns_scalar(method, numeric_only): # GH#21597 As of 2.0, axis=None reduces over all axes. - df = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 4))) result = getattr(df, method)(axis=None, numeric_only=numeric_only) np_arr = df.to_numpy() diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 70517db1478b9..0c9e5e01fa644 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -46,7 +46,7 @@ def test_repr_bytes_61_lines(self): def test_repr_unicode_level_names(self, frame_or_series): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) - obj = DataFrame(np.random.default_rng(2).standard_normal(2, 4), index=index) + obj = DataFrame(np.random.default_rng(2).standard_normal((2, 4)), index=index) obj = tm.get_obj(obj, frame_or_series) repr(obj) @@ -264,7 +264,7 @@ def test_str_to_bytes_raises(self): def test_very_wide_info_repr(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 20), + np.random.default_rng(2).standard_normal((10, 20)), columns=tm.rands_array(10, 20), ) repr(df) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 318fa95dbc1e0..22b49854d1f99 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -385,7 +385,7 @@ def unstack_and_compare(df, column_name): def test_stack_ints(self): columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) df = DataFrame( - np.random.default_rng(2).standard_normal(30, 27), columns=columns + np.random.default_rng(2).standard_normal((30, 27)), columns=columns ) tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) @@ -411,7 +411,9 @@ def test_stack_mixed_levels(self): ], names=["exp", "animal", "hair_length"], ) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), columns=columns) + df = DataFrame( + np.random.default_rng(2).standard_normal((4, 4)), columns=columns + ) animal_hair_stacked = df.stack(level=["animal", "hair_length"]) exp_hair_stacked = df.stack(level=["exp", "hair_length"]) @@ -455,7 +457,9 @@ def test_stack_int_level_names(self): ], names=["exp", "animal", "hair_length"], ) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), columns=columns) + df = DataFrame( + np.random.default_rng(2).standard_normal((4, 4)), columns=columns + ) exp_animal_stacked = df.stack(level=["exp", "animal"]) animal_hair_stacked = df.stack(level=["animal", "hair_length"]) @@ -1963,7 +1967,7 @@ def test_unstack_unobserved_keys(self): index = MultiIndex(levels, codes) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 2), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 2)), index=index) result = df.unstack() assert len(result.columns) == 4 diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 867014f467c34..9bc790cbed8e8 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -665,7 +665,7 @@ def test_corrwith(self): columns=columns, ) df2 = tm.SubclassedDataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=index[:4], columns=columns, ) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 9cbdab713d1ef..87beab04bc586 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -46,7 +46,7 @@ def construct(box, shape, value=None, dtype=None, **kwargs): arr = np.repeat(arr, new_shape).reshape(shape) else: - arr = np.random.default_rng(2).standard_normal(*shape) + arr = np.random.default_rng(2).standard_normal(shape) return box(arr, dtype=dtype, **kwargs) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 43cdb8267deda..11ce290896073 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -211,7 +211,7 @@ def f(x): cats = Categorical.from_codes(codes, levels, ordered=True) - data = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) + data = DataFrame(np.random.default_rng(2).standard_normal((100, 4))) result = data.groupby(cats, observed=False).mean() @@ -646,7 +646,7 @@ def test_datetime(): cats = Categorical.from_codes(codes, levels, ordered=True) - data = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) + data = DataFrame(np.random.default_rng(2).standard_normal((100, 4))) result = data.groupby(cats, observed=False).mean() expected = data.groupby(np.asarray(cats), observed=False).mean() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 11b5561b8c3bb..d2ec85f879572 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1461,7 +1461,7 @@ def test_groupby_one_row(): with pytest.raises(KeyError, match=msg): df1.groupby("Z") df2 = DataFrame( - np.random.default_rng(2).standard_normal(2, 4), columns=list("ABCD") + np.random.default_rng(2).standard_normal((2, 4)), columns=list("ABCD") ) with pytest.raises(KeyError, match=msg): df2.groupby("Z") diff --git a/pandas/tests/indexes/test_subclass.py b/pandas/tests/indexes/test_subclass.py index aa7433cd38f32..c3287e1ddcddc 100644 --- a/pandas/tests/indexes/test_subclass.py +++ b/pandas/tests/indexes/test_subclass.py @@ -32,7 +32,7 @@ def test_insert_fallback_to_base_index(): tm.assert_index_equal(result, expected) df = DataFrame( - np.random.default_rng(2).standard_normal(2, 3), + np.random.default_rng(2).standard_normal((2, 3)), columns=idx, index=Index([1, 2], name="string"), ) diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py index 08156bc2c400b..4184c6a0047cc 100644 --- a/pandas/tests/indexing/conftest.py +++ b/pandas/tests/indexing/conftest.py @@ -18,7 +18,7 @@ def series_ints(): @pytest.fixture def frame_ints(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3), ) @@ -35,7 +35,7 @@ def series_uints(): @pytest.fixture def frame_uints(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=Index(range(0, 8, 2), dtype=np.uint64), columns=Index(range(0, 12, 3), dtype=np.uint64), ) @@ -49,7 +49,7 @@ def series_labels(): @pytest.fixture def frame_labels(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=list("abcd"), columns=list("ABCD"), ) @@ -66,7 +66,7 @@ def series_ts(): @pytest.fixture def frame_ts(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=date_range("20130101", periods=4), ) @@ -82,7 +82,7 @@ def series_floats(): @pytest.fixture def frame_floats(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=Index(range(0, 8, 2), dtype=np.float64), columns=Index(range(0, 12, 3), dtype=np.float64), ) @@ -96,7 +96,7 @@ def series_mixed(): @pytest.fixture def frame_mixed(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), index=[2, 4, "null", 8] + np.random.default_rng(2).standard_normal((4, 4)), index=[2, 4, "null", 8] ) @@ -113,7 +113,7 @@ def series_empty(): @pytest.fixture def frame_multi(): return DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=MultiIndex.from_product([[1, 2], [3, 4]]), columns=MultiIndex.from_product([[5, 6], [7, 8]]), ) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index f6b9a8bfdd210..9374887a55d07 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -187,7 +187,7 @@ def test_frame_mixed_depth_get(): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index) result = df["a"] expected = df["a", "", ""].rename("a") diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index 2052886619fac..8939ecc78000b 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -67,7 +67,7 @@ def test_iloc_getitem_multiple_items(): # GH 5528 tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) index = MultiIndex.from_tuples(tup) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 4), index=index) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index) result = df.iloc[[2, 3]] expected = df.xs("b", drop_level=False) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index ee160e7dcabcd..17d500141632d 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -29,7 +29,7 @@ def frame_random_data_integer_multi_index(): levels = [[0, 1], [0, 1, 2]] codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, codes=codes) - return DataFrame(np.random.default_rng(2).standard_normal(6, 2), index=index) + return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index) class TestMultiIndexLoc: @@ -233,7 +233,7 @@ def test_loc_multiindex_indexer_none(self): attribute_values = ["Value" + str(i) for i in range(5)] index = MultiIndex.from_product([attributes, attribute_values]) - df = 0.1 * np.random.default_rng(2).standard_normal(10, 1 * 5) + 0.5 + df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5 df = DataFrame(df, columns=index) result = df[attributes] tm.assert_frame_equal(result, df) @@ -291,13 +291,13 @@ def test_loc_getitem_int_slice(self): # loc should treat integer slices like label slices index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) - df = DataFrame(np.random.default_rng(2).standard_normal(6, 6), index, index) + df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index) result = df.loc[6:8, :] expected = df tm.assert_frame_equal(result, expected) index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) - df = DataFrame(np.random.default_rng(2).standard_normal(6, 6), index, index) + df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index) result = df.loc[20:30, :] expected = df.iloc[2:] tm.assert_frame_equal(result, expected) @@ -476,7 +476,7 @@ def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): # empty indexer multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) df = DataFrame( - np.random.default_rng(2).standard_normal(5, 6), + np.random.default_rng(2).standard_normal((5, 6)), index=range(5), columns=multi_index, ) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index dd1d5a8fbcc57..7e97adb97a0b3 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -255,7 +255,7 @@ def test_loc_getitem_partial_both_axis(): columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) df = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), index=rows, columns=columns + np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns ) expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) result = df.loc["a", "b"] diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 609bf985f14a1..de69d78c0f392 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -140,7 +140,7 @@ def test_multiindex_setitem(self): ] df_orig = DataFrame( - np.random.default_rng(2).standard_normal(6, 3), + np.random.default_rng(2).standard_normal((6, 3)), index=arrays, columns=["A", "B", "C"], ).sort_index() @@ -474,7 +474,7 @@ def test_setitem_new_column_mixed_depth(self): tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.default_rng(2).standard_normal(4, 6), columns=index) + df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index) result = df.copy() expected = df.copy() diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6a5b68d7d6a3c..6831126ad8a35 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -435,7 +435,7 @@ def test_iloc_getitem_slice_dups(self): def test_iloc_setitem(self): df = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3), ) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index e110517a46a8b..607528ea20aba 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -317,7 +317,7 @@ def test_dups_fancy_indexing2(self): def test_dups_fancy_indexing3(self): # GH 6504, multi-axis indexing df = DataFrame( - np.random.default_rng(2).standard_normal(9, 2), + np.random.default_rng(2).standard_normal((9, 2)), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"], ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8b8eb977f08de..0cb1081feabf0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -782,7 +782,7 @@ def test_loc_setitem_empty_frame(self): def test_loc_setitem_frame(self): df = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=list("abcd"), columns=list("ABCD"), ) @@ -1539,7 +1539,7 @@ def test_loc_setitem_td64_non_nano(self): tm.assert_series_equal(ser, expected) def test_loc_setitem_2d_to_1d_raises(self): - data = np.random.default_rng(2).standard_normal(2, 2) + data = np.random.default_rng(2).standard_normal((2, 2)) # float64 dtype to avoid upcast when trying to set float data ser = Series(range(2), dtype="float64") @@ -2879,7 +2879,7 @@ def test_loc_with_period_index_indexer(): def test_loc_setitem_multiindex_timestamp(): # GH#13831 - vals = np.random.default_rng(2).standard_normal(8, 6) + vals = np.random.default_rng(2).standard_normal((8, 6)) idx = date_range("1/1/2000", periods=8) cols = ["A", "B", "C", "D", "E", "F"] exp = DataFrame(vals, index=idx, columns=cols) diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index d683ab58594b7..3ca8637885639 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -38,7 +38,7 @@ def assert_equal_cell_styles(cell1, cell2): def test_styler_to_excel_unstyled(engine): # compare DataFrame.to_excel and Styler.to_excel when no styles applied pytest.importorskip(engine) - df = DataFrame(np.random.default_rng(2).standard_normal(2, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((2, 2))) with tm.ensure_clean(".xlsx") as path: with ExcelWriter(path, engine=engine) as writer: df.to_excel(writer, sheet_name="dataframe") diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 70f6940ce0297..232f8ebabeee7 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -108,7 +108,7 @@ def test_info_verbose_check_header_separator_body(): buf = StringIO() size = 1001 start = 5 - frame = DataFrame(np.random.default_rng(2).standard_normal(3, size)) + frame = DataFrame(np.random.default_rng(2).standard_normal((3, size))) frame.info(verbose=True, buf=buf) res = buf.getvalue() @@ -170,7 +170,7 @@ def test_info_verbose_with_counts_spacing( size, header_exp, separator_exp, first_line_exp, last_line_exp ): """Test header column, spacer, first line and last line in verbose mode.""" - frame = DataFrame(np.random.default_rng(2).standard_normal(3, size)) + frame = DataFrame(np.random.default_rng(2).standard_normal((3, size))) with StringIO() as buf: frame.info(verbose=True, show_counts=True, buf=buf) all_lines = buf.getvalue().splitlines() @@ -208,7 +208,7 @@ def test_info_memory(): def test_info_wide(): io = StringIO() - df = DataFrame(np.random.default_rng(2).standard_normal(5, 101)) + df = DataFrame(np.random.default_rng(2).standard_normal((5, 101))) df.info(buf=io) io = StringIO() diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index bf6a2bd4bae13..d21da4ec93318 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -155,7 +155,7 @@ def test_publishes_not_implemented(self, ip): # GH 15996 midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) df = pd.DataFrame( - np.random.default_rng(2).standard_normal(5, len(midx)), columns=midx + np.random.default_rng(2).standard_normal((5, len(midx))), columns=midx ) opt = pd.option_context("display.html.table_schema", True) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index e4f0f163d54bf..947ecd9c8c040 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -651,7 +651,7 @@ def test_overlapping_names(self, case): def test_mi_falsey_name(self): # GH 16203 df = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]), ) result = [x["name"] for x in build_table_schema(df)["fields"]] diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d091cec53bfce..a3a4a535a8af5 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -536,7 +536,7 @@ def test_write_column_multiindex_nonstring(self, engine): [1, 2, 1, 2, 1, 2, 1, 2], ] df = pd.DataFrame( - np.random.default_rng(2).standard_normal(8, 8), columns=arrays + np.random.default_rng(2).standard_normal((8, 8)), columns=arrays ) df.columns.names = ["Level1", "Level2"] if engine == "fastparquet": @@ -555,7 +555,7 @@ def test_write_column_multiindex_string(self, pa): ["one", "two", "one", "two", "one", "two", "one", "two"], ] df = pd.DataFrame( - np.random.default_rng(2).standard_normal(8, 8), columns=arrays + np.random.default_rng(2).standard_normal((8, 8)), columns=arrays ) df.columns.names = ["ColLevel1", "ColLevel2"] diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 8b6c1cc9fb134..a2df548577db6 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -278,7 +278,7 @@ def test_implicit_label(self): def test_donot_overwrite_index_name(self): # GH 8494 df = DataFrame( - np.random.default_rng(2).standard_normal(2, 2), columns=["a", "b"] + np.random.default_rng(2).standard_normal((2, 2)), columns=["a", "b"] ) df.index.name = "NAME" df.plot(y="b", label="LABEL") @@ -719,7 +719,7 @@ def test_bar_nan_stacked(self): def test_bar_categorical(self, idx): # GH 13019 df = DataFrame( - np.random.default_rng(2).standard_normal(6, 5), + np.random.default_rng(2).standard_normal((6, 5)), index=idx(list("ABCDEF")), columns=idx(list("abcde")), ) @@ -943,7 +943,7 @@ def test_plot_bar(self, kwargs): @pytest.mark.slow def test_plot_bar_int_col(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 15), + np.random.default_rng(2).standard_normal((10, 15)), index=list(string.ascii_letters[:10]), columns=range(15), ) @@ -1058,7 +1058,7 @@ def test_boxplot_return_type_invalid_type(self, return_type): @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((100, 4))) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] _check_legend_labels(ax, labels=expected) @@ -1094,7 +1094,7 @@ def test_kde_missing_vals(self): _check_plot_works(df.plot, kind="kde") def test_hist_df(self): - df = DataFrame(np.random.default_rng(2).standard_normal(100, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((100, 4))) ax = _check_plot_works(df.plot.hist) expected = [pprint_thing(c) for c in df.columns] @@ -1145,7 +1145,7 @@ def test_hist_weights(self, weights): # GH 33173 df = DataFrame( - dict(zip(["A", "B"], np.random.default_rng(2).standard_normal(2, 100))) + dict(zip(["A", "B"], np.random.default_rng(2).standard_normal((2, 100)))) ) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) @@ -1312,7 +1312,7 @@ def test_hist_df_coord(self, data): ) def test_plot_int_columns(self): - df = DataFrame(np.random.default_rng(2).standard_normal(100, 4)).cumsum() + df = DataFrame(np.random.default_rng(2).standard_normal((100, 4))).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.parametrize( @@ -2278,7 +2278,7 @@ def test_x_multiindex_values_ticks(self): # GH: 15912 index = MultiIndex.from_product([[2012, 2013], [1, 2]]) df = DataFrame( - np.random.default_rng(2).standard_normal(4, 2), + np.random.default_rng(2).standard_normal((4, 2)), columns=["A", "B"], index=index, ) diff --git a/pandas/tests/plotting/frame/test_frame_legend.py b/pandas/tests/plotting/frame/test_frame_legend.py index 08e462df2a196..942fa0c2bc358 100644 --- a/pandas/tests/plotting/frame/test_frame_legend.py +++ b/pandas/tests/plotting/frame/test_frame_legend.py @@ -192,7 +192,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), + np.random.default_rng(2).standard_normal((4, 4)), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 526ffd80c9efc..a2b54c91693b8 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -363,7 +363,7 @@ def test_subplots_ts_share_axes(self): _, axes = mpl.pyplot.subplots(3, 3, sharex=True, sharey=True) mpl.pyplot.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) df = DataFrame( - np.random.default_rng(2).standard_normal(10, 9), + np.random.default_rng(2).standard_normal((10, 9)), index=date_range(start="2014-07-01", freq="M", periods=10), ) for i, ax in enumerate(axes.ravel()): diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 6217dbfa5aa44..0ba4ed85d56d4 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -491,7 +491,7 @@ def test_grouped_box_return_type_arg(self, hist_df, return_type): def test_grouped_box_return_type_arg_duplcate_cats(self, return_type): columns2 = "X B C D A".split() df2 = DataFrame( - np.random.default_rng(2).standard_normal(6, 5), columns=columns2 + np.random.default_rng(2).standard_normal((6, 5)), columns=columns2 ) categories2 = "A B".split() df2["category"] = categories2 * 3 @@ -732,7 +732,7 @@ def test_boxplot_multiindex_column(self): tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).standard_normal(3, 8), + np.random.default_rng(2).standard_normal((3, 8)), index=["A", "B", "C"], columns=index, ) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index fd9c960ccf56f..445729e3f220d 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -60,7 +60,9 @@ def test_ts_plot_with_tz(self, tz_aware_fixture): def test_fontsize_set_correctly(self): # For issue #8765 - df = DataFrame(np.random.default_rng(2).standard_normal(10, 9), index=range(10)) + df = DataFrame( + np.random.default_rng(2).standard_normal((10, 9)), index=range(10) + ) _, ax = mpl.pyplot.subplots() df.plot(fontsize=2, ax=ax) for label in ax.get_xticklabels() + ax.get_yticklabels(): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 7111146d70334..43c964f1d311d 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -582,7 +582,7 @@ def test_hist_df_with_nonnumerics_no_bins(self): def test_hist_secondary_legend(self): # GH 9610 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((30, 4)), columns=list("abcd") ) # primary -> secondary @@ -598,7 +598,7 @@ def test_hist_secondary_legend(self): def test_hist_secondary_secondary(self): # GH 9610 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((30, 4)), columns=list("abcd") ) # secondary -> secondary _, ax = mpl.pyplot.subplots() @@ -613,7 +613,7 @@ def test_hist_secondary_secondary(self): def test_hist_secondary_primary(self): # GH 9610 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((30, 4)), columns=list("abcd") ) # secondary -> primary _, ax = mpl.pyplot.subplots() diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 7cf6b37246d09..0981f38288d1e 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -449,7 +449,7 @@ def test_pie_nan(self): def test_df_series_secondary_legend(self): # GH 9779 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((30, 3)), columns=list("abc") ) s = Series(np.random.default_rng(2).standard_normal(30), name="x") @@ -466,7 +466,7 @@ def test_df_series_secondary_legend(self): def test_df_series_secondary_legend_with_axes(self): # GH 9779 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((30, 3)), columns=list("abc") ) s = Series(np.random.default_rng(2).standard_normal(30), name="x") # primary -> secondary (with passing ax) @@ -482,7 +482,7 @@ def test_df_series_secondary_legend_with_axes(self): def test_df_series_secondary_legend_both(self): # GH 9779 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((30, 3)), columns=list("abc") ) s = Series(np.random.default_rng(2).standard_normal(30), name="x") # secondary -> secondary (without passing ax) @@ -499,7 +499,7 @@ def test_df_series_secondary_legend_both(self): def test_df_series_secondary_legend_both_with_axis(self): # GH 9779 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((30, 3)), columns=list("abc") ) s = Series(np.random.default_rng(2).standard_normal(30), name="x") # secondary -> secondary (with passing ax) @@ -516,7 +516,7 @@ def test_df_series_secondary_legend_both_with_axis(self): def test_df_series_secondary_legend_both_with_axis_2(self): # GH 9779 df = DataFrame( - np.random.default_rng(2).standard_normal(30, 3), columns=list("abc") + np.random.default_rng(2).standard_normal((30, 3)), columns=list("abc") ) s = Series(np.random.default_rng(2).standard_normal(30), name="x") # secondary -> secondary (with passing ax) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index d2223731867bf..be56b31efb778 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -664,7 +664,7 @@ def test_evenly_divisible_with_no_extra_bins(self): # when the frequency is evenly divisible, sometimes extra bins df = DataFrame( - np.random.default_rng(2).standard_normal(9, 3), + np.random.default_rng(2).standard_normal((9, 3)), index=date_range("2000-1-1", periods=9), ) result = df.resample("5D").mean() diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 77b57c7b35078..600cac34f1074 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -86,7 +86,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): assert arr.base is not None # Float block was consolidated. - df4 = DataFrame(np.random.default_rng(2).standard_normal(4, 1)) + df4 = DataFrame(np.random.default_rng(2).standard_normal((4, 1))) result = concat([df, df2, df3, df4], axis=1, copy=False) for arr in result._mgr.arrays: if arr.dtype.kind == "f": @@ -108,7 +108,7 @@ def test_concat_copy(self, using_array_manager, using_copy_on_write): def test_concat_with_group_keys(self): # axis=0 df = DataFrame(np.random.default_rng(2).standard_normal((3, 4))) - df2 = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) + df2 = DataFrame(np.random.default_rng(2).standard_normal((4, 4))) result = concat([df, df2], keys=[0, 1]) exp_index = MultiIndex.from_arrays( @@ -124,7 +124,7 @@ def test_concat_with_group_keys(self): # axis=1 df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) - df2 = DataFrame(np.random.default_rng(2).standard_normal(4, 4)) + df2 = DataFrame(np.random.default_rng(2).standard_normal((4, 4))) result = concat([df, df2], keys=[0, 1], axis=1) expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 7d719ba57c0b1..fce3a08100ce4 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -244,7 +244,7 @@ def test_concat_multiindex_rangeindex(self): # when multi-index levels are RangeIndex objects # there is a bug in concat with objects of len 1 - df = DataFrame(np.random.default_rng(2).standard_normal(9, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((9, 2))) df.index = MultiIndex( levels=[pd.RangeIndex(3), pd.RangeIndex(3)], codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)], diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index fd06f67684b6f..dab9fa5d7491c 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -340,7 +340,7 @@ def test_join_unconsolidated(self): ) c = Series(np.random.default_rng(2).standard_normal(30)) a["c"] = c - d = DataFrame(np.random.default_rng(2).standard_normal(30, 1), columns=["q"]) + d = DataFrame(np.random.default_rng(2).standard_normal((30, 1)), columns=["q"]) # it works! a.join(d) @@ -623,7 +623,7 @@ def test_mixed_type_join_with_suffix(self): def test_join_many(self): df = DataFrame( - np.random.default_rng(2).standard_normal(10, 6), columns=list("abcdef") + np.random.default_rng(2).standard_normal((10, 6)), columns=list("abcdef") ) df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]] @@ -687,16 +687,16 @@ def test_join_dups(self): # GH 4975, invalid join on dups w = DataFrame( - np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + np.random.default_rng(2).standard_normal((4, 2)), columns=["x", "y"] ) x = DataFrame( - np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + np.random.default_rng(2).standard_normal((4, 2)), columns=["x", "y"] ) y = DataFrame( - np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + np.random.default_rng(2).standard_normal((4, 2)), columns=["x", "y"] ) z = DataFrame( - np.random.default_rng(2).standard_normal(4, 2), columns=["x", "y"] + np.random.default_rng(2).standard_normal((4, 2)), columns=["x", "y"] ) dta = x.merge(y, left_index=True, right_index=True).merge( diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 6c3a4d94a4f12..4c92b5694c43b 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -158,7 +158,7 @@ def test_tidy_repr(self): def test_repr_bool_fails(self, capsys): s = Series( [ - DataFrame(np.random.default_rng(2).standard_normal(2, 2)) + DataFrame(np.random.default_rng(2).standard_normal((2, 2))) for i in range(5) ] ) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index c7ea19e34891a..e61539a9e97a7 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -27,7 +27,7 @@ def _frame(): @pytest.fixture def _frame2(): return DataFrame( - np.random.default_rng(2).standard_normal(100, 4), + np.random.default_rng(2).standard_normal((100, 4)), columns=list("ABCD"), dtype="float64", ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 6a72ff5f93832..453e34abacfd2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -164,7 +164,7 @@ def test_multilevel_consolidate(self): [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")] ) df = DataFrame( - np.random.default_rng(2).standard_normal(4, 4), index=index, columns=index + np.random.default_rng(2).standard_normal((4, 4)), index=index, columns=index ) df["Totals", ""] = df.sum(1) df = df._consolidate() diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 9f2782831a32d..890e2624194d7 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -96,7 +96,7 @@ def test_flex_binary_frame(method, frame): frame2 = frame.copy() frame2 = DataFrame( - np.random.default_rng(2).standard_normal(*frame2.shape), + np.random.default_rng(2).standard_normal(frame2.shape), index=frame2.index, columns=frame2.columns, ) From ff12f6111aa315a6156c90462dad4732caad6435 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 13 Jul 2023 11:43:14 -0700 Subject: [PATCH 10/22] Fix more --- pandas/tests/computation/test_eval.py | 2 +- pandas/tests/frame/indexing/test_mask.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/methods/test_align.py | 4 ++-- pandas/tests/frame/methods/test_asfreq.py | 2 +- pandas/tests/frame/methods/test_at_time.py | 6 ++++-- pandas/tests/frame/methods/test_between_time.py | 2 +- pandas/tests/frame/methods/test_clip.py | 4 ++-- pandas/tests/frame/methods/test_to_csv.py | 14 +++++++------- pandas/tests/frame/methods/test_to_period.py | 6 +++--- pandas/tests/frame/methods/test_to_timestamp.py | 6 +++--- pandas/tests/frame/test_arithmetic.py | 4 +++- pandas/tests/frame/test_query_eval.py | 2 +- pandas/tests/groupby/aggregate/test_aggregate.py | 4 ++-- pandas/tests/groupby/test_function.py | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexing/multiindex/test_getitem.py | 2 +- .../tests/indexing/multiindex/test_multiindex.py | 2 +- pandas/tests/indexing/test_iloc.py | 4 ++-- pandas/tests/indexing/test_loc.py | 6 +++--- pandas/tests/io/formats/test_info.py | 3 ++- pandas/tests/io/parser/test_network.py | 2 +- pandas/tests/plotting/test_datetimelike.py | 16 ++++++++++------ pandas/tests/plotting/test_hist_method.py | 2 +- pandas/tests/reductions/test_reductions.py | 2 +- pandas/tests/resample/test_period_index.py | 4 ++-- pandas/tests/resample/test_resample_api.py | 2 +- pandas/tests/reshape/concat/test_concat.py | 12 ++++++------ pandas/tests/series/indexing/test_datetime.py | 2 +- pandas/tests/series/test_api.py | 2 +- pandas/tests/test_sorting.py | 4 ++-- 32 files changed, 70 insertions(+), 61 deletions(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 1aa918527a11b..43083d65f14ed 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -971,7 +971,7 @@ def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2): tm.assert_frame_equal(res, expected) def test_performance_warning_for_poor_alignment(self, engine, parser): - df = DataFrame(np.random.default_rng(2).standard_normal(1000, 10)) + df = DataFrame(np.random.default_rng(2).standard_normal((1000, 10))) s = Series(np.random.default_rng(2).standard_normal(10000)) if engine == "numexpr": seen = PerformanceWarning diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 8536d6e8ff9db..264e27c9c122e 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -85,7 +85,7 @@ def test_mask_callable(self): def test_mask_dtype_bool_conversion(self): # GH#3733 - df = DataFrame(data=np.random.default_rng(2).standard_normal(100, 50)) + df = DataFrame(data=np.random.default_rng(2).standard_normal((100, 50))) df = df.where(df > 0) # create nans bools = df > 0 mask = isna(df) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 3885cbef3ed93..347a4a1edacb3 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -64,7 +64,7 @@ def test_setitem_dtype(self, dtype, float_frame): assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): - data = np.random.default_rng(2).standard_normal(len(float_frame), 2) + data = np.random.default_rng(2).standard_normal((len(float_frame), 2)) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index b0c3ae1f1a5e3..492dd387971c8 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -170,7 +170,7 @@ def test_xs_integer_key(self): ids = list("abcde") index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) df = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 3), + np.random.default_rng(2).standard_normal((len(index), 3)), index, ["X", "Y", "Z"], ) diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index a52dc90b931f9..87a56c0736287 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -25,8 +25,8 @@ def test_align_asfreq_method_raises(self): def test_frame_align_aware(self): idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") - df1 = DataFrame(np.random.default_rng(2).standard_normal(len(idx1), 3), idx1) - df2 = DataFrame(np.random.default_rng(2).standard_normal(len(idx2), 3), idx2) + df1 = DataFrame(np.random.default_rng(2).standard_normal((len(idx1), 3)), idx1) + df2 = DataFrame(np.random.default_rng(2).standard_normal((len(idx2), 3)), idx2) new1, new2 = df1.align(df2) assert df1.index.tz == new1.index.tz assert df2.index.tz == new2.index.tz diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 6da6737d9215d..a7102c1944338 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -106,7 +106,7 @@ def test_asfreq_keep_index_name(self, frame_or_series): def test_asfreq_ts(self, frame_or_series): index = period_range(freq="A", start="1/1/2001", end="12/31/2010") obj = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 3), index=index + np.random.default_rng(2).standard_normal((len(index), 3)), index=index ) obj = tm.get_obj(obj, frame_or_series) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index 088e84e083084..f949ca7083860 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -48,7 +48,9 @@ def test_at_time(self, frame_or_series): def test_at_time_midnight(self, frame_or_series): # midnight, everything rng = date_range("1/1/2000", "1/31/2000") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 3)), index=rng + ) ts = tm.get_obj(ts, frame_or_series) result = ts.at_time(time(0, 0)) @@ -97,7 +99,7 @@ def test_at_time_raises(self, frame_or_series): def test_at_time_axis(self, axis): # issue 8839 rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), len(rng))) + ts = DataFrame(np.random.default_rng(2).standard_normal((len(rng), len(rng)))) ts.index, ts.columns = rng, rng indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index be6f6f05babdf..8110b86b35845 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -200,7 +200,7 @@ def test_between_time_axis_raises(self, axis): def test_between_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 5), index=index + np.random.default_rng(2).standard_normal((len(index), 5)), index=index ) bkey = slice(time(13, 0, 0), time(14, 0, 0)) binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index 9bfcc39c28b08..710978057460a 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -124,11 +124,11 @@ def test_clip_against_frame(self, axis): def test_clip_against_unordered_columns(self): # GH#20911 df1 = DataFrame( - np.random.default_rng(2).standard_normal(1000, 4), + np.random.default_rng(2).standard_normal((1000, 4)), columns=["A", "B", "C", "D"], ) df2 = DataFrame( - np.random.default_rng(2).standard_normal(1000, 4), + np.random.default_rng(2).standard_normal((1000, 4)), columns=["D", "A", "B", "C"], ) df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index dabdd8e59b81d..97c043052c4a7 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -627,7 +627,7 @@ def test_to_csv_interval_index(self): def test_to_csv_float32_nanrep(self): df = DataFrame( - np.random.default_rng(2).standard_normal(1, 4).astype(np.float32) + np.random.default_rng(2).standard_normal((1, 4)).astype(np.float32) ) df[1] = np.nan @@ -652,12 +652,12 @@ def create_cols(name): return [f"{name}{i:03d}" for i in range(5)] df_float = DataFrame( - np.random.default_rng(2).standard_normal(100, 5), + np.random.default_rng(2).standard_normal((100, 5)), dtype="float64", columns=create_cols("float"), ) df_int = DataFrame( - np.random.default_rng(2).standard_normal(100, 5).astype("int64"), + np.random.default_rng(2).standard_normal((100, 5)).astype("int64"), dtype="int64", columns=create_cols("int"), ) @@ -700,7 +700,7 @@ def create_cols(name): def test_to_csv_dups_cols(self): df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 30), + np.random.default_rng(2).standard_normal((1000, 30)), columns=list(range(15)) + list(range(15)), dtype="float64", ) @@ -712,9 +712,9 @@ def test_to_csv_dups_cols(self): tm.assert_frame_equal(result, df) df_float = DataFrame( - np.random.default_rng(2).standard_normal(1000, 3), dtype="float64" + np.random.default_rng(2).standard_normal((1000, 3)), dtype="float64" ) - df_int = DataFrame(np.random.default_rng(2).standard_normal(1000, 3)).astype( + df_int = DataFrame(np.random.default_rng(2).standard_normal((1000, 3))).astype( "int64" ) df_bool = DataFrame(True, index=df_float.index, columns=range(3)) @@ -770,7 +770,7 @@ def test_to_csv_wide_frame_formatting(self, monkeypatch): # Issue #8621 chunksize = 100 df = DataFrame( - np.random.default_rng(2).standard_normal(1, chunksize + 10), + np.random.default_rng(2).standard_normal((1, chunksize + 10)), columns=None, index=None, ) diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py index cadbf11622671..6a3e6b8c0e059 100644 --- a/pandas/tests/frame/methods/test_to_period.py +++ b/pandas/tests/frame/methods/test_to_period.py @@ -18,7 +18,7 @@ def test_to_period(self, frame_or_series): dr = date_range("1/1/2000", "1/1/2001", freq="D") obj = DataFrame( - np.random.default_rng(2).standard_normal(len(dr), K), + np.random.default_rng(2).standard_normal((len(dr), K)), index=dr, columns=["A", "B", "C", "D", "E"], ) @@ -56,7 +56,7 @@ def test_to_period_without_freq(self, frame_or_series): def test_to_period_columns(self): dr = date_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.default_rng(2).standard_normal(len(dr), 5), index=dr) + df = DataFrame(np.random.default_rng(2).standard_normal((len(dr), 5)), index=dr) df["mix"] = "a" df = df.T @@ -70,7 +70,7 @@ def test_to_period_columns(self): def test_to_period_invalid_axis(self): dr = date_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.default_rng(2).standard_normal(len(dr), 5), index=dr) + df = DataFrame(np.random.default_rng(2).standard_normal((len(dr), 5)), index=dr) df["mix"] = "a" msg = "No axis named 2 for object type DataFrame" diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index 525b85a51512b..2f73e3d58b516 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -29,7 +29,7 @@ def test_to_timestamp(self, frame_or_series): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") obj = DataFrame( - np.random.default_rng(2).standard_normal(len(index), K), + np.random.default_rng(2).standard_normal((len(index), K)), index=index, columns=["A", "B", "C", "D", "E"], ) @@ -73,7 +73,7 @@ def test_to_timestamp_columns(self): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") df = DataFrame( - np.random.default_rng(2).standard_normal(len(index), K), + np.random.default_rng(2).standard_normal((len(index), K)), index=index, columns=["A", "B", "C", "D", "E"], ) @@ -124,7 +124,7 @@ def test_to_timestamp_columns(self): def test_to_timestamp_invalid_axis(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") obj = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 5), index=index + np.random.default_rng(2).standard_normal((len(index), 5)), index=index ) # invalid axis diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 5be30fecc13d3..db8387d729d47 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1231,7 +1231,9 @@ def test_frame_add_tz_mismatch_converts_to_utc(self): def test_align_frame(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), index=rng) + ts = DataFrame( + np.random.default_rng(2).standard_normal((len(rng), 3)), index=rng + ) result = ts + ts[::2] expected = ts + ts diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 086b3c8541cf2..db2bdac093522 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -634,7 +634,7 @@ def test_chained_cmp_and_in(self, engine, parser): skip_if_no_pandas_parser(parser) cols = list("abc") df = DataFrame( - np.random.default_rng(2).standard_normal(100, len(cols)), columns=cols + np.random.default_rng(2).standard_normal((100, len(cols))), columns=cols ) res = df.query( "a < b < c and a not in b not in c", engine=engine, parser=parser diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index abce0f6d14f70..498a7c03c6d9f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -359,7 +359,7 @@ def test_agg_multiple_functions_maintain_order(df): def test_agg_multiple_functions_same_name(): # GH 30880 df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 3), + np.random.default_rng(2).standard_normal((1000, 3)), index=pd.date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], ) @@ -381,7 +381,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): # GH 30880 # ohlc expands dimensions, so different test to the above is required. df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 3), + np.random.default_rng(2).standard_normal((1000, 3)), index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"), columns=Index(["A", "B", "C"], name="alpha"), ) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index cb25e8dc96f92..77d7a54ed8c79 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -384,7 +384,7 @@ def test_cython_median(): exp = df.groupby(labels).agg(np.nanmedian) tm.assert_frame_equal(result, exp) - df = DataFrame(np.random.default_rng(2).standard_normal(1000, 5)) + df = DataFrame(np.random.default_rng(2).standard_normal((1000, 5))) msg = "using DataFrameGroupBy.median" with tm.assert_produces_warning(FutureWarning, match=msg): rs = df.groupby(labels).agg(np.median) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d2ec85f879572..944003c993b99 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1456,7 +1456,7 @@ def test_groupby_one_row(): # GH 11741 msg = r"^'Z'$" df1 = DataFrame( - np.random.default_rng(2).standard_normal(1, 4), columns=list("ABCD") + np.random.default_rng(2).standard_normal((1, 4)), columns=list("ABCD") ) with pytest.raises(KeyError, match=msg): df1.groupby("Z") diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 9374887a55d07..e2fbc0653695b 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -45,7 +45,7 @@ def test_series_getitem_duplicates_multiindex(level0_value): codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=["tag", "day"], ) - arr = np.random.default_rng(2).standard_normal(len(index), 1) + arr = np.random.default_rng(2).standard_normal((len(index), 1)) df = DataFrame(arr, index=index, columns=["val"]) # confirm indexing on missing value raises KeyError diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 9a54e2d0ae12e..3d2ed1d168040 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -117,7 +117,7 @@ def test_multiindex_with_datatime_level_preserves_freq(self): idx = Index(range(2), name="A") dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") mi = MultiIndex.from_product([idx, dti]) - df = DataFrame(np.random.default_rng(2).standard_normal(14, 2), index=mi) + df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi) result = df.loc[0].index tm.assert_index_equal(result, dti) assert result.freq == dti.freq diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6831126ad8a35..e9d4e9fbc1d80 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -264,7 +264,7 @@ def check(result, expected): def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): # GH 25753 df = DataFrame( - np.random.default_rng(2).standard_normal(len(index), len(columns)), + np.random.default_rng(2).standard_normal((len(index), len(columns))), index=index, columns=columns, ) @@ -979,7 +979,7 @@ def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture): def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): idx = Index([]) obj = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), len(idx)), + np.random.default_rng(2).standard_normal((len(idx), len(idx))), index=idx, columns=idx, ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0cb1081feabf0..b3ad6e270e476 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1027,7 +1027,7 @@ def test_loc_non_unique_memory_error(self, length, l2): df = pd.concat( [ DataFrame( - np.random.default_rng(2).standard_normal(length, len(columns)), + np.random.default_rng(2).standard_normal((length, len(columns))), index=np.arange(length), columns=columns, ), @@ -1270,7 +1270,7 @@ def test_loc_getitem_time_object(self, frame_or_series): mask = (rng.hour == 9) & (rng.minute == 30) obj = DataFrame( - np.random.default_rng(2).standard_normal(len(rng), 3), index=rng + np.random.default_rng(2).standard_normal((len(rng), 3)), index=rng ) obj = tm.get_obj(obj, frame_or_series) @@ -1470,7 +1470,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): def test_loc_setitem_time_key(self, using_array_manager): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 5), index=index + np.random.default_rng(2).standard_normal((len(index), 5)), index=index ) akey = time(12, 0, 0) bkey = slice(time(13, 0, 0), time(14, 0, 0)) diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 232f8ebabeee7..71696ee1e8b1d 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -27,7 +27,8 @@ def duplicate_columns_frame(): """Dataframe with duplicate column names.""" return DataFrame( - np.random.default_rng(2).standard_normal(1500, 4), columns=["a", "a", "b", "b"] + np.random.default_rng(2).standard_normal((1500, 4)), + columns=["a", "a", "b", "b"], ) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 47af81fe67ff0..dd702259a9558 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -297,7 +297,7 @@ def test_read_csv_chunked_download(self, s3_public_bucket, caplog, s3so): import s3fs df = DataFrame( - np.random.default_rng(2).standard_normal(100000, 4), columns=list("abcd") + np.random.default_rng(2).standard_normal((100000, 4)), columns=list("abcd") ) str_buf = StringIO() diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 445729e3f220d..b79b0adf8d664 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -289,7 +289,9 @@ def test_uhf(self): import pandas.plotting._matplotlib.converter as conv idx = date_range("2012-6-22 21:59:51.960928", freq="L", periods=500) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), index=idx) + df = DataFrame( + np.random.default_rng(2).standard_normal((len(idx), 2)), index=idx + ) _, ax = mpl.pyplot.subplots() df.plot(ax=ax) @@ -305,7 +307,9 @@ def test_uhf(self): def test_irreg_hf(self): idx = date_range("2012-6-22 21:59:51", freq="S", periods=10) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), index=idx) + df = DataFrame( + np.random.default_rng(2).standard_normal((len(idx), 2)), index=idx + ) irreg = df.iloc[[0, 1, 3, 4]] _, ax = mpl.pyplot.subplots() @@ -318,7 +322,7 @@ def test_irreg_hf(self): def test_irreg_hf_object(self): idx = date_range("2012-6-22 21:59:51", freq="S", periods=10) df2 = DataFrame( - np.random.default_rng(2).standard_normal(len(idx), 2), index=idx + np.random.default_rng(2).standard_normal((len(idx), 2)), index=idx ) _, ax = mpl.pyplot.subplots() df2.index = df2.index.astype(object) @@ -1295,7 +1299,7 @@ def test_secondary_legend_nonts_multi_col(self): @pytest.mark.xfail(reason="Api changed in 3.6.0") def test_format_date_axis(self): rng = date_range("1/1/2012", periods=12, freq="M") - df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).standard_normal((len(rng), 3)), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(ax=ax) xaxis = ax.get_xaxis() @@ -1423,7 +1427,7 @@ def test_format_timedelta_ticks_narrow(self): expected_labels = [f"00:00:00.0000000{i:0>2d}" for i in np.arange(10)] rng = timedelta_range("0", periods=10, freq="ns") - df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).standard_normal((len(rng), 3)), rng) _, ax = mpl.pyplot.subplots() df.plot(fontsize=2, ax=ax) mpl.pyplot.draw() @@ -1447,7 +1451,7 @@ def test_format_timedelta_ticks_wide(self): ] rng = timedelta_range("0", periods=10, freq="1 d") - df = DataFrame(np.random.default_rng(2).standard_normal(len(rng), 3), rng) + df = DataFrame(np.random.default_rng(2).standard_normal((len(rng), 3)), rng) _, ax = mpl.pyplot.subplots() ax = df.plot(fontsize=2, ax=ax) mpl.pyplot.draw() diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 43c964f1d311d..a9a3219b7b8f7 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -427,7 +427,7 @@ def test_hist_layout_error(self): # GH 9351 def test_tight_layout(self): - df = DataFrame(np.random.default_rng(2).standard_normal(100, 2)) + df = DataFrame(np.random.default_rng(2).standard_normal((100, 2))) df[2] = to_datetime( np.random.default_rng(2).integers( 812419200000000000, diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 1143dc147eee9..c6d50ffb472a4 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -556,7 +556,7 @@ def test_sum_inf(self): assert np.isinf(s.sum()) - arr = np.random.default_rng(2).standard_normal(100, 100).astype("f4") + arr = np.random.default_rng(2).standard_normal((100, 100)).astype("f4") arr[:, 2] = np.inf msg = "use_inf_as_na option is deprecated" diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index be56b31efb778..59d7a681e9c21 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -631,7 +631,7 @@ def test_monthly_convention_span(self): ) def test_default_right_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), idx) + df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 2)), idx) resampled = df.resample(to_freq).mean() tm.assert_frame_equal( @@ -644,7 +644,7 @@ def test_default_right_closed_label(self, from_freq, to_freq): ) def test_default_left_closed_label(self, from_freq, to_freq): idx = date_range(start="8/15/2012", periods=100, freq=from_freq) - df = DataFrame(np.random.default_rng(2).standard_normal(len(idx), 2), idx) + df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 2)), idx) resampled = df.resample(to_freq).mean() tm.assert_frame_equal( diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index e0002091a9d0a..1cfcf555355b5 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -343,7 +343,7 @@ def test_agg_consistency(): # make sure that we are consistent across # similar aggregations with and w/o selection list df = DataFrame( - np.random.default_rng(2).standard_normal(1000, 3), + np.random.default_rng(2).standard_normal((1000, 3)), index=date_range("1/1/2012", freq="S", periods=1000), columns=["A", "B", "C"], ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 600cac34f1074..4491023125fb2 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -179,8 +179,8 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass): tm.assert_frame_equal(result, expected) def test_concat_keys_and_levels(self): - df = DataFrame(np.random.default_rng(2).standard_normal(1, 3)) - df2 = DataFrame(np.random.default_rng(2).standard_normal(1, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 3))) + df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4))) levels = [["foo", "baz"], ["one", "two"]] names = ["first", "second"] @@ -221,8 +221,8 @@ def test_concat_keys_and_levels(self): def test_concat_keys_levels_no_overlap(self): # GH #1406 - df = DataFrame(np.random.default_rng(2).standard_normal(1, 3), index=["a"]) - df2 = DataFrame(np.random.default_rng(2).standard_normal(1, 4), index=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)), index=["a"]) + df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)), index=["b"]) msg = "Values not found in passed level" with pytest.raises(ValueError, match=msg): @@ -260,8 +260,8 @@ def test_crossed_dtypes_weird_corner(self): ) tm.assert_frame_equal(appended, expected) - df = DataFrame(np.random.default_rng(2).standard_normal(1, 3), index=["a"]) - df2 = DataFrame(np.random.default_rng(2).standard_normal(1, 4), index=["b"]) + df = DataFrame(np.random.default_rng(2).standard_normal((1, 3)), index=["a"]) + df2 = DataFrame(np.random.default_rng(2).standard_normal((1, 4)), index=["b"]) result = concat([df, df2], keys=["one", "two"], names=["first", "second"]) assert result.index.names == ("first", "second") diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 30db5adc07d1d..2b80e9cb1b587 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -335,7 +335,7 @@ def test_loc_getitem_over_size_cutoff(monkeypatch): dates[p + 1] = dates[p] df = DataFrame( - np.random.default_rng(2).standard_normal(len(dates), 4), + np.random.default_rng(2).standard_normal((len(dates), 4)), index=dates, columns=list("ABCD"), ) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 096cbe3ab7e41..bd33f499711db 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -120,7 +120,7 @@ def test_class_axis(self): def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( - np.random.default_rng(2).standard_normal(1000, 3), + np.random.default_rng(2).standard_normal((1000, 3)), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index f01882cc095da..51fd35fa43aa0 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -201,11 +201,11 @@ class TestMerge: def test_int64_overflow_outer_merge(self): # #2690, combinatorial explosion df1 = DataFrame( - np.random.default_rng(2).standard_normal(1000, 7), + np.random.default_rng(2).standard_normal((1000, 7)), columns=list("ABCDEF") + ["G1"], ) df2 = DataFrame( - np.random.default_rng(2).standard_normal(1000, 7), + np.random.default_rng(2).standard_normal((1000, 7)), columns=list("ABCDEF") + ["G2"], ) result = merge(df1, df2, how="outer") From cef28050bad3e5f22ed8d6bacce6eb3028f7b242 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 13 Jul 2023 13:23:48 -0700 Subject: [PATCH 11/22] Fix --- pandas/tests/frame/methods/test_asfreq.py | 2 +- pandas/tests/frame/methods/test_at_time.py | 2 +- pandas/tests/frame/methods/test_replace.py | 4 +-- pandas/tests/frame/methods/test_round.py | 2 +- pandas/tests/frame/methods/test_sample.py | 4 +-- pandas/tests/frame/test_api.py | 4 +-- pandas/tests/frame/test_arithmetic.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +++-- pandas/tests/frame/test_nonunique_indexes.py | 4 ++- pandas/tests/frame/test_query_eval.py | 14 +++++----- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 2 +- .../tests/indexing/multiindex/test_partial.py | 2 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_loc.py | 6 ++--- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/io/formats/style/test_style.py | 2 +- pandas/tests/io/formats/test_format.py | 2 +- .../io/parser/dtypes/test_dtypes_basic.py | 2 +- pandas/tests/io/parser/test_c_parser_only.py | 2 +- pandas/tests/plotting/frame/test_frame.py | 10 +++---- .../tests/plotting/frame/test_frame_legend.py | 26 +++++++++++++------ pandas/tests/plotting/test_misc.py | 2 +- pandas/tests/plotting/test_series.py | 4 +-- .../tests/resample/test_resampler_grouper.py | 2 +- pandas/tests/reshape/concat/test_index.py | 4 +-- 26 files changed, 65 insertions(+), 51 deletions(-) diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index a7102c1944338..2c5137db94c16 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -82,7 +82,7 @@ def test_asfreq_normalize(self, frame_or_series): rng = date_range("1/1/2000 09:30", periods=20) norm = date_range("1/1/2000", periods=20) - vals = np.random.default_rng(2).standard_normal(20, 3) + vals = np.random.default_rng(2).standard_normal((20, 3)) obj = DataFrame(vals, index=rng) expected = DataFrame(vals, index=norm) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index f949ca7083860..67200396f6375 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -119,7 +119,7 @@ def test_at_time_axis(self, axis): def test_at_time_datetimeindex(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 5), index=index + np.random.default_rng(2).standard_normal((len(index), 5)), index=index ) akey = time(12, 0, 0) ainds = [24, 72, 120, 168] diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 933851526f28f..3203482ddf724 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -983,12 +983,12 @@ def test_replace_bool_with_string(self): tm.assert_frame_equal(result, expected) def test_replace_pure_bool_with_string_no_op(self): - df = DataFrame(np.random.default_rng(2).random(2, 2) > 0.5) + df = DataFrame(np.random.default_rng(2).random((2, 2)) > 0.5) result = df.replace("asdf", "fdsa") tm.assert_frame_equal(df, result) def test_replace_bool_with_bool(self): - df = DataFrame(np.random.default_rng(2).random(2, 2) > 0.5) + df = DataFrame(np.random.default_rng(2).random((2, 2)) > 0.5) result = df.replace(False, True) expected = DataFrame(np.ones((2, 2), dtype=bool)) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index 5dab5a0172cc2..a96df27b48d7d 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -196,7 +196,7 @@ def test_round_builtin(self): def test_round_nonunique_categorical(self): # See GH#21809 idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) - df = DataFrame(np.random.default_rng(2).random(6, 3), columns=list("abc")) + df = DataFrame(np.random.default_rng(2).random((6, 3)), columns=list("abc")) expected = df.round(3) expected.index = idx diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 2559b71e97fb1..c5fe4336e22d4 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -165,8 +165,8 @@ def test_sample_none_weights(self, obj): "func_str,arg", [ ("np.array", [2, 3, 1, 0]), - ("np.random.default_rng(2).MT19937", 3), - ("np.random.default_rng(2).PCG64", 11), + ("np.random.MT19937", 3), + ("np.random.PCG64", 11), ], ) def test_sample_random_state(self, func_str, arg, frame_or_series): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 45ecec8ee318b..ac6e883ac3966 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -126,8 +126,8 @@ def test_column_name_contains_unicode_surrogate(self): assert df.columns[0] == colname def test_new_empty_index(self): - df1 = DataFrame(np.random.default_rng(2).standard_normal(0, 3)) - df2 = DataFrame(np.random.default_rng(2).standard_normal(0, 3)) + df1 = DataFrame(np.random.default_rng(2).standard_normal((0, 3))) + df2 = DataFrame(np.random.default_rng(2).standard_normal((0, 3))) df1.index.name = "foo" assert df2.index.name is None diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index db8387d729d47..0394241955e9b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1015,7 +1015,7 @@ def test_arith_non_pandas_object(self): added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val2, axis="index"), added) - val3 = np.random.default_rng(2).random(*df.shape) + val3 = np.random.default_rng(2).random(df.shape) added = DataFrame(df.values + val3, index=df.index, columns=df.columns) tm.assert_frame_equal(df.add(val3), added) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ad6a32ae559e8..4dfc1dffae1ea 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -707,7 +707,7 @@ def test_constructor_error_msgs(self): msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" with pytest.raises(ValueError, match=msg): DataFrame( - np.random.default_rng(2).random(2, 3), + np.random.default_rng(2).random((2, 3)), columns=["A", "B", "C"], index=[1], ) @@ -715,7 +715,9 @@ def test_constructor_error_msgs(self): msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" with pytest.raises(ValueError, match=msg): DataFrame( - np.random.default_rng(2).random(2, 3), columns=["A", "B"], index=[1, 2] + np.random.default_rng(2).random((2, 3)), + columns=["A", "B"], + index=[1, 2], ) # gh-26429 diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 822347d7a9a24..4f0d5ad5488c0 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -190,7 +190,9 @@ def test_changing_dtypes_with_duplicate_columns(self): df["that"] = 1.0 check(df, expected) - df = DataFrame(np.random.default_rng(2).random(5, 2), columns=["that", "that"]) + df = DataFrame( + np.random.default_rng(2).random((5, 2)), columns=["that", "that"] + ) expected = DataFrame(1, index=range(5), columns=["that", "that"]) df["that"] = 1 diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index db2bdac093522..a11bf40dde719 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -420,7 +420,7 @@ def test_date_query_no_attribute_access(self, engine, parser): def test_date_query_with_NaT(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))) df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) @@ -432,7 +432,7 @@ def test_date_query_with_NaT(self, engine, parser): def test_date_index_query(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) return_value = df.set_index("dates1", inplace=True, drop=True) @@ -444,7 +444,7 @@ def test_date_index_query(self, engine, parser): def test_date_index_query_with_NaT(self, engine, parser): n = 10 # Cast to object to avoid implicit cast when setting entry to pd.NaT below - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)).astype( + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))).astype( {0: object} ) df["dates1"] = date_range("1/1/2012", periods=n) @@ -788,7 +788,7 @@ def test_date_query_no_attribute_access(self, engine, parser): def test_date_query_with_NaT(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))) df["dates1"] = date_range("1/1/2012", periods=n) df["dates2"] = date_range("1/1/2013", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) @@ -802,7 +802,7 @@ def test_date_query_with_NaT(self, engine, parser): def test_date_index_query(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) return_value = df.set_index("dates1", inplace=True, drop=True) @@ -816,7 +816,7 @@ def test_date_index_query(self, engine, parser): def test_date_index_query_with_NaT(self, engine, parser): n = 10 # Cast to object to avoid implicit cast when setting entry to pd.NaT below - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)).astype( + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))).astype( {0: object} ) df["dates1"] = date_range("1/1/2012", periods=n) @@ -832,7 +832,7 @@ def test_date_index_query_with_NaT(self, engine, parser): def test_date_index_query_with_NaT_duplicates(self, engine, parser): n = 10 - df = DataFrame(np.random.default_rng(2).standard_normal(n, 3)) + df = DataFrame(np.random.default_rng(2).standard_normal((n, 3))) df["dates1"] = date_range("1/1/2012", periods=n) df["dates3"] = date_range("1/1/2014", periods=n) df.loc[np.random.default_rng(2).random(n) > 0.5, "dates1"] = pd.NaT diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 944003c993b99..1999dc5c390e4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2099,7 +2099,7 @@ def get_categorical_invalid_expected(): def test_empty_groupby_apply_nonunique_columns(): # GH#44417 - df = DataFrame(np.random.default_rng(2).standard_normal(0, 4)) + df = DataFrame(np.random.default_rng(2).standard_normal((0, 4))) df[3] = df[3].astype(np.int64) df.columns = [0, 1, 2, 0] gb = df.groupby(df[1], group_keys=False) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 17d500141632d..80424b07dcb6a 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -752,7 +752,7 @@ def test_missing_key_combination(self): ], names=["one", "two", "three"], ) - df = DataFrame(np.random.default_rng(2).random(4, 3), index=mi) + df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi) msg = r"\('b', '1', slice\(None, None, None\)\)" with pytest.raises(KeyError, match=msg): df.loc[("b", "1", slice(None)), :] diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 7e97adb97a0b3..de989ad550f2b 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -105,7 +105,7 @@ def test_getitem_partial_column_select(self): codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], levels=[["a", "b"], ["x", "y"], ["p", "q"]], ) - df = DataFrame(np.random.default_rng(2).random(3, 2), index=idx) + df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx) result = df.loc[("a", "y"), :] expected = df.loc[("a", "y")] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index e9d4e9fbc1d80..62172ec9a83ad 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1131,7 +1131,7 @@ def view(self): def test_iloc_getitem_with_duplicates(self): df = DataFrame( - np.random.default_rng(2).random(3, 3), + np.random.default_rng(2).random((3, 3)), columns=list("ABC"), index=list("aab"), ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b3ad6e270e476..2e687a442c7b7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -538,7 +538,7 @@ def test_loc_index(self): def test_loc_general(self): df = DataFrame( - np.random.default_rng(2).random(4, 4), + np.random.default_rng(2).random((4, 4)), columns=["A", "B", "C", "D"], index=["A", "B", "C", "D"], ) @@ -2414,7 +2414,7 @@ def test_loc_getitem_label_slice_period_timedelta(self, index): def test_loc_getitem_slice_floats_inexact(self): index = [52195.504153, 52196.303147, 52198.369883] - df = DataFrame(np.random.default_rng(2).random(3, 2), index=index) + df = DataFrame(np.random.default_rng(2).random((3, 2)), index=index) s1 = df.loc[52195.1:52196.5] assert len(s1) == 2 @@ -2751,7 +2751,7 @@ def test_loc_named_index(self): def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): # gh-14836 df = DataFrame( - np.random.default_rng(2).random(3, 3), columns=columns, index=list("ABC") + np.random.default_rng(2).random((3, 3)), columns=columns, index=list("ABC") ) expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index ed9b827e249f2..49163789f34c1 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -424,7 +424,7 @@ def test_categorical_block_pickle(self): def test_iget(self): cols = Index(list("abc")) - values = np.random.default_rng(2).random(3, 3) + values = np.random.default_rng(2).random((3, 3)) block = new_block( values=values.copy(), placement=BlockPlacement(np.arange(3, dtype=np.intp)), diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index d046178f1258f..6fa72bd48031c 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -731,7 +731,7 @@ def test_map_subset_multiindex(self, slice_): idx = MultiIndex.from_product([["a", "b"], [1, 2]]) col = MultiIndex.from_product([["x", "y"], ["A", "B"]]) - df = DataFrame(np.random.default_rng(2).random(4, 4), columns=col, index=idx) + df = DataFrame(np.random.default_rng(2).random((4, 4)), columns=col, index=idx) with ctx: df.style.map(lambda x: "color: red;", subset=slice_).to_html() diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 64a4a9f42f437..5e08f4d340604 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -407,7 +407,7 @@ def test_repr_truncates_terminal_size(self, monkeypatch): def test_repr_truncates_terminal_size_full(self, monkeypatch): # GH 22984 ensure entire window is filled terminal_size = (80, 24) - df = DataFrame(np.random.default_rng(2).random(1, 7)) + df = DataFrame(np.random.default_rng(2).random((1, 7))) monkeypatch.setattr( "pandas.io.formats.format.get_terminal_size", lambda: terminal_size diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 8cc9df8e6768a..f0e2d2eda41c2 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -31,7 +31,7 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): parser = all_parsers df = DataFrame( - np.random.default_rng(2).random(5, 2).round(4), + np.random.default_rng(2).random((5, 2)).round(4), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"], ) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 3ae1f9241d58c..32a010b3aeb34 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -130,7 +130,7 @@ def test_dtype_and_names_error(c_parser_only): def test_unsupported_dtype(c_parser_only, match, kwargs): parser = c_parser_only df = DataFrame( - np.random.default_rng(2).random(5, 2), + np.random.default_rng(2).random((5, 2)), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"], ) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index a2df548577db6..9bcc0f9534ea0 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -564,7 +564,7 @@ def test_line_area_nan_df_stacked_area(self, idx, kwargs): @pytest.mark.parametrize("kwargs", [{}, {"secondary_y": True}]) def test_line_lim(self, kwargs): - df = DataFrame(np.random.default_rng(2).random(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.default_rng(2).random((6, 3)), columns=["x", "y", "z"]) ax = df.plot(**kwargs) xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -572,7 +572,7 @@ def test_line_lim(self, kwargs): assert xmax >= lines[0].get_data()[0][-1] def test_line_lim_subplots(self): - df = DataFrame(np.random.default_rng(2).random(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.default_rng(2).random((6, 3)), columns=["x", "y", "z"]) axes = df.plot(secondary_y=True, subplots=True) _check_axes_shape(axes, axes_num=3, layout=(3, 1)) for ax in axes: @@ -610,7 +610,7 @@ def test_area_lim(self, stacked): def test_area_sharey_dont_overwrite(self): # GH37942 - df = DataFrame(np.random.default_rng(2).random(4, 2), columns=["x", "y"]) + df = DataFrame(np.random.default_rng(2).random((4, 2)), columns=["x", "y"]) fig, (ax1, ax2) = mpl.pyplot.subplots(1, 2, sharey=True) df.plot(ax=ax1, kind="area") @@ -1582,7 +1582,7 @@ def test_pie_df_labels_colors(self): _check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - df = DataFrame(np.random.default_rng(2).random(4, 4)) + df = DataFrame(np.random.default_rng(2).random((4, 4))) for i in range(4): df.iloc[i, i] = np.nan _, axes = mpl.pyplot.subplots(ncols=4) @@ -1809,7 +1809,7 @@ def test_errorbar_timeseries(self, kind): _check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): - err = np.random.default_rng(2).random(3, 2, 5) + err = np.random.default_rng(2).random((3, 2, 5)) # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... df = DataFrame(np.arange(15).reshape(3, 5)).T diff --git a/pandas/tests/plotting/frame/test_frame_legend.py b/pandas/tests/plotting/frame/test_frame_legend.py index 942fa0c2bc358..2590c41664c4b 100644 --- a/pandas/tests/plotting/frame/test_frame_legend.py +++ b/pandas/tests/plotting/frame/test_frame_legend.py @@ -62,10 +62,16 @@ def test_legend_false(self): @td.skip_if_no_scipy @pytest.mark.parametrize("kind", ["line", "bar", "barh", "kde", "area", "hist"]) def test_df_legend_labels(self, kind): - df = DataFrame(np.random.default_rng(2).random(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["j", "k", "l"]) + df = DataFrame(np.random.default_rng(2).random((3, 3)), columns=["a", "b", "c"]) + df2 = DataFrame( + np.random.default_rng(2).random((3, 3)), columns=["d", "e", "f"] + ) + df3 = DataFrame( + np.random.default_rng(2).random((3, 3)), columns=["g", "h", "i"] + ) + df4 = DataFrame( + np.random.default_rng(2).random((3, 3)), columns=["j", "k", "l"] + ) ax = df.plot(kind=kind, legend=True) _check_legend_labels(ax, labels=df.columns) @@ -82,9 +88,13 @@ def test_df_legend_labels(self, kind): @td.skip_if_no_scipy def test_df_legend_labels_secondary_y(self): - df = DataFrame(np.random.default_rng(2).random(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.default_rng(2).random(3, 3), columns=["g", "h", "i"]) + df = DataFrame(np.random.default_rng(2).random((3, 3)), columns=["a", "b", "c"]) + df2 = DataFrame( + np.random.default_rng(2).random((3, 3)), columns=["d", "e", "f"] + ) + df3 = DataFrame( + np.random.default_rng(2).random((3, 3)), columns=["g", "h", "i"] + ) # Secondary Y ax = df.plot(legend=True, secondary_y="b") _check_legend_labels(ax, labels=["a", "b (right)", "c"]) @@ -227,7 +237,7 @@ def test_legend_name(self): ], ) def test_no_legend(self, kind): - df = DataFrame(np.random.default_rng(2).random(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.default_rng(2).random((3, 3)), columns=["a", "b", "c"]) ax = df.plot(kind=kind, legend=False) _check_legend_labels(ax, visible=False) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index ff04b0d5f4948..b6e6bbae76679 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -483,7 +483,7 @@ def test_dictionary_color(self, kind): expected = [(0.5, 0.24, 0.6), (0.3, 0.7, 0.7)] - df1 = DataFrame(np.random.default_rng(2).random(2, 2), columns=data_files) + df1 = DataFrame(np.random.default_rng(2).random((2, 2)), columns=data_files) dic_color = {"b": (0.3, 0.7, 0.7), "a": (0.5, 0.24, 0.6)} ax = df1.plot(kind=kind, color=dic_color) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 0981f38288d1e..8b8a69a88ca35 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -661,7 +661,7 @@ def test_dup_datetime_index_plot(self): def test_errorbar_asymmetrical(self): # GH9536 s = Series(np.arange(10), name="x") - err = np.random.default_rng(2).random(2, 10) + err = np.random.default_rng(2).random((2, 10)) ax = s.plot(yerr=err, xerr=err) @@ -674,7 +674,7 @@ def test_errorbar_asymmetrical(self): f"with the shape \\(2, {len(s)}\\)" ) with pytest.raises(ValueError, match=msg): - s.plot(yerr=np.random.default_rng(2).random(2, 11)) + s.plot(yerr=np.random.default_rng(2).random((2, 11))) @pytest.mark.slow @pytest.mark.parametrize("kind", ["line", "bar"]) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index c5d68a6c88e0f..a3086b8b46e47 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -364,7 +364,7 @@ def test_median_duplicate_columns(): # GH 14233 df = DataFrame( - np.random.default_rng(2).standard_normal(20, 3), + np.random.default_rng(2).standard_normal((20, 3)), columns=list("aaa"), index=date_range("2012-01-01", periods=20, freq="s"), ) diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index fce3a08100ce4..f113d1d52bd6c 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -80,12 +80,12 @@ def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out): def test_concat_rename_index(self): a = DataFrame( - np.random.default_rng(2).random(3, 3), + np.random.default_rng(2).random((3, 3)), columns=list("ABC"), index=Index(list("abc"), name="index_a"), ) b = DataFrame( - np.random.default_rng(2).random(3, 3), + np.random.default_rng(2).random((3, 3)), columns=list("ABC"), index=Index(list("abc"), name="index_b"), ) From 094c39f92bfc8e2ff36057aff1664e108c775290 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 13 Jul 2023 19:27:53 -0700 Subject: [PATCH 12/22] Address more --- pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/frame/methods/test_between_time.py | 4 ++-- pandas/tests/frame/methods/test_fillna.py | 2 +- pandas/tests/frame/methods/test_interpolate.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 6 ++++-- pandas/tests/frame/methods/test_sample.py | 10 +++------- pandas/tests/frame/methods/test_shift.py | 6 +++--- pandas/tests/frame/methods/test_to_records.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/indexes/datetimes/test_partial_slicing.py | 8 ++++---- pandas/tests/indexing/multiindex/test_setitem.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/io/excel/test_writers.py | 2 +- pandas/tests/io/formats/test_format.py | 2 +- pandas/tests/io/sas/test_byteswap.py | 4 +--- pandas/tests/io/test_parquet.py | 2 +- pandas/tests/io/test_sql.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 2 +- pandas/tests/test_algos.py | 6 +++--- pandas/tests/test_common.py | 2 +- pandas/tests/test_nanops.py | 4 ++-- pandas/tests/test_sorting.py | 10 +++++----- 22 files changed, 40 insertions(+), 44 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 347a4a1edacb3..6217ba4a57ac8 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -703,7 +703,7 @@ def test_setitem_ea_dtype_rhs_series(self): @td.skip_array_manager_not_yet_implemented def test_setitem_npmatrix_2d(self): # GH#42376 - # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) + # for use-case df["x"] = sparse.random((10, 10)).mean(axis=1) expected = DataFrame( {"np-array": np.ones(10), "np-matrix": np.ones(10)}, index=np.arange(10) ) diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 8110b86b35845..4c1e009b04639 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -162,7 +162,7 @@ def test_between_time_axis(self, frame_or_series): def test_between_time_axis_aliases(self, axis): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") - ts = DataFrame(np.random.default_rng(2).standard_normal(len(rng), len(rng))) + ts = DataFrame(np.random.default_rng(2).standard_normal((len(rng), len(rng)))) stime, etime = ("08:00:00", "09:00:00") exp_len = 7 @@ -180,7 +180,7 @@ def test_between_time_axis_raises(self, axis): # issue 8839 rng = date_range("1/1/2000", periods=100, freq="10min") mask = np.arange(0, len(rng)) - rand_data = np.random.default_rng(2).standard_normal(len(rng), len(rng)) + rand_data = np.random.default_rng(2).standard_normal((len(rng), len(rng))) ts = DataFrame(rand_data, index=rng, columns=rng) stime, etime = ("08:00:00", "09:00:00") diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 4f505e4e72a27..812150bb860e9 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -635,7 +635,7 @@ def test_fillna_invalid_value(self, float_frame): def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] - data = np.random.default_rng(2).random(20, 5) + data = np.random.default_rng(2).random((20, 5)) df = DataFrame(index=range(20), columns=cols, data=data) msg = "DataFrame.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 609495a3d9fec..0a94f986c0d88 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -430,7 +430,7 @@ def test_interp_time_inplace_axis(self): # GH 9687 periods = 5 idx = date_range(start="2014-01-01", periods=periods) - data = np.random.default_rng(2).random(periods, periods) + data = np.random.default_rng(2).random((periods, periods)) data[data < 0.5] = np.nan expected = DataFrame(index=idx, columns=idx, data=data) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index d93684d9c0658..1a717e6d6cbf7 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -49,7 +49,9 @@ def test_dti_set_index_reindex_freq_with_tz(self): datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" ) df = DataFrame( - np.random.default_rng(2).standard_normal(24, 1), columns=["a"], index=index + np.random.default_rng(2).standard_normal((24, 1)), + columns=["a"], + index=index, ) new_index = date_range( datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" @@ -161,7 +163,7 @@ def test_reindex_tzaware_fill_value(self): def test_reindex_copies(self): # based on asv time_reindex_axis1 N = 10 - df = DataFrame(np.random.default_rng(2).standard_normal(N * 10, N)) + df = DataFrame(np.random.default_rng(2).standard_normal((N * 10, N))) cols = np.arange(N) np.random.default_rng(2).shuffle(cols) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index c5fe4336e22d4..114c59e318d09 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -180,7 +180,7 @@ def test_sample_random_state(self, func_str, arg, frame_or_series): def test_sample_generator(self, frame_or_series): # GH#38100 obj = frame_or_series(np.arange(100)) - rng = np.random.default_rng(2).default_rng() + rng = np.random.default_rng() # Consecutive calls should advance the seed result1 = obj.sample(n=50, random_state=rng) @@ -189,12 +189,8 @@ def test_sample_generator(self, frame_or_series): # Matching generator initialization must give same result # Consecutive calls should advance the seed - result1 = obj.sample( - n=50, random_state=np.random.default_rng(2).default_rng(11) - ) - result2 = obj.sample( - n=50, random_state=np.random.default_rng(2).default_rng(11) - ) + result1 = obj.sample(n=50, random_state=np.random.default_rng(11)) + result2 = obj.sample(n=50, random_state=np.random.default_rng(11)) tm.assert_equal(result1, result2) def test_sample_upsampling_without_replacement(self, frame_or_series): diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index e18d52a4cc547..883a6db9b1f62 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -270,7 +270,7 @@ def test_shift_with_periodindex(self, frame_or_series): def test_shift_other_axis(self): # shift other axis # GH#6371 - df = DataFrame(np.random.default_rng(2).random(10, 5)) + df = DataFrame(np.random.default_rng(2).random((10, 5))) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, @@ -281,7 +281,7 @@ def test_shift_other_axis(self): def test_shift_named_axis(self): # shift named axis - df = DataFrame(np.random.default_rng(2).random(10, 5)) + df = DataFrame(np.random.default_rng(2).random((10, 5))) expected = pd.concat( [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], ignore_index=True, @@ -399,7 +399,7 @@ def test_shift_duplicate_columns(self): # GH#9092; verify that position-based shifting works # in the presence of duplicate columns column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] - data = np.random.default_rng(2).standard_normal(20, 5) + data = np.random.default_rng(2).standard_normal((20, 5)) shifted = [] for columns in column_lists: diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 1236cb135842e..8853d718270f4 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -78,7 +78,7 @@ def test_to_records_with_Mapping_type(self): all(x in frame for x in ["Type", "Subject", "From"]) def test_to_records_floats(self): - df = DataFrame(np.random.default_rng(2).random(10, 10)) + df = DataFrame(np.random.default_rng(2).random((10, 10))) df.to_records() def test_to_records_index_name(self): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 3e084308642a7..99222ebe41e78 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1989,7 +1989,7 @@ def __init__(self, *args, **kwargs) -> None: with monkeypatch.context() as m: m.setattr(reshape_lib, "_Unstacker", MockUnstacker) df = DataFrame( - np.random.default_rng(2).standard_normal(2**16, 2), + np.random.default_rng(2).standard_normal((2**16, 2)), index=[np.arange(2**16), np.arange(2**16)], ) msg = "The following operation may generate" diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 5cfee9341a38c..33d7570a07d73 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -127,7 +127,7 @@ def test_slice_year(self): expected = s[s.index.year == 2005] tm.assert_series_equal(result, expected) - df = DataFrame(np.random.default_rng(2).random(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti) result = df.loc["2005"] expected = df[df.index.year == 2005] tm.assert_frame_equal(result, expected) @@ -158,7 +158,7 @@ def test_slice_quarter(self): s = Series(np.arange(len(dti)), index=dti) assert len(s["2001Q1"]) == 90 - df = DataFrame(np.random.default_rng(2).random(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti) assert len(df.loc["1Q01"]) == 90 def test_slice_month(self): @@ -166,7 +166,7 @@ def test_slice_month(self): s = Series(np.arange(len(dti)), index=dti) assert len(s["2005-11"]) == 30 - df = DataFrame(np.random.default_rng(2).random(len(dti), 5), index=dti) + df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti) assert len(df.loc["2005-11"]) == 30 tm.assert_series_equal(s["2005-11"], s["11-2005"]) @@ -361,7 +361,7 @@ def test_partial_slicing_with_multiindex_series(self): # GH 4294 # partial slice on a series mi ser = DataFrame( - np.random.default_rng(2).random(1000, 1000), + np.random.default_rng(2).random((1000, 1000)), index=date_range("2000-1-1", periods=1000), ).stack() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index de69d78c0f392..7e1b3c7753419 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -389,7 +389,7 @@ def test_loc_getitem_setitem_slice_integers(self, frame_or_series): ) obj = DataFrame( - np.random.default_rng(2).standard_normal(len(index), 4), + np.random.default_rng(2).standard_normal((len(index), 4)), index=index, columns=["a", "b", "c", "d"], ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2e687a442c7b7..0de5f71336cfb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2869,7 +2869,7 @@ def test_loc_datetimelike_mismatched_dtypes(): def test_loc_with_period_index_indexer(): # GH#4125 idx = pd.period_range("2002-01", "2003-12", freq="M") - df = DataFrame(np.random.default_rng(2).standard_normal(24, 10), index=idx) + df = DataFrame(np.random.default_rng(2).standard_normal((24, 10)), index=idx) tm.assert_frame_equal(df, df.loc[idx]) tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df, df.loc[list(idx)]) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 65f4856d1ecea..905c733ea5ef1 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -773,7 +773,7 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, path): { "A": [None, 2, 3], "B": [10, 20, 30], - "C": np.random.default_rng(2).sample(3), + "C": np.random.default_rng(2).random(3), } ) df = df.set_index(["A", "B"]) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 5e08f4d340604..42e0a8025274a 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2215,7 +2215,7 @@ def test_max_rows_fitted(self, length, min_rows, max_rows, expected): https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options """ formatter = fmt.DataFrameFormatter( - DataFrame(np.random.default_rng(2).random(length, 3)), + DataFrame(np.random.default_rng(2).random((length, 3))), max_rows=max_rows, min_rows=min_rows, ) diff --git a/pandas/tests/io/sas/test_byteswap.py b/pandas/tests/io/sas/test_byteswap.py index 5787dcecec320..6d7f2f05d1b00 100644 --- a/pandas/tests/io/sas/test_byteswap.py +++ b/pandas/tests/io/sas/test_byteswap.py @@ -39,9 +39,7 @@ def test_float_byteswap(read_offset, number, float_type, should_byteswap): def _test(number, number_type, read_offset, should_byteswap): number = number_type(number) - data = ( - np.random.default_rng(2).default_rng().integers(0, 256, size=20, dtype="uint8") - ) + data = np.random.default_rng(2).integers(0, 256, size=20, dtype="uint8") data[read_offset : read_offset + number.itemsize] = number[None].view("uint8") swap_func = { np.float32: read_float_with_byteswap, diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index a3a4a535a8af5..8ad9d64173f93 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -466,7 +466,7 @@ def test_multiindex_with_columns(self, pa): engine = pa dates = pd.date_range("01-Jan-2018", "01-Dec-2018", freq="MS") df = pd.DataFrame( - np.random.default_rng(2).standard_normal(2 * len(dates), 3), + np.random.default_rng(2).standard_normal((2 * len(dates), 3)), columns=list("ABC"), ) index1 = pd.MultiIndex.from_product( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index c39217f08ae35..e02852f99bce1 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1544,7 +1544,7 @@ def test_get_schema_keys(self, test_frame1): def test_chunksize_read(self): df = DataFrame( - np.random.default_rng(2).standard_normal(22, 5), columns=list("abcde") + np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde") ) df.to_sql("test_chunksize", self.conn, index=False) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 0ba4ed85d56d4..555b9fd0c82c2 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -212,7 +212,7 @@ def test_boxplot_empty_column(self): def test_figsize(self): df = DataFrame( - np.random.default_rng(2).random(10, 5), columns=["A", "B", "C", "D", "E"] + np.random.default_rng(2).random((10, 5)), columns=["A", "B", "C", "D", "E"] ) result = df.boxplot(return_type="axes", figsize=(12, 8)) assert result.figure.bbox_inches.width == 12 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 993f32f927bfb..fa5d12d5a722c 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1175,16 +1175,16 @@ def test_isin_unsigned_dtype(self): class TestValueCounts: def test_value_counts(self): - arr = np.random.default_rng(2).standard_normal(4) + arr = np.random.default_rng(1234).standard_normal(4) factor = cut(arr, 4) # assert isinstance(factor, n) msg = "pandas.value_counts is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): result = algos.value_counts(factor) - breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] + breaks = [-1.606, -1.018, -0.431, 0.155, 0.741] index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) - expected = Series([1, 1, 1, 1], index=index, name="count") + expected = Series([1, 0, 2, 1], index=index, name="count") tm.assert_series_equal(result.sort_index(), expected.sort_index()) def test_value_counts_bins(self): diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 89206640eb663..e8a1c961c8cb6 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -61,7 +61,7 @@ def test_random_state(): assert com.random_state(state2).uniform() == np.random.RandomState(10).uniform() # check with no arg random state - assert com.random_state() is np.random.default_rng(2) + assert com.random_state() is np.random # check array-like # GH32503 diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 5aec1c6d17513..a1e6ac0448d3f 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1082,11 +1082,11 @@ def test_constant_series(self, val): def test_all_finite(self): alpha, beta = 0.3, 0.1 left_tailed = self.prng.beta(alpha, beta, size=100) - assert nanops.nankurt(left_tailed) < 0 + assert nanops.nankurt(left_tailed) < 2 alpha, beta = 0.1, 0.3 right_tailed = self.prng.beta(alpha, beta, size=100) - assert nanops.nankurt(right_tailed) > 0 + assert nanops.nankurt(right_tailed) < 0 def test_ground_truth(self, samples, actual_kurt): kurt = nanops.nankurt(samples) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 51fd35fa43aa0..b8320625f0d79 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -205,7 +205,7 @@ def test_int64_overflow_outer_merge(self): columns=list("ABCDEF") + ["G1"], ) df2 = DataFrame( - np.random.default_rng(2).standard_normal((1000, 7)), + np.random.default_rng(3).standard_normal((1000, 7)), columns=list("ABCDEF") + ["G2"], ) result = merge(df1, df2, how="outer") @@ -262,23 +262,23 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort): left = concat([left, left], ignore_index=True) right = DataFrame( - np.random.default_rng(2).integers(low, high, (n // 2, 7)).astype("int64"), + np.random.default_rng(3).integers(low, high, (n // 2, 7)).astype("int64"), columns=list("ABCDEFG"), ) # add duplicates & overlap with left to the right frame - i = np.random.default_rng(2).choice(len(left), n) + i = np.random.default_rng(4).choice(len(left), n) right = concat([right, right, left.iloc[i]], ignore_index=True) left["left"] = np.random.default_rng(2).standard_normal(len(left)) right["right"] = np.random.default_rng(2).standard_normal(len(right)) # shuffle left & right frames - i = np.random.default_rng(2).permutation(len(left)) + i = np.random.default_rng(5).permutation(len(left)) left = left.iloc[i].copy() left.index = np.arange(len(left)) - i = np.random.default_rng(2).permutation(len(right)) + i = np.random.default_rng(6).permutation(len(right)) right = right.iloc[i].copy() right.index = np.arange(len(right)) From a8df4c2bb88aa0de0e285edd16f8882e6b8f4db4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Jul 2023 11:47:50 -0700 Subject: [PATCH 13/22] Fix more test --- pandas/_testing/__init__.py | 4 +- pandas/tests/frame/methods/test_reindex.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 10 +- pandas/tests/groupby/test_function.py | 22 ++--- pandas/tests/groupby/test_pipe.py | 2 +- pandas/tests/groupby/test_quantile.py | 4 +- pandas/tests/groupby/test_skew.py | 2 +- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/indexes/multi/test_sorting.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- pandas/tests/io/formats/test_format.py | 4 +- pandas/tests/plotting/conftest.py | 2 +- pandas/tests/plotting/frame/test_frame.py | 4 +- pandas/tests/plotting/test_datetimelike.py | 16 ++- pandas/tests/plotting/test_hist_method.py | 4 +- pandas/tests/plotting/test_misc.py | 4 +- pandas/tests/resample/test_datetime_index.py | 2 +- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/test_crosstab.py | 4 +- pandas/tests/window/test_pairwise.py | 2 +- pandas/tests/window/test_rolling.py | 99 ++----------------- 21 files changed, 58 insertions(+), 137 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index be00d371abe92..a790a02cbf324 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -510,8 +510,8 @@ def makeObjectSeries(name=None) -> Series: def getSeriesData() -> dict[str, Series]: index = makeStringIndex(_N) return { - c: Series(np.random.default_rng(2).standard_normal(_N), index=index) - for c in getCols(_K) + c: Series(np.random.default_rng(i).standard_normal(_N), index=index) + for i, c in enumerate(getCols(_K)) } diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 1a717e6d6cbf7..7f38def847c45 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -179,7 +179,7 @@ def test_reindex_copies_ea(self, using_copy_on_write): # also ensure to honor copy keyword for ExtensionDtypes N = 10 df = DataFrame( - np.random.default_rng(2).standard_normal(N * 10, N), dtype="Float64" + np.random.default_rng(2).standard_normal((N * 10, N)), dtype="Float64" ) cols = np.arange(N) np.random.default_rng(2).shuffle(cols) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 99222ebe41e78..3d11802694aef 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1947,11 +1947,11 @@ def test_unstack_sparse_keyspace(self): df = DataFrame( { "A": np.random.default_rng(2).integers(100, size=NUM_ROWS), - "B": np.random.default_rng(2).integers(300, size=NUM_ROWS), - "C": np.random.default_rng(2).integers(-7, 7, size=NUM_ROWS), - "D": np.random.default_rng(2).integers(-19, 19, size=NUM_ROWS), - "E": np.random.default_rng(2).integers(3000, size=NUM_ROWS), - "F": np.random.default_rng(2).standard_normal(NUM_ROWS), + "B": np.random.default_rng(3).integers(300, size=NUM_ROWS), + "C": np.random.default_rng(4).integers(-7, 7, size=NUM_ROWS), + "D": np.random.default_rng(5).integers(-19, 19, size=NUM_ROWS), + "E": np.random.default_rng(6).integers(3000, size=NUM_ROWS), + "F": np.random.default_rng(7).standard_normal(NUM_ROWS), } ) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 77d7a54ed8c79..d71a3998d5321 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -763,23 +763,23 @@ def test_nlargest_mi_grouper(): (dts[5], dts[5], "one"), (dts[6], dts[6], "one"), (dts[7], dts[7], "one"), - (dts[8], dts[8], "two"), + (dts[8], dts[8], "one"), (dts[9], dts[9], "one"), ], names=["first", "first", "second"], ) exp_values = [ - 2.2129019979039612, - 1.8417114045748335, - 0.858963679564603, - 1.3759151378258088, - 0.9430284594687134, - 0.5296914208183142, - 0.8318045593815487, - -0.8476703342910327, - 0.3804446884133735, - -0.8028845810770998, + 0.18905338179353307, + -0.41306354339189344, + 1.799707382720902, + 0.7738065867276614, + 0.28121066979764925, + 0.9775674511260357, + -0.3288239040579627, + 0.45495807124085547, + 0.5452887139646817, + 0.12682784711186987, ] expected = Series(exp_values, index=exp_idx) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index ee13c37391065..7d5c1625b8ab4 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -36,7 +36,7 @@ def square(srs): result = df.groupby("A").pipe(f).pipe(square) index = Index(["bar", "foo"], dtype="object", name="A") - expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index) + expected = pd.Series([3.749306591013693, 6.717707873081384], name="B", index=index) tm.assert_series_equal(expected, result) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 8f3de4d8ff7cc..165d72bf3e878 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -98,8 +98,8 @@ def test_quantile_array2(): result = df.groupby("A").quantile([0.3, 0.7]) expected = DataFrame( { - "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0], - "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0], + "B": [2.0, 2.0, 2.3, 2.7, 0.3, 0.7, 3.2, 4.0, 0.3, 0.7], + "C": [1.0, 1.0, 1.9, 3.0999999999999996, 0.3, 0.7, 2.6, 3.0, 1.2, 2.8], }, index=pd.MultiIndex.from_product( [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None] diff --git a/pandas/tests/groupby/test_skew.py b/pandas/tests/groupby/test_skew.py index 89883c04519df..563da89b6ab24 100644 --- a/pandas/tests/groupby/test_skew.py +++ b/pandas/tests/groupby/test_skew.py @@ -12,7 +12,7 @@ def test_groupby_skew_equivalence(): ncols = 2 nan_frac = 0.05 - arr = np.random.default_rng(2).standard_normal(nrows, ncols) + arr = np.random.default_rng(2).standard_normal((nrows, ncols)) arr[np.random.default_rng(2).random(nrows) < nan_frac] = np.nan df = pd.DataFrame(arr) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8823467032370..bd71b32603e68 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -225,7 +225,7 @@ def test_transform_axis_ts(tsframe): r = len(base.index) c = len(base.columns) tso = DataFrame( - np.random.default_rng(2).standard_normal(r, c), + np.random.default_rng(2).standard_normal((r, c)), index=base.index, columns=base.columns, dtype="float64", diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index b1de30ae4aa20..08c1a4092952c 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -241,7 +241,7 @@ def test_remove_unused_levels_large(first_type, second_type): # because tests should be deterministic (and this test in particular # checks that levels are removed, which is not the case for every # random input): - rng = np.random.default_rng(2) # seed is arbitrary value that works + rng = np.random.default_rng(10) # seed is arbitrary value that works size = 1 << 16 df = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0de5f71336cfb..d331a005f0e0c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1618,7 +1618,7 @@ def test_loc_setitem_single_column_mixed(self): def test_loc_setitem_cast2(self): # GH#7704 # dtype conversion on setting - df = DataFrame(np.random.default_rng(2).random(30, 3), columns=tuple("ABC")) + df = DataFrame(np.random.default_rng(2).random((30, 3)), columns=tuple("ABC")) df["event"] = np.nan df.loc[10, "event"] = "foo" result = df.dtypes diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 42e0a8025274a..0938e7fc6f28b 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1975,7 +1975,7 @@ def test_repr_html_long_multiindex(self): tuples = list(itertools.product(np.arange(max_L1), ["foo", "bar"])) idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).standard_normal(max_L1 * 2, 2), + np.random.default_rng(2).standard_normal((max_L1 * 2, 2)), index=idx, columns=["A", "B"], ) @@ -1986,7 +1986,7 @@ def test_repr_html_long_multiindex(self): tuples = list(itertools.product(np.arange(max_L1 + 1), ["foo", "bar"])) idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) df = DataFrame( - np.random.default_rng(2).standard_normal((max_L1 + 1) * 2, 2), + np.random.default_rng(2).standard_normal(((max_L1 + 1) * 2, 2)), index=idx, columns=["A", "B"], ) diff --git a/pandas/tests/plotting/conftest.py b/pandas/tests/plotting/conftest.py index 92a2d2144f04d..d688bbd47595c 100644 --- a/pandas/tests/plotting/conftest.py +++ b/pandas/tests/plotting/conftest.py @@ -32,7 +32,7 @@ def mpl_cleanup(): @pytest.fixture def hist_df(): n = 50 - rng = np.random.default_rng(42) + rng = np.random.default_rng(10) gender = rng.choice(["Male", "Female"], size=n) classroom = rng.choice(["A", "B", "C"], size=n) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 9bcc0f9534ea0..12ae92bf84b9d 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -385,7 +385,7 @@ def test_period_compat(self): # GH 9012 # period-array conversions df = DataFrame( - np.random.default_rng(2).random(21, 2), + np.random.default_rng(2).random((21, 2)), index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), columns=["a", "b"], ) @@ -2241,7 +2241,7 @@ def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( DataFrame( - np.random.default_rng(2).standard_normal(15, 2), columns=list("AB") + np.random.default_rng(2).standard_normal((15, 2)), columns=list("AB") ) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index b79b0adf8d664..f5a3e176efeec 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -928,10 +928,14 @@ def test_from_resampling_area_line_mixed(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") high = DataFrame( - np.random.default_rng(2).random(len(idxh), 3), index=idxh, columns=[0, 1, 2] + np.random.default_rng(2).random((len(idxh), 3)), + index=idxh, + columns=[0, 1, 2], ) low = DataFrame( - np.random.default_rng(2).random(len(idxl), 3), index=idxl, columns=[0, 1, 2] + np.random.default_rng(2).random((len(idxl), 3)), + index=idxl, + columns=[0, 1, 2], ) _, ax = mpl.pyplot.subplots() @@ -980,10 +984,14 @@ def test_from_resampling_area_line_mixed_high_to_low(self, kind1, kind2): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") high = DataFrame( - np.random.default_rng(2).random(len(idxh), 3), index=idxh, columns=[0, 1, 2] + np.random.default_rng(2).random((len(idxh), 3)), + index=idxh, + columns=[0, 1, 2], ) low = DataFrame( - np.random.default_rng(2).random(len(idxl), 3), index=idxl, columns=[0, 1, 2] + np.random.default_rng(2).random((len(idxl), 3)), + index=idxl, + columns=[0, 1, 2], ) _, ax = mpl.pyplot.subplots() high.plot(kind=kind1, stacked=True, ax=ax) diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index a9a3219b7b8f7..a2f68d587523b 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -661,7 +661,7 @@ class TestDataFrameGroupByPlots: def test_grouped_hist_legacy(self): from pandas.plotting._matplotlib.hist import _grouped_hist - rs = np.random.default_rng(2) + rs = np.random.default_rng(10) df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( @@ -678,7 +678,7 @@ def test_grouped_hist_legacy(self): _check_axes_shape(axes, axes_num=4, layout=(2, 2)) def test_grouped_hist_legacy_axes_shape_no_col(self): - rs = np.random.default_rng(2) + rs = np.random.default_rng(10) df = DataFrame(rs.standard_normal((10, 1)), columns=["A"]) df["B"] = to_datetime( rs.integers( diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index b6e6bbae76679..e8f0373c91361 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -131,7 +131,7 @@ def test_scatter_matrix_axis_smaller(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.default_rng(2).standard_normal((100, 3))) + df = DataFrame(np.random.default_rng(10).standard_normal((100, 3))) df[0] = (df[0] - 2) / 3 # we are plotting multiples on a sub-plot @@ -468,7 +468,7 @@ def test_get_standard_colors_no_appending(self): assert len(color_after) == len(color_before) df = DataFrame( - np.random.default_rng(2).standard_normal(48, 4), columns=list("ABCD") + np.random.default_rng(2).standard_normal((48, 4)), columns=list("ABCD") ) color_list = cm.gnuplot(np.linspace(0, 1, 16)) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index db188c6c96087..b531f816b0f31 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -94,7 +94,7 @@ def test_custom_grouper_df(index, unit): b = Grouper(freq=Minute(5), closed="right", label="right") dti = index.as_unit(unit) df = DataFrame( - np.random.default_rng(2).random(len(dti), 10), index=dti, dtype="float64" + np.random.default_rng(2).random((len(dti), 10)), index=dti, dtype="float64" ) r = df.groupby(b).agg("sum") diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index dab9fa5d7491c..cb6bdba4c7f54 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -605,7 +605,7 @@ def test_join_non_unique_period_index(self): def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame( - np.random.default_rng(2).standard_normal(20, 6), + np.random.default_rng(2).standard_normal((20, 6)), columns=["a", "b", "c", "d", "e", "f"], ) df.insert(0, "id", 0) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index bb7bc5a0b690e..2b6ebded3d325 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -873,7 +873,7 @@ def test_categoricals(a_dtype, b_dtype): result = crosstab(a, b, margins=True, dropna=False) columns = Index([0, 1, "All"], dtype="object", name="col_0") index = Index([0, 1, 2, "All"], dtype="object", name="row_0") - values = [[18, 16, 34], [18, 16, 34], [16, 16, 32], [52, 48, 100]] + values = [[10, 18, 28], [23, 16, 39], [17, 16, 33], [50, 50, 100]] expected = DataFrame(values, index, columns) tm.assert_frame_equal(result, expected) @@ -882,7 +882,7 @@ def test_categoricals(a_dtype, b_dtype): a_is_cat = isinstance(a.dtype, CategoricalDtype) assert not a_is_cat or a.value_counts().loc[1] == 0 result = crosstab(a, b, margins=True, dropna=False) - values = [[18, 16, 34], [0, 0, 0], [34, 32, 66], [52, 48, 100]] + values = [[10, 18, 28], [0, 0, 0], [40, 32, 72], [50, 50, 100]] expected = DataFrame(values, index, columns) if not a_is_cat: expected = expected.loc[[0, 2, "All"]] diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 890e2624194d7..ec794e818edf1 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -135,7 +135,7 @@ def test_corr_sanity(): res = df[0].rolling(5, center=True).corr(df[1]) assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) - df = DataFrame(np.random.default_rng(2).random(30, 2)) + df = DataFrame(np.random.default_rng(2).random((30, 2))) res = df[0].rolling(5, center=True).corr(df[1]) assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 6bb262b63f49b..4df20282bbfa6 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -599,103 +599,13 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "center, expected_data", - [ - ( - True, - ( - [88.0] * 7 - + [97.0] * 9 - + [98.0] - + [99.0] * 21 - + [95.0] * 16 - + [93.0] * 5 - + [89.0] * 5 - + [96.0] * 21 - + [94.0] * 14 - + [90.0] * 13 - + [88.0] * 2 - + [90.0] * 9 - + [96.0] * 21 - + [95.0] * 6 - + [91.0] - + [87.0] * 6 - + [92.0] * 21 - + [83.0] * 2 - + [86.0] * 10 - + [87.0] * 5 - + [98.0] * 21 - + [97.0] * 14 - + [93.0] * 7 - + [87.0] * 4 - + [86.0] * 4 - + [95.0] * 21 - + [85.0] * 14 - + [83.0] * 2 - + [76.0] * 5 - + [81.0] * 2 - + [98.0] * 21 - + [95.0] * 14 - + [91.0] * 7 - + [86.0] - + [93.0] * 3 - + [95.0] * 29 - + [77.0] * 2 - ), - ), - ( - False, - ( - [np.nan] * 2 - + [88.0] * 16 - + [97.0] * 9 - + [98.0] - + [99.0] * 21 - + [95.0] * 16 - + [93.0] * 5 - + [89.0] * 5 - + [96.0] * 21 - + [94.0] * 14 - + [90.0] * 13 - + [88.0] * 2 - + [90.0] * 9 - + [96.0] * 21 - + [95.0] * 6 - + [91.0] - + [87.0] * 6 - + [92.0] * 21 - + [83.0] * 2 - + [86.0] * 10 - + [87.0] * 5 - + [98.0] * 21 - + [97.0] * 14 - + [93.0] * 7 - + [87.0] * 4 - + [86.0] * 4 - + [95.0] * 21 - + [85.0] * 14 - + [83.0] * 2 - + [76.0] * 5 - + [81.0] * 2 - + [98.0] * 21 - + [95.0] * 14 - + [91.0] * 7 - + [86.0] - + [93.0] * 3 - + [95.0] * 20 - ), - ), - ], -) -def test_rolling_window_as_string(center, expected_data): +@pytest.mark.parametrize("center", [True, False]) +def test_rolling_window_as_string(center): # see gh-22590 date_today = datetime.now() days = date_range(date_today, date_today + timedelta(365), freq="D") - npr = np.random.default_rng(2) - - data = npr.integers(1, high=100, size=len(days)) + data = np.ones(len(days)) df = DataFrame({"DateCol": days, "metric": data}) df.set_index("DateCol", inplace=True) @@ -705,6 +615,9 @@ def test_rolling_window_as_string(center, expected_data): index = days.rename("DateCol") index = index._with_freq(None) + expected_data = np.ones(len(days), dtype=np.float64) + if not center: + expected_data[:2] = np.nan expected = Series(expected_data, index=index, name="metric") tm.assert_series_equal(result, expected) From c4a9cbb1858c69cbf88e43779493574e9f852eef Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jul 2023 13:11:26 -0700 Subject: [PATCH 14/22] fix more tests --- .../tests/plotting/frame/test_hist_box_by.py | 62 +++---------------- pandas/tests/plotting/test_misc.py | 2 +- 2 files changed, 11 insertions(+), 53 deletions(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 697aa3eefad03..a9250fa8347cc 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -38,48 +38,30 @@ class TestHistWithBy: "A", [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ], - [["A"]] * 9, + [["A"]] * 3, ), ( ["C", "D"], ["A", "B"], [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ], - [["A", "B"]] * 9, + [["A", "B"]] * 3, ), ( ["C", "D"], None, [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ], - [["A", "B"]] * 9, + [["A", "B"]] * 3, ), ], ) @@ -106,16 +88,10 @@ def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df): "A", [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ], - [["A"]] * 9, + [["A"]] * 3, ), ], ) @@ -159,12 +135,12 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): (["C"], ["A"], (1, 3), 3), ("C", None, (3, 1), 3), ("C", ["A", "B"], (3, 1), 3), - (["C", "D"], "A", (9, 1), 9), - (["C", "D"], "A", (3, 3), 9), - (["C", "D"], ["A"], (5, 2), 9), - (["C", "D"], ["A", "B"], (9, 1), 9), - (["C", "D"], None, (9, 1), 9), - (["C", "D"], ["A", "B"], (5, 2), 9), + (["C", "D"], "A", (9, 1), 3), + (["C", "D"], "A", (3, 3), 3), + (["C", "D"], ["A"], (5, 2), 3), + (["C", "D"], ["A", "B"], (9, 1), 3), + (["C", "D"], None, (9, 1), 3), + (["C", "D"], ["A", "B"], (5, 2), 3), ], ) def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): @@ -243,13 +219,7 @@ class TestBoxWithBy: [ [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ] ], @@ -262,13 +232,7 @@ class TestBoxWithBy: [ [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ] ] @@ -301,13 +265,7 @@ def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): [ [ "(a, a)", - "(a, b)", - "(a, c)", - "(b, a)", "(b, b)", - "(b, c)", - "(c, a)", - "(c, b)", "(c, c)", ] ], diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index e8f0373c91361..ff2058a4c4f09 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -131,7 +131,7 @@ def test_scatter_matrix_axis_smaller(self, pass_axis): if pass_axis: _, ax = mpl.pyplot.subplots(3, 3) - df = DataFrame(np.random.default_rng(10).standard_normal((100, 3))) + df = DataFrame(np.random.default_rng(11).standard_normal((100, 3))) df[0] = (df[0] - 2) / 3 # we are plotting multiples on a sub-plot From 744e638e1e6e1cc65d27b913da81aff3d64bce52 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jul 2023 17:54:37 -0700 Subject: [PATCH 15/22] Try addressing windows tests --- pandas/tests/extension/test_sparse.py | 5 +++-- pandas/tests/frame/methods/test_shift.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index def46bf848682..e39fdc3f56709 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -27,10 +27,11 @@ def make_data(fill_value): + rng = np.random.default_rng(2) if np.isnan(fill_value): - data = np.random.default_rng(2).uniform(size=100) + data = rng.uniform(size=100) else: - data = np.random.default_rng(2).integers(1, 100, size=100) + data = rng.integers(1, 100, size=100, dtype=int) if data[0] == data[1]: data[0] += 1 diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 883a6db9b1f62..bdc32eb39082d 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -463,8 +463,9 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager): @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) axis=1 support def test_shift_axis1_multiple_blocks_with_int_fill(self): # GH#42719 - df1 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 3))) - df2 = DataFrame(np.random.default_rng(2).integers(1000, size=(5, 2))) + rng = np.random.default_rng(2) + df1 = DataFrame(rng.integers(1000, size=(5, 3), dtype=int)) + df2 = DataFrame(rng.integers(1000, size=(5, 2), dtype=int)) df3 = pd.concat([df1.iloc[:4, 1:3], df2.iloc[:4, :]], axis=1) result = df3.shift(2, axis=1, fill_value=np.int_(0)) assert len(df3._mgr.blocks) == 2 From 9e9ce3e6bf35424cb28094a304ada41f8c6a4545 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 13:14:13 -0700 Subject: [PATCH 16/22] Address confest, ignore asv --- pandas/conftest.py | 12 +++++++----- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index d7e8fbeb9336b..d0eacc4de781a 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -557,7 +557,9 @@ def multiindex_dataframe_random_data( """DataFrame with 2 level MultiIndex with random data""" index = lexsorted_two_level_string_multiindex return DataFrame( - np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") + np.random.default_rng(2).standard_normal((10, 3)), + index=index, + columns=Index(["A", "B", "C"], name="exp"), ) @@ -610,7 +612,7 @@ def _create_mi_with_dt64tz_level(): "float32": tm.makeFloatIndex(100, dtype="float32"), "float64": tm.makeFloatIndex(100, dtype="float64"), "bool-object": tm.makeBoolIndex(10).astype(object), - "bool-dtype": Index(np.random.randn(10) < 0), + "bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0), "complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"), "complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"), "categorical": tm.makeCategoricalIndex(100), @@ -740,7 +742,7 @@ def datetime_series() -> Series: def _create_series(index): """Helper for the _series dict""" size = len(index) - data = np.random.randn(size) + data = np.random.default_rng(2).standard_normal(size) return Series(data, index=index, name="a", copy=False) @@ -769,7 +771,7 @@ def series_with_multilevel_index() -> Series: ] tuples = zip(*arrays) index = MultiIndex.from_tuples(tuples) - data = np.random.randn(8) + data = np.random.default_rng(2).standard_normal(8) ser = Series(data, index=index) ser.iloc[3] = np.NaN return ser @@ -942,7 +944,7 @@ def rand_series_with_duplicate_datetimeindex() -> Series: datetime(2000, 1, 5), ] - return Series(np.random.randn(len(dates)), index=dates) + return Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates) # ---------------------------------------------------------------- diff --git a/pyproject.toml b/pyproject.toml index b5d39a999cab9..cef0b2bc23b70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -345,7 +345,7 @@ exclude = [ [tool.ruff.per-file-ignores] # relative imports allowed for asv_bench -"asv_bench/*" = ["TID"] +"asv_bench/*" = ["TID", "NPY002"] # to be enabled gradually "pandas/core/*" = ["PLR5501"] "pandas/tests/*" = ["B028"] From 7aa13a3366af7a128d175f055f56e07487051e7b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 16:50:41 -0700 Subject: [PATCH 17/22] adjust once more --- pandas/tests/groupby/test_filters.py | 4 +++- pandas/tests/groupby/test_rank.py | 4 ++-- pandas/tests/io/formats/test_info.py | 2 +- pandas/tests/io/formats/test_series_info.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 9f5e2c87349e4..0bb7ad4fd274d 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -213,7 +213,9 @@ def test_filter_against_workaround(): # Set up DataFrame of ints, floats, strings. letters = np.array(list(ascii_lowercase)) N = 1000 - random_letters = letters.take(np.random.default_rng(2).integers(0, 26, N)) + random_letters = letters.take( + np.random.default_rng(2).integers(0, 26, N, dtype=int) + ) df = DataFrame( { "ints": Series(np.random.default_rng(2).integers(0, 100, N)), diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index bd1deb58c6195..41bfa121624ea 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -33,8 +33,8 @@ def test_rank_unordered_categorical_typeerror(): def test_rank_apply(): lev1 = tm.rands_array(10, 100) lev2 = tm.rands_array(10, 130) - lab1 = np.random.default_rng(2).integers(0, 100, size=500) - lab2 = np.random.default_rng(2).integers(0, 130, size=500) + lab1 = np.random.default_rng(2).integers(0, 100, size=500, dtype=int) + lab2 = np.random.default_rng(2).integers(0, 130, size=500, dtype=int) df = DataFrame( { diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 71696ee1e8b1d..163faf79a9f94 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -49,7 +49,7 @@ def test_info_empty(): def test_info_categorical_column_smoke_test(): n = 2500 - df = DataFrame({"int64": np.random.default_rng(2).integers(100, size=n)}) + df = DataFrame({"int64": np.random.default_rng(2).integers(100, size=n, dtype=int)}) df["category"] = Series( np.array(list("abcdefghij")).take( np.random.default_rng(2).integers(0, 10, size=n) diff --git a/pandas/tests/io/formats/test_series_info.py b/pandas/tests/io/formats/test_series_info.py index bf20a5cb954c4..02827ee25042a 100644 --- a/pandas/tests/io/formats/test_series_info.py +++ b/pandas/tests/io/formats/test_series_info.py @@ -18,7 +18,7 @@ def test_info_categorical_column_just_works(): n = 2500 data = np.array(list("abcdefghij")).take( - np.random.default_rng(2).integers(0, 10, size=n) + np.random.default_rng(2).integers(0, 10, size=n, dtype=int) ) s = Series(data).astype("category") s.isna() From 71a17df865236d1706a8ff6bffc064a543062dcf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 19 Jul 2023 15:20:28 -0700 Subject: [PATCH 18/22] ANother dtype --- pandas/tests/io/formats/test_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 163faf79a9f94..73de2b068b699 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -52,7 +52,7 @@ def test_info_categorical_column_smoke_test(): df = DataFrame({"int64": np.random.default_rng(2).integers(100, size=n, dtype=int)}) df["category"] = Series( np.array(list("abcdefghij")).take( - np.random.default_rng(2).integers(0, 10, size=n) + np.random.default_rng(2).integers(0, 10, size=n, dtype=int) ) ).astype("category") df.isna() From 846955f84a11751afccd8b3f2f34304aa36c3ade Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:21:43 -0700 Subject: [PATCH 19/22] fix another unseeded default_rng --- pandas/tests/frame/methods/test_sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 114c59e318d09..6b3459fbdc035 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -180,7 +180,7 @@ def test_sample_random_state(self, func_str, arg, frame_or_series): def test_sample_generator(self, frame_or_series): # GH#38100 obj = frame_or_series(np.arange(100)) - rng = np.random.default_rng() + rng = np.random.default_rng(2) # Consecutive calls should advance the seed result1 = obj.sample(n=50, random_state=rng) From 73bd560893c80f5efcf46ad1e19ff44f4d972f7f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 20 Jul 2023 09:31:08 -0700 Subject: [PATCH 20/22] Add a rule for unseeded default_rng --- .pre-commit-config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 366db4337b0e1..fe02e5db697d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -243,6 +243,9 @@ repos: # os.remove |os\.remove + + # Unseeded numpy default_rng + | default_rng\(\) files: ^pandas/tests/ types_or: [python, cython, rst] - id: unwanted-patterns-in-ea-tests From a90c4ca3c57451f153667362d7d6297f27cebd2a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 20 Jul 2023 10:05:45 -0700 Subject: [PATCH 21/22] Remove space --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fe02e5db697d9..186fab777c49e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -245,7 +245,7 @@ repos: |os\.remove # Unseeded numpy default_rng - | default_rng\(\) + |default_rng\(\) files: ^pandas/tests/ types_or: [python, cython, rst] - id: unwanted-patterns-in-ea-tests From 7f29304128cb56315774af4874f774ee5fed39e7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 28 Jul 2023 11:36:58 -0700 Subject: [PATCH 22/22] other fixes --- pandas/tests/resample/test_resampler_grouper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 6a3554caae5f6..7144468dfc44c 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -143,7 +143,7 @@ def test_groupby_with_origin(): # test origin on 1970-01-01 00:00:00 rng = date_range("1970-01-01 00:00:00", end, freq="1231min") # prime number - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) middle_ts = rng[len(rng) // 2] ts2 = ts[middle_ts:end] @@ -170,7 +170,7 @@ def test_groupby_with_origin(): # test origin on 2049-10-18 20:00:00 rng = date_range(start, "2049-10-18 20:00:00", freq="1231min") # prime number - ts = Series(np.random.randn(len(rng)), index=rng) + ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng) middle_ts = rng[len(rng) // 2] ts2 = ts[middle_ts:end] origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000