Skip to content

BENCH: Remove unnecessary random seeds #41889

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def setup(self, dtype):

elif dtype in ["category[object]", "category[int]"]:
# Note: sizes are different in this case than others
np.random.seed(1234)

n = 5 * 10 ** 5
sample_size = 100

Expand Down Expand Up @@ -99,7 +97,6 @@ class IsinAlmostFullWithRandomInt:
def setup(self, dtype, exponent, title):
M = 3 * 2 ** (exponent - 2)
# 0.77-the maximal share of occupied buckets
np.random.seed(42)
self.series = Series(np.random.randint(0, M, M)).astype(dtype)

values = np.random.randint(0, M, M).astype(dtype)
Expand Down Expand Up @@ -132,7 +129,6 @@ class IsinWithRandomFloat:
param_names = ["dtype", "size", "title"]

def setup(self, dtype, size, title):
np.random.seed(42)
self.values = np.random.rand(size)
self.series = Series(self.values).astype(dtype)
np.random.shuffle(self.values)
Expand Down Expand Up @@ -179,7 +175,6 @@ class IsinWithArange:

def setup(self, dtype, M, offset_factor):
offset = int(M * offset_factor)
np.random.seed(42)
tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
self.series = tmp.astype(dtype)
self.values = np.arange(M).astype(dtype)
Expand Down Expand Up @@ -290,10 +285,8 @@ def setup(self, dtype, MaxNumber, series_type):
raise NotImplementedError

if series_type == "random_hits":
np.random.seed(42)
array = np.random.randint(0, MaxNumber, N)
if series_type == "random_misses":
np.random.seed(42)
array = np.random.randint(0, MaxNumber, N) + MaxNumber
if series_type == "monotone_hits":
array = np.repeat(np.arange(MaxNumber), N // MaxNumber)
Expand Down Expand Up @@ -322,7 +315,6 @@ def setup(self, dtype, series_type):
raise NotImplementedError

if series_type == "random":
np.random.seed(42)
vals = np.random.randint(0, 10 * N, N)
if series_type == "monotone":
vals = np.arange(N)
Expand Down
1 change: 0 additions & 1 deletion asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class FromDictwithTimestamp:

def setup(self, offset):
N = 10 ** 3
np.random.seed(1234)
idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N)
df = DataFrame(np.random.randn(N, 10), index=idx)
self.d = df.to_dict()
Expand Down
1 change: 0 additions & 1 deletion asv_bench/benchmarks/hash_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class NumericSeriesIndexingShuffled:

def setup(self, index, N):
vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
np.random.seed(42)
np.random.shuffle(vals)
indices = index(vals)
self.data = pd.Series(np.arange(N), index=indices)
Expand Down
3 changes: 0 additions & 3 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,17 +368,14 @@ def setup(self):
self.df = DataFrame(index=range(self.N))

def time_insert(self):
np.random.seed(1234)
for i in range(100):
self.df.insert(0, i, np.random.randn(self.N), allow_duplicates=True)

def time_assign_with_setitem(self):
np.random.seed(1234)
for i in range(100):
self.df[i] = np.random.randn(self.N)

def time_assign_list_like_with_setitem(self):
np.random.seed(1234)
self.df[list(range(100))] = np.random.randn(self.N, 100)

def time_assign_list_of_columns_concat(self):
Expand Down
1 change: 0 additions & 1 deletion asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ class Mode:
param_names = ["N", "dtype"]

def setup(self, N, dtype):
np.random.seed(42)
self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)

def time_mode(self, N, dtype):
Expand Down
4 changes: 4 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for unnecessary random seeds in asv benchmarks' ; echo $MSG
invgrep -R --exclude pandas_vb_common.py -E 'np.random.seed' asv_bench/benchmarks/
RET=$(($RET + $?)) ; echo $MSG "DONE"

fi

### CODE ###
Expand Down