Skip to content

Commit 964a267

Browse files
authored
Enable codefactor in master & cleaning code (#85)
* codefactor * mypy
1 parent 972efac commit 964a267

File tree

5 files changed

+57
-67
lines changed

5 files changed

+57
-67
lines changed

bench.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,11 @@ def get_dtype(data):
3030
'''
3131
if hasattr(data, 'dtype'):
3232
return data.dtype
33-
elif hasattr(data, 'dtypes'):
33+
if hasattr(data, 'dtypes'):
3434
return str(data.dtypes[0])
35-
elif hasattr(data, 'values'):
35+
if hasattr(data, 'values'):
3636
return data.values.dtype
37-
else:
38-
raise ValueError(f'Impossible to get data type of {type(data)}')
37+
raise ValueError(f'Impossible to get data type of {type(data)}')
3938

4039

4140
def sklearn_disable_finiteness_check():
@@ -66,10 +65,7 @@ def _parse_size(string, dim=2):
6665

6766

6867
def float_or_int(string):
69-
if '.' in string:
70-
return float(string)
71-
else:
72-
return int(string)
68+
return float(string) if '.' in string else int(string)
7369

7470

7571
def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):
@@ -90,10 +86,8 @@ def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):
9086
optimal_cache_size_bytes = byte_size * (n_rows ** 2)
9187
one_gb = 2 ** 30
9288
max_cache_bytes = max_cache * one_gb
93-
if optimal_cache_size_bytes > max_cache_bytes:
94-
return max_cache_bytes
95-
else:
96-
return optimal_cache_size_bytes
89+
return max_cache_bytes \
90+
if optimal_cache_size_bytes > max_cache_bytes else optimal_cache_size_bytes
9791

9892

9993
def parse_args(parser, size=None, loop_types=(),
@@ -175,9 +169,10 @@ def parse_args(parser, size=None, loop_types=(),
175169
help='Seed to pass as random_state')
176170
parser.add_argument('--dataset-name', type=str, default=None,
177171
help='Dataset name')
178-
parser.add_argument('--no-intel-optimized', default=False, action='store_true',
172+
parser.add_argument('--no-intel-optimized', default=False,
173+
action='store_true',
179174
help='Use no intel optimized version. '
180-
'Now avalible for scikit-learn benchmarks'),
175+
'Now avalible for scikit-learn benchmarks')
181176
parser.add_argument('--device', default='None', type=str,
182177
choices=('host', 'cpu', 'gpu', 'None'),
183178
help='Execution context device')
@@ -519,8 +514,8 @@ def print_output(library, algorithm, stages, params, functions,
519514
alg_params=None):
520515
if params.output_format == 'json':
521516
output = []
522-
for i in range(len(stages)):
523-
result = gen_basic_dict(library, algorithm, stages[i], params,
517+
for i, stage in enumerate(stages):
518+
result = gen_basic_dict(library, algorithm, stage, params,
524519
data[i], alg_instance, alg_params)
525520
result.update({'time[s]': times[i]})
526521
if metric_type is not None:

cuml_bench/df_clsf.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# ===============================================================================
1616

1717
import argparse
18-
from typing import Any
1918

2019
import bench
2120
import cuml
@@ -62,36 +61,36 @@
6261
params.split_algorithm = 1
6362

6463
params.n_classes = y_train[y_train.columns[0]].nunique()
65-
clf: Any
66-
67-
68-
def fit(X, y):
69-
global clf
70-
clf = RandomForestClassifier(split_criterion=params.criterion,
71-
split_algo=params.split_algorithm,
72-
n_estimators=params.num_trees,
73-
max_depth=params.max_depth,
74-
max_features=params.max_features,
75-
min_samples_split=params.min_samples_split,
76-
max_leaves=params.max_leaf_nodes,
77-
min_impurity_decrease=params.min_impurity_decrease,
78-
bootstrap=params.bootstrap)
64+
65+
clf = RandomForestClassifier(
66+
split_criterion=params.criterion,
67+
split_algo=params.split_algorithm,
68+
n_estimators=params.num_trees,
69+
max_depth=params.max_depth,
70+
max_features=params.max_features,
71+
min_samples_split=params.min_samples_split,
72+
max_leaves=params.max_leaf_nodes,
73+
min_impurity_decrease=params.min_impurity_decrease,
74+
bootstrap=params.bootstrap,
75+
)
76+
77+
78+
def fit(clf, X, y):
7979
return clf.fit(X, y)
8080

8181

82-
def predict(X):
83-
global clf
82+
def predict(clf, X):
8483
prediction_args = {'predict_model': 'GPU'}
8584
if int(cuml.__version__.split('.')[1]) <= 14:
8685
prediction_args.update({'num_classes': params.n_classes})
8786
return clf.predict(X, **prediction_args)
8887

8988

90-
fit_time, _ = bench.measure_function_time(fit, X_train, y_train, params=params)
91-
y_pred = predict(X_train)
89+
fit_time, _ = bench.measure_function_time(fit, clf, X_train, y_train, params=params)
90+
y_pred = predict(clf, X_train)
9291
train_acc = 100 * bench.accuracy_score(y_pred, y_train)
9392

94-
predict_time, y_pred = bench.measure_function_time(predict, X_test, params=params)
93+
predict_time, y_pred = bench.measure_function_time(predict, clf, X_test, params=params)
9594
test_acc = 100 * bench.accuracy_score(y_pred, y_test)
9695

9796
bench.print_output(library='cuml', algorithm='decision_forest_classification',

cuml_bench/df_regr.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# ===============================================================================
1616

1717
import argparse
18-
from typing import Any
1918

2019
import bench
2120
from cuml.ensemble import RandomForestRegressor
@@ -59,35 +58,35 @@
5958
params.split_algorithm = 0
6059
else:
6160
params.split_algorithm = 1
62-
regr: Any
63-
6461

6562
# Create our random forest regressor
66-
def fit(X, y):
67-
global regr
68-
regr = RandomForestRegressor(split_criterion=params.criterion,
69-
split_algo=params.split_algorithm,
70-
n_estimators=params.num_trees,
71-
max_depth=params.max_depth,
72-
max_features=params.max_features,
73-
min_samples_split=params.min_samples_split,
74-
max_leaves=params.max_leaf_nodes,
75-
min_impurity_decrease=params.min_impurity_decrease,
76-
bootstrap=params.bootstrap)
63+
regr = RandomForestRegressor(
64+
split_criterion=params.criterion,
65+
split_algo=params.split_algorithm,
66+
n_estimators=params.num_trees,
67+
max_depth=params.max_depth,
68+
max_features=params.max_features,
69+
min_samples_split=params.min_samples_split,
70+
max_leaves=params.max_leaf_nodes,
71+
min_impurity_decrease=params.min_impurity_decrease,
72+
bootstrap=params.bootstrap,
73+
)
74+
75+
76+
def fit(regr, X, y):
7777
return regr.fit(X, y)
7878

7979

80-
def predict(X):
81-
global regr
80+
def predict(regr, X):
8281
return regr.predict(X, predict_model='GPU')
8382

8483

85-
fit_time, _ = bench.measure_function_time(fit, X_train, y_train, params=params)
84+
fit_time, _ = bench.measure_function_time(fit, regr, X_train, y_train, params=params)
8685

87-
y_pred = predict(X_train)
86+
y_pred = predict(regr, X_train)
8887
train_rmse = bench.rmse_score(y_pred, y_train)
8988

90-
predict_time, y_pred = bench.measure_function_time(predict, X_test, params=params)
89+
predict_time, y_pred = bench.measure_function_time(predict, regr, X_test, params=params)
9190
test_rmse = bench.rmse_score(y_pred, y_test)
9291

9392
bench.print_output(library='cuml', algorithm='decision_forest_regression',

daal4py_bench/pca.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,8 @@ def pca_fit_full_daal(X, n_components):
121121
def test_fit(X):
122122
if params.svd_solver == 'full':
123123
return pca_fit_full_daal(X, params.n_components)
124-
else:
125-
method = 'correlationDense' if params.svd_solver == 'correlation' else 'svdDense'
126-
return pca_fit_daal(X, params.n_components, method)
124+
method = 'correlationDense' if params.svd_solver == 'correlation' else 'svdDense'
125+
return pca_fit_daal(X, params.n_components, method)
127126

128127

129128
def test_transform(Xp, pca_result, eigenvalues, eigenvectors):

modelbuilders_bench/mb_utils.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,15 @@
2121

2222
def get_accuracy(true_labels, prediction):
2323
errors = 0
24-
for i in range(len(true_labels)):
24+
for i, true_label in enumerate(true_labels):
2525
pred_label = 0
26-
if isinstance(prediction[i], float) or \
27-
isinstance(prediction[i], np.single) or \
28-
isinstance(prediction[i], np.float):
26+
if isinstance(prediction[i], (float, np.single, np.float)):
2927
pred_label = prediction[i] > 0.5
3028
elif prediction[i].shape[0] == 1:
3129
pred_label = prediction[i][0]
3230
else:
3331
pred_label = np.argmax(prediction[i])
34-
if true_labels[i] != pred_label:
32+
if true_label != pred_label:
3533
errors += 1
3634
return 100 * (1 - errors / len(true_labels))
3735

@@ -54,14 +52,14 @@ def print_output(library, algorithm, stages, params, functions,
5452
})
5553
if hasattr(params, 'n_classes'):
5654
output[-1]['input_data'].update({'classes': params.n_classes})
57-
for i in range(len(stages)):
55+
for i, stage in enumerate(stages):
5856
result = {
59-
'stage': stages[i],
57+
'stage': stage,
6058
}
61-
if 'daal' in stages[i]:
59+
if 'daal' in stage:
6260
result.update({'conversion_to_daal4py': times[2 * i],
6361
'prediction_time': times[2 * i + 1]})
64-
elif 'train' in stages[i]:
62+
elif 'train' in stage:
6563
result.update({'matrix_creation_time': times[2 * i],
6664
'training_time': times[2 * i + 1]})
6765
else:

0 commit comments

Comments
 (0)