Skip to content

Commit 2139f5b

Browse files
authored
Revert "Extended output result, new metrics and minor fixes (#81)"
This reverts commit 972efac.
1 parent 972efac commit 2139f5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+250
-461
lines changed

bench.py

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -338,47 +338,20 @@ def columnwise_score(y, yp, score_func):
338338
return score_func(y, yp)
339339

340340

341-
def accuracy_score(y_true, y_pred):
342-
return columnwise_score(y_true, y_pred, lambda y1, y2: np.mean(y1 == y2))
341+
def accuracy_score(y, yp):
342+
return columnwise_score(y, yp, lambda y1, y2: np.mean(y1 == y2))
343343

344344

345-
def log_loss(y_true, y_pred):
345+
def log_loss(y, yp):
346346
from sklearn.metrics import log_loss as sklearn_log_loss
347-
y_true = convert_to_numpy(y_true)
348-
y_pred = convert_to_numpy(y_pred)
349-
return sklearn_log_loss(y_true, y_pred)
350-
351-
352-
def roc_auc_score(y_true, y_pred, multi_class='ovr'):
353-
from sklearn.metrics import roc_auc_score as sklearn_roc_auc
354-
y_true = convert_to_numpy(y_true)
355-
y_pred = convert_to_numpy(y_pred)
356-
if y_pred.shape[1] == 2: # binary case
357-
y_pred = y_pred[:, 1]
358-
return sklearn_roc_auc(y_true, y_pred, multi_class=multi_class)
347+
y = convert_to_numpy(y)
348+
yp = convert_to_numpy(yp)
349+
return sklearn_log_loss(y, yp)
359350

360351

361-
def rmse_score(y_true, y_pred):
352+
def rmse_score(y, yp):
362353
return columnwise_score(
363-
y_true, y_pred, lambda y1, y2: float(np.sqrt(np.mean((y1 - y2)**2))))
364-
365-
366-
def r2_score(y_true, y_pred):
367-
from sklearn.metrics import r2_score as sklearn_r2_score
368-
y_true = convert_to_numpy(y_true)
369-
y_pred = convert_to_numpy(y_pred)
370-
return sklearn_r2_score(y_true, y_pred)
371-
372-
373-
def davies_bouldin_score(X, labels):
374-
from sklearn.metrics.cluster import davies_bouldin_score as sklearn_dbs
375-
X = convert_to_numpy(X)
376-
labels = convert_to_numpy(labels)
377-
try:
378-
res = sklearn_dbs(X, labels)
379-
except ValueError as ex:
380-
res = ex
381-
return res
354+
y, yp, lambda y1, y2: float(np.sqrt(np.mean((y1 - y2)**2))))
382355

383356

384357
def convert_data(data, dtype, data_order, data_format):
@@ -515,21 +488,16 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
515488

516489

517490
def print_output(library, algorithm, stages, params, functions,
518-
times, metric_type, metrics, data, alg_instance=None,
491+
times, accuracy_type, accuracies, data, alg_instance=None,
519492
alg_params=None):
520493
if params.output_format == 'json':
521494
output = []
522495
for i in range(len(stages)):
523496
result = gen_basic_dict(library, algorithm, stages[i], params,
524497
data[i], alg_instance, alg_params)
525498
result.update({'time[s]': times[i]})
526-
if metric_type is not None:
527-
if isinstance(metric_type, str):
528-
result.update({f'{metric_type}': metrics[i]})
529-
elif isinstance(metric_type, list):
530-
for ind, val in enumerate(metric_type):
531-
if metrics[ind][i] is not None:
532-
result.update({f'{val}': metrics[ind][i]})
499+
if accuracy_type is not None:
500+
result.update({f'{accuracy_type}': accuracies[i]})
533501
if hasattr(params, 'n_classes'):
534502
result['input_data'].update({'classes': params.n_classes})
535503
if hasattr(params, 'n_clusters'):

configs/blogs/skl_2021_3.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@
307307
}
308308
],
309309
"nu": [0.25],
310-
"kernel": ["poly"]
310+
"kernel": ["sigmoid"]
311311
},
312312
{
313313
"algorithm": "svr",

cuml_bench/dbscan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@
4848

4949
bench.print_output(library='cuml', algorithm='dbscan', stages=['training'],
5050
params=params, functions=['DBSCAN'], times=[time],
51-
metrics=[acc], metric_type='davies_bouldin_score', data=[X],
51+
accuracies=[acc], accuracy_type='davies_bouldin_score', data=[X],
5252
alg_instance=dbscan)

cuml_bench/df_clsf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,6 @@ def predict(X):
9797
bench.print_output(library='cuml', algorithm='decision_forest_classification',
9898
stages=['training', 'prediction'],
9999
params=params, functions=['df_clsf.fit', 'df_clsf.predict'],
100-
times=[fit_time, predict_time], metric_type='accuracy[%]',
101-
metrics=[train_acc, test_acc], data=[X_train, X_test],
100+
times=[fit_time, predict_time], accuracy_type='accuracy[%]',
101+
accuracies=[train_acc, test_acc], data=[X_train, X_test],
102102
alg_instance=clf)

cuml_bench/df_regr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,6 @@ def predict(X):
9393
bench.print_output(library='cuml', algorithm='decision_forest_regression',
9494
stages=['training', 'prediction'], params=params,
9595
functions=['df_regr.fit', 'df_regr.predict'],
96-
times=[fit_time, predict_time], metric_type='rmse',
97-
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
96+
times=[fit_time, predict_time], accuracy_type='rmse',
97+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
9898
alg_instance=regr)

cuml_bench/elasticnet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,6 @@
5656
bench.print_output(library='cuml', algorithm='elastic-net',
5757
stages=['training', 'prediction'], params=params,
5858
functions=['ElasticNet.fit', 'ElasticNet.predict'],
59-
times=[fit_time, predict_time], metric_type='rmse',
60-
metrics=[train_rmse, test_rmse], data=[X_train, X_train],
59+
times=[fit_time, predict_time], accuracy_type='rmse',
60+
accuracies=[train_rmse, test_rmse], data=[X_train, X_train],
6161
alg_instance=regr)

cuml_bench/kmeans.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,6 @@ def kmeans_fit(X):
8888
bench.print_output(library='cuml', algorithm='kmeans',
8989
stages=['training', 'prediction'], params=params,
9090
functions=['KMeans.fit', 'KMeans.predict'],
91-
times=[fit_time, predict_time], metric_type='davies_bouldin_score',
92-
metrics=[acc_train, acc_test], data=[X_train, X_test],
91+
times=[fit_time, predict_time], accuracy_type='davies_bouldin_score',
92+
accuracies=[acc_train, acc_test], data=[X_train, X_test],
9393
alg_instance=kmeans)

cuml_bench/knn_clsf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,13 @@
6868
stages=['training', 'prediction'], params=params,
6969
functions=['knn_clsf.fit', 'knn_clsf.predict'],
7070
times=[train_time, predict_time],
71-
metrics=[train_acc, test_acc], metric_type='accuracy[%]',
71+
accuracies=[train_acc, test_acc], accuracy_type='accuracy[%]',
7272
data=[X_train, X_test], alg_instance=knn_clsf)
7373
else:
7474
bench.print_output(library='cuml',
7575
algorithm=knn_clsf.algorithm + '_knn_search',
7676
stages=['training', 'search'], params=params,
7777
functions=['knn_clsf.fit', 'knn_clsf.kneighbors'],
7878
times=[train_time, predict_time],
79-
metrics=[], metric_type=None,
79+
accuracies=[], accuracy_type=None,
8080
data=[X_train, X_test], alg_instance=knn_clsf)

cuml_bench/lasso.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
bench.print_output(library='sklearn', algorithm='lasso',
5454
stages=['training', 'prediction'],
5555
params=params, functions=['Lasso.fit', 'Lasso.predict'],
56-
times=[fit_time, predict_time], metric_type='rmse',
57-
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
56+
times=[fit_time, predict_time], accuracy_type='rmse',
57+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
5858
alg_instance=regr)

cuml_bench/linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,6 @@
5050
bench.print_output(library='cuml', algorithm='linear_regression',
5151
stages=['training', 'prediction'], params=params,
5252
functions=['Linear.fit', 'Linear.predict'],
53-
times=[fit_time, predict_time], metric_type='rmse',
54-
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
53+
times=[fit_time, predict_time], accuracy_type='rmse',
54+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
5555
alg_instance=regr)

cuml_bench/log_reg.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,6 @@
6161
bench.print_output(library='cuml', algorithm='logistic_regression',
6262
stages=['training', 'prediction'], params=params,
6363
functions=['LogReg.fit', 'LogReg.predict'],
64-
times=[fit_time, predict_time], metric_type='accuracy[%]',
65-
metrics=[train_acc, test_acc], data=[X_train, X_test],
64+
times=[fit_time, predict_time], accuracy_type='accuracy[%]',
65+
accuracies=[train_acc, test_acc], data=[X_train, X_test],
6666
alg_instance=clf)

cuml_bench/pca.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,6 @@
5151
bench.print_output(library='cuml', algorithm='pca',
5252
stages=['training', 'transformation'],
5353
params=params, functions=['PCA.fit', 'PCA.transform'],
54-
times=[fit_time, transform_time], metric_type=None,
55-
metrics=[None, None], data=[X_train, X_test],
54+
times=[fit_time, transform_time], accuracy_type=None,
55+
accuracies=[None, None], data=[X_train, X_test],
5656
alg_instance=pca)

cuml_bench/ridge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,6 @@
5252
bench.print_output(library='cuml', algorithm='ridge_regression',
5353
stages=['training', 'prediction'], params=params,
5454
functions=['Ridge.fit', 'Ridge.predict'],
55-
times=[fit_time, predict_time], metric_type='rmse',
56-
metrics=[train_rmse, test_rmse], data=[X_train, X_test],
55+
times=[fit_time, predict_time], accuracy_type='rmse',
56+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test],
5757
alg_instance=regr)

cuml_bench/svm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@
5757

5858
if params.probability:
5959
state_predict = 'predict_proba'
60-
metric_type = 'log_loss'
60+
accuracy_type = 'log_loss'
6161
clf_predict = clf.predict_proba
6262

6363
def metric_call(x, y):
6464
return bench.log_loss(x, y)
6565
else:
6666
state_predict = 'prediction'
67-
metric_type = 'accuracy[%]'
67+
accuracy_type = 'accuracy[%]'
6868
clf_predict = clf.predict
6969

7070
def metric_call(x, y):
@@ -82,6 +82,6 @@ def metric_call(x, y):
8282
bench.print_output(library='cuml', algorithm='svc',
8383
stages=['training', state_predict], params=params,
8484
functions=['SVM.fit', 'SVM.predict'],
85-
times=[fit_time, predict_train_time], metric_type=metric_type,
86-
metrics=[train_acc, test_acc], data=[X_train, X_train],
85+
times=[fit_time, predict_train_time], accuracy_type=accuracy_type,
86+
accuracies=[train_acc, test_acc], data=[X_train, X_train],
8787
alg_instance=clf)

cuml_bench/svr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,6 @@
6666
bench.print_output(library='cuml', algorithm='svr',
6767
stages=['training', 'prediction'], params=params,
6868
functions=['SVR.fit', 'SVR.predict'],
69-
times=[fit_time, predict_train_time], metric_type='rmse',
70-
metrics=[train_rmse, test_rmse], data=[X_train, X_train],
69+
times=[fit_time, predict_train_time], accuracy_type='rmse',
70+
accuracies=[train_rmse, test_rmse], data=[X_train, X_train],
7171
alg_instance=regr)

cuml_bench/train_test_split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@
4444

4545
bench.print_output(library='cuml', algorithm='train_test_split',
4646
stages=['training'], params=params,
47-
functions=['train_test_split'], times=[time], metrics=[None],
48-
metric_type=None, data=[X], alg_params=tts_params)
47+
functions=['train_test_split'], times=[time], accuracies=[None],
48+
accuracy_type=None, data=[X], alg_params=tts_params)

daal4py_bench/dbscan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,4 @@ def test_dbscan(X):
5151

5252
bench.print_output(library='daal4py', algorithm='dbscan', stages=['training'],
5353
params=params, functions=['DBSCAN'], times=[time],
54-
metrics=[None], metric_type=None, data=[X])
54+
accuracies=[None], accuracy_type=None, data=[X])

daal4py_bench/df_clsf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,5 @@ def df_clsf_predict(X, training_result, n_classes, verbose=False):
126126
bench.print_output(library='daal4py', algorithm='decision_forest_classification',
127127
stages=['training', 'prediction'], params=params,
128128
functions=['df_clsf.fit', 'df_clsf.predict'],
129-
times=[fit_time, predict_time], metric_type='accuracy[%]',
130-
metrics=[train_acc, test_acc], data=[X_train, X_test])
129+
times=[fit_time, predict_time], accuracy_type='accuracy[%]',
130+
accuracies=[train_acc, test_acc], data=[X_train, X_test])

daal4py_bench/df_regr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,5 +123,5 @@ def df_regr_predict(X, training_result):
123123
bench.print_output(library='daal4py', algorithm='decision_forest_regression',
124124
stages=['training', 'prediction'], params=params,
125125
functions=['df_regr.fit', 'df_regr.predict'],
126-
times=[fit_time, predict_time], metric_type='rmse',
127-
metrics=[train_rmse, test_rmse], data=[X_train, X_test])
126+
times=[fit_time, predict_time], accuracy_type='rmse',
127+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test])

daal4py_bench/distances.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,5 @@ def compute_distances(pairwise_distances, X):
4343

4444
bench.print_output(library='daal4py', algorithm='distances', stages=['computation'],
4545
params=params, functions=[params.metric.capitalize()], times=[time],
46-
metric_type=None, metrics=[None], data=[X],
46+
accuracy_type=None, accuracies=[None], data=[X],
4747
alg_params={'metric': params.metric})

daal4py_bench/kmeans.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,5 +87,5 @@ def test_predict(X, X_init):
8787
bench.print_output(library='daal4py', algorithm='kmeans',
8888
stages=['training', 'prediction'],
8989
params=params, functions=['KMeans.fit', 'KMeans.predict'],
90-
times=[fit_time, predict_time], metric_type='inertia',
91-
metrics=[train_inertia, test_inertia], data=[X_train, X_test])
90+
times=[fit_time, predict_time], accuracy_type='inertia',
91+
accuracies=[train_inertia, test_inertia], data=[X_train, X_test])

daal4py_bench/linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,5 @@ def test_predict(Xp, model):
6868
bench.print_output(library='daal4py', algorithm='linear_regression',
6969
stages=['training', 'prediction'],
7070
params=params, functions=['Linear.fit', 'Linear.predict'],
71-
times=[fit_time, predict_time], metric_type='rmse',
72-
metrics=[train_rmse, test_rmse], data=[X_train, X_test])
71+
times=[fit_time, predict_time], accuracy_type='rmse',
72+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test])

daal4py_bench/pca.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def test_transform(Xp, pca_result, eigenvalues, eigenvectors):
142142
bench.print_output(library='daal4py', algorithm='pca',
143143
stages=['training', 'transformation'],
144144
params=params, functions=['PCA.fit', 'PCA.transform'],
145-
times=[fit_time, transform_time], metric_type=None,
146-
metrics=[None, None], data=[X_train, X_test],
145+
times=[fit_time, transform_time], accuracy_type=None,
146+
accuracies=[None, None], data=[X_train, X_test],
147147
alg_params={'svd_solver': params.svd_solver,
148148
'n_components': params.n_components})

daal4py_bench/ridge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,5 @@ def test_predict(Xp, model):
6464
bench.print_output(library='daal4py', algorithm='ridge_regression',
6565
stages=['training', 'prediction'], params=params,
6666
functions=['Ridge.fit', 'Ridge.predict'],
67-
times=[fit_time, predict_time], metric_type='rmse',
68-
metrics=[train_rmse, test_rmse], data=[X_train, X_test])
67+
times=[fit_time, predict_time], accuracy_type='rmse',
68+
accuracies=[train_rmse, test_rmse], data=[X_train, X_test])

datasets/loader_classification.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,9 @@ def airline(dataset_dir: Path) -> bool:
6868
Airline dataset
6969
http://kt.ijs.si/elena_ikonomovska/data.html
7070
71-
Classification task. n_classes = 2.
72-
airline X train dataset (92055213, 13)
73-
airline y train dataset (92055213, 1)
74-
airline X test dataset (23013804, 13)
75-
airline y test dataset (23013804, 1)
71+
TaskType:binclass
72+
NumberOfFeatures:13
73+
NumberOfInstances:115M
7674
"""
7775
dataset_name = 'airline'
7876
os.makedirs(dataset_dir, exist_ok=True)
@@ -128,12 +126,9 @@ def airline(dataset_dir: Path) -> bool:
128126
def airline_ohe(dataset_dir: Path) -> bool:
129127
"""
130128
Dataset from szilard benchmarks: https://github.com/szilard/GBM-perf
131-
132-
Classification task. n_classes = 2.
133-
airline-ohe X train dataset (1000000, 692)
134-
airline-ohe y train dataset (1000000, 1)
135-
airline-ohe X test dataset (100000, 692)
136-
airline-ohe y test dataset (100000, 1)
129+
TaskType:binclass
130+
NumberOfFeatures:700
131+
NumberOfInstances:10100000
137132
"""
138133
dataset_name = 'airline-ohe'
139134
os.makedirs(dataset_dir, exist_ok=True)
@@ -294,11 +289,9 @@ def epsilon(dataset_dir: Path) -> bool:
294289
Epsilon dataset
295290
https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html
296291
297-
Classification task. n_classes = 2.
298-
epsilon X train dataset (400000, 2000)
299-
epsilon y train dataset (400000, 1)
300-
epsilon X test dataset (100000, 2000)
301-
epsilon y test dataset (100000, 1)
292+
TaskType:binclass
293+
NumberOfFeatures:2000
294+
NumberOfInstances:500K
302295
"""
303296
dataset_name = 'epsilon'
304297
os.makedirs(dataset_dir, exist_ok=True)
@@ -451,11 +444,9 @@ def higgs(dataset_dir: Path) -> bool:
451444
Higgs dataset from UCI machine learning repository
452445
https://archive.ics.uci.edu/ml/datasets/HIGGS
453446
454-
Classification task. n_classes = 2.
455-
higgs X train dataset (8799999, 28)
456-
higgs y train dataset (8799999, 1)
457-
higgs X test dataset (2200000, 28)
458-
higgs y test dataset (2200000, 1)
447+
TaskType:binclass
448+
NumberOfFeatures:28
449+
NumberOfInstances:11M
459450
"""
460451
dataset_name = 'higgs'
461452
os.makedirs(dataset_dir, exist_ok=True)
@@ -488,11 +479,9 @@ def higgs_one_m(dataset_dir: Path) -> bool:
488479
489480
Only first 1.5M samples is taken
490481
491-
Classification task. n_classes = 2.
492-
higgs1m X train dataset (1000000, 28)
493-
higgs1m y train dataset (1000000, 1)
494-
higgs1m X test dataset (500000, 28)
495-
higgs1m y test dataset (500000, 1)
482+
TaskType:binclass
483+
NumberOfFeatures:28
484+
NumberOfInstances:1.5M
496485
"""
497486
dataset_name = 'higgs1m'
498487
os.makedirs(dataset_dir, exist_ok=True)

0 commit comments

Comments
 (0)