-
Notifications
You must be signed in to change notification settings - Fork 73
Xgb datasets adding #60
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
62f87c3
132d73f
4aa4898
5a8db33
5594efd
35b55b8
56de8f7
0ee5f05
04e7a64
8268747
b6a7eb0
7e780bb
670c289
dc0e9c9
f64ae68
873754b
93ea32d
dcfc5b9
340402e
6e47423
340a628
4be3720
8184016
cf5ee76
9db3177
5e76a0b
13fcd20
0873f97
877e0fd
8bdc7f2
f9cf09b
41e003f
523df30
59303fa
b56e42c
ad176e5
b92a27f
11a8ffc
37d5461
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
|
||
import argparse | ||
import json | ||
import logging | ||
import sys | ||
import timeit | ||
|
||
|
@@ -200,15 +201,16 @@ def parse_args(parser, size=None, loop_types=(), | |
from sklearnex import patch_sklearn | ||
patch_sklearn() | ||
except ImportError: | ||
print('Failed to import sklearnex.patch_sklearn.' | ||
'Use stock version scikit-learn', file=sys.stderr) | ||
logging.info('Failed to import sklearnex.patch_sklearn.' | ||
'Use stock version scikit-learn', file=sys.stderr) | ||
params.device = 'None' | ||
else: | ||
if params.device != 'None': | ||
print('Device context is not supported for stock scikit-learn.' | ||
'Please use --no-intel-optimized=False with' | ||
f'--device={params.device} parameter. Fallback to --device=None.', | ||
file=sys.stderr) | ||
logging.info( | ||
'Device context is not supported for stock scikit-learn.' | ||
'Please use --no-intel-optimized=False with' | ||
f'--device={params.device} parameter. Fallback to --device=None.', | ||
file=sys.stderr) | ||
params.device = 'None' | ||
|
||
# disable finiteness check (default) | ||
|
@@ -218,7 +220,7 @@ def parse_args(parser, size=None, loop_types=(), | |
# Ask DAAL what it thinks about this number of threads | ||
num_threads = prepare_daal_threads(num_threads=params.threads) | ||
if params.verbose: | ||
print(f'@ DAAL gave us {num_threads} threads') | ||
logging.info(f'@ DAAL gave us {num_threads} threads') | ||
|
||
n_jobs = None | ||
if n_jobs_supported: | ||
|
@@ -234,7 +236,7 @@ def parse_args(parser, size=None, loop_types=(), | |
|
||
# Very verbose output | ||
if params.verbose: | ||
print(f'@ params = {params.__dict__}') | ||
logging.info(f'@ params = {params.__dict__}') | ||
|
||
return params | ||
|
||
|
@@ -249,8 +251,8 @@ def set_daal_num_threads(num_threads): | |
if num_threads: | ||
daal4py.daalinit(nthreads=num_threads) | ||
except ImportError: | ||
print('@ Package "daal4py" was not found. Number of threads ' | ||
'is being ignored') | ||
logging.info('@ Package "daal4py" was not found. Number of threads ' | ||
'is being ignored') | ||
|
||
|
||
def prepare_daal_threads(num_threads=-1): | ||
|
@@ -484,7 +486,7 @@ def print_output(library, algorithm, stages, params, functions, | |
output = [] | ||
for i in range(len(stages)): | ||
result = gen_basic_dict(library, algorithm, stages[i], params, | ||
data[i], alg_instance, alg_params) | ||
data[i], alg_instance, alg_params if i == 0 else None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not clear why only first stage has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like in all benchmarks all stages in each case has similar parameters. So, since the the parameter list is usually quite long, we can reduce the length of benchmark output by printing this section only once. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @RukhovichIV, Excel report generator filters benchmark cases basing on parameters, output should not be shortened for correct work of generator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rolled back that change, but very upset about it :( |
||
result.update({'time[s]': times[i]}) | ||
if accuracy_type is not None: | ||
result.update({f'{accuracy_type}': accuracies[i]}) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
{ | ||
"omp_env": ["OMP_NUM_THREADS"], | ||
"common": { | ||
"lib": ["cuml"], | ||
"data-format": ["cudf"], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,109 +1,120 @@ | ||
{ | ||
"omp_env": ["OMP_NUM_THREADS", "OMP_PLACES"], | ||
"common": { | ||
"lib": ["modelbuilders"], | ||
"data-format": ["pandas"], | ||
"data-order": ["F"], | ||
"dtype": ["float32"] | ||
"lib": "modelbuilders", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add note to README that parameters might be set with single value or list of values There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done earlier |
||
"data-format": "pandas", | ||
"data-order": "F", | ||
"dtype": "float32", | ||
"algorithm": "lgbm_mb" | ||
}, | ||
"cases": [ | ||
{ | ||
"algorithm": "lgbm_mb", | ||
"dataset": [ | ||
{ | ||
"source": "csv", | ||
"name": "mortgage1Q", | ||
"source": "npy", | ||
"name": "airline-ohe", | ||
"training": | ||
{ | ||
"x": "data/mortgage_x.csv", | ||
"y": "data/mortgage_y.csv" | ||
"x": "data/airline-ohe_x_train.npy", | ||
"y": "data/airline-ohe_y_train.npy" | ||
}, | ||
"testing": | ||
{ | ||
"x": "data/airline-ohe_x_test.npy", | ||
"y": "data/airline-ohe_y_test.npy" | ||
} | ||
} | ||
], | ||
"n-estimators": [100], | ||
"objective": ["regression"], | ||
"max-depth": [8], | ||
"scale-pos-weight": [2], | ||
"learning-rate": [0.1], | ||
"subsample": [1], | ||
"reg-alpha": [0.9], | ||
"reg-lambda": [1], | ||
"min-child-weight": [0], | ||
"max-leaves": [256] | ||
"reg-alpha": 0.9, | ||
"max-bin": 256, | ||
"scale-pos-weight": 2, | ||
"learning-rate": 0.1, | ||
"subsample": 1, | ||
"reg-lambda": 1, | ||
"min-child-weight": 0, | ||
"max-depth": 8, | ||
"max-leaves": 256, | ||
"n-estimators": 1000, | ||
"objective": "binary" | ||
}, | ||
{ | ||
"algorithm": "lgbm_mb", | ||
"dataset": [ | ||
{ | ||
"source": "csv", | ||
"name": "airline-ohe", | ||
"source": "npy", | ||
"name": "higgs1m", | ||
"training": | ||
{ | ||
"x": "data/airline-ohe_x_train.csv", | ||
"y": "data/airline-ohe_y_train.csv" | ||
"x": "data/higgs1m_x_train.npy", | ||
"y": "data/higgs1m_y_train.npy" | ||
}, | ||
"testing": | ||
{ | ||
"x": "data/higgs1m_x_test.npy", | ||
"y": "data/higgs1m_y_test.npy" | ||
} | ||
} | ||
], | ||
"reg-alpha": [0.9], | ||
"max-bin": [256], | ||
"scale-pos-weight": [2], | ||
"learning-rate": [0.1], | ||
"subsample": [1], | ||
"reg-lambda": [1], | ||
"min-child-weight": [0], | ||
"max-depth": [8], | ||
"max-leaves": [256], | ||
"n-estimators": [1000], | ||
"objective": ["binary"] | ||
"reg-alpha": 0.9, | ||
"max-bin": 256, | ||
"scale-pos-weight": 2, | ||
"learning-rate": 0.1, | ||
"subsample": 1, | ||
"reg-lambda": 1, | ||
"min-child-weight": 0, | ||
"max-depth": 8, | ||
"max-leaves": 256, | ||
"n-estimators": 1000, | ||
"objective": "binary" | ||
}, | ||
{ | ||
"algorithm": "lgbm_mb", | ||
"dataset": [ | ||
{ | ||
"source": "csv", | ||
"name": "higgs1m", | ||
"source": "csv", | ||
"name": "mortgage1Q", | ||
"training": | ||
{ | ||
"x": "data/higgs1m_x_train.csv", | ||
"y": "data/higgs1m_y_train.csv" | ||
"x": "data/mortgage_x.csv", | ||
"y": "data/mortgage_y.csv" | ||
} | ||
} | ||
], | ||
"reg-alpha": [0.9], | ||
"max-bin": [256], | ||
"scale-pos-weight": [2], | ||
"learning-rate": [0.1], | ||
"subsample": [1], | ||
"reg-lambda": [1], | ||
"min-child-weight": [0], | ||
"max-depth": [8], | ||
"max-leaves": [256], | ||
"n-estimators": [1000], | ||
"objective": ["binary"] | ||
"n-estimators": 100, | ||
"objective": "regression", | ||
"max-depth": 8, | ||
"scale-pos-weight": 2, | ||
"learning-rate": 0.1, | ||
"subsample": 1, | ||
"reg-alpha": 0.9, | ||
"reg-lambda": 1, | ||
"min-child-weight": 0, | ||
"max-leaves": 256 | ||
}, | ||
{ | ||
"algorithm": "lgbm_mb", | ||
"dataset": [ | ||
{ | ||
"source": "csv", | ||
"name": "msrank", | ||
"source": "npy", | ||
"name": "msrank", | ||
"training": | ||
{ | ||
"x": "data/mlsr_x_train.csv", | ||
"y": "data/mlsr_y_train.csv" | ||
"x": "data/msrank_x_train.npy", | ||
"y": "data/msrank_y_train.npy" | ||
}, | ||
"testing": | ||
{ | ||
"x": "data/msrank_x_test.npy", | ||
"y": "data/msrank_y_test.npy" | ||
} | ||
} | ||
], | ||
"max-bin": [256], | ||
"learning-rate": [0.3], | ||
"subsample": [1], | ||
"reg-lambda": [2], | ||
"min-child-weight": [1], | ||
"min-split-gain": [0.1], | ||
"max-depth": [8], | ||
"max-leaves": [256], | ||
"n-estimators": [200], | ||
"objective": ["multiclass"] | ||
"max-bin": 256, | ||
"learning-rate": 0.3, | ||
"subsample": 1, | ||
"reg-lambda": 2, | ||
"min-child-weight": 1, | ||
"min-split-loss": 0.1, | ||
"max-depth": 8, | ||
"max-leaves": 256, | ||
"n-estimators": 200, | ||
"objective": "multiclass" | ||
} | ||
] | ||
} |
Uh oh!
There was an error while loading. Please reload this page.