-
Notifications
You must be signed in to change notification settings - Fork 73
Add device context parameter #57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,7 +175,11 @@ def parse_args(parser, size=None, loop_types=(), | |
help='Dataset name') | ||
parser.add_argument('--no-intel-optimized', default=False, action='store_true', | ||
help='Use no intel optimized version. ' | ||
'Now avalible for scikit-learn benchmarks'), | ||
'Now avalible for scikit-learn benchmarks') | ||
parser.add_argument('--device', default=None, type=str, | ||
choices=("host", "cpu", "gpu"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
help='Execution context device') | ||
|
||
for data in ['X', 'y']: | ||
for stage in ['train', 'test']: | ||
parser.add_argument(f'--file-{data}-{stage}', | ||
|
@@ -197,6 +201,8 @@ def parse_args(parser, size=None, loop_types=(), | |
except ImportError: | ||
print('Failed to import daal4py.sklearn.patch_sklearn.' | ||
'Use stock version scikit-learn', file=sys.stderr) | ||
else: | ||
params.device = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should check if the device parameter is passed by the user and print a warning that it is useless in that case - for clarity |
||
|
||
# disable finiteness check (default) | ||
if not params.check_finiteness: | ||
|
@@ -492,3 +498,11 @@ def print_output(library, algorithm, stages, params, functions, | |
del result['algorithm_parameters']['handle'] | ||
output.append(result) | ||
print(json.dumps(output, indent=4)) | ||
|
||
def run_with_context(params, function): | ||
if params.device is not None: | ||
from daal4py.oneapi import sycl_context | ||
with sycl_context(params.device): | ||
function() | ||
else: | ||
function() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
{ | ||
"common": { | ||
"lib": ["sklearn"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How stock or intel version of sk is specified for this config? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to launch the stock sk in this config? Maybe just add flag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok - probably its better to skip There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to add support to skip these cases? |
||
"data-format": ["pandas"], | ||
"data-order": ["F"], | ||
"dtype": ["float64"], | ||
"device": ["host", "cpu", "gpu"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if I run this config on a machine without a GPU driver? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly the same what happens if you try to run on a CPU wo DPC++ support - an exception. What is your suggession here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shall point somewhere that using this config file requires DPC++ support and GPU device on board There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, probably it's ok |
||
}, | ||
"cases": [ | ||
{ | ||
"algorithm": "kmeans", | ||
"dataset": [ | ||
{ | ||
"source": "synthetic", | ||
"type": "blobs", | ||
"n_clusters": 10, | ||
"n_features": 50, | ||
"training": { | ||
"n_samples": 1000000 | ||
} | ||
} | ||
], | ||
"n-clusters": [10] | ||
}, | ||
{ | ||
"algorithm": "dbscan", | ||
"dataset": [ | ||
{ | ||
"source": "synthetic", | ||
"type": "blobs", | ||
"n_clusters": 10, | ||
"n_features": 50, | ||
"training": { | ||
"n_samples": 10000 | ||
} | ||
} | ||
] | ||
}, | ||
{ | ||
"algorithm": "linear", | ||
"dataset": [ | ||
{ | ||
"source": "synthetic", | ||
"type": "regression", | ||
"n_features": 50, | ||
"training": { | ||
"n_samples": 1000000 | ||
} | ||
} | ||
] | ||
}, | ||
{ | ||
"algorithm": "log_reg", | ||
"solver":["lbfgs", "newton-cg"], | ||
"dataset": [ | ||
{ | ||
"source": "synthetic", | ||
"type": "classification", | ||
"n_classes": 2, | ||
"n_features": 100, | ||
"training": { | ||
"n_samples": 100000 | ||
} | ||
}, | ||
{ | ||
"source": "synthetic", | ||
"type": "classification", | ||
"n_classes": 5, | ||
"n_features": 100, | ||
"training": { | ||
"n_samples": 100000 | ||
} | ||
} | ||
] | ||
} | ||
] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,9 @@ def generate_cases(params): | |
parser.add_argument('--report', default=False, action='store_true', | ||
help='Create an Excel report based on benchmarks results. ' | ||
'Need "openpyxl" library') | ||
parser.add_argument('--device', default=None, type=str, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the parameter is duplicated in |
||
choices=("host", "cpu", "gpu"), | ||
help='Execution context device') | ||
args = parser.parse_args() | ||
env = os.environ.copy() | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,38 +19,41 @@ | |||||
import os | ||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
import bench | ||||||
from sklearn.metrics.cluster import davies_bouldin_score | ||||||
|
||||||
parser = argparse.ArgumentParser(description='scikit-learn DBSCAN benchmark') | ||||||
parser.add_argument('-e', '--eps', '--epsilon', type=float, default=10., | ||||||
help='Radius of neighborhood of a point') | ||||||
parser.add_argument('-m', '--min-samples', default=5, type=int, | ||||||
help='The minimum number of samples required in a ' | ||||||
'neighborhood to consider a point a core point') | ||||||
params = bench.parse_args(parser) | ||||||
|
||||||
from sklearn.cluster import DBSCAN | ||||||
|
||||||
# Load generated data | ||||||
X, _, _, _ = bench.load_data(params, add_dtype=True) | ||||||
|
||||||
# Create our clustering object | ||||||
dbscan = DBSCAN(eps=params.eps, n_jobs=params.n_jobs, | ||||||
min_samples=params.min_samples, metric='euclidean', | ||||||
algorithm='auto') | ||||||
|
||||||
# N.B. algorithm='auto' will select DAAL's brute force method when running | ||||||
# daal4py-patched scikit-learn, and probably 'kdtree' when running unpatched | ||||||
# scikit-learn. | ||||||
|
||||||
# Time fit | ||||||
time, _ = bench.measure_function_time(dbscan.fit, X, params=params) | ||||||
labels = dbscan.labels_ | ||||||
|
||||||
params.n_clusters = len(set(labels)) - (1 if -1 in labels else 0) | ||||||
acc = davies_bouldin_score(X, labels) | ||||||
|
||||||
bench.print_output(library='sklearn', algorithm='dbscan', stages=['training'], | ||||||
params=params, functions=['DBSCAN'], times=[time], accuracies=[acc], | ||||||
accuracy_type='davies_bouldin_score', data=[X], | ||||||
alg_instance=dbscan) | ||||||
def main(): | ||||||
from sklearn.cluster import DBSCAN | ||||||
from sklearn.metrics.cluster import davies_bouldin_score | ||||||
|
||||||
# Load generated data | ||||||
X, _, _, _ = bench.load_data(params, add_dtype=True) | ||||||
|
||||||
# Create our clustering object | ||||||
dbscan = DBSCAN(eps=params.eps, n_jobs=params.n_jobs, | ||||||
min_samples=params.min_samples, metric='euclidean', | ||||||
algorithm='auto') | ||||||
|
||||||
# N.B. algorithm='auto' will select DAAL's brute force method when running | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @PetrovKP what about other files that @vlad-nazarov did not touch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will correct if there are such places yet |
||||||
# daal4py-patched scikit-learn, and probably 'kdtree' when running unpatched | ||||||
# scikit-learn. | ||||||
|
||||||
# Time fit | ||||||
time, _ = bench.measure_function_time(dbscan.fit, X, params=params) | ||||||
labels = dbscan.labels_ | ||||||
|
||||||
params.n_clusters = len(set(labels)) - (1 if -1 in labels else 0) | ||||||
acc = davies_bouldin_score(X, labels) | ||||||
|
||||||
bench.print_output(library='sklearn', algorithm='dbscan', stages=['training'], | ||||||
params=params, functions=['DBSCAN'], times=[time], accuracies=[acc], | ||||||
accuracy_type='davies_bouldin_score', data=[X], | ||||||
alg_instance=dbscan) | ||||||
|
||||||
if __name__ == "__main__": | ||||||
parser = argparse.ArgumentParser(description='scikit-learn DBSCAN benchmark') | ||||||
parser.add_argument('-e', '--eps', '--epsilon', type=float, default=10., | ||||||
help='Radius of neighborhood of a point') | ||||||
parser.add_argument('-m', '--min-samples', default=5, type=int, | ||||||
help='The minimum number of samples required in a ' | ||||||
'neighborhood to consider a point a core point') | ||||||
params = bench.parse_args(parser) | ||||||
bench.run_with_context(params, main) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My understanding is that
None
is used to run without context. Other values specify device type for a contextThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ohh, then I think the host is not needed at all