-
Notifications
You must be signed in to change notification settings - Fork 73
adding parameters for device context and patching of Scikit-Learn #23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
902bec1
607c832
2e08178
56cccee
8d38330
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,6 +164,12 @@ def parse_args(parser, size=None, loop_types=(), | |
help='Seed to pass as random_state') | ||
parser.add_argument('--dataset-name', type=str, default=None, | ||
help='Dataset name') | ||
parser.add_argument('--device', type=str, default='None', | ||
choices=('None', 'host', 'cpu', 'gpu'), | ||
help='Execution context device, "None" to run without context.') | ||
parser.add_argument('--patch_sklearn', type=str, default='None', | ||
choices=('None', 'True', 'False'), | ||
help='True for patch, False for unpatch, "None" to leave as is.') | ||
|
||
for data in ['X', 'y']: | ||
for stage in ['train', 'test']: | ||
|
@@ -618,3 +624,36 @@ def import_fptype_getter(): | |
except: | ||
from daal4py.sklearn.utils import getFPType | ||
return getFPType | ||
|
||
|
||
def patch_sklearn(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--patch_sklearn', type=str, default='None', | ||
choices=('None', 'True', 'False'), | ||
help='True for patch, False for unpatch, "None" to leave as is.') | ||
args, _ = parser.parse_known_args() | ||
|
||
if args.patch_sklearn is not None and args.patch_sklearn != 'None': | ||
from daal4py.sklearn import patch_sklearn, unpatch_sklearn | ||
if args.patch_sklearn == "True": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are "True", "False" string, not boolean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One more option is "None" - to leave patching state "as is". I think it can be useful for back compatibility. Possible we should change it to boolean. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What will be broken in this case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not common to have to write For the If possible, it would be nice to not have the exact same construction of argument parsing in two places for both the device and for the patching. What if you added the same tri-state patch argument as a kwarg to |
||
patch_sklearn() | ||
elif args.patch_sklearn == "False": | ||
unpatch_sklearn() | ||
else: | ||
raise ValueError('Parameter "patch_sklearn" must be ' | ||
'"None", "True" or "False", got {}.'.format(args.patch_sklearn)) | ||
|
||
|
||
def run_with_context(function): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--device', type=str, default='None', | ||
choices=('None', 'host', 'cpu', 'gpu'), | ||
help='Execution context device, "None" to run without context.') | ||
args, _ = parser.parse_known_args() | ||
|
||
if args.device is not None and args.device != 'None': | ||
from daal4py.oneapi import sycl_context | ||
with sycl_context(args.device): | ||
function() | ||
else: | ||
function() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,78 @@ | ||
# Copyright (C) 2017-2020 Intel Corporation | ||
# Copyright (C) 2018-2020 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: MIT | ||
|
||
import argparse | ||
from bench import ( | ||
parse_args, measure_function_time, load_data, print_output | ||
) | ||
import numpy as np | ||
from sklearn.cluster import KMeans | ||
from sklearn.metrics.cluster import davies_bouldin_score | ||
|
||
parser = argparse.ArgumentParser(description='scikit-learn K-means benchmark') | ||
parser.add_argument('-i', '--filei', '--fileI', '--init', | ||
type=str, help='Initial clusters') | ||
parser.add_argument('-t', '--tol', type=float, default=0., | ||
help='Absolute threshold') | ||
parser.add_argument('--maxiter', type=int, default=100, | ||
help='Maximum number of iterations') | ||
parser.add_argument('--n-clusters', type=int, help='Number of clusters') | ||
params = parse_args(parser) | ||
|
||
# Load and convert generated data | ||
X_train, X_test, _, _ = load_data(params) | ||
|
||
if params.filei == 'k-means++': | ||
X_init = 'k-means++' | ||
# Load initial centroids from specified path | ||
elif params.filei is not None: | ||
X_init = np.load(params.filei).astype(params.dtype) | ||
params.n_clusters = X_init.shape[0] | ||
# or choose random centroids from training data | ||
else: | ||
np.random.seed(params.seed) | ||
centroids_idx = np.random.randint(0, X_train.shape[0], | ||
size=params.n_clusters) | ||
if hasattr(X_train, "iloc"): | ||
X_init = X_train.iloc[centroids_idx].values | ||
else: | ||
X_init = X_train[centroids_idx] | ||
from bench import (measure_function_time, parse_args, load_data, print_output, | ||
run_with_context, patch_sklearn) | ||
|
||
def main(): | ||
import argparse | ||
import numpy as np | ||
from sklearn.cluster import KMeans | ||
from sklearn.metrics.cluster import davies_bouldin_score | ||
|
||
def fit_kmeans(X): | ||
global X_init, params | ||
alg = KMeans(n_clusters=params.n_clusters, tol=params.tol, | ||
max_iter=params.maxiter, init=X_init, n_init=1) | ||
alg.fit(X) | ||
return alg | ||
|
||
parser = argparse.ArgumentParser(description='scikit-learn K-means benchmark') | ||
parser.add_argument('-i', '--filei', '--fileI', '--init', | ||
type=str, help='Initial clusters') | ||
parser.add_argument('-t', '--tol', type=float, default=0., | ||
help='Absolute threshold') | ||
parser.add_argument('--maxiter', type=int, default=100, | ||
help='Maximum number of iterations') | ||
parser.add_argument('--n-clusters', type=int, help='Number of clusters') | ||
params = parse_args(parser) | ||
|
||
# Load and convert generated data | ||
X_train, X_test, _, _ = load_data(params) | ||
|
||
if params.filei == 'k-means++': | ||
X_init = 'k-means++' | ||
# Load initial centroids from specified path | ||
elif params.filei is not None: | ||
X_init = np.load(params.filei).astype(params.dtype) | ||
params.n_clusters = X_init.shape[0] | ||
# or choose random centroids from training data | ||
else: | ||
np.random.seed(params.seed) | ||
centroids_idx = np.random.randint(0, X_train.shape[0], | ||
size=params.n_clusters) | ||
if hasattr(X_train, "iloc"): | ||
X_init = X_train.iloc[centroids_idx].values | ||
else: | ||
X_init = X_train[centroids_idx] | ||
|
||
|
||
def fit_kmeans(X): | ||
global X_init, params | ||
alg = KMeans(n_clusters=params.n_clusters, tol=params.tol, | ||
max_iter=params.maxiter, init=X_init, n_init=1) | ||
alg.fit(X) | ||
return alg | ||
|
||
|
||
columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', | ||
'n_clusters', 'time') | ||
|
||
columns = ('batch', 'arch', 'prefix', 'function', 'threads', 'dtype', 'size', | ||
'n_clusters', 'time') | ||
# Time fit | ||
fit_time, kmeans = measure_function_time(fit_kmeans, X_train, params=params) | ||
|
||
# Time fit | ||
fit_time, kmeans = measure_function_time(fit_kmeans, X_train, params=params) | ||
train_predict = kmeans.predict(X_train) | ||
acc_train = davies_bouldin_score(X_train, train_predict) | ||
|
||
train_predict = kmeans.predict(X_train) | ||
acc_train = davies_bouldin_score(X_train, train_predict) | ||
# Time predict | ||
predict_time, test_predict = measure_function_time( | ||
kmeans.predict, X_test, params=params) | ||
|
||
# Time predict | ||
predict_time, test_predict = measure_function_time( | ||
kmeans.predict, X_test, params=params) | ||
acc_test = davies_bouldin_score(X_test, test_predict) | ||
|
||
acc_test = davies_bouldin_score(X_test, test_predict) | ||
print_output(library='sklearn', algorithm='kmeans', | ||
stages=['training', 'prediction'], columns=columns, | ||
params=params, functions=['KMeans.fit', 'KMeans.predict'], | ||
times=[fit_time, predict_time], accuracy_type='davies_bouldin_score', | ||
accuracies=[acc_train, acc_test], data=[X_train, X_test], | ||
alg_instance=kmeans) | ||
|
||
print_output(library='sklearn', algorithm='kmeans', | ||
stages=['training', 'prediction'], columns=columns, | ||
params=params, functions=['KMeans.fit', 'KMeans.predict'], | ||
times=[fit_time, predict_time], accuracy_type='davies_bouldin_score', | ||
accuracies=[acc_train, acc_test], data=[X_train, X_test], | ||
alg_instance=kmeans) | ||
if __name__ == "__main__": | ||
patch_sklearn() | ||
run_with_context(main) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this params should be enabled in higher level scripts that will execute them - but i don't see this in make?
Another question - are we going to pass single value for bench.py or this should be tuple so bench.py will iterate via them?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At this moment I used only runner.py script. Possible that additional changes should be done in make or somewhere else. Thanks.
Devices are provided as a list in config
"device": ["None", "host", "cpu", "gpu"],
. See example above. Benchmarks are executing with all devices in the list.