From d44dad987054fef319322fd4308a0cab172f1593 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 20 Jan 2025 22:02:02 -0800 Subject: [PATCH 1/2] minor update for kmeans out of preview --- configs/regular/kmeans.json | 2 +- configs/spmd/large_scale/full.json | 245 +++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 configs/spmd/large_scale/full.json diff --git a/configs/regular/kmeans.json b/configs/regular/kmeans.json index d4953615..bcb7026f 100644 --- a/configs/regular/kmeans.json +++ b/configs/regular/kmeans.json @@ -70,7 +70,7 @@ "TEMPLATES": { "sklearn kmeans": { "SETS": [ - "sklearn-ex[preview] implementations", + "sklearn-ex[cpu,gpu] implementations", "common kmeans parameters", "sklearn kmeans parameters", "kmeans datasets" diff --git a/configs/spmd/large_scale/full.json b/configs/spmd/large_scale/full.json new file mode 100644 index 00000000..02ddfe9b --- /dev/null +++ b/configs/spmd/large_scale/full.json @@ -0,0 +1,245 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd basicstats parameters": { + "algorithm": { + "estimator": "BasicStatistics" + } + }, + "spmd incremental basicstats parameters": { + "algorithm": { + "estimator": "IncrementalBasicStatistics", + "estimator_methods": {"training": "partial_fit"} + } + }, + "spmd covariance parameters": { + "algorithm": { + "estimator": "EmpiricalCovariance" + } + }, + "spmd incremental covariance parameters": { + "algorithm": { + "estimator": "IncrementalEmpiricalCovariance", + "estimator_methods": {"training": "partial_fit"} + } + }, + "spmd pca parameters": { + "algorithm": { + "estimator": "PCA" + } + }, + "spmd incremental pca parameters": { + "algorithm": { + "estimator": "IncrementalPCA", + "estimator_methods": {"training": "partial_fit"} + } + }, + "spmd dbscan parameters": { + "algorithm": { + "estimator": "DBSCAN" + } + }, + "spmd kmeans parameters": { + "algorithm": { + "estimator": "KMeans" + }, + "estimator_params": { + "algorithm": "lloyd" + } + }, + "spmd rfcls parameters": { + "algorithm": { + "estimator": "RandomForestClassifier" + } + }, + "spmd knncls parameters": { + "algorithm": { + "estimator": "KNeighborsClassifier" + } + }, + "spmd logreg parameters": { + "algorithm": { + "estimator": "LogisticRegression" + }, + "estimator_params": { + "solver": "newton-cg" + } + }, + "spmd rfreg parameters": { + "algorithm": { + "estimator": "RandomForestRegressor" + } + }, + "spmd knnreg parameters": { + "algorithm": { + "estimator": "KNeighborsRegressor" + } + }, + "spmd linreg parameters": { + "algorithm": { + "estimator": "LinearRegression" + } + }, + "spmd incremental linreg parameters": { + "algorithm": { + "estimator": "IncrementalLinearRegression", + "estimator_methods": {"training": "partial_fit"} + } + }, + "synthetic stat data": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000, "n_features": 10, "centers": 1 } } + ] + }, + "synthetic classification data": { + "data": [ + { "source": "make_classification", "split_kwargs": { "train_size": 1000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 2000, "n_features": 10, "n_classes": 2 } } + ] + }, + "synthetic regression data": { + "data": [ + { "source": "make_regression", "split_kwargs": { "train_size": 1000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 2000, "n_features": 10 } } + ] + }, + "universal": { + "sklearnex_context": { "use_raw_input": [ true, false ] } + } + }, + "TEMPLATES": { + "basicstats": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd basicstats parameters" + ] + }, + "incremental basicstats": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd incremental basicstats parameters" + ] + }, + "pca": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd pca parameters" + ] + }, + "incremental pca": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd incremental pca parameters" + ] + }, + "covariance": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd covariance parameters" + ] + }, + "incremental covariance": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd incremental covariance parameters" + ] + }, + "dbscan": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd dbscan parameters" + ] + }, + "kmeans": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic stat data", + "universal", + "spmd kmeans parameters" + ] + }, + "rfcls": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic classification data", + "universal", + "spmd rfcls parameters" + ] + }, + "knncls": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic classification data", + "universal", + "spmd knncls parameters" + ] + }, + "logreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic classification data", + "universal", + "spmd logreg parameters" + ] + }, + "rfreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic regression data", + "universal", + "spmd rfreg parameters" + ] + }, + "knnreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic regression data", + "universal", + "spmd knnreg parameters" + ] + }, + "linreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic regression data", + "universal", + "spmd linreg parameters" + ] + }, + "incremental linreg": { + "SETS": [ + "sklearnex spmd implementation", + "large scale default parameters", + "synthetic regression data", + "universal", + "spmd incremental linreg parameters" + ] + } + } +} From 283bcd562e2733986da6db26bb49e9ba175fa833 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 20 Jan 2025 23:42:48 -0800 Subject: [PATCH 2/2] oops indeed --- configs/spmd/large_scale/full.json | 245 ----------------------------- 1 file changed, 245 deletions(-) delete mode 100644 configs/spmd/large_scale/full.json diff --git a/configs/spmd/large_scale/full.json b/configs/spmd/large_scale/full.json deleted file mode 100644 index 02ddfe9b..00000000 --- a/configs/spmd/large_scale/full.json +++ /dev/null @@ -1,245 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd basicstats parameters": { - "algorithm": { - "estimator": "BasicStatistics" - } - }, - "spmd incremental basicstats parameters": { - "algorithm": { - "estimator": "IncrementalBasicStatistics", - "estimator_methods": {"training": "partial_fit"} - } - }, - "spmd covariance parameters": { - "algorithm": { - "estimator": "EmpiricalCovariance" - } - }, - "spmd incremental covariance parameters": { - "algorithm": { - "estimator": "IncrementalEmpiricalCovariance", - "estimator_methods": {"training": "partial_fit"} - } - }, - "spmd pca parameters": { - "algorithm": { - "estimator": "PCA" - } - }, - "spmd incremental pca parameters": { - "algorithm": { - "estimator": "IncrementalPCA", - "estimator_methods": {"training": "partial_fit"} - } - }, - "spmd dbscan parameters": { - "algorithm": { - "estimator": "DBSCAN" - } - }, - "spmd kmeans parameters": { - "algorithm": { - "estimator": "KMeans" - }, - "estimator_params": { - "algorithm": "lloyd" - } - }, - "spmd rfcls parameters": { - "algorithm": { - "estimator": "RandomForestClassifier" - } - }, - "spmd knncls parameters": { - "algorithm": { - "estimator": "KNeighborsClassifier" - } - }, - "spmd logreg parameters": { - "algorithm": { - "estimator": "LogisticRegression" - }, - "estimator_params": { - "solver": "newton-cg" - } - }, - "spmd rfreg parameters": { - "algorithm": { - "estimator": "RandomForestRegressor" - } - }, - "spmd knnreg parameters": { - "algorithm": { - "estimator": "KNeighborsRegressor" - } - }, - "spmd linreg parameters": { - "algorithm": { - "estimator": "LinearRegression" - } - }, - "spmd incremental linreg parameters": { - "algorithm": { - "estimator": "IncrementalLinearRegression", - "estimator_methods": {"training": "partial_fit"} - } - }, - "synthetic stat data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000, "n_features": 10, "centers": 1 } } - ] - }, - "synthetic classification data": { - "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 1000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 2000, "n_features": 10, "n_classes": 2 } } - ] - }, - "synthetic regression data": { - "data": [ - { "source": "make_regression", "split_kwargs": { "train_size": 1000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 2000, "n_features": 10 } } - ] - }, - "universal": { - "sklearnex_context": { "use_raw_input": [ true, false ] } - } - }, - "TEMPLATES": { - "basicstats": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd basicstats parameters" - ] - }, - "incremental basicstats": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd incremental basicstats parameters" - ] - }, - "pca": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd pca parameters" - ] - }, - "incremental pca": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd incremental pca parameters" - ] - }, - "covariance": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd covariance parameters" - ] - }, - "incremental covariance": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd incremental covariance parameters" - ] - }, - "dbscan": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd dbscan parameters" - ] - }, - "kmeans": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic stat data", - "universal", - "spmd kmeans parameters" - ] - }, - "rfcls": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic classification data", - "universal", - "spmd rfcls parameters" - ] - }, - "knncls": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic classification data", - "universal", - "spmd knncls parameters" - ] - }, - "logreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic classification data", - "universal", - "spmd logreg parameters" - ] - }, - "rfreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic regression data", - "universal", - "spmd rfreg parameters" - ] - }, - "knnreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic regression data", - "universal", - "spmd knnreg parameters" - ] - }, - "linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic regression data", - "universal", - "spmd linreg parameters" - ] - }, - "incremental linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale default parameters", - "synthetic regression data", - "universal", - "spmd incremental linreg parameters" - ] - } - } -}