From 80257199e245c0685664f541219ee97533f6a1cc Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Mon, 7 Oct 2024 21:45:55 +0000 Subject: [PATCH 1/4] dbscan large scale support and logreg details --- configs/spmd/large_scale/dbscan_strong.json | 32 +++++++++++++++++++++ configs/spmd/large_scale/large_scale.json | 27 +++++++++++++++++ configs/spmd/large_scale/logreg_strong.json | 2 +- sklbench/utils/measurement.py | 7 +++++ 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 configs/spmd/large_scale/dbscan_strong.json diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json new file mode 100644 index 00000000..1843cd8c --- /dev/null +++ b/configs/spmd/large_scale/dbscan_strong.json @@ -0,0 +1,32 @@ +{ + "INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"], + "PARAMETERS_SETS": { + "spmd dbscan parameters": { + "algorithm": { + "estimator": "DBSCAN", + "estimator_methods": { + "training": "fit" + } + }, + "data": { + "dtype": "float64" + } + }, + "synthetic dataset": { + "data": [ + { "source": "make_blobs", "generation_kwargs": { "n_samples": 400000, "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } } + ] + } + }, + "TEMPLATES": { + "dbscan": { + "SETS": [ + "common dbscan parameters", + "synthetic dataset", + "sklearnex spmd implementation", + "large scale strong parameters", + "spmd dbscan parameters" + ] + } + } +} diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json index 72b808fe..06a8db16 100644 --- a/configs/spmd/large_scale/large_scale.json +++ b/configs/spmd/large_scale/large_scale.json @@ -27,6 +27,24 @@ "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, + "large scale <64 parameters": { + "data": { + "dtype": "float64", + "distributed_split": "None" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, + "large scale >64 parameters": { + "data": { + "dtype": "float64", + "distributed_split": "None" + }, + "bench": { + "mpi_params": {"n": [768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, "large scale strong 2k parameters": { "data": { "dtype": "float64", @@ -36,6 +54,15 @@ "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, + "large scale strong <64 parameters": { + "data": { + "dtype": "float64", + "distributed_split": "rank_based" + }, + "bench": { + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + } + }, "large scale impi parameters": { "data": { "dtype": "float64", diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json index 2bf1c0f9..8787f6b6 100644 --- a/configs/spmd/large_scale/logreg_strong.json +++ b/configs/spmd/large_scale/logreg_strong.json @@ -5,7 +5,7 @@ "algorithm":{ "estimator": "LogisticRegression", "estimator_methods": { "inference": "predict" }, - "estimator_params": { "max_iter": 30 } + "estimator_params": { "max_iter": 16 } } }, "synthetic data": { diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index df74e8da..ea86d29f 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -72,12 +72,16 @@ def measure_time( ) times = [] func_return_value = None + inners, iters = [], [] while len(times) < n_runs: if enable_itt and itt_is_available: itt.resume() t0 = timeit.default_timer() func_return_value = func(*args, **kwargs) t1 = timeit.default_timer() + if hasattr(func.__self__, "_n_inner_iter"): + inners.append(func.__self__._n_inner_iter) + iters.append(func.__self__.n_iter_) if enable_itt and itt_is_available: itt.pause() times.append(t1 - t0) @@ -88,6 +92,9 @@ def measure_time( f"exceeded time limit ({time_limit} seconds)" ) break + from mpi4py import MPI + if MPI.COMM_WORLD.Get_rank() == 0: + logger.debug("iters across n runs: " + str(iters) + ", inner iters across n runs: " + str(inners)) logger.debug(times) #mean, std = box_filter(times) #if std / mean > std_mean_ratio: From e68edd5389c2cb8302a126f6d41a326e7ab66d3b Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Tue, 15 Oct 2024 23:41:37 +0000 Subject: [PATCH 2/4] configs nearly finalized + minor job updates --- configs/spmd/large_scale/basic_stats.json | 2 +- .../spmd/large_scale/basic_stats_single.json | 30 ----- .../spmd/large_scale/basic_stats_strong.json | 2 +- configs/spmd/large_scale/covariance.json | 2 +- .../spmd/large_scale/covariance_strong.json | 2 +- configs/spmd/large_scale/dbscan.json | 7 +- configs/spmd/large_scale/dbscan_strong.json | 7 +- configs/spmd/large_scale/forest.json | 9 +- configs/spmd/large_scale/forest_reg.json | 27 ----- configs/spmd/large_scale/forest_strong.json | 7 +- .../spmd/large_scale/forest_strong_reg.json | 27 ----- configs/spmd/large_scale/kmeans.json | 6 +- configs/spmd/large_scale/kmeans_strong.json | 4 +- configs/spmd/large_scale/kmeans_strong_2.json | 31 ------ configs/spmd/large_scale/knn.json | 6 +- configs/spmd/large_scale/knn_strong.json | 8 +- configs/spmd/large_scale/large_scale.json | 105 +----------------- .../{linear_model.json => linreg.json} | 2 +- ...r_model_strong.json => linreg_strong.json} | 2 +- configs/spmd/large_scale/logreg.json | 6 +- configs/spmd/large_scale/logreg_2.json | 29 ----- configs/spmd/large_scale/logreg_strong.json | 7 +- configs/spmd/large_scale/logreg_strong_2.json | 28 ----- configs/spmd/large_scale/pca.json | 2 +- configs/spmd/large_scale/pca_single.json | 30 ----- configs/spmd/large_scale/pca_strong.json | 2 +- sklbench/benchmarks/sklearn_estimator.py | 12 +- sklbench/datasets/common.py | 4 +- 28 files changed, 59 insertions(+), 347 deletions(-) delete mode 100644 configs/spmd/large_scale/basic_stats_single.json delete mode 100644 configs/spmd/large_scale/forest_reg.json delete mode 100644 configs/spmd/large_scale/forest_strong_reg.json delete mode 100644 configs/spmd/large_scale/kmeans_strong_2.json rename configs/spmd/large_scale/{linear_model.json => linreg.json} (90%) rename configs/spmd/large_scale/{linear_model_strong.json => linreg_strong.json} (88%) delete mode 100644 configs/spmd/large_scale/logreg_2.json delete mode 100644 configs/spmd/large_scale/logreg_strong_2.json delete mode 100644 configs/spmd/large_scale/pca_single.json diff --git a/configs/spmd/large_scale/basic_stats.json b/configs/spmd/large_scale/basic_stats.json index 9ac4725f..d6c2c4d2 100644 --- a/configs/spmd/large_scale/basic_stats.json +++ b/configs/spmd/large_scale/basic_stats.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd basicstats parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/basic_stats_single.json b/configs/spmd/large_scale/basic_stats_single.json deleted file mode 100644 index 832bd3b2..00000000 --- a/configs/spmd/large_scale/basic_stats_single.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd basicstats parameters": { - "algorithm": { - "estimator": "BasicStatistics", - "estimator_methods": { "training": "fit" } - }, - "data": { - "split_kwargs": { "test_size": 0.0001 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } }, - { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } } - ] - } - }, - "TEMPLATES": { - "basicstats": { - "SETS": [ - "sklearnex spmd implementation", - "large scale one node parameters", - "synthetic data", - "spmd basicstats parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/basic_stats_strong.json b/configs/spmd/large_scale/basic_stats_strong.json index b7aa22cb..b5b0ef69 100644 --- a/configs/spmd/large_scale/basic_stats_strong.json +++ b/configs/spmd/large_scale/basic_stats_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd basicstats parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/covariance.json b/configs/spmd/large_scale/covariance.json index 260befd0..20da8d15 100644 --- a/configs/spmd/large_scale/covariance.json +++ b/configs/spmd/large_scale/covariance.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd basicstats parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/covariance_strong.json b/configs/spmd/large_scale/covariance_strong.json index 568b4a8f..b8424d92 100644 --- a/configs/spmd/large_scale/covariance_strong.json +++ b/configs/spmd/large_scale/covariance_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd basicstats parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/dbscan.json b/configs/spmd/large_scale/dbscan.json index 0660e869..61b0521e 100644 --- a/configs/spmd/large_scale/dbscan.json +++ b/configs/spmd/large_scale/dbscan.json @@ -6,6 +6,9 @@ "estimator": "DBSCAN", "estimator_methods": { "training": "fit" + }, + "estimator_params" : { + "eps": 10, "min_samples": 5 } }, "data": { @@ -14,7 +17,7 @@ }, "synthetic dataset": { "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } } + { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 100, "centers": 10 } } ] } }, @@ -24,7 +27,7 @@ "common dbscan parameters", "synthetic dataset", "sklearnex spmd implementation", - "large scale default parameters", + "large scale <64 parameters", "spmd dbscan parameters" ] } diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json index e591316e..24ea7cfc 100644 --- a/configs/spmd/large_scale/dbscan_strong.json +++ b/configs/spmd/large_scale/dbscan_strong.json @@ -6,7 +6,10 @@ "estimator": "DBSCAN", "estimator_methods": { "training": "fit" - } + }, + "estimator_params" : { + "eps": 10, "min_samples": 5 + } }, "data": { "dtype": "float64" @@ -14,7 +17,7 @@ }, "synthetic dataset": { "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000, "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } } + { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000, "n_features": 100, "centers": 10 } } ] } }, diff --git a/configs/spmd/large_scale/forest.json b/configs/spmd/large_scale/forest.json index 2d9dfde9..b4402442 100644 --- a/configs/spmd/large_scale/forest.json +++ b/configs/spmd/large_scale/forest.json @@ -1,16 +1,17 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd forest classification parameters": { "algorithm": { "estimator": "RandomForestClassifier", - "estimator_methods": { "training": "fit" } + "estimator_methods": { "training": "fit" }, + "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }, "synthetic data": { "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }, - { "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } } + { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 10, "n_classes": 2 } }, + { "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "n_classes": 2 } } ] } }, diff --git a/configs/spmd/large_scale/forest_reg.json b/configs/spmd/large_scale/forest_reg.json deleted file mode 100644 index a5ec73cd..00000000 --- a/configs/spmd/large_scale/forest_reg.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd forest regression parameters": { - "algorithm": { - "estimator": "RandomForestRegressor" - } - }, - "synthetic data": { - "data": [ - { "source": "make_regression", "generation_kwargs": { "n_samples": 501000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }}, - { "source": "make_regression", "generation_kwargs": { "n_samples": 11000, "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }} - - ] - } - }, - "TEMPLATES": { - "forestReg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale 2k parameters", - "synthetic data", - "spmd forest regression parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/forest_strong.json b/configs/spmd/large_scale/forest_strong.json index 17ca8c51..23b982f5 100644 --- a/configs/spmd/large_scale/forest_strong.json +++ b/configs/spmd/large_scale/forest_strong.json @@ -1,15 +1,16 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd forest classification parameters": { "algorithm": { "estimator": "RandomForestClassifier", - "estimator_methods": { "training": "fit" } + "estimator_methods": { "training": "fit" }, + "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }, "synthetic data": { "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } } + { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 } } ] } }, diff --git a/configs/spmd/large_scale/forest_strong_reg.json b/configs/spmd/large_scale/forest_strong_reg.json deleted file mode 100644 index 305e729b..00000000 --- a/configs/spmd/large_scale/forest_strong_reg.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd forest regression parameters": { - "algorithm": { - "estimator": "RandomForestRegressor" - } - }, - "synthetic data": { - "data": [ - { "source": "make_regression", "generation_kwargs": { "n_samples": 1000000, "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 900000, "test_size": 10000 }, "algorithm": { "estimator_params": { "n_estimators": 5, "max_depth": 4 } }}, - { "source": "make_regression", "generation_kwargs": { "n_samples": 100000, "n_features": 100, "noise": 1.25 }, "split_kwargs": { "train_size": 90000, "test_size": 10000 }, "algorithm": { "estimator_params": { "n_estimators": 10, "max_depth": 4 } }} - - ] - } - }, - "TEMPLATES": { - "forestReg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale strong 32 parameters", - "synthetic data", - "spmd forest regression parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/kmeans.json b/configs/spmd/large_scale/kmeans.json index c77d22bc..1140823d 100644 --- a/configs/spmd/large_scale/kmeans.json +++ b/configs/spmd/large_scale/kmeans.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd kmeans parameters": { "algorithm": { @@ -12,8 +12,8 @@ }, "synthetic data": { "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 3750000, "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }, - { "source": "make_blobs", "generation_kwargs": { "n_samples": 18750, "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } } + { "source": "make_blobs", "generation_kwargs": { "n_samples": 5000000, "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }, + { "source": "make_blobs", "generation_kwargs": { "n_samples": 30000, "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } } ] } }, diff --git a/configs/spmd/large_scale/kmeans_strong.json b/configs/spmd/large_scale/kmeans_strong.json index 6f095af0..6277745b 100644 --- a/configs/spmd/large_scale/kmeans_strong.json +++ b/configs/spmd/large_scale/kmeans_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd kmeans parameters": { "algorithm": { @@ -23,7 +23,7 @@ "SETS": [ "synthetic data", "sklearnex spmd implementation", - "large scale strong 32 parameters", + "large scale strong <64 parameters", "spmd kmeans parameters" ] } diff --git a/configs/spmd/large_scale/kmeans_strong_2.json b/configs/spmd/large_scale/kmeans_strong_2.json deleted file mode 100644 index 03f2bc59..00000000 --- a/configs/spmd/large_scale/kmeans_strong_2.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd kmeans parameters": { - "algorithm": { - "estimator": "KMeans", - "estimator_params": { - "algorithm": "lloyd" - }, - "estimator_methods": { "training": "fit", "inference": "predict" } - } - }, - "synthetic data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 5000000, "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }, - { "source": "make_blobs", "generation_kwargs": { "n_samples": 30000, "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }, - { "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000, "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } } - ] - } - }, - "TEMPLATES": { - "kmeans": { - "SETS": [ - "synthetic data", - "sklearnex spmd implementation", - "large scale strong two nodes parameters", - "spmd kmeans parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/knn.json b/configs/spmd/large_scale/knn.json index f1e0678d..b68b94af 100644 --- a/configs/spmd/large_scale/knn.json +++ b/configs/spmd/large_scale/knn.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/knn.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd knn cls parameters": { "algorithm": { @@ -19,15 +19,13 @@ }, "synthetic classification data": { "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 5000 }, "generation_kwargs": { "n_samples": 55000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }, - { "source": "make_classification", "split_kwargs": { "train_size": 5000, "test_size": 50000 }, "generation_kwargs": { "n_samples": 55000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } + { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 5000 }, "generation_kwargs": { "n_samples": 5005000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } ] } }, "TEMPLATES": { "knn classifier": { "SETS": [ - "common knn parameters", "synthetic classification data", "sklearnex spmd implementation", "large scale 2k parameters", diff --git a/configs/spmd/large_scale/knn_strong.json b/configs/spmd/large_scale/knn_strong.json index 67398123..7fe862dd 100644 --- a/configs/spmd/large_scale/knn_strong.json +++ b/configs/spmd/large_scale/knn_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/knn.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd knn cls parameters": { "algorithm": { @@ -19,18 +19,16 @@ }, "synthetic classification data": { "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 5000 }, "generation_kwargs": { "n_samples": 505000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } }, - { "source": "make_classification", "split_kwargs": { "train_size": 5000, "test_size": 500000 }, "generation_kwargs": { "n_samples": 505000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } + { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000000 }, "generation_kwargs": { "n_samples": 1500000, "n_features": 100, "n_classes": 2, "n_informative": "[SPECIAL_VALUE]0.5" } } ] } }, "TEMPLATES": { "knn classifier": { "SETS": [ - "common knn parameters", "synthetic classification data", "sklearnex spmd implementation", - "large scale strong 32 parameters", + "large scale strong <64 parameters", "spmd knn cls parameters" ] } diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json index 832259a0..7e523984 100644 --- a/configs/spmd/large_scale/large_scale.json +++ b/configs/spmd/large_scale/large_scale.json @@ -18,49 +18,22 @@ "mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, - "large scale one node parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [1,2,3,4,5,6,7,8,9,10,11,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - "large scale strong one node parameters": { - "data": { - "dtype": "float64", - "distributed_split": "rank_based" - }, - "bench": { - "mpi_params": {"n": [1,2,3,4,5,6,7,8,9,10,11,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - "large scale full one node parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - "large scale strong full one node parameters": { + "large scale 2k parameters": { "data": { "dtype": "float64", - "distributed_split": "rank_based" + "distributed_split": "None" }, "bench": { - "mpi_params": {"n": [12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, - "large scale 2k parameters": { + "large scale 32 parameters": { "data": { "dtype": "float64", "distributed_split": "None" }, "bench": { - "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } + "mpi_params": {"n": [1,2,6,12,24,48,96,192,384], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, "large scale <64 parameters": { @@ -82,65 +55,6 @@ } }, - "large scale 128 parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [1536], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - - "large scale 256 parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [3072], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - - "large scale 512 parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [6144], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - - "large scale 1024 parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [12288], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - - "large scale 2048 parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - - "large scale two nodes parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [24], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, "large scale strong 2k parameters": { "data": { "dtype": "float64", @@ -159,15 +73,6 @@ "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, - "large scale strong two nodes parameters": { - "data": { - "dtype": "float64", - "distributed_split": "rank_based" - }, - "bench": { - "mpi_params": {"n": [24], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, "large scale impi parameters": { "data": { "dtype": "float64", diff --git a/configs/spmd/large_scale/linear_model.json b/configs/spmd/large_scale/linreg.json similarity index 90% rename from configs/spmd/large_scale/linear_model.json rename to configs/spmd/large_scale/linreg.json index f9d17b5b..ea45a52c 100644 --- a/configs/spmd/large_scale/linear_model.json +++ b/configs/spmd/large_scale/linreg.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd linear parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/linear_model_strong.json b/configs/spmd/large_scale/linreg_strong.json similarity index 88% rename from configs/spmd/large_scale/linear_model_strong.json rename to configs/spmd/large_scale/linreg_strong.json index 77a9c79e..629bf544 100644 --- a/configs/spmd/large_scale/linear_model_strong.json +++ b/configs/spmd/large_scale/linreg_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd linear parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/logreg.json b/configs/spmd/large_scale/logreg.json index c5ef6203..326f2580 100644 --- a/configs/spmd/large_scale/logreg.json +++ b/configs/spmd/large_scale/logreg.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd logreg2 parameters": { "algorithm":{ @@ -11,12 +11,12 @@ "synthetic data": { "data": [ { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 10, "n_classes": 2 } }, - { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 101000, "n_features": 1000, "n_classes": 2 } } + { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 501000, "n_features": 1000, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } } ] } }, "TEMPLATES": { - "linreg": { + "logreg": { "SETS": [ "sklearnex spmd implementation", "large scale 2k parameters", diff --git a/configs/spmd/large_scale/logreg_2.json b/configs/spmd/large_scale/logreg_2.json deleted file mode 100644 index 796eb8ad..00000000 --- a/configs/spmd/large_scale/logreg_2.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd logreg2 parameters": { - "algorithm":{ - "estimator": "LogisticRegression", - "estimator_methods": { "inference": "predict" }, - "estimator_params": { "max_iter": 20 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 5001000, "n_features": 10, "n_classes": 2 } }, - { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 101000, "n_features": 1000, "n_classes": 2 } } - ] - } - }, - "TEMPLATES": { - "linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale two nodes parameters", - "spmd logreg parameters", - "synthetic data", - "spmd logreg2 parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json index 8787f6b6..0b79ba9d 100644 --- a/configs/spmd/large_scale/logreg_strong.json +++ b/configs/spmd/large_scale/logreg_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd logreg2 parameters": { "algorithm":{ @@ -10,12 +10,13 @@ }, "synthetic data": { "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 } } + { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 } }, + { "source": "make_classification", "split_kwargs": { "train_size": 12000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 12001000, "n_features": 200, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } } ] } }, "TEMPLATES": { - "linreg": { + "logreg": { "SETS": [ "sklearnex spmd implementation", "large scale strong 2k parameters", diff --git a/configs/spmd/large_scale/logreg_strong_2.json b/configs/spmd/large_scale/logreg_strong_2.json deleted file mode 100644 index 998e3bb7..00000000 --- a/configs/spmd/large_scale/logreg_strong_2.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd logreg2 parameters": { - "algorithm":{ - "estimator": "LogisticRegression", - "estimator_methods": { "inference": "predict" }, - "estimator_params": { "max_iter": 30 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 }, "generation_kwargs": { "n_samples": 10001000, "n_features": 100, "n_classes": 2 } } - ] - } - }, - "TEMPLATES": { - "linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale strong two nodes parameters", - "spmd logreg parameters", - "synthetic data", - "spmd logreg2 parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/pca.json b/configs/spmd/large_scale/pca.json index 9a6a6b02..d0ee879a 100644 --- a/configs/spmd/large_scale/pca.json +++ b/configs/spmd/large_scale/pca.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd pca parameters": { "algorithm": { diff --git a/configs/spmd/large_scale/pca_single.json b/configs/spmd/large_scale/pca_single.json deleted file mode 100644 index 07775a6a..00000000 --- a/configs/spmd/large_scale/pca_single.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"], - "PARAMETERS_SETS": { - "spmd pca parameters": { - "algorithm": { - "estimator": "PCA", - "estimator_methods": { "training": "fit", "inference": "" } - }, - "data": { - "split_kwargs": { "test_size": 0.0001 } - } - }, - "synthetic data": { - "data": [ - { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000, "n_features": 10, "centers": 1 } }, - { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000, "n_features": 1000, "centers": 1 } } - ] - } - }, - "TEMPLATES": { - "linreg": { - "SETS": [ - "sklearnex spmd implementation", - "large scale one node parameters", - "synthetic data", - "spmd pca parameters" - ] - } - } -} diff --git a/configs/spmd/large_scale/pca_strong.json b/configs/spmd/large_scale/pca_strong.json index 9063c22e..3cb33e72 100644 --- a/configs/spmd/large_scale/pca_strong.json +++ b/configs/spmd/large_scale/pca_strong.json @@ -1,5 +1,5 @@ { - "INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"], + "INCLUDE": ["../../common/sklearn.json", "large_scale.json"], "PARAMETERS_SETS": { "spmd pca parameters": { "algorithm": { diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index b4d4f3ee..36ec40b6 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -66,8 +66,8 @@ def get_estimator(library_name: str, estimator_name: str): f"Using first {classes_map[estimator_name][0]}." ) estimator = classes_map[estimator_name][0] - if not issubclass(estimator, BaseEstimator): - logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator") + #if not issubclass(estimator, BaseEstimator): + # logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator") return estimator @@ -515,7 +515,11 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): estimator_params = get_bench_case_value( bench_case, "algorithm:estimator_params", dict() ) - + #logger.debug("estimator params: " + str(estimator_params)) + if "DBSCAN" in str(estimator_name): + if "min_samples" in estimator_params: + from mpi4py import MPI + estimator_params["min_samples"] = MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"] # get estimator methods for measurement estimator_methods = get_estimator_methods(bench_case) @@ -551,7 +555,7 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): # note: "handle" is not JSON-serializable if "handle" in estimator_params: del estimator_params["handle"] - logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}") + #logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}") result_template.update(estimator_params) data_descs = { diff --git a/sklbench/datasets/common.py b/sklbench/datasets/common.py index e7ed0160..5c6bd27a 100644 --- a/sklbench/datasets/common.py +++ b/sklbench/datasets/common.py @@ -136,11 +136,11 @@ def cache_wrapper(**kwargs): data_name = kwargs["data_name"] data_cache = kwargs["data_cache"] if len(get_filenames_by_prefix(data_cache, data_name)) > 0: - logger.info(f'Loading "{data_name}" dataset from cache files') + #logger.info(f'Loading "{data_name}" dataset from cache files') data = load_data_from_cache(data_cache, data_name) data_desc = load_data_description(data_cache, data_name) else: - logger.info(f'Loading "{data_name}" dataset from scratch') + #logger.info(f'Loading "{data_name}" dataset from scratch') data, data_desc = function(**kwargs) save_data_to_cache(data, data_cache, data_name) save_data_description(data_desc, data_cache, data_name) From e8344932c33cf07f095c6a0de33ab9fdcbe18000 Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Wed, 16 Oct 2024 03:55:12 +0000 Subject: [PATCH 3/4] <= --- configs/spmd/large_scale/dbscan.json | 2 +- configs/spmd/large_scale/dbscan_strong.json | 2 +- configs/spmd/large_scale/kmeans_strong.json | 2 +- configs/spmd/large_scale/knn_strong.json | 2 +- configs/spmd/large_scale/large_scale.json | 14 ++------------ 5 files changed, 6 insertions(+), 16 deletions(-) diff --git a/configs/spmd/large_scale/dbscan.json b/configs/spmd/large_scale/dbscan.json index 61b0521e..e4996c9e 100644 --- a/configs/spmd/large_scale/dbscan.json +++ b/configs/spmd/large_scale/dbscan.json @@ -27,7 +27,7 @@ "common dbscan parameters", "synthetic dataset", "sklearnex spmd implementation", - "large scale <64 parameters", + "large scale <=64 parameters", "spmd dbscan parameters" ] } diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json index 24ea7cfc..04fb9016 100644 --- a/configs/spmd/large_scale/dbscan_strong.json +++ b/configs/spmd/large_scale/dbscan_strong.json @@ -27,7 +27,7 @@ "common dbscan parameters", "synthetic dataset", "sklearnex spmd implementation", - "large scale strong <64 parameters", + "large scale strong <=64 parameters", "spmd dbscan parameters" ] } diff --git a/configs/spmd/large_scale/kmeans_strong.json b/configs/spmd/large_scale/kmeans_strong.json index 6277745b..87fb7fac 100644 --- a/configs/spmd/large_scale/kmeans_strong.json +++ b/configs/spmd/large_scale/kmeans_strong.json @@ -23,7 +23,7 @@ "SETS": [ "synthetic data", "sklearnex spmd implementation", - "large scale strong <64 parameters", + "large scale strong <=64 parameters", "spmd kmeans parameters" ] } diff --git a/configs/spmd/large_scale/knn_strong.json b/configs/spmd/large_scale/knn_strong.json index 7fe862dd..d202f6e4 100644 --- a/configs/spmd/large_scale/knn_strong.json +++ b/configs/spmd/large_scale/knn_strong.json @@ -28,7 +28,7 @@ "SETS": [ "synthetic classification data", "sklearnex spmd implementation", - "large scale strong <64 parameters", + "large scale strong <=64 parameters", "spmd knn cls parameters" ] } diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json index 7e523984..4e4c9d0c 100644 --- a/configs/spmd/large_scale/large_scale.json +++ b/configs/spmd/large_scale/large_scale.json @@ -36,7 +36,7 @@ "mpi_params": {"n": [1,2,6,12,24,48,96,192,384], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, - "large scale <64 parameters": { + "large scale <=64 parameters": { "data": { "dtype": "float64", "distributed_split": "None" @@ -45,16 +45,6 @@ "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, - "large scale >64 parameters": { - "data": { - "dtype": "float64", - "distributed_split": "None" - }, - "bench": { - "mpi_params": {"n": [768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } - } - }, - "large scale strong 2k parameters": { "data": { "dtype": "float64", @@ -64,7 +54,7 @@ "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" } } }, - "large scale strong <64 parameters": { + "large scale strong <=64 parameters": { "data": { "dtype": "float64", "distributed_split": "rank_based" From 75f2f10e42728437ec6a32b98f76d84546c68b8b Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Wed, 16 Oct 2024 03:59:40 +0000 Subject: [PATCH 4/4] lint --- sklbench/benchmarks/sklearn_estimator.py | 11 +++++++---- sklbench/datasets/common.py | 4 ++-- sklbench/utils/measurement.py | 8 +++++++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py index 36ec40b6..e57a9038 100644 --- a/sklbench/benchmarks/sklearn_estimator.py +++ b/sklbench/benchmarks/sklearn_estimator.py @@ -66,7 +66,7 @@ def get_estimator(library_name: str, estimator_name: str): f"Using first {classes_map[estimator_name][0]}." ) estimator = classes_map[estimator_name][0] - #if not issubclass(estimator, BaseEstimator): + # if not issubclass(estimator, BaseEstimator): # logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator") return estimator @@ -515,11 +515,14 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): estimator_params = get_bench_case_value( bench_case, "algorithm:estimator_params", dict() ) - #logger.debug("estimator params: " + str(estimator_params)) + # logger.debug("estimator params: " + str(estimator_params)) if "DBSCAN" in str(estimator_name): if "min_samples" in estimator_params: from mpi4py import MPI - estimator_params["min_samples"] = MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"] + + estimator_params["min_samples"] = ( + MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"] + ) # get estimator methods for measurement estimator_methods = get_estimator_methods(bench_case) @@ -555,7 +558,7 @@ def main(bench_case: BenchCase, filters: List[BenchCase]): # note: "handle" is not JSON-serializable if "handle" in estimator_params: del estimator_params["handle"] - #logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}") + # logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}") result_template.update(estimator_params) data_descs = { diff --git a/sklbench/datasets/common.py b/sklbench/datasets/common.py index 5c6bd27a..28b62fe6 100644 --- a/sklbench/datasets/common.py +++ b/sklbench/datasets/common.py @@ -136,11 +136,11 @@ def cache_wrapper(**kwargs): data_name = kwargs["data_name"] data_cache = kwargs["data_cache"] if len(get_filenames_by_prefix(data_cache, data_name)) > 0: - #logger.info(f'Loading "{data_name}" dataset from cache files') + # logger.info(f'Loading "{data_name}" dataset from cache files') data = load_data_from_cache(data_cache, data_name) data_desc = load_data_description(data_cache, data_name) else: - #logger.info(f'Loading "{data_name}" dataset from scratch') + # logger.info(f'Loading "{data_name}" dataset from scratch') data, data_desc = function(**kwargs) save_data_to_cache(data, data_cache, data_name) save_data_description(data_desc, data_cache, data_name) diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index 3628813d..bfabbdc0 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -93,8 +93,14 @@ def measure_time( ) break from mpi4py import MPI + if MPI.COMM_WORLD.Get_rank() == 0: - logger.debug("iters across n runs: " + str(iters) + ", inner iters across n runs: " + str(inners)) + logger.debug( + "iters across n runs: " + + str(iters) + + ", inner iters across n runs: " + + str(inners) + ) logger.debug(times) # mean, std = box_filter(times) # if std / mean > std_mean_ratio: