From 80257199e245c0685664f541219ee97533f6a1cc Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Mon, 7 Oct 2024 21:45:55 +0000
Subject: [PATCH 1/4] dbscan large scale support and logreg details

---
 configs/spmd/large_scale/dbscan_strong.json | 32 +++++++++++++++++++++
 configs/spmd/large_scale/large_scale.json   | 27 +++++++++++++++++
 configs/spmd/large_scale/logreg_strong.json |  2 +-
 sklbench/utils/measurement.py               |  7 +++++
 4 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 configs/spmd/large_scale/dbscan_strong.json

diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json
new file mode 100644
index 00000000..1843cd8c
--- /dev/null
+++ b/configs/spmd/large_scale/dbscan_strong.json
@@ -0,0 +1,32 @@
+{
+    "INCLUDE": ["../../common/sklearn.json", "../../regular/dbscan.json", "large_scale.json"],
+    "PARAMETERS_SETS": {
+        "spmd dbscan parameters": {
+	    "algorithm": {
+		"estimator": "DBSCAN",
+		"estimator_methods": {
+		    "training": "fit"
+		}
+	    },
+	    "data": {
+		"dtype": "float64"
+	    }
+	},
+	"synthetic dataset": {
+            "data": [
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 400000,  "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
+            ]
+	}
+    },
+    "TEMPLATES": {
+        "dbscan": {
+            "SETS": [
+                "common dbscan parameters",
+                "synthetic dataset",
+                "sklearnex spmd implementation",
+		"large scale strong parameters",
+                "spmd dbscan parameters"
+            ]
+        }
+    }
+}
diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json
index 72b808fe..06a8db16 100644
--- a/configs/spmd/large_scale/large_scale.json
+++ b/configs/spmd/large_scale/large_scale.json
@@ -27,6 +27,24 @@
                 "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
+        "large scale <64 parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "None"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
+        "large scale >64 parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "None"
+            },
+            "bench": {
+                "mpi_params": {"n": [768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
         "large scale strong 2k parameters": {
             "data": {
                 "dtype": "float64",
@@ -36,6 +54,15 @@
                 "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
+        "large scale strong <64 parameters": {
+            "data": {
+                "dtype": "float64",
+                "distributed_split": "rank_based"
+            },
+            "bench": {
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+            }
+        },
 	"large scale impi parameters": {
 	    "data": {
 		"dtype": "float64",
diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json
index 2bf1c0f9..8787f6b6 100644
--- a/configs/spmd/large_scale/logreg_strong.json
+++ b/configs/spmd/large_scale/logreg_strong.json
@@ -5,7 +5,7 @@
 	    "algorithm":{
 		"estimator": "LogisticRegression",
                 "estimator_methods": { "inference": "predict" },
-		"estimator_params": { "max_iter": 30 }
+		"estimator_params": { "max_iter": 16 }
             }
 	},
         "synthetic data": {
diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py
index df74e8da..ea86d29f 100644
--- a/sklbench/utils/measurement.py
+++ b/sklbench/utils/measurement.py
@@ -72,12 +72,16 @@ def measure_time(
         )
     times = []
     func_return_value = None
+    inners, iters = [], []
     while len(times) < n_runs:
         if enable_itt and itt_is_available:
             itt.resume()
         t0 = timeit.default_timer()
         func_return_value = func(*args, **kwargs)
         t1 = timeit.default_timer()
+        if hasattr(func.__self__, "_n_inner_iter"):
+            inners.append(func.__self__._n_inner_iter)
+            iters.append(func.__self__.n_iter_)
         if enable_itt and itt_is_available:
             itt.pause()
         times.append(t1 - t0)
@@ -88,6 +92,9 @@ def measure_time(
                 f"exceeded time limit ({time_limit} seconds)"
             )
             break
+    from mpi4py import MPI
+    if MPI.COMM_WORLD.Get_rank() == 0:
+        logger.debug("iters across n runs: " + str(iters) + ", inner iters across n runs: " + str(inners))
     logger.debug(times)
     #mean, std = box_filter(times)
     #if std / mean > std_mean_ratio:

From e68edd5389c2cb8302a126f6d41a326e7ab66d3b Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Tue, 15 Oct 2024 23:41:37 +0000
Subject: [PATCH 2/4] configs nearly finalized + minor job updates

---
 configs/spmd/large_scale/basic_stats.json     |   2 +-
 .../spmd/large_scale/basic_stats_single.json  |  30 -----
 .../spmd/large_scale/basic_stats_strong.json  |   2 +-
 configs/spmd/large_scale/covariance.json      |   2 +-
 .../spmd/large_scale/covariance_strong.json   |   2 +-
 configs/spmd/large_scale/dbscan.json          |   7 +-
 configs/spmd/large_scale/dbscan_strong.json   |   7 +-
 configs/spmd/large_scale/forest.json          |   9 +-
 configs/spmd/large_scale/forest_reg.json      |  27 -----
 configs/spmd/large_scale/forest_strong.json   |   7 +-
 .../spmd/large_scale/forest_strong_reg.json   |  27 -----
 configs/spmd/large_scale/kmeans.json          |   6 +-
 configs/spmd/large_scale/kmeans_strong.json   |   4 +-
 configs/spmd/large_scale/kmeans_strong_2.json |  31 ------
 configs/spmd/large_scale/knn.json             |   6 +-
 configs/spmd/large_scale/knn_strong.json      |   8 +-
 configs/spmd/large_scale/large_scale.json     | 105 +-----------------
 .../{linear_model.json => linreg.json}        |   2 +-
 ...r_model_strong.json => linreg_strong.json} |   2 +-
 configs/spmd/large_scale/logreg.json          |   6 +-
 configs/spmd/large_scale/logreg_2.json        |  29 -----
 configs/spmd/large_scale/logreg_strong.json   |   7 +-
 configs/spmd/large_scale/logreg_strong_2.json |  28 -----
 configs/spmd/large_scale/pca.json             |   2 +-
 configs/spmd/large_scale/pca_single.json      |  30 -----
 configs/spmd/large_scale/pca_strong.json      |   2 +-
 sklbench/benchmarks/sklearn_estimator.py      |  12 +-
 sklbench/datasets/common.py                   |   4 +-
 28 files changed, 59 insertions(+), 347 deletions(-)
 delete mode 100644 configs/spmd/large_scale/basic_stats_single.json
 delete mode 100644 configs/spmd/large_scale/forest_reg.json
 delete mode 100644 configs/spmd/large_scale/forest_strong_reg.json
 delete mode 100644 configs/spmd/large_scale/kmeans_strong_2.json
 rename configs/spmd/large_scale/{linear_model.json => linreg.json} (90%)
 rename configs/spmd/large_scale/{linear_model_strong.json => linreg_strong.json} (88%)
 delete mode 100644 configs/spmd/large_scale/logreg_2.json
 delete mode 100644 configs/spmd/large_scale/logreg_strong_2.json
 delete mode 100644 configs/spmd/large_scale/pca_single.json

diff --git a/configs/spmd/large_scale/basic_stats.json b/configs/spmd/large_scale/basic_stats.json
index 9ac4725f..d6c2c4d2 100644
--- a/configs/spmd/large_scale/basic_stats.json
+++ b/configs/spmd/large_scale/basic_stats.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd basicstats parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/basic_stats_single.json b/configs/spmd/large_scale/basic_stats_single.json
deleted file mode 100644
index 832bd3b2..00000000
--- a/configs/spmd/large_scale/basic_stats_single.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd basicstats parameters": {
-            "algorithm": {
-                "estimator": "BasicStatistics",
-                "estimator_methods": { "training": "fit" }
-            },
-	    "data": {
-		"split_kwargs": { "test_size": 0.0001 }
-	    }
-        },
-        "synthetic data": {
-            "data": [
-                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } },
-		        { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 1000, "centers": 1 } }
-            ]
-        }
-    },
-    "TEMPLATES": {
-        "basicstats": {
-            "SETS": [
-                "sklearnex spmd implementation",
-                "large scale one node parameters",
-		"synthetic data",
-                "spmd basicstats parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/basic_stats_strong.json b/configs/spmd/large_scale/basic_stats_strong.json
index b7aa22cb..b5b0ef69 100644
--- a/configs/spmd/large_scale/basic_stats_strong.json
+++ b/configs/spmd/large_scale/basic_stats_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd basicstats parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/covariance.json b/configs/spmd/large_scale/covariance.json
index 260befd0..20da8d15 100644
--- a/configs/spmd/large_scale/covariance.json
+++ b/configs/spmd/large_scale/covariance.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd basicstats parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/covariance_strong.json b/configs/spmd/large_scale/covariance_strong.json
index 568b4a8f..b8424d92 100644
--- a/configs/spmd/large_scale/covariance_strong.json
+++ b/configs/spmd/large_scale/covariance_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd basicstats parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/dbscan.json b/configs/spmd/large_scale/dbscan.json
index 0660e869..61b0521e 100644
--- a/configs/spmd/large_scale/dbscan.json
+++ b/configs/spmd/large_scale/dbscan.json
@@ -6,6 +6,9 @@
 		"estimator": "DBSCAN",
 		"estimator_methods": {
 		    "training": "fit"
+		},
+		"estimator_params" : {
+			"eps": 10, "min_samples": 5
 		}
 	    },
 	    "data": {
@@ -14,7 +17,7 @@
 	},
 	"synthetic dataset": {
             "data": [
-                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 100, "centers": 10 } }
             ]
 	}
     },
@@ -24,7 +27,7 @@
                 "common dbscan parameters",
                 "synthetic dataset",
                 "sklearnex spmd implementation",
-		"large scale default parameters",
+		"large scale <64 parameters",
                 "spmd dbscan parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json
index e591316e..24ea7cfc 100644
--- a/configs/spmd/large_scale/dbscan_strong.json
+++ b/configs/spmd/large_scale/dbscan_strong.json
@@ -6,7 +6,10 @@
 		"estimator": "DBSCAN",
 		"estimator_methods": {
 		    "training": "fit"
-		}
+		},
+                "estimator_params" : {
+                        "eps": 10, "min_samples": 5
+                }
 	    },
 	    "data": {
 		"dtype": "float64"
@@ -14,7 +17,7 @@
 	},
 	"synthetic dataset": {
             "data": [
-                { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000,  "n_features": 100, "centers": 10 }, "algorithm": { "eps": 5, "min_samples": 5 } }
+                { "source": "make_blobs", "generation_kwargs": { "n_samples": 500000,  "n_features": 100, "centers": 10 } }
             ]
 	}
     },
diff --git a/configs/spmd/large_scale/forest.json b/configs/spmd/large_scale/forest.json
index 2d9dfde9..b4402442 100644
--- a/configs/spmd/large_scale/forest.json
+++ b/configs/spmd/large_scale/forest.json
@@ -1,16 +1,17 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd forest classification parameters": {
             "algorithm": {
                 "estimator": "RandomForestClassifier",
-		"estimator_methods": { "training": "fit" }
+		"estimator_methods": { "training": "fit" },
+		"estimator_params": { "n_estimators": 20, "max_depth": 4 }
             }
         },
         "synthetic data": {
             "data": [
-                { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 10, "n_classes": 2 },  "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } },
-                { "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 11000, "n_features": 1000, "n_classes": 2 },  "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
+                { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 10, "n_classes": 2 } },
+                { "source": "make_classification", "split_kwargs": { "train_size": 10000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 11000, "n_features": 1000, "n_classes": 2 } }
             ]
         }
     },
diff --git a/configs/spmd/large_scale/forest_reg.json b/configs/spmd/large_scale/forest_reg.json
deleted file mode 100644
index a5ec73cd..00000000
--- a/configs/spmd/large_scale/forest_reg.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd forest regression parameters": {
-            "algorithm": {
-                "estimator": "RandomForestRegressor"
-            }
-        },
-        "synthetic data": {
-            "data": [
-                { "source": "make_regression", "generation_kwargs": { "n_samples": 501000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 500000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }},
-                { "source": "make_regression", "generation_kwargs": { "n_samples": 11000,  "n_features": 1000, "noise": 1.25 }, "split_kwargs": { "train_size": 10000, "test_size": 1000 }, "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } }}
-            
-            ]
-        }
-    },
-    "TEMPLATES": {
-        "forestReg": {
-            "SETS": [
-                "sklearnex spmd implementation",
-                "large scale 2k parameters",
-		"synthetic data",
-                "spmd forest regression parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/forest_strong.json b/configs/spmd/large_scale/forest_strong.json
index 17ca8c51..23b982f5 100644
--- a/configs/spmd/large_scale/forest_strong.json
+++ b/configs/spmd/large_scale/forest_strong.json
@@ -1,15 +1,16 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd forest classification parameters": {
             "algorithm": {
                 "estimator": "RandomForestClassifier",
-		"estimator_methods": { "training": "fit" }
+		"estimator_methods": { "training": "fit" },
+		"estimator_params": { "n_estimators": 20, "max_depth": 4 }
             }
         },
         "synthetic data": {
             "data": [
-                { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 10001000, "n_features": 100, "n_classes": 2 },  "algorithm": { "estimator_params": { "n_estimators": 20, "max_depth": 4 } } }
+                { "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 10001000, "n_features": 100, "n_classes": 2 } }
             ]
         }
     },
diff --git a/configs/spmd/large_scale/forest_strong_reg.json b/configs/spmd/large_scale/forest_strong_reg.json
deleted file mode 100644
index 305e729b..00000000
--- a/configs/spmd/large_scale/forest_strong_reg.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../spmd/stats_covariance.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd forest regression parameters": {
-            "algorithm": {
-                "estimator": "RandomForestRegressor"
-            }
-        },
-        "synthetic data": {
-            "data": [
-                { "source": "make_regression", "generation_kwargs": { "n_samples": 1000000,  "n_features": 10, "noise": 1.25 }, "split_kwargs": { "train_size": 900000, "test_size": 10000 }, "algorithm": { "estimator_params": { "n_estimators": 5, "max_depth": 4 } }},
-                { "source": "make_regression", "generation_kwargs": { "n_samples": 100000,  "n_features": 100, "noise": 1.25 }, "split_kwargs": { "train_size": 90000, "test_size": 10000 }, "algorithm": { "estimator_params": { "n_estimators": 10, "max_depth": 4 } }}
-            
-            ]
-        }
-    },
-    "TEMPLATES": {
-        "forestReg": {
-            "SETS": [
-                "sklearnex spmd implementation",
-                "large scale strong 32 parameters",
-		"synthetic data",
-                "spmd forest regression parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/kmeans.json b/configs/spmd/large_scale/kmeans.json
index c77d22bc..1140823d 100644
--- a/configs/spmd/large_scale/kmeans.json
+++ b/configs/spmd/large_scale/kmeans.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd kmeans parameters": {
             "algorithm": {
@@ -12,8 +12,8 @@
 	},
 	"synthetic data": {
                 "data": [
-                    { "source": "make_blobs", "generation_kwargs": { "n_samples": 3750000,  "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } },
-	                { "source": "make_blobs", "generation_kwargs": { "n_samples": 18750,  "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }
+                    { "source": "make_blobs", "generation_kwargs": { "n_samples": 5000000,  "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } },
+	                { "source": "make_blobs", "generation_kwargs": { "n_samples": 30000,  "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } }
                 ]
         }
     },
diff --git a/configs/spmd/large_scale/kmeans_strong.json b/configs/spmd/large_scale/kmeans_strong.json
index 6f095af0..6277745b 100644
--- a/configs/spmd/large_scale/kmeans_strong.json
+++ b/configs/spmd/large_scale/kmeans_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd kmeans parameters": {
             "algorithm": {
@@ -23,7 +23,7 @@
             "SETS": [
                 "synthetic data",
                 "sklearnex spmd implementation",
-                "large scale strong 32 parameters",
+                "large scale strong <64 parameters",
                 "spmd kmeans parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/kmeans_strong_2.json b/configs/spmd/large_scale/kmeans_strong_2.json
deleted file mode 100644
index 03f2bc59..00000000
--- a/configs/spmd/large_scale/kmeans_strong_2.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/kmeans.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd kmeans parameters": {
-            "algorithm": {
-                "estimator": "KMeans",
-                "estimator_params": {
-                    "algorithm": "lloyd"
-                },
-                "estimator_methods": { "training": "fit", "inference": "predict" }
-            }
-	},
-	"synthetic data": {
-                "data": [
-                        { "source": "make_blobs", "generation_kwargs": { "n_samples": 5000000,  "n_features": 10, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } },
-	                { "source": "make_blobs", "generation_kwargs": { "n_samples": 30000,  "n_features": 1000, "centers": 10 }, "algorithm": { "n_clusters": 10, "max_iter": 10 } },
-			{ "source": "make_blobs", "generation_kwargs": { "n_samples": 1000000,  "n_features": 100, "centers": 100 }, "algorithm": { "n_clusters": 100, "max_iter": 100 } }
-                ]
-        }
-    },
-    "TEMPLATES": {
-        "kmeans": {
-            "SETS": [
-                "synthetic data",
-                "sklearnex spmd implementation",
-                "large scale strong two nodes parameters",
-                "spmd kmeans parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/knn.json b/configs/spmd/large_scale/knn.json
index f1e0678d..b68b94af 100644
--- a/configs/spmd/large_scale/knn.json
+++ b/configs/spmd/large_scale/knn.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/knn.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd knn cls parameters": {
             "algorithm": {
@@ -19,15 +19,13 @@
         },
         "synthetic classification data": {
             "data": [
-		        { "source": "make_classification", "split_kwargs": { "train_size": 50000, "test_size": 5000 },   "generation_kwargs": {  "n_samples": 55000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } },
-		        { "source": "make_classification", "split_kwargs": { "train_size": 5000, "test_size": 50000 },   "generation_kwargs": {  "n_samples": 55000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+		{ "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 5000 },   "generation_kwargs": {  "n_samples": 5005000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
             ]
         }	
     },
     "TEMPLATES": {
         "knn classifier": {
             "SETS": [
-                "common knn parameters",
                 "synthetic classification data",
                 "sklearnex spmd implementation",
 		"large scale 2k parameters",
diff --git a/configs/spmd/large_scale/knn_strong.json b/configs/spmd/large_scale/knn_strong.json
index 67398123..7fe862dd 100644
--- a/configs/spmd/large_scale/knn_strong.json
+++ b/configs/spmd/large_scale/knn_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/knn.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd knn cls parameters": {
             "algorithm": {
@@ -19,18 +19,16 @@
         },
         "synthetic classification data": {
             "data": [
-		        { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 5000 },   "generation_kwargs": {  "n_samples": 505000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } },
-		        { "source": "make_classification", "split_kwargs": { "train_size": 5000, "test_size": 500000 },   "generation_kwargs": {  "n_samples": 505000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
+		{ "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000000 },   "generation_kwargs": {  "n_samples": 1500000,  "n_features": 100, "n_classes": 2,  "n_informative": "[SPECIAL_VALUE]0.5" } }
             ]
         }	
     },
     "TEMPLATES": {
         "knn classifier": {
             "SETS": [
-                "common knn parameters",
                 "synthetic classification data",
                 "sklearnex spmd implementation",
-		"large scale strong 32 parameters",
+		"large scale strong <64 parameters",
                 "spmd knn cls parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json
index 832259a0..7e523984 100644
--- a/configs/spmd/large_scale/large_scale.json
+++ b/configs/spmd/large_scale/large_scale.json
@@ -18,49 +18,22 @@
                 "mpi_params": {"n": [1,2,6,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
-        "large scale one node parameters": {
-            "data": {
-                "dtype": "float64",
-		        "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [1,2,3,4,5,6,7,8,9,10,11,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-        "large scale strong one node parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "rank_based"
-            },
-            "bench": {
-                "mpi_params": {"n": [1,2,3,4,5,6,7,8,9,10,11,12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-        "large scale full one node parameters": {
-            "data": {
-                "dtype": "float64",
-		        "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-        "large scale strong full one node parameters": {
+        "large scale 2k parameters": {
             "data": {
                 "dtype": "float64",
-                "distributed_split": "rank_based"
+                "distributed_split": "None"
             },
             "bench": {
-                "mpi_params": {"n": [12], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
-        "large scale 2k parameters": {
+        "large scale 32 parameters": {
             "data": {
                 "dtype": "float64",
                 "distributed_split": "None"
             },
             "bench": {
-                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
+                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
         "large scale <64 parameters": {
@@ -82,65 +55,6 @@
             }
         },
 
-        "large scale 128 parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [1536], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-
-        "large scale 256 parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [3072], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-
-        "large scale 512 parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [6144], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-
-        "large scale 1024 parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [12288], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-
-        "large scale 2048 parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-
-        "large scale two nodes parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [24], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
         "large scale strong 2k parameters": {
             "data": {
                 "dtype": "float64",
@@ -159,15 +73,6 @@
                "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
-        "large scale strong two nodes parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "rank_based"
-            },
-            "bench": {
-                "mpi_params": {"n": [24], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
         "large scale impi parameters": {
             "data": {
                 "dtype": "float64",
diff --git a/configs/spmd/large_scale/linear_model.json b/configs/spmd/large_scale/linreg.json
similarity index 90%
rename from configs/spmd/large_scale/linear_model.json
rename to configs/spmd/large_scale/linreg.json
index f9d17b5b..ea45a52c 100644
--- a/configs/spmd/large_scale/linear_model.json
+++ b/configs/spmd/large_scale/linreg.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd linear parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/linear_model_strong.json b/configs/spmd/large_scale/linreg_strong.json
similarity index 88%
rename from configs/spmd/large_scale/linear_model_strong.json
rename to configs/spmd/large_scale/linreg_strong.json
index 77a9c79e..629bf544 100644
--- a/configs/spmd/large_scale/linear_model_strong.json
+++ b/configs/spmd/large_scale/linreg_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/linear_model.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd linear parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/logreg.json b/configs/spmd/large_scale/logreg.json
index c5ef6203..326f2580 100644
--- a/configs/spmd/large_scale/logreg.json
+++ b/configs/spmd/large_scale/logreg.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd logreg2 parameters": {
 	    "algorithm":{
@@ -11,12 +11,12 @@
         "synthetic data": {
             "data": [
 		{ "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } },
-                { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 101000, "n_features": 1000, "n_classes": 2 } }
+                { "source": "make_classification", "split_kwargs": { "train_size": 500000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 501000, "n_features": 1000, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } }
             ]
         }
     },
     "TEMPLATES": {
-        "linreg": {
+        "logreg": {
             "SETS": [
                 "sklearnex spmd implementation",
                 "large scale 2k parameters",
diff --git a/configs/spmd/large_scale/logreg_2.json b/configs/spmd/large_scale/logreg_2.json
deleted file mode 100644
index 796eb8ad..00000000
--- a/configs/spmd/large_scale/logreg_2.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd logreg2 parameters": {
-	    "algorithm":{
-		"estimator": "LogisticRegression",
-                "estimator_methods": { "inference": "predict" },
-		"estimator_params": { "max_iter": 20 }
-            }
-	},
-        "synthetic data": {
-            "data": [
-		{ "source": "make_classification", "split_kwargs": { "train_size": 5000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 5001000, "n_features": 10, "n_classes": 2 } },
-                { "source": "make_classification", "split_kwargs": { "train_size": 100000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 101000, "n_features": 1000, "n_classes": 2 } }
-            ]
-        }
-    },
-    "TEMPLATES": {
-        "linreg": {
-            "SETS": [
-                "sklearnex spmd implementation",
-                "large scale two nodes parameters",
-                "spmd logreg parameters",
-		"synthetic data",
-		"spmd logreg2 parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/logreg_strong.json b/configs/spmd/large_scale/logreg_strong.json
index 8787f6b6..0b79ba9d 100644
--- a/configs/spmd/large_scale/logreg_strong.json
+++ b/configs/spmd/large_scale/logreg_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "../logreg.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd logreg2 parameters": {
 	    "algorithm":{
@@ -10,12 +10,13 @@
 	},
         "synthetic data": {
             "data": [
-		{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 10001000, "n_features": 100, "n_classes": 2 } }
+		{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 10001000, "n_features": 100, "n_classes": 2 } },
+		{ "source": "make_classification", "split_kwargs": { "train_size": 12000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 12001000, "n_features": 200, "n_classes": 2, "n_informative": 40, "n_clusters_per_class": 3, "flip_y": 0.05 } }
             ]
         }
     },
     "TEMPLATES": {
-        "linreg": {
+        "logreg": {
             "SETS": [
                 "sklearnex spmd implementation",
                 "large scale strong 2k parameters",
diff --git a/configs/spmd/large_scale/logreg_strong_2.json b/configs/spmd/large_scale/logreg_strong_2.json
deleted file mode 100644
index 998e3bb7..00000000
--- a/configs/spmd/large_scale/logreg_strong_2.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/logreg.json", "../logreg.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd logreg2 parameters": {
-	    "algorithm":{
-		"estimator": "LogisticRegression",
-                "estimator_methods": { "inference": "predict" },
-		"estimator_params": { "max_iter": 30 }
-            }
-	},
-        "synthetic data": {
-            "data": [
-		{ "source": "make_classification", "split_kwargs": { "train_size": 10000000, "test_size": 1000 },    "generation_kwargs": {  "n_samples": 10001000, "n_features": 100, "n_classes": 2 } }
-            ]
-        }
-    },
-    "TEMPLATES": {
-        "linreg": {
-            "SETS": [
-                "sklearnex spmd implementation",
-                "large scale strong two nodes parameters",
-                "spmd logreg parameters",
-		"synthetic data",
-		"spmd logreg2 parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/pca.json b/configs/spmd/large_scale/pca.json
index 9a6a6b02..d0ee879a 100644
--- a/configs/spmd/large_scale/pca.json
+++ b/configs/spmd/large_scale/pca.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd pca parameters": {
             "algorithm": {
diff --git a/configs/spmd/large_scale/pca_single.json b/configs/spmd/large_scale/pca_single.json
deleted file mode 100644
index 07775a6a..00000000
--- a/configs/spmd/large_scale/pca_single.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"],
-    "PARAMETERS_SETS": {
-        "spmd pca parameters": {
-            "algorithm": {
-                "estimator": "PCA",
-                "estimator_methods": { "training": "fit", "inference": "" }
-            },
-            "data": {
-                "split_kwargs": { "test_size": 0.0001 }
-            }
-        },
-        "synthetic data": {
-            "data": [
-                { "source": "make_blobs", "generation_kwargs": { "n_samples": 10000000,  "n_features": 10, "centers": 1 } },
-                { "source": "make_blobs", "generation_kwargs": { "n_samples": 100000,  "n_features": 1000, "centers": 1 } }
-            ]
-        }
-    },
-    "TEMPLATES": {
-        "linreg": {
-            "SETS": [
-	        "sklearnex spmd implementation",
-                "large scale one node parameters",
-                "synthetic data",
-		"spmd pca parameters"
-            ]
-        }
-    }
-}
diff --git a/configs/spmd/large_scale/pca_strong.json b/configs/spmd/large_scale/pca_strong.json
index 9063c22e..3cb33e72 100644
--- a/configs/spmd/large_scale/pca_strong.json
+++ b/configs/spmd/large_scale/pca_strong.json
@@ -1,5 +1,5 @@
 {
-    "INCLUDE": ["../../common/sklearn.json", "../../regular/pca.json", "large_scale.json"],
+    "INCLUDE": ["../../common/sklearn.json", "large_scale.json"],
     "PARAMETERS_SETS": {
         "spmd pca parameters": {
             "algorithm": {
diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py
index b4d4f3ee..36ec40b6 100644
--- a/sklbench/benchmarks/sklearn_estimator.py
+++ b/sklbench/benchmarks/sklearn_estimator.py
@@ -66,8 +66,8 @@ def get_estimator(library_name: str, estimator_name: str):
             f"Using first {classes_map[estimator_name][0]}."
         )
     estimator = classes_map[estimator_name][0]
-    if not issubclass(estimator, BaseEstimator):
-        logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator")
+    #if not issubclass(estimator, BaseEstimator):
+    #    logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator")
     return estimator
 
 
@@ -515,7 +515,11 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
     estimator_params = get_bench_case_value(
         bench_case, "algorithm:estimator_params", dict()
     )
-
+    #logger.debug("estimator params: " + str(estimator_params))
+    if "DBSCAN" in str(estimator_name):
+        if "min_samples" in estimator_params:
+            from mpi4py import MPI
+            estimator_params["min_samples"] = MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"]
     # get estimator methods for measurement
     estimator_methods = get_estimator_methods(bench_case)
 
@@ -551,7 +555,7 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
     # note: "handle" is not JSON-serializable
     if "handle" in estimator_params:
         del estimator_params["handle"]
-    logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}")
+    #logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}")
     result_template.update(estimator_params)
 
     data_descs = {
diff --git a/sklbench/datasets/common.py b/sklbench/datasets/common.py
index e7ed0160..5c6bd27a 100644
--- a/sklbench/datasets/common.py
+++ b/sklbench/datasets/common.py
@@ -136,11 +136,11 @@ def cache_wrapper(**kwargs):
         data_name = kwargs["data_name"]
         data_cache = kwargs["data_cache"]
         if len(get_filenames_by_prefix(data_cache, data_name)) > 0:
-            logger.info(f'Loading "{data_name}" dataset from cache files')
+            #logger.info(f'Loading "{data_name}" dataset from cache files')
             data = load_data_from_cache(data_cache, data_name)
             data_desc = load_data_description(data_cache, data_name)
         else:
-            logger.info(f'Loading "{data_name}" dataset from scratch')
+            #logger.info(f'Loading "{data_name}" dataset from scratch')
             data, data_desc = function(**kwargs)
             save_data_to_cache(data, data_cache, data_name)
             save_data_description(data_desc, data_cache, data_name)

From e8344932c33cf07f095c6a0de33ab9fdcbe18000 Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Wed, 16 Oct 2024 03:55:12 +0000
Subject: [PATCH 3/4] <=

---
 configs/spmd/large_scale/dbscan.json        |  2 +-
 configs/spmd/large_scale/dbscan_strong.json |  2 +-
 configs/spmd/large_scale/kmeans_strong.json |  2 +-
 configs/spmd/large_scale/knn_strong.json    |  2 +-
 configs/spmd/large_scale/large_scale.json   | 14 ++------------
 5 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/configs/spmd/large_scale/dbscan.json b/configs/spmd/large_scale/dbscan.json
index 61b0521e..e4996c9e 100644
--- a/configs/spmd/large_scale/dbscan.json
+++ b/configs/spmd/large_scale/dbscan.json
@@ -27,7 +27,7 @@
                 "common dbscan parameters",
                 "synthetic dataset",
                 "sklearnex spmd implementation",
-		"large scale <64 parameters",
+		"large scale <=64 parameters",
                 "spmd dbscan parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/dbscan_strong.json b/configs/spmd/large_scale/dbscan_strong.json
index 24ea7cfc..04fb9016 100644
--- a/configs/spmd/large_scale/dbscan_strong.json
+++ b/configs/spmd/large_scale/dbscan_strong.json
@@ -27,7 +27,7 @@
                 "common dbscan parameters",
                 "synthetic dataset",
                 "sklearnex spmd implementation",
-		"large scale strong <64 parameters",
+		"large scale strong <=64 parameters",
                 "spmd dbscan parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/kmeans_strong.json b/configs/spmd/large_scale/kmeans_strong.json
index 6277745b..87fb7fac 100644
--- a/configs/spmd/large_scale/kmeans_strong.json
+++ b/configs/spmd/large_scale/kmeans_strong.json
@@ -23,7 +23,7 @@
             "SETS": [
                 "synthetic data",
                 "sklearnex spmd implementation",
-                "large scale strong <64 parameters",
+                "large scale strong <=64 parameters",
                 "spmd kmeans parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/knn_strong.json b/configs/spmd/large_scale/knn_strong.json
index 7fe862dd..d202f6e4 100644
--- a/configs/spmd/large_scale/knn_strong.json
+++ b/configs/spmd/large_scale/knn_strong.json
@@ -28,7 +28,7 @@
             "SETS": [
                 "synthetic classification data",
                 "sklearnex spmd implementation",
-		"large scale strong <64 parameters",
+		"large scale strong <=64 parameters",
                 "spmd knn cls parameters"
             ]
         }
diff --git a/configs/spmd/large_scale/large_scale.json b/configs/spmd/large_scale/large_scale.json
index 7e523984..4e4c9d0c 100644
--- a/configs/spmd/large_scale/large_scale.json
+++ b/configs/spmd/large_scale/large_scale.json
@@ -36,7 +36,7 @@
                 "mpi_params": {"n": [1,2,6,12,24,48,96,192,384], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
-        "large scale <64 parameters": {
+        "large scale <=64 parameters": {
             "data": {
                 "dtype": "float64",
                 "distributed_split": "None"
@@ -45,16 +45,6 @@
                 "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
-        "large scale >64 parameters": {
-            "data": {
-                "dtype": "float64",
-                "distributed_split": "None"
-            },
-            "bench": {
-                "mpi_params": {"n": [768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
-            }
-        },
-
         "large scale strong 2k parameters": {
             "data": {
                 "dtype": "float64",
@@ -64,7 +54,7 @@
                 "mpi_params": {"n": [1,2,6,12,24,48,96,192,384,768,1536,3072,6144,12288,24576], "ppn": 12, "-hostfile": "", "-cpu-bind=list:0-7,104-111:8-15,112-119:16-23,120-127:24-31,128-135:32-39,136-143:40-47,144-151:52-59,156-163:60-67,164-171:68-75,172-179:76-83,180-187:84-91,188-195:92-99,196-203": "--envall gpu_tile_compact.sh" }
             }
         },
-        "large scale strong <64 parameters": {
+        "large scale strong <=64 parameters": {
             "data": {
                 "dtype": "float64",
                 "distributed_split": "rank_based"

From 75f2f10e42728437ec6a32b98f76d84546c68b8b Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Wed, 16 Oct 2024 03:59:40 +0000
Subject: [PATCH 4/4] lint

---
 sklbench/benchmarks/sklearn_estimator.py | 11 +++++++----
 sklbench/datasets/common.py              |  4 ++--
 sklbench/utils/measurement.py            |  8 +++++++-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/sklbench/benchmarks/sklearn_estimator.py b/sklbench/benchmarks/sklearn_estimator.py
index 36ec40b6..e57a9038 100644
--- a/sklbench/benchmarks/sklearn_estimator.py
+++ b/sklbench/benchmarks/sklearn_estimator.py
@@ -66,7 +66,7 @@ def get_estimator(library_name: str, estimator_name: str):
             f"Using first {classes_map[estimator_name][0]}."
         )
     estimator = classes_map[estimator_name][0]
-    #if not issubclass(estimator, BaseEstimator):
+    # if not issubclass(estimator, BaseEstimator):
     #    logger.info(f"{estimator} estimator is not derived from sklearn's BaseEstimator")
     return estimator
 
@@ -515,11 +515,14 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
     estimator_params = get_bench_case_value(
         bench_case, "algorithm:estimator_params", dict()
     )
-    #logger.debug("estimator params: " + str(estimator_params))
+    # logger.debug("estimator params: " + str(estimator_params))
     if "DBSCAN" in str(estimator_name):
         if "min_samples" in estimator_params:
             from mpi4py import MPI
-            estimator_params["min_samples"] = MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"]
+
+            estimator_params["min_samples"] = (
+                MPI.COMM_WORLD.Get_size() * estimator_params["min_samples"]
+            )
     # get estimator methods for measurement
     estimator_methods = get_estimator_methods(bench_case)
 
@@ -555,7 +558,7 @@ def main(bench_case: BenchCase, filters: List[BenchCase]):
     # note: "handle" is not JSON-serializable
     if "handle" in estimator_params:
         del estimator_params["handle"]
-    #logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}")
+    # logger.debug(f"Estimator parameters:\n{custom_format(estimator_params)}")
     result_template.update(estimator_params)
 
     data_descs = {
diff --git a/sklbench/datasets/common.py b/sklbench/datasets/common.py
index 5c6bd27a..28b62fe6 100644
--- a/sklbench/datasets/common.py
+++ b/sklbench/datasets/common.py
@@ -136,11 +136,11 @@ def cache_wrapper(**kwargs):
         data_name = kwargs["data_name"]
         data_cache = kwargs["data_cache"]
         if len(get_filenames_by_prefix(data_cache, data_name)) > 0:
-            #logger.info(f'Loading "{data_name}" dataset from cache files')
+            # logger.info(f'Loading "{data_name}" dataset from cache files')
             data = load_data_from_cache(data_cache, data_name)
             data_desc = load_data_description(data_cache, data_name)
         else:
-            #logger.info(f'Loading "{data_name}" dataset from scratch')
+            # logger.info(f'Loading "{data_name}" dataset from scratch')
             data, data_desc = function(**kwargs)
             save_data_to_cache(data, data_cache, data_name)
             save_data_description(data_desc, data_cache, data_name)
diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py
index 3628813d..bfabbdc0 100644
--- a/sklbench/utils/measurement.py
+++ b/sklbench/utils/measurement.py
@@ -93,8 +93,14 @@ def measure_time(
             )
             break
     from mpi4py import MPI
+
     if MPI.COMM_WORLD.Get_rank() == 0:
-        logger.debug("iters across n runs: " + str(iters) + ", inner iters across n runs: " + str(inners))
+        logger.debug(
+            "iters across n runs: "
+            + str(iters)
+            + ", inner iters across n runs: "
+            + str(inners)
+        )
     logger.debug(times)
     # mean, std = box_filter(times)
     # if std / mean > std_mean_ratio: