Skip to content

Commit 9d62d5d

Browse files
committed
SPMD support
1 parent 06ae1ab commit 9d62d5d

24 files changed

+535
-79
lines changed

configs/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ Configs have the three highest parameter keys:
8888
| `bench`:`vtune_results_directory` | `vtune_results` | | Directory path to store Intel(R) VTune* Profiler results. |
8989
| `bench`:`n_runs` | `10` | | Number of runs for measured entity. |
9090
| `bench`:`time_limit` | `3600` | | Time limit in seconds before the benchmark early stop. |
91+
| `bench`:`distributor` | None | None, `mpi` | Library used to handle distributed algorithm. |
92+
| `bench`:`mpi_params` | Empty dict | | Parameters for `mpirun` command of MPI library. |
9193
|<h3>Data parameters</h3>||||
9294
| `data`:`cache_directory` | `data_cache` | | Directory path to store cached datasets for fast loading. |
9395
| `data`:`raw_cache_directory` | `data`:`cache_directory` + "raw" | | Directory path to store downloaded raw datasets. |
@@ -102,6 +104,7 @@ Configs have the three highest parameter keys:
102104
| `data`:`format` | `pandas` | `pandas`, `numpy`, `cudf` | Data format to use in benchmark. |
103105
| `data`:`order` | `F` | `C`, `F` | Data order to use in benchmark: contiguous(C) or Fortran. |
104106
| `data`:`dtype` | `float64` | | Data type to use in benchmark. |
107+
| `data`:`distributed_split` | None | None, `rank_based` | Split type used to distribute data between machines in distributed algorithm. `None` type means usage of all data without split on all machines. `rank_based` type splits the data equally between machines with split sequence based on rank id from MPI. |
105108
|<h3>Algorithm parameters</h3>||||
106109
| `algorithm`:`library` | None | | Python module containing measured entity (class or function). |
107110
| `algorithm`:`device` | `default` | `default`, `cpu`, `gpu` | Device selected for computation. |
@@ -160,5 +163,13 @@ Supported ranges:
160163
- `mul:current{int}:end{int}:step{int}` - Geometric progression (Sequence: current * step <= end)
161164
- `pow:base{int}:start{int}:end{int}[:step{int}=1]` - Powers of base number
162165

166+
## Removal of Values
167+
168+
You can remove specific parameter from subset of cases when stacking parameters sets using `[REMOVE]` parameter value:
169+
170+
```json
171+
... "estimator_params": { "n_jobs": "[REMOVE]" } ...
172+
```
173+
163174
---
164175
[Documentation tree](../README.md#-documentation-tree)

configs/common/sklearn.json

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,35 @@
1919
{ "library": "sklearnex.preview", "device": ["cpu", "gpu"] }
2020
]
2121
},
22+
"sklearnex spmd implementation": {
23+
"algorithm": {
24+
"library": "sklearnex.spmd",
25+
"device": "gpu",
26+
"estimator_params": { "n_jobs": "[REMOVE]" }
27+
},
28+
"data": {
29+
"format": "dpctl",
30+
"order": "C",
31+
"distributed_split": "rank_based"
32+
},
33+
"bench": {
34+
"distributor": "mpi"
35+
}
36+
},
37+
"spmd default parameters": {
38+
"algorithm": {
39+
"estimator_methods": {
40+
"training": "fit",
41+
"inference": ""
42+
}
43+
},
44+
"data": {
45+
"dtype": "float32"
46+
},
47+
"bench": {
48+
"mpi_params": { "n": [1, 2] }
49+
}
50+
},
2251
"cuml implementation": {
2352
"algorithm": { "library": "cuml" },
2453
"data": { "format": "cudf" }

configs/spmd/dbscan.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/dbscan.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd dbscan parameters": {}
5+
},
6+
"TEMPLATES": {
7+
"kmeans": {
8+
"SETS": [
9+
"common dbscan parameters",
10+
"sklearn dbscan parameters",
11+
"dbscan datasets",
12+
"sklearnex spmd implementation",
13+
"spmd default parameters",
14+
"spmd dbscan parameters"
15+
]
16+
}
17+
}
18+
}

configs/spmd/ensemble.json

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/ensemble.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd ensemble classifier params": {
5+
"algorithm": {
6+
"estimator": "RandomForestClassifier"
7+
}
8+
},
9+
"spmd ensemble regressor params": {
10+
"algorithm": {
11+
"estimator": "RandomForestRegressor"
12+
}
13+
},
14+
"ensemble classification data": {
15+
"data": [
16+
{ "dataset": "skin_segmentation", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } },
17+
{ "dataset": "creditcard", "split_kwargs": { "train_size": 100000, "test_size": null } },
18+
{ "dataset": "a9a", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } },
19+
{ "dataset": "mnist", "split_kwargs": { "train_size": 20000, "test_size": null } }
20+
]
21+
},
22+
"ensemble regression data": {
23+
"data": [
24+
{
25+
"dataset": "road_network",
26+
"split_kwargs": {
27+
"train_size": 200000, "test_size": null,
28+
"shuffle": true, "random_state": 42
29+
}
30+
},
31+
{ "dataset": "creditcard", "split_kwargs": { "train_size": 100000, "test_size": null } },
32+
{ "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 50000, "test_size": null } },
33+
{ "dataset": "a9a", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } }
34+
]
35+
}
36+
},
37+
"TEMPLATES": {
38+
"ensemble classification": {
39+
"SETS": [
40+
"common ensemble params",
41+
"sklearn ensemble classifier params",
42+
"ensemble classification data",
43+
"sklearnex spmd implementation",
44+
"spmd default parameters",
45+
"spmd ensemble classifier params"
46+
]
47+
},
48+
"ensemble regression": {
49+
"SETS": [
50+
"common ensemble params",
51+
"sklearn ensemble regressor params",
52+
"ensemble regression data",
53+
"sklearnex spmd implementation",
54+
"spmd default parameters",
55+
"spmd ensemble regressor params"
56+
]
57+
}
58+
}
59+
}

configs/spmd/kmeans.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/kmeans.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd kmeans parameters": {}
5+
},
6+
"TEMPLATES": {
7+
"kmeans": {
8+
"SETS": [
9+
"common kmeans parameters",
10+
"sklearn kmeans parameters",
11+
"kmeans datasets",
12+
"sklearnex spmd implementation",
13+
"spmd default parameters",
14+
"spmd kmeans parameters"
15+
]
16+
}
17+
}
18+
}

configs/spmd/knn.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/knn.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd knn parameters": {
5+
"algorithm": {
6+
"estimator_params": {
7+
"algorithm": "brute",
8+
"metric": "minkowski",
9+
"p": 2,
10+
"weights": "uniform"
11+
}
12+
}
13+
}
14+
},
15+
"TEMPLATES": {
16+
"knn regressor": {
17+
"SETS": [
18+
"common knn parameters",
19+
"sklearn knn parameters",
20+
"brute knn algorithm - regression data",
21+
"sklearnex spmd implementation",
22+
"spmd default parameters",
23+
"spmd knn parameters"
24+
]
25+
}
26+
}
27+
}

configs/spmd/linear_model.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/linear_model.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd linear parameters": {}
5+
},
6+
"TEMPLATES": {
7+
"linreg": {
8+
"SETS": [
9+
"common linear parameters",
10+
"sklearn linear parameters",
11+
"regression datasets",
12+
"sklearnex spmd implementation",
13+
"spmd default parameters",
14+
"spmd linear parameters"
15+
]
16+
}
17+
}
18+
}

configs/spmd/logreg.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/logreg.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd logreg parameters": {
5+
"algorithm": {
6+
"estimator_params": { "solver": "newton-cg" }
7+
}
8+
}
9+
},
10+
"TEMPLATES": {
11+
"logreg": {
12+
"SETS": [
13+
"common logreg parameters",
14+
"sklearn logreg parameters",
15+
"logreg datasets",
16+
"sklearnex spmd implementation",
17+
"spmd default parameters",
18+
"spmd logreg parameters"
19+
]
20+
}
21+
}
22+
}

configs/spmd/pca.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json", "../regular/pca.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd pca parameters": {
5+
"algorithm": {
6+
"estimator_params": {
7+
"copy": "[REMOVE]",
8+
"svd_solver": "[REMOVE]",
9+
"tol": "[REMOVE]",
10+
"iterated_power": "[REMOVE]",
11+
"random_state": "[REMOVE]",
12+
"method": "cov"
13+
}
14+
}
15+
}
16+
},
17+
"TEMPLATES": {
18+
"pca": {
19+
"SETS": [
20+
"pca parameters",
21+
"pca datasets",
22+
"sklearnex spmd implementation",
23+
"spmd default parameters",
24+
"spmd pca parameters"
25+
]
26+
}
27+
}
28+
}

configs/spmd/stats_covariance.json

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json"],
3+
"PARAMETERS_SETS": {
4+
"spmd basic statistics parameters": {
5+
"algorithm": {
6+
"estimator": "BasicStatistics",
7+
"estimator_methods": {
8+
"training": "compute"
9+
}
10+
}
11+
},
12+
"spmd covariance parameters": {
13+
"algorithm": {
14+
"estimator": "EmpiricalCovariance",
15+
"estimator_params": {
16+
"bias": true
17+
}
18+
}
19+
},
20+
"datasets": {
21+
"data": [
22+
{
23+
"dataset": ["susy", "higgs"]
24+
},
25+
{
26+
"source": "make_blobs",
27+
"generation_kwargs": [
28+
{ "n_samples": 400000, "n_features": 1000, "centers": 1 },
29+
{ "n_samples": 40000000, "n_features": 10, "centers": 1 }
30+
],
31+
"split_kwargs": { "ignore": true }
32+
}
33+
]
34+
}
35+
},
36+
"TEMPLATES": {
37+
"basic statistics": {
38+
"SETS": [
39+
"sklearnex spmd implementation",
40+
"spmd default parameters",
41+
"spmd basic statistics parameters",
42+
"datasets"
43+
]
44+
},
45+
"covariance": {
46+
"SETS": [
47+
"sklearnex spmd implementation",
48+
"spmd default parameters",
49+
"spmd covariance parameters",
50+
"datasets"
51+
]
52+
}
53+
}
54+
}

configs/spmd_example.json

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
{
2+
"PARAMETERS_SETS": {
3+
"implementations": [
4+
{
5+
"algorithm": {
6+
"library": "sklearnex.spmd",
7+
"device": "gpu"
8+
},
9+
"data": { "distributed_split": "rank_based" },
10+
"bench": {
11+
"distributor": "mpi",
12+
"mpi_params": { "n": 2, "ppn": 2 }
13+
}
14+
},
15+
{
16+
"algorithm": {
17+
"library": "sklearnex",
18+
"device": "cpu"
19+
}
20+
}
21+
],
22+
"datasets": {
23+
"data": [
24+
{
25+
"dataset": "higgs",
26+
"split_kwargs": { "train_size": 10000, "test_size": 10000 }
27+
},
28+
{
29+
"source": "make_regression",
30+
"generation_kwargs": {
31+
"n_samples": 20000,
32+
"n_features": 100,
33+
"noise": 1.0
34+
},
35+
"split_kwargs": { "train_size": 0.5, "test_size": 0.5 }
36+
}
37+
]
38+
},
39+
"linear regression": {
40+
"algorithm": { "estimator": "LinearRegression" }
41+
},
42+
"knn regression": {
43+
"algorithm": {
44+
"estimator": "KNeighborsRegressor",
45+
"estimator_params": { "algorithm": "brute", "n_neighbors": 5 }
46+
}
47+
},
48+
"random forest regression": {
49+
"algorithm": {
50+
"estimator": "RandomForestRegressor",
51+
"estimator_params": {
52+
"criterion": "squared_error",
53+
"max_features": 1.0,
54+
"n_estimators": 10,
55+
"max_depth": 4
56+
}
57+
}
58+
}
59+
},
60+
"TEMPLATES": {
61+
"linear regression": {
62+
"SETS": ["implementations", "datasets", "linear regression"]
63+
},
64+
"knn regression": {
65+
"SETS": ["implementations", "datasets", "knn regression"]
66+
},
67+
"random forest regression": {
68+
"SETS": ["implementations", "datasets", "random forest regression"]
69+
}
70+
}
71+
}

envs/requirements-sklearn.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
scikit-learn
1+
# essentials
2+
scikit-learn==1.4.*
23
pandas
34
tabulate
45
fastparquet

0 commit comments

Comments
 (0)