Skip to content

Commit 1c6bd66

Browse files
committed
Update CI and minor code rework
1 parent 22ce12e commit 1c6bd66

File tree

20 files changed

+186
-65
lines changed

20 files changed

+186
-65
lines changed

configs/regular/pca.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"TEMPLATES": {
4747
"sklearn pca": {
4848
"SETS": [
49-
"sklearn-ex[preview] implementations",
49+
"sklearn-ex[cpu,gpu] implementations",
5050
"pca parameters",
5151
"pca datasets"
5252
]

configs/testing/ci.json

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
{
2+
"INCLUDE": ["../common/sklearn.json"],
3+
"PARAMETERS_SETS": {
4+
"common parameters": {
5+
"data": {
6+
"format": ["numpy", "pandas"],
7+
"dtype": ["float32", "float64"],
8+
"order": ["C", "F"],
9+
"split_kwargs": {
10+
"train_size": 400,
11+
"test_size": 100,
12+
"shuffle": true,
13+
"random_state": 42
14+
},
15+
"preprocessing_kwargs": {
16+
"normalize": true
17+
}
18+
},
19+
"bench": { "n_runs": 5 },
20+
"algorithm": { "device": "default" }
21+
},
22+
"datasets": {
23+
"data":
24+
[
25+
{
26+
"source": "fetch_openml",
27+
"id": 1430
28+
},
29+
{
30+
"source": "make_classification",
31+
"generation_kwargs": {
32+
"n_classes": 2,
33+
"n_samples": 500,
34+
"n_features": 16,
35+
"n_informative": "[SPECIAL_VALUE]0.5"
36+
}
37+
}
38+
]
39+
},
40+
"algorithms": [
41+
{
42+
"algorithm": {
43+
"estimator": "DBSCAN",
44+
"estimator_params": { "algorithm": "brute" }
45+
}
46+
},
47+
{
48+
"algorithm": {
49+
"estimator": "KMeans",
50+
"estimator_params": { "init": "random", "algorithm": "lloyd" }
51+
}
52+
},
53+
{
54+
"algorithm": {
55+
"estimator": "PCA",
56+
"estimator_params": { "svd_solver": "full" }
57+
}
58+
},
59+
{
60+
"algorithm": {
61+
"estimator": "TSNE"
62+
}
63+
},
64+
{
65+
"algorithm": {
66+
"estimator": [
67+
"RandomForestClassifier", "ExtraTreesClassifier",
68+
"RandomForestRegressor", "ExtraTreesRegressor"
69+
],
70+
"estimator_params": { "n_estimators": 20 }
71+
}
72+
},
73+
{
74+
"algorithm": {
75+
"estimator": [
76+
"KNeighborsClassifier", "KNeighborsRegressor"
77+
],
78+
"estimator_params": { "algorithm": ["brute", "kd_tree"] }
79+
}
80+
},
81+
{
82+
"algorithm": {
83+
"estimator": ["LinearRegression", "Ridge", "Lasso", "ElasticNet"]
84+
}
85+
},
86+
{
87+
"algorithm": {
88+
"estimator": ["SVC", "SVR"]
89+
}
90+
},
91+
{
92+
"algorithm": {
93+
"estimator": ["NuSVC", "NuSVR"],
94+
"estimator_params": { "nu": 0.1 }
95+
}
96+
},
97+
{
98+
"algorithm": {
99+
"function": "train_test_split",
100+
"args_order": "x_train|y_train",
101+
"kwargs": {
102+
"random_state": 42,
103+
"shuffle": true
104+
}
105+
}
106+
}
107+
]
108+
},
109+
"TEMPLATES": {
110+
"test": {
111+
"SETS": [
112+
"common parameters",
113+
"datasets",
114+
"sklearn-ex[cpu] implementations",
115+
"algorithms"
116+
]
117+
}
118+
}
119+
}

envs/requirements-sklearn.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ xgboost
1212
catboost
1313
lightgbm
1414
scikit-learn-intelex
15+
# oneapi components
16+
dpctl
17+
dpnp

sklbench/benchmarks/sklearn_estimator.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from ..datasets import load_data
4141
from ..datasets.transformer import split_and_transform_data
4242
from ..utils.bench_case import get_bench_case_value, get_data_name
43-
from ..utils.common import convert_to_ndarray, custom_format, get_module_members
43+
from ..utils.common import convert_to_numpy, custom_format, get_module_members
4444
from ..utils.config import bench_case_filter
4545
from ..utils.custom_types import BenchCase, Numeric, NumpyNumeric
4646
from ..utils.logger import logger
@@ -121,7 +121,7 @@ def get_subset_metrics_of_estimator(
121121
metrics = dict()
122122
# Note: use data[0, 1] when calling estimator methods,
123123
# x, y are numpy ndarrays for compatibility with sklearn metrics
124-
x, y = list(map(convert_to_ndarray, data))
124+
x, y = list(map(convert_to_numpy, data))
125125
if stage == "training":
126126
if hasattr(estimator_instance, "n_iter_"):
127127
iterations = estimator_instance.n_iter_
@@ -134,7 +134,7 @@ def get_subset_metrics_of_estimator(
134134
):
135135
metrics.update({"iterations": int(iterations[0])})
136136
if task == "classification":
137-
y_pred = convert_to_ndarray(estimator_instance.predict(x))
137+
y_pred = convert_to_numpy(estimator_instance.predict(x))
138138
metrics.update(
139139
{
140140
"accuracy": float(accuracy_score(y, y_pred)),
@@ -145,7 +145,7 @@ def get_subset_metrics_of_estimator(
145145
hasattr(estimator_instance, "probability")
146146
and getattr(estimator_instance, "probability") == False
147147
):
148-
y_pred_proba = convert_to_ndarray(estimator_instance.predict_proba(x))
148+
y_pred_proba = convert_to_numpy(estimator_instance.predict_proba(x))
149149
metrics.update(
150150
{
151151
"ROC AUC": float(
@@ -163,7 +163,7 @@ def get_subset_metrics_of_estimator(
163163
}
164164
)
165165
elif task == "regression":
166-
y_pred = convert_to_ndarray(estimator_instance.predict(x))
166+
y_pred = convert_to_numpy(estimator_instance.predict(x))
167167
metrics.update(
168168
{
169169
"RMSE": float(mean_squared_error(y, y_pred) ** 0.5),
@@ -194,16 +194,14 @@ def get_subset_metrics_of_estimator(
194194
{
195195
"inertia": float(
196196
np.power(
197-
convert_to_ndarray(estimator_instance.transform(x)).min(
198-
axis=1
199-
),
197+
convert_to_numpy(estimator_instance.transform(x)).min(axis=1),
200198
2,
201199
).sum()
202200
)
203201
}
204202
)
205203
if hasattr(estimator_instance, "predict"):
206-
y_pred = convert_to_ndarray(estimator_instance.predict(x))
204+
y_pred = convert_to_numpy(estimator_instance.predict(x))
207205
metrics.update(
208206
{
209207
"Davies-Bouldin score": float(davies_bouldin_score(x, y_pred)),
@@ -212,7 +210,7 @@ def get_subset_metrics_of_estimator(
212210
}
213211
)
214212
if "DBSCAN" in str(estimator_instance) and stage == "training":
215-
labels = convert_to_ndarray(estimator_instance.labels_)
213+
labels = convert_to_numpy(estimator_instance.labels_)
216214
clusters = len(np.unique(labels[labels != -1]))
217215
metrics.update({"clusters": clusters})
218216
if clusters > 1:
@@ -245,7 +243,7 @@ def get_subset_metrics_of_estimator(
245243
ground_truth_neighbors = _brute_knn.kneighbors(
246244
x, recall_degree, return_distance=False
247245
)
248-
predicted_neighbors = convert_to_ndarray(
246+
predicted_neighbors = convert_to_numpy(
249247
estimator_instance.kneighbors(
250248
data[0], recall_degree, return_distance=False
251249
)

sklbench/datasets/transformer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# ===============================================================================
1616

1717
import os
18+
1819
import numpy as np
1920
import pandas as pd
2021
from scipy.sparse import csr_matrix

sklbench/emulators/common/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,4 @@
1616

1717
from .neighbors import NearestNeighborsBase
1818

19-
2019
__all__ = ["NearestNeighborsBase"]

sklbench/emulators/common/neighbors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717

1818
from warnings import warn
19+
1920
import numpy as np
2021

2122

sklbench/emulators/faiss/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,4 @@
1616

1717
from .neighbors import NearestNeighbors
1818

19-
2019
__all__ = ["NearestNeighbors"]

sklbench/emulators/faiss/neighbors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717

1818
import faiss
19+
1920
from ..common import NearestNeighborsBase
2021

2122

sklbench/emulators/raft/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,4 @@
1616

1717
from .neighbors import NearestNeighbors
1818

19-
2019
__all__ = ["NearestNeighbors"]

sklbench/emulators/raft/neighbors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616

1717
import cupy as cp
1818
from pylibraft.common import DeviceResources
19-
from pylibraft.neighbors import brute_force, ivf_flat, ivf_pq, cagra
19+
from pylibraft.neighbors import brute_force, cagra, ivf_flat, ivf_pq
20+
2021
from ..common import NearestNeighborsBase
2122

2223

sklbench/emulators/svs/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,4 @@
1616

1717
from .neighbors import NearestNeighbors
1818

19-
2019
__all__ = ["NearestNeighbors"]

sklbench/emulators/svs/neighbors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import pysvs
1818
from psutil import cpu_count
19+
1920
from ..common.neighbors import NearestNeighborsBase
2021

2122

sklbench/report/implementation.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,4 @@ def generate_report(args: argparse.Namespace):
360360
# remove default sheet
361361
wb.remove(wb["Sheet"])
362362
wb.save(args.report_file)
363+
return 0

sklbench/runner/arguments.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,31 @@
2222
from ..report import add_report_generator_arguments
2323

2424

25-
def get_argument_actions(parser):
26-
arg_actions = []
27-
28-
for action in parser._actions:
29-
if isinstance(action, argparse._ArgumentGroup):
30-
for subaction in action._group_actions:
31-
arg_actions.append(subaction)
32-
else:
33-
arg_actions.append(action)
34-
return arg_actions
25+
def get_parser_description(parser: argparse.ArgumentParser):
26+
"""Convert parser description to Markdown-style table."""
3527

28+
def get_argument_actions(parser):
29+
arg_actions = []
3630

37-
def parse_action(action: argparse.Action) -> Dict:
38-
return {
39-
"Name": "</br>".join(map(lambda x: f"`{x}`", action.option_strings)),
40-
"Type": action.type.__name__ if action.type is not None else None,
41-
"Default value": (
42-
action.default if action.default is not argparse.SUPPRESS else None
43-
),
44-
"Choices": action.choices,
45-
"Description": action.help,
46-
}
31+
for action in parser._actions:
32+
if isinstance(action, argparse._ArgumentGroup):
33+
for subaction in action._group_actions:
34+
arg_actions.append(subaction)
35+
else:
36+
arg_actions.append(action)
37+
return arg_actions
4738

39+
def parse_action(action: argparse.Action) -> Dict:
40+
return {
41+
"Name": "</br>".join(map(lambda x: f"`{x}`", action.option_strings)),
42+
"Type": action.type.__name__ if action.type is not None else None,
43+
"Default value": (
44+
action.default if action.default is not argparse.SUPPRESS else None
45+
),
46+
"Choices": action.choices,
47+
"Description": action.help,
48+
}
4849

49-
def get_parser_description(parser: argparse.ArgumentParser):
50-
"""Convert parser description to Markdown-style table."""
5150
return pd.DataFrame(map(parse_action, get_argument_actions(parser))).to_markdown(
5251
index=False
5352
)
@@ -76,7 +75,7 @@ def add_runner_arguments(parser: argparse.ArgumentParser) -> argparse.ArgumentPa
7675
type=str,
7776
choices=("ERROR", "WARNING", "INFO", "DEBUG"),
7877
help="Global logging level for benchmarks: "
79-
"overwrites runner, bench and report log levels.",
78+
"overwrites runner, bench and report logging levels.",
8079
)
8180
# benchmarking cases finding, overwriting and filtering
8281
parser.add_argument(
@@ -106,7 +105,7 @@ def add_runner_arguments(parser: argparse.ArgumentParser) -> argparse.ArgumentPa
106105
default="",
107106
type=str,
108107
nargs="+",
109-
help="Filters benchmark bench_cases by config parameters. "
108+
help="Filters benhcmarking cases by parameter values. "
110109
"For example: `-f data:dtype=float32 data:order=F`.",
111110
)
112111

@@ -120,6 +119,7 @@ def add_runner_arguments(parser: argparse.ArgumentParser) -> argparse.ArgumentPa
120119
parser.add_argument(
121120
"--environment-alias",
122121
"--env-alias",
122+
"-e",
123123
type=str,
124124
default=None,
125125
help="Environment alias to use instead of it's configuration hash.",

sklbench/runner/benchmark_commands.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,16 @@ def run_benchmark_from_case(
8787
bench_case: BenchCase, filters: List[BenchCase], log_level: str
8888
) -> Tuple[int, List[Dict]]:
8989
command = generate_benchmark_command(bench_case, filters, log_level)
90-
logger.debug(f"Benchmark wrapper call command: {command}")
90+
logger.debug(f"Benchmark wrapper call command:\n{command}")
9191
return_code, stdout, stderr = read_output_from_command(command)
9292

9393
# filter stdout warnings
94+
prefixes_to_skip = ["[W]", "[I]"]
9495
stdout = "\n".join(
9596
[
9697
line
9798
for line in stdout.split("\n")
98-
if not (line.startswith("[W]") or line.startswith("[I]"))
99+
if not any(map(lambda x: line.startswith(x), prefixes_to_skip))
99100
]
100101
)
101102

0 commit comments

Comments
 (0)