diff --git a/README.md b/README.md index e478a229a..8214e4022 100755 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Run `python runner.py --configs configs/config_example.json [--output-file resul Options: -- ``--configs``: specify the path to a configuration file. +- ``--configs``: specify the path to a configuration file or a folder that contains configuration files. - ``--no-intel-optimized``: use Scikit-learn without [Intel(R) Extension for Scikit-learn*](#intelr-extension-for-scikit-learn-support). Now available for [scikit-learn benchmarks](https://github.com/IntelPython/scikit-learn_bench/tree/master/sklearn_bench). By default, the runner uses Intel(R) Extension for Scikit-learn. - ``--output-file``: specify the name of the output file for the benchmark result. The default name is `result.json` - ``--report``: create an Excel report based on benchmark results. The `openpyxl` library is required. diff --git a/configs/blogs/skl_2021_3.json b/configs/blogs/skl_2021_3.json index c3e2f409b..0035ae288 100644 --- a/configs/blogs/skl_2021_3.json +++ b/configs/blogs/skl_2021_3.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,11 +19,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -38,12 +38,12 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -58,11 +58,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -107,12 +107,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -129,10 +129,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -146,7 +146,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -201,8 +201,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -222,8 +222,8 @@ } } ], - "C": [500.0], - "kernel": ["rbf"] + "C": 500.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -243,8 +243,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -264,8 +264,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -285,8 +285,8 @@ } } ], - "C": [50.0], - "kernel": ["rbf"] + "C": 50.0, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -306,8 +306,8 @@ } } ], - "nu": [0.25], - "kernel": ["poly"] + "nu": 0.25, + "kernel": "poly" }, { "algorithm": "svr", @@ -327,8 +327,8 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { "algorithm": "nusvr", @@ -348,9 +348,9 @@ } } ], - "nu": [0.8], - "C": [2.0], - "kernel": ["rbf"] + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -386,7 +386,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -437,11 +437,11 @@ } } ], - "method": ["brute"] + "method": "brute" }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -468,7 +468,7 @@ } } ], - "method": ["kd_tree"] + "method": "kd_tree" }, { "algorithm": "train_test_split", @@ -483,9 +483,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 } ] } diff --git a/configs/blogs/skl_conda_config.json b/configs/blogs/skl_conda_config.json index 07557d2bf..3f413a617 100755 --- a/configs/blogs/skl_conda_config.json +++ b/configs/blogs/skl_conda_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,11 +19,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -38,12 +38,12 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -58,11 +58,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -107,12 +107,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -129,10 +129,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -146,7 +146,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -201,8 +201,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -222,8 +222,8 @@ } } ], - "C": [500.0], - "kernel": ["rbf"] + "C": 500.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -243,8 +243,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -264,8 +264,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -285,8 +285,8 @@ } } ], - "C": [50.0], - "kernel": ["rbf"] + "C": 50.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -322,7 +322,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -373,11 +373,11 @@ } } ], - "method": ["brute"] + "method": "brute" }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -404,7 +404,7 @@ } } ], - "method": ["kd_tree"] + "method": "kd_tree" }, { "algorithm": "train_test_split", @@ -419,9 +419,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 } ] } diff --git a/configs/config_example.json b/configs/config_example.json index 0a5be54ac..fa615cf29 100644 --- a/configs/config_example.json +++ b/configs/config_example.json @@ -1,12 +1,12 @@ { "common": { - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "lib": ["daal4py"], + "lib": "daal4py", "algorithm": "linear", "dataset": [ { @@ -20,7 +20,7 @@ ] }, { - "lib": ["sklearn"], + "lib": "sklearn", "algorithm": "svm", "dataset": [ { @@ -33,11 +33,11 @@ } } ], - "max-cache-size": [4], - "kernel": ["rbf"] + "max-cache-size": 4, + "kernel": "rbf" }, { - "lib": ["xgboost"], + "lib": "xgboost", "algorithm": "gbt", "dataset": [ { @@ -49,12 +49,12 @@ } } ], - "n-estimators": [1000], - "objective": ["reg:squarederror"], - "tree-method": ["hist"], - "max-depth": [1], - "subsample": [0.5], - "eta": [0.1] + "n-estimators": 1000, + "objective": "reg:squarederror", + "tree-method": "hist", + "max-depth": 1, + "subsample": 0.5, + "eta": 0.1 } ] } diff --git a/configs/cuml_config.json b/configs/cuml_config.json index f17797c70..1a60da90a 100755 --- a/configs/cuml_config.json +++ b/configs/cuml_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["cuml"], - "data-format": ["cudf"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "cuml", + "data-format": "cudf", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,10 +19,10 @@ } } ], - "time-method": ["box_filter"], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -37,11 +37,11 @@ } } ], - "time-method": ["box_filter"], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -56,10 +56,10 @@ } } ], - "time-method": ["box_filter"], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -95,12 +95,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -131,10 +131,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -156,7 +156,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -219,8 +219,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -240,8 +240,8 @@ } } ], - "C": [1000.0], - "kernel": ["linear"] + "C": 1000.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -261,8 +261,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -282,8 +282,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -303,8 +303,8 @@ } } ], - "C": [500.0], - "kernel": ["linear"] + "C": 500.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -324,8 +324,8 @@ } } ], - "C": [1.0], - "kernel": ["rbf"] + "C": 1.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -345,8 +345,8 @@ } } ], - "C": [100.0], - "kernel": ["rbf"] + "C": 100.0, + "kernel": "rbf" }, { "algorithm": "svr", @@ -366,8 +366,8 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { "algorithm": "svr", @@ -387,8 +387,8 @@ } } ], - "C": [10.0], - "kernel": ["rbf"] + "C": 10.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -424,7 +424,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -475,7 +475,7 @@ } } ], - "method": ["brute"] + "method": "brute" }, { "algorithm": "train_test_split", @@ -508,8 +508,8 @@ } } ], - "train-size": [0.75], - "test-size": [0.25] + "train-size": 0.75, + "test-size": 0.25 }, { "algorithm": "train_test_split", @@ -524,8 +524,8 @@ } } ], - "train-size": [0.9], - "test-size": [0.1] + "train-size": 0.9, + "test-size": 0.1 }, { "algorithm": "lasso", @@ -540,8 +540,8 @@ } } ], - "alpha": [1.0], - "tol": [1e-4] + "alpha": 1.0, + "tol": 1e-4 }, { "algorithm": "elasticnet", @@ -561,9 +561,9 @@ } } ], - "alpha": [2.0], - "l1_ratio": [0.5], - "tol": [1e-4] + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 } ] } diff --git a/configs/skl_config.json b/configs/skl_config.json index 4d8be6792..ca74489fe 100755 --- a/configs/skl_config.json +++ b/configs/skl_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,11 +19,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -38,12 +38,12 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -58,11 +58,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -107,12 +107,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -143,10 +143,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -168,7 +168,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -231,8 +231,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -252,8 +252,8 @@ } } ], - "C": [1000.0], - "kernel": ["linear"] + "C": 1000.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -273,8 +273,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -294,8 +294,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -315,8 +315,8 @@ } } ], - "C": [500.0], - "kernel": ["linear"] + "C": 500.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -336,8 +336,8 @@ } } ], - "C": [1.0], - "kernel": ["rbf"] + "C": 1.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -357,8 +357,8 @@ } } ], - "C": [100.0], - "kernel": ["rbf"] + "C": 100.0, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -378,8 +378,8 @@ } } ], - "nu": [0.25], - "kernel": ["sigmoid"] + "nu": 0.25, + "kernel": "sigmoid" }, { "algorithm": "nusvc", @@ -399,8 +399,8 @@ } } ], - "nu": [0.7], - "kernel": ["rbf"] + "nu": 0.7, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -420,8 +420,8 @@ } } ], - "nu": [0.5], - "kernel": ["rbf"] + "nu": 0.5, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -441,8 +441,8 @@ } } ], - "nu": [0.15], - "kernel": ["poly"] + "nu": 0.15, + "kernel": "poly" }, { "algorithm": "svr", @@ -462,8 +462,8 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { "algorithm": "svr", @@ -483,8 +483,8 @@ } } ], - "C": [10.0], - "kernel": ["rbf"] + "C": 10.0, + "kernel": "rbf" }, { "algorithm": "nusvr", @@ -504,9 +504,9 @@ } } ], - "nu": [0.8], - "C": [2.0], - "kernel": ["rbf"] + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" }, { "algorithm": "nusvr", @@ -526,10 +526,10 @@ } } ], - "nu": [0.5], - "C": [10.0], - "kernel": ["poly"], - "degree": [2] + "nu": 0.5, + "C": 10.0, + "kernel": "poly", + "degree": 2 }, { "algorithm": "nusvr", @@ -549,9 +549,9 @@ } } ], - "nu": [0.8], - "C": [2.0], - "kernel": ["rbf"] + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -587,7 +587,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -671,9 +671,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 }, { "algorithm": "train_test_split", @@ -688,11 +688,11 @@ } } ], - "data-format": ["numpy"], - "data-order": ["C"], - "include-y": [""], - "train-size": [0.9], - "test-size": [0.1] + "data-format": "numpy", + "data-order": "C", + "include-y": "", + "train-size": 0.9, + "test-size": 0.1 }, { "algorithm": "lasso", @@ -707,8 +707,8 @@ } } ], - "alpha": [1.0], - "tol": [1e-4] + "alpha": 1.0, + "tol": 1e-4 }, { "algorithm": "elasticnet", @@ -728,9 +728,9 @@ } } ], - "alpha": [2.0], - "l1_ratio": [0.5], - "tol": [1e-4] + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 } ] } diff --git a/configs/skl_xpu_config.json b/configs/skl_xpu_config.json index 023850c38..06849287e 100644 --- a/configs/skl_xpu_config.json +++ b/configs/skl_xpu_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"], + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", "device": ["host", "cpu", "gpu", "None"] }, "cases": [ @@ -20,7 +20,7 @@ } } ], - "n-clusters": [10] + "n-clusters": 10 }, { "algorithm": "dbscan", diff --git a/configs/testing/daal4py.json b/configs/testing/daal4py.json index 2f0250ccc..9af747f16 100755 --- a/configs/testing/daal4py.json +++ b/configs/testing/daal4py.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["daal4py"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "daal4py", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,7 +19,7 @@ } } ], - "n-clusters": [10] + "n-clusters": 10 }, { "algorithm": "df_clsf", @@ -37,7 +37,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "df_regr", @@ -55,7 +55,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "ridge", @@ -72,7 +72,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", diff --git a/configs/testing/metrics/dbscan.json b/configs/testing/metrics/dbscan.json new file mode 100755 index 000000000..8a35d5bcd --- /dev/null +++ b/configs/testing/metrics/dbscan.json @@ -0,0 +1,159 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "dbscan", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "eps": 18800 + }, + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "eps": 2 + }, + { + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "eps": 0.5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "eps": 0.5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "eps": 0.5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + } + ], + "eps": 0.5 + } + ] +} diff --git a/configs/testing/metrics/elasticnet.json b/configs/testing/metrics/elasticnet.json new file mode 100755 index 000000000..c64044f89 --- /dev/null +++ b/configs/testing/metrics/elasticnet.json @@ -0,0 +1,116 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "elasticnet", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": 0.005, + "tol": 1e-4, + "l1_ratio": 0.85 + }, + { + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": 0.01, + "tol": 1e-4, + "l1_ratio": 0.7 + }, + { + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": 0.0625, + "tol": 1e-4, + "l1_ratio": 0.75 + }, + { + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": 0.006, + "tol": 1e-4, + "l1_ratio": 0.25 + }, + { + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": 0.15, + "tol": 1e-4, + "l1_ratio": 0.4 + } + ] +} diff --git a/configs/testing/metrics/kmeans.json b/configs/testing/metrics/kmeans.json new file mode 100755 index 000000000..427844912 --- /dev/null +++ b/configs/testing/metrics/kmeans.json @@ -0,0 +1,273 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "kmeans", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "time-method": "box_filter", + "time-limit": 50, + "n_init": 10, + "maxiter": 300, + "tol": 1e-4, + "init": "k-means++" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ], + "n-clusters": 10 + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "n-clusters": 45 + }, + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "n-clusters": 70 + }, + { + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + } + ], + "n-clusters": 60 + }, + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "n-clusters": 25 + }, + { + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "n-clusters": 40 + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "n-clusters": 5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + } + ], + "n-clusters": 8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + } + ], + "n-clusters": 12 + }, + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "n-clusters": 3 + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ], + "n-clusters": 35 + }, + { + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + } + ], + "n-clusters": 9 + }, + { + "dataset": [ + { + "source": "npy", + "name": "covtype", + "training": { + "x": "data/covtype_x_train.npy", + "y": "data/covtype_y_train.npy" + }, + "testing": { + "x": "data/covtype_x_test.npy", + "y": "data/covtype_y_test.npy" + } + } + ], + "n-clusters": 10 + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs", + "training": { + "x": "data/higgs_x_train.npy", + "y": "data/higgs_y_train.npy" + }, + "testing": { + "x": "data/higgs_x_test.npy", + "y": "data/higgs_y_test.npy" + } + } + ], + "n-clusters": 20 + } + ] +} diff --git a/configs/testing/metrics/knn_brute_clsf.json b/configs/testing/metrics/knn_brute_clsf.json new file mode 100755 index 000000000..8903055b8 --- /dev/null +++ b/configs/testing/metrics/knn_brute_clsf.json @@ -0,0 +1,84 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "knn_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "method": "brute" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/knn_brute_regr.json b/configs/testing/metrics/knn_brute_regr.json new file mode 100755 index 000000000..46edbb2fc --- /dev/null +++ b/configs/testing/metrics/knn_brute_regr.json @@ -0,0 +1,71 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "knn_regr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + }, + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + }, + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/knn_kdtree_clsf.json b/configs/testing/metrics/knn_kdtree_clsf.json new file mode 100755 index 000000000..ea15e071a --- /dev/null +++ b/configs/testing/metrics/knn_kdtree_clsf.json @@ -0,0 +1,70 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "knn_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "method": "kd_tree" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/knn_kdtree_regr.json b/configs/testing/metrics/knn_kdtree_regr.json new file mode 100755 index 000000000..21a8dd3b1 --- /dev/null +++ b/configs/testing/metrics/knn_kdtree_regr.json @@ -0,0 +1,86 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "knn_regr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "method": "kd_tree" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + }, + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + }, + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + }, + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/lasso.json b/configs/testing/metrics/lasso.json new file mode 100755 index 000000000..53daa8daa --- /dev/null +++ b/configs/testing/metrics/lasso.json @@ -0,0 +1,107 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "lasso", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "tol": 1e-4 + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": -0.0025 + }, + { + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": 0.015625 + }, + { + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": 0.0625 + }, + { + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": -0.0625 + }, + { + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": 0.03125 + } + ] +} diff --git a/configs/testing/metrics/linreg.json b/configs/testing/metrics/linreg.json new file mode 100644 index 000000000..704aa7c04 --- /dev/null +++ b/configs/testing/metrics/linreg.json @@ -0,0 +1,143 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "linear", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/logreg.json b/configs/testing/metrics/logreg.json new file mode 100755 index 000000000..1f4e3bab9 --- /dev/null +++ b/configs/testing/metrics/logreg.json @@ -0,0 +1,171 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "log_reg", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "maxiter": 5000, + "tol": 0 + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/nusvc.json b/configs/testing/metrics/nusvc.json new file mode 100755 index 000000000..83755f2a6 --- /dev/null +++ b/configs/testing/metrics/nusvc.json @@ -0,0 +1,91 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "nusvc", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "nu": 0.07, + "kernel": "linear" + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "nu": 0.25, + "kernel": "sigmoid" + }, + { + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "nu": 0.25, + "kernel": "linear" + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "nu": 0.01, + "kernel": "rbf" + } + ] +} diff --git a/configs/testing/metrics/nusvr.json b/configs/testing/metrics/nusvr.json new file mode 100755 index 000000000..1f3b2981c --- /dev/null +++ b/configs/testing/metrics/nusvr.json @@ -0,0 +1,71 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "nusvr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": 0.1, + "kernel": "poly", + "nu": 0.17 + }, + { + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "C": 2.0, + "kernel": "rbf", + "nu": 0.8 + } + ] +} diff --git a/configs/testing/metrics/pca.json b/configs/testing/metrics/pca.json new file mode 100755 index 000000000..7479666bc --- /dev/null +++ b/configs/testing/metrics/pca.json @@ -0,0 +1,153 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "pca", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "svd-solver": "full" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + } + } + ], + "n-components": 0.8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": + { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + } + } + ], + "n-components": 0.6 + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + } + } + ], + "n-components": 0.8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + } + } + ], + "n-components": 0.6 + }, + { + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + } + } + ], + "n-components": 0.8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + } + } + ], + "n-components": 0.6 + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + } + } + ], + "n-components": 0.8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + } + } + ], + "n-components": 0.6 + }, + { + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + } + } + ], + "n-components": 0.8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + } + } + ], + "n-components": 0.8 + } + ] +} diff --git a/configs/testing/metrics/rf_clsf.json b/configs/testing/metrics/rf_clsf.json new file mode 100755 index 000000000..bfdaaf730 --- /dev/null +++ b/configs/testing/metrics/rf_clsf.json @@ -0,0 +1,188 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "df_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "num-trees": 500 + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "covtype", + "training": { + "x": "data/covtype_x_train.npy", + "y": "data/covtype_y_train.npy" + }, + "testing": { + "x": "data/covtype_x_test.npy", + "y": "data/covtype_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/rf_regr.json b/configs/testing/metrics/rf_regr.json new file mode 100644 index 000000000..399dc1add --- /dev/null +++ b/configs/testing/metrics/rf_regr.json @@ -0,0 +1,137 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "df_regr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/ridge.json b/configs/testing/metrics/ridge.json new file mode 100755 index 000000000..271e677fa --- /dev/null +++ b/configs/testing/metrics/ridge.json @@ -0,0 +1,144 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "ridge", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "alpha": 1.0 + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} diff --git a/configs/testing/metrics/svc.json b/configs/testing/metrics/svc.json new file mode 100755 index 000000000..60a5cd96c --- /dev/null +++ b/configs/testing/metrics/svc.json @@ -0,0 +1,91 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "svm", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "C": 0.0015, + "kernel": "linear" + }, + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "C": 500, + "kernel": "sigmoid" + }, + { + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "C": 100, + "kernel": "linear" + }, + { + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "C": 1.0, + "kernel": "rbf" + } + ] +} diff --git a/configs/testing/metrics/svr.json b/configs/testing/metrics/svr.json new file mode 100755 index 000000000..7884e1edb --- /dev/null +++ b/configs/testing/metrics/svr.json @@ -0,0 +1,69 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "svr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": 0.1, + "kernel": "poly" + }, + { + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "C": 2.0, + "kernel": "rbf" + } + ] +} diff --git a/configs/testing/sklearn.json b/configs/testing/sklearn.json index be566823d..c2045f289 100755 --- a/configs/testing/sklearn.json +++ b/configs/testing/sklearn.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,8 +19,8 @@ } } ], - "init": ["k-means++"], - "n-clusters": [10] + "init": "k-means++", + "n-clusters": 10 }, { "algorithm": "df_clsf", @@ -38,7 +38,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "df_regr", @@ -56,7 +56,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "ridge", @@ -73,7 +73,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -107,7 +107,7 @@ } } ], - "tol": [0.01] + "tol": 0.01 }, { "algorithm": "svm", @@ -125,8 +125,8 @@ } } ], - "C": [10.0], - "kernel": ["linear"] + "C": 10.0, + "kernel": "linear" }, { "algorithm": "nusvc", @@ -144,8 +144,8 @@ } } ], - "nu": [0.1], - "kernel": ["poly"] + "nu": 0.1, + "kernel": "poly" }, { "algorithm": "svr", @@ -162,8 +162,8 @@ } } ], - "C": [10.0], - "kernel": ["rbf"] + "C": 10.0, + "kernel": "rbf" }, { "algorithm": "nusvr", @@ -180,10 +180,10 @@ } } ], - "nu": [0.1], - "C": [1.0], - "kernel": ["poly"], - "degree": [2] + "nu": 0.1, + "C": 1.0, + "kernel": "poly", + "degree": 2 }, { "algorithm": "dbscan", @@ -217,6 +217,23 @@ ], "method": ["brute", "kd_tree"] }, + { + "algorithm": "knn_regr", + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "training": { + "n_samples": 1000 + }, + "testing": { + "n_samples": 200 + } + } + ], + "method": ["brute", "kd_tree"] + }, { "algorithm": "train_test_split", "dataset": [ @@ -230,9 +247,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 }, { "algorithm": "lasso", @@ -250,8 +267,8 @@ } } ], - "alpha": [1.0], - "tol": [1e-4] + "alpha": 1.0, + "tol": 1e-4 }, { "algorithm": "elasticnet", @@ -269,9 +286,9 @@ } } ], - "alpha": [2.0], - "l1_ratio": [0.5], - "tol": [1e-4] + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 }, { "algorithm": "pca", diff --git a/cuml_bench/df_clsf.py b/cuml_bench/df_clsf.py index 848e97d7e..80f659638 100755 --- a/cuml_bench/df_clsf.py +++ b/cuml_bench/df_clsf.py @@ -93,7 +93,7 @@ def predict(clf, X): predict_time, y_pred = bench.measure_function_time(predict, clf, X_test, params=params) test_acc = 100 * bench.accuracy_score(y_pred, y_test) -bench.print_output(library='cuml', algorithm='decision_forest_classification', +bench.print_output(library='cuml', algorithm='df_clsf', stages=['training', 'prediction'], params=params, functions=['df_clsf.fit', 'df_clsf.predict'], times=[fit_time, predict_time], metric_type='accuracy[%]', diff --git a/cuml_bench/df_regr.py b/cuml_bench/df_regr.py index 61e08ce7b..088760533 100644 --- a/cuml_bench/df_regr.py +++ b/cuml_bench/df_regr.py @@ -89,7 +89,7 @@ def predict(regr, X): predict_time, y_pred = bench.measure_function_time(predict, regr, X_test, params=params) test_rmse = bench.rmse_score(y_pred, y_test) -bench.print_output(library='cuml', algorithm='decision_forest_regression', +bench.print_output(library='cuml', algorithm='df_regr', stages=['training', 'prediction'], params=params, functions=['df_regr.fit', 'df_regr.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/elasticnet.py b/cuml_bench/elasticnet.py index 2f4e3dd5e..2d969a886 100755 --- a/cuml_bench/elasticnet.py +++ b/cuml_bench/elasticnet.py @@ -53,7 +53,7 @@ pred_test = regr.predict(X_test) test_rmse = bench.rmse_score(pred_test, y_test) -bench.print_output(library='cuml', algorithm='elastic-net', +bench.print_output(library='cuml', algorithm='elasticnet', stages=['training', 'prediction'], params=params, functions=['ElasticNet.fit', 'ElasticNet.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/knn_clsf.py b/cuml_bench/knn_clsf.py index 6ccf3aa47..ec7d21490 100755 --- a/cuml_bench/knn_clsf.py +++ b/cuml_bench/knn_clsf.py @@ -64,7 +64,7 @@ if params.task == 'classification': bench.print_output(library='cuml', - algorithm=knn_clsf.algorithm + '_knn_classification', + algorithm=knn_clsf.algorithm + '_knn_clsf', stages=['training', 'prediction'], params=params, functions=['knn_clsf.fit', 'knn_clsf.predict'], times=[train_time, predict_time], diff --git a/cuml_bench/linear.py b/cuml_bench/linear.py index bfe81991f..714454cfc 100644 --- a/cuml_bench/linear.py +++ b/cuml_bench/linear.py @@ -47,7 +47,7 @@ yp = regr.predict(X_train) train_rmse = bench.rmse_score(yp, y_train) -bench.print_output(library='cuml', algorithm='linear_regression', +bench.print_output(library='cuml', algorithm='lin_reg', stages=['training', 'prediction'], params=params, functions=['Linear.fit', 'Linear.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/log_reg.py b/cuml_bench/log_reg.py index 599b1bfdf..5dda0611a 100644 --- a/cuml_bench/log_reg.py +++ b/cuml_bench/log_reg.py @@ -58,7 +58,7 @@ clf.predict, X_test, params=params) test_acc = 100 * bench.accuracy_score(y_pred, y_test) -bench.print_output(library='cuml', algorithm='logistic_regression', +bench.print_output(library='cuml', algorithm='log_reg', stages=['training', 'prediction'], params=params, functions=['LogReg.fit', 'LogReg.predict'], times=[fit_time, predict_time], metric_type='accuracy[%]', diff --git a/cuml_bench/pca.py b/cuml_bench/pca.py index 35f20f3b6..bf9b9a878 100644 --- a/cuml_bench/pca.py +++ b/cuml_bench/pca.py @@ -48,7 +48,7 @@ transform_time, _ = bench.measure_function_time( pca.transform, X_train, params=params) -bench.print_output(library='cuml', algorithm='pca', +bench.print_output(library='cuml', algorithm='PCA', stages=['training', 'transformation'], params=params, functions=['PCA.fit', 'PCA.transform'], times=[fit_time, transform_time], metric_type=None, diff --git a/cuml_bench/ridge.py b/cuml_bench/ridge.py index d6d488673..caf80392b 100644 --- a/cuml_bench/ridge.py +++ b/cuml_bench/ridge.py @@ -49,7 +49,7 @@ yp = regr.predict(X_train) train_rmse = bench.rmse_score(yp, y_train) -bench.print_output(library='cuml', algorithm='ridge_regression', +bench.print_output(library='cuml', algorithm='ridge_regr', stages=['training', 'prediction'], params=params, functions=['Ridge.fit', 'Ridge.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/svm.py b/cuml_bench/svm.py index 112427397..0b2c0020a 100644 --- a/cuml_bench/svm.py +++ b/cuml_bench/svm.py @@ -79,7 +79,7 @@ def metric_call(x, y): clf_predict, X_test, params=params) test_acc = metric_call(y_test, y_pred) -bench.print_output(library='cuml', algorithm='svc', +bench.print_output(library='cuml', algorithm='SVC', stages=['training', state_predict], params=params, functions=['SVM.fit', 'SVM.predict'], times=[fit_time, predict_train_time], metric_type=metric_type, diff --git a/cuml_bench/svr.py b/cuml_bench/svr.py index 7560ff103..616b5bcbd 100644 --- a/cuml_bench/svr.py +++ b/cuml_bench/svr.py @@ -63,7 +63,7 @@ regr.predict, X_test, params=params) test_rmse = bench.rmse_score(y_test, y_pred) -bench.print_output(library='cuml', algorithm='svr', +bench.print_output(library='cuml', algorithm='SVR', stages=['training', 'prediction'], params=params, functions=['SVR.fit', 'SVR.predict'], times=[fit_time, predict_train_time], metric_type='rmse', diff --git a/report_generator/sklearn_metrics_report_gen_config.json b/report_generator/sklearn_metrics_report_gen_config.json new file mode 100644 index 000000000..66c993953 --- /dev/null +++ b/report_generator/sklearn_metrics_report_gen_config.json @@ -0,0 +1,35 @@ +{ + "header": [ + "stage", + "input_data:data_order", + "input_data:data_type", + "input_data:dataset_name", + "input_data:rows", + "input_data:columns", + "input_data:classes", + "input_data:n_clusters", + "n_clusters", + "algorithm_parameters:algorithm", + "algorithm_parameters:tol", + "algorithm_parameters:max_iter", + "algorithm_parameters:init", + "algorithm_parameters:n_init", + "algorithm_parameters:alpha", + "algorithm_parameters:l1_ratio", + "algorithm_parameters:solver", + "algorithm_parameters:C", + "algorithm_parameters:cache_size", + "algorithm_parameters:kernel", + "algorithm_parameters:nu", + "algorithm_parameters:eps", + "algorithm_parameters:n_neighbors", + "algorithm_parameters:metric", + "algorithm_parameters:n_estimators" + ], + "comparison_method": { + "default": "2 / 1" + }, + "aggregation_metrics": [ + "geomean" + ] +} diff --git a/runner.py b/runner.py index c4cba2449..99d992704 100755 --- a/runner.py +++ b/runner.py @@ -24,18 +24,31 @@ import datasets.make_datasets as make_datasets import utils +from pathlib import Path + + +def get_configs(path: Path) -> List[str]: + result = list() + for dir_or_file in os.listdir(path): + new_path = Path(path, dir_or_file) + if dir_or_file.endswith('.json'): + result.append(str(new_path)) + elif os.path.isdir(new_path): + result += get_configs(new_path) + return result + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--configs', metavar='ConfigPath', type=str, default='configs/config_example.json', - help='Path to configuration files') + help='The path to a configuration file or ' + 'a directory that contains configuration files') parser.add_argument('--dummy-run', default=False, action='store_true', help='Run configuration parser and datasets generation ' 'without benchmarks running') parser.add_argument('--no-intel-optimized', default=False, action='store_true', - help='Use no intel optimized version. ' - 'Now avalible for scikit-learn benchmarks'), + help='Use Scikit-learn without Intel optimizations') parser.add_argument('--output-file', default='results.json', type=argparse.FileType('w'), help='Output file of benchmarks to use with their runner') @@ -60,6 +73,15 @@ 'results': [] } is_successful = True + # getting jsons from folders + paths_to_configs: List[str] = list() + for config_name in args.configs.split(','): + if os.path.isdir(config_name): + config_name = get_configs(Path(config_name)) + else: + config_name = [config_name] + paths_to_configs += config_name + args.configs = ','.join(paths_to_configs) for config_name in args.configs.split(','): logging.info(f'Config: {config_name}') diff --git a/sklearn_bench/df_clsf.py b/sklearn_bench/df_clsf.py index 95709340c..d99ffa898 100644 --- a/sklearn_bench/df_clsf.py +++ b/sklearn_bench/df_clsf.py @@ -56,7 +56,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='decision_forest_classification', + algorithm='df_clsf', stages=['training', 'prediction'], params=params, functions=['df_clsf.fit', 'df_clsf.predict'], diff --git a/sklearn_bench/df_regr.py b/sklearn_bench/df_regr.py index 2d12c65f7..f21eaee9c 100644 --- a/sklearn_bench/df_regr.py +++ b/sklearn_bench/df_regr.py @@ -50,7 +50,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='decision_forest_regression', + algorithm='df_regr', stages=['training', 'prediction'], params=params, functions=['df_regr.fit', 'df_regr.predict'], diff --git a/sklearn_bench/knn_clsf.py b/sklearn_bench/knn_clsf.py index ef581f537..f58be1650 100755 --- a/sklearn_bench/knn_clsf.py +++ b/sklearn_bench/knn_clsf.py @@ -59,7 +59,7 @@ def main(): if params.task == 'classification': bench.print_output( library='sklearn', - algorithm=knn_clsf._fit_method + '_knn_classification', + algorithm=knn_clsf._fit_method + '_knn_clsf', stages=['training', 'prediction'], params=params, functions=['knn_clsf.fit', 'knn_clsf.predict'], diff --git a/sklearn_bench/knn_regr.py b/sklearn_bench/knn_regr.py new file mode 100644 index 000000000..c2048e3f4 --- /dev/null +++ b/sklearn_bench/knn_regr.py @@ -0,0 +1,100 @@ +# =============================================================================== +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +import argparse + +import bench +import numpy as np + + +def main(): + from sklearn.neighbors import KNeighborsRegressor + + # Load generated data + X_train, X_test, y_train, y_test = bench.load_data(params) + params.n_classes = len(np.unique(y_train)) + + # Create a regression object + knn_regr = KNeighborsRegressor(n_neighbors=params.n_neighbors, + weights=params.weights, + algorithm=params.method, + metric=params.metric, + n_jobs=params.n_jobs) + + # Measure time and accuracy on fitting + train_time, _ = bench.measure_function_time( + knn_regr.fit, X_train, y_train, params=params) + if params.task == 'regression': + y_pred = knn_regr.predict(X_train) + train_rmse = bench.rmse_score(y_train, y_pred) + train_r2 = bench.r2_score(y_train, y_pred) + + # Measure time and accuracy on prediction + if params.task == 'regression': + predict_time, yp = bench.measure_function_time(knn_regr.predict, X_test, + params=params) + test_rmse = bench.rmse_score(y_test, yp) + test_r2 = bench.r2_score(y_test, yp) + else: + predict_time, _ = bench.measure_function_time(knn_regr.kneighbors, X_test, + params=params) + + if params.task == 'regression': + bench.print_output( + library='sklearn', + algorithm=knn_regr._fit_method + '_knn_regr', + stages=['training', 'prediction'], + params=params, + functions=['knn_regr.fit', 'knn_regr.predict'], + times=[train_time, predict_time], + metric_type=['rmse', 'r2_score'], + metrics=[[train_rmse, test_rmse], [train_r2, test_r2]], + data=[X_train, X_test], + alg_instance=knn_regr, + ) + else: + bench.print_output( + library='sklearn', + algorithm=knn_regr._fit_method + '_knn_search', + stages=['training', 'search'], + params=params, + functions=['knn_regr.fit', 'knn_regr.kneighbors'], + times=[train_time, predict_time], + metric_type=None, + metrics=[], + data=[X_train, X_test], + alg_instance=knn_regr, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='scikit-learn kNN classifier benchmark') + + parser.add_argument('--task', default='regression', type=str, + choices=('search', 'regression'), + help='The type of kNN task: search or regression') + parser.add_argument('--n-neighbors', default=5, type=int, + help='The number of neighbors to use') + parser.add_argument('--weights', type=str, default='uniform', + help='The weight function to be used in prediction') + parser.add_argument('--method', type=str, default='brute', + choices=('brute', 'kd_tree', 'ball_tree', 'auto'), + help='The method to find the nearest neighbors') + parser.add_argument('--metric', type=str, default='euclidean', + help='The metric to calculate distances') + params = bench.parse_args(parser) + bench.run_with_context(params, main) diff --git a/sklearn_bench/linear.py b/sklearn_bench/linear.py index c7390efbe..b97d49a6e 100644 --- a/sklearn_bench/linear.py +++ b/sklearn_bench/linear.py @@ -42,7 +42,7 @@ def main(): train_r2 = bench.r2_score(y_train, yp) bench.print_output( - library='sklearn', algorithm='linear_regression', + library='sklearn', algorithm='lin_reg', stages=['training', 'prediction'], params=params, functions=['Linear.fit', 'Linear.predict'], times=[fit_time, predict_time], diff --git a/sklearn_bench/log_reg.py b/sklearn_bench/log_reg.py index 1053d3819..733ee5765 100644 --- a/sklearn_bench/log_reg.py +++ b/sklearn_bench/log_reg.py @@ -58,7 +58,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='logistic_regression', + algorithm='log_reg', stages=['training', 'prediction'], params=params, functions=['LogReg.fit', 'LogReg.predict'], diff --git a/sklearn_bench/nusvc.py b/sklearn_bench/nusvc.py index d98b184df..d3e6eeece 100644 --- a/sklearn_bench/nusvc.py +++ b/sklearn_bench/nusvc.py @@ -68,7 +68,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='nusvc', + algorithm='nuSVC', stages=['training', state_predict], params=params, functions=['NuSVC.fit', f'NuSVC.{state_predict}'], times=[fit_time, predict_train_time], diff --git a/sklearn_bench/nusvr.py b/sklearn_bench/nusvr.py index d31b7d26e..ccfe519ba 100644 --- a/sklearn_bench/nusvr.py +++ b/sklearn_bench/nusvr.py @@ -53,7 +53,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='nusvr', + algorithm='nuSVR', stages=['training', 'prediction'], params=params, functions=['NuSVR.fit', 'NuSVR.predict'], diff --git a/sklearn_bench/pca.py b/sklearn_bench/pca.py index 7e4fcf366..ef71f4cfc 100644 --- a/sklearn_bench/pca.py +++ b/sklearn_bench/pca.py @@ -42,7 +42,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='pca', + algorithm='PCA', stages=['training', 'transformation'], params=params, functions=['PCA.fit', 'PCA.transform'], diff --git a/sklearn_bench/ridge.py b/sklearn_bench/ridge.py index 3b8f138d2..19718a4e7 100644 --- a/sklearn_bench/ridge.py +++ b/sklearn_bench/ridge.py @@ -44,7 +44,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='ridge_regression', + algorithm='ridge_regr', stages=['training', 'prediction'], params=params, functions=['Ridge.fit', 'Ridge.predict'], diff --git a/sklearn_bench/svm.py b/sklearn_bench/svm.py index 6e17ea00a..88160283e 100644 --- a/sklearn_bench/svm.py +++ b/sklearn_bench/svm.py @@ -68,7 +68,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='svc', + algorithm='SVC', stages=['training', state_predict], params=params, functions=['SVM.fit', f'SVM.{state_predict}'], diff --git a/sklearn_bench/svr.py b/sklearn_bench/svr.py index a3447332b..7e9dc2c8d 100644 --- a/sklearn_bench/svr.py +++ b/sklearn_bench/svr.py @@ -53,7 +53,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='svr', + algorithm='SVR', stages=['training', 'prediction'], params=params, functions=['SVR.fit', 'SVR.predict'],