From facd58f47509d3a67ed91c86256d1a68c69d45cf Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 14:18:52 +0300 Subject: [PATCH 01/22] add configs --- configs/testing/metrics/dbscan.json | 174 +++++++++++++ configs/testing/metrics/elasticnet.json | 120 +++++++++ configs/testing/metrics/kmeans.json | 244 ++++++++++++++++++ configs/testing/metrics/knn_brute_clsf.json | 85 ++++++ configs/testing/metrics/knn_brute_regr.json | 72 ++++++ configs/testing/metrics/knn_kdtree_clsf.json | 71 +++++ configs/testing/metrics/knn_kdtree_regr.json | 87 +++++++ configs/testing/metrics/lasso.json | 115 +++++++++ configs/testing/metrics/linreg.json | 143 ++++++++++ configs/testing/metrics/logreg.json | 140 ++++++++++ configs/testing/metrics/nusvc.json | 94 +++++++ configs/testing/metrics/nusvr.json | 73 ++++++ configs/testing/metrics/pca.json | 170 ++++++++++++ configs/testing/metrics/rf_clsf.json | 154 +++++++++++ configs/testing/metrics/rf_regr.json | 150 +++++++++++ configs/testing/metrics/ridge.json | 150 +++++++++++ configs/testing/metrics/svc.json | 94 +++++++ configs/testing/metrics/svr.json | 71 +++++ .../default_report_gen_config.json | 2 +- 19 files changed, 2208 insertions(+), 1 deletion(-) create mode 100755 configs/testing/metrics/dbscan.json create mode 100755 configs/testing/metrics/elasticnet.json create mode 100755 configs/testing/metrics/kmeans.json create mode 100755 configs/testing/metrics/knn_brute_clsf.json create mode 100755 configs/testing/metrics/knn_brute_regr.json create mode 100755 configs/testing/metrics/knn_kdtree_clsf.json create mode 100755 configs/testing/metrics/knn_kdtree_regr.json create mode 100755 configs/testing/metrics/lasso.json create mode 100644 configs/testing/metrics/linreg.json create mode 100755 configs/testing/metrics/logreg.json create mode 100755 configs/testing/metrics/nusvc.json create mode 100755 configs/testing/metrics/nusvr.json create mode 100755 configs/testing/metrics/pca.json create mode 100755 configs/testing/metrics/rf_clsf.json create mode 100644 configs/testing/metrics/rf_regr.json create mode 100755 configs/testing/metrics/ridge.json create mode 100755 configs/testing/metrics/svc.json create mode 100755 configs/testing/metrics/svr.json diff --git a/configs/testing/metrics/dbscan.json b/configs/testing/metrics/dbscan.json new file mode 100755 index 000000000..6c2f5f9ed --- /dev/null +++ b/configs/testing/metrics/dbscan.json @@ -0,0 +1,174 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "eps": [18800] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "eps": [2] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + } + ], + "eps": [0.5] + } + ] +} diff --git a/configs/testing/metrics/elasticnet.json b/configs/testing/metrics/elasticnet.json new file mode 100755 index 000000000..ac1aac1ce --- /dev/null +++ b/configs/testing/metrics/elasticnet.json @@ -0,0 +1,120 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": [0.005], + "tol": [1e-4], + "l1_ratio": [0.85] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": [0.01], + "tol": [1e-4], + "l1_ratio": [0.7] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": [0.0625], + "tol": [1e-4], + "l1_ratio": [0.75] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": [0.006], + "tol": [1e-4], + "l1_ratio": [0.25] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": [0.15], + "tol": [1e-4], + "l1_ratio": [0.4] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/kmeans.json b/configs/testing/metrics/kmeans.json new file mode 100755 index 000000000..0ab273a61 --- /dev/null +++ b/configs/testing/metrics/kmeans.json @@ -0,0 +1,244 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + }, + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + }, + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + }, + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "airline-ohe", + "training": { + "x": "data/airline-ohe_x_train.npy", + "y": "data/airline-ohe_y_train.npy" + }, + "testing": { + "x": "data/airline-ohe_x_test.npy", + "y": "data/airline-ohe_y_test.npy" + } + }, + { + "source": "npy", + "name": "higgs1m", + "training": { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "airline", + "training": { + "x": "data/airline_x_train.npy", + "y": "data/airline_y_train.npy" + }, + "testing": { + "x": "data/airline_x_test.npy", + "y": "data/airline_y_test.npy" + } + }, + { + "source": "npy", + "name": "covtype", + "training": { + "x": "data/covtype_x_train.npy", + "y": "data/covtype_y_train.npy" + }, + "testing": { + "x": "data/covtype_x_test.npy", + "y": "data/covtype_y_test.npy" + } + }, + { + "source": "npy", + "name": "epsilon", + "training": { + "x": "data/epsilon_x_train.npy", + "y": "data/epsilon_y_train.npy" + }, + "testing": { + "x": "data/epsilon_x_test.npy", + "y": "data/epsilon_y_test.npy" + } + }, + { + "source": "npy", + "name": "higgs", + "training": { + "x": "data/higgs_x_train.npy", + "y": "data/higgs_y_train.npy" + }, + "testing": { + "x": "data/higgs_x_test.npy", + "y": "data/higgs_y_test.npy" + } + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [20], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + } + ] +} diff --git a/configs/testing/metrics/knn_brute_clsf.json b/configs/testing/metrics/knn_brute_clsf.json new file mode 100755 index 000000000..549cb1e95 --- /dev/null +++ b/configs/testing/metrics/knn_brute_clsf.json @@ -0,0 +1,85 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_clsf", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "method": ["brute"] + } + ] +} diff --git a/configs/testing/metrics/knn_brute_regr.json b/configs/testing/metrics/knn_brute_regr.json new file mode 100755 index 000000000..ac629fa0c --- /dev/null +++ b/configs/testing/metrics/knn_brute_regr.json @@ -0,0 +1,72 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + }, + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + }, + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/knn_kdtree_clsf.json b/configs/testing/metrics/knn_kdtree_clsf.json new file mode 100755 index 000000000..927dc4466 --- /dev/null +++ b/configs/testing/metrics/knn_kdtree_clsf.json @@ -0,0 +1,71 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_clsf", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "method": ["kd_tree"] + } + ] +} diff --git a/configs/testing/metrics/knn_kdtree_regr.json b/configs/testing/metrics/knn_kdtree_regr.json new file mode 100755 index 000000000..5b20c9ea3 --- /dev/null +++ b/configs/testing/metrics/knn_kdtree_regr.json @@ -0,0 +1,87 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + }, + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + }, + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + }, + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "method": ["kd_tree"] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/lasso.json b/configs/testing/metrics/lasso.json new file mode 100755 index 000000000..adbfa012a --- /dev/null +++ b/configs/testing/metrics/lasso.json @@ -0,0 +1,115 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": [-0.0025], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": [0.015625], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": [0.0625], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": [-0.0625], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": [0.03125], + "tol": [1e-4] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/linreg.json b/configs/testing/metrics/linreg.json new file mode 100644 index 000000000..7de63fd72 --- /dev/null +++ b/configs/testing/metrics/linreg.json @@ -0,0 +1,143 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/logreg.json b/configs/testing/metrics/logreg.json new file mode 100755 index 000000000..ff078f47a --- /dev/null +++ b/configs/testing/metrics/logreg.json @@ -0,0 +1,140 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "log_reg", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + }, + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + }, + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + }, + { + "source": "npy", + "name": "higgs1m", + "training": { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + + ], + "maxiter": [5000], + "tol": [0] + } + ] +} diff --git a/configs/testing/metrics/nusvc.json b/configs/testing/metrics/nusvc.json new file mode 100755 index 000000000..0021ecf4e --- /dev/null +++ b/configs/testing/metrics/nusvc.json @@ -0,0 +1,94 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "nu": [0.07], + "kernel": ["linear"] + }, + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "nu": [0.25], + "kernel": ["sigmoid"] + }, + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "nu": [0.25], + "kernel": ["linear"] + }, + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "nu": [0.01], + "kernel": ["rbf"] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/nusvr.json b/configs/testing/metrics/nusvr.json new file mode 100755 index 000000000..36ac54d34 --- /dev/null +++ b/configs/testing/metrics/nusvr.json @@ -0,0 +1,73 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "nusvr", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "nusvr", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": [0.1], + "kernel": ["poly"], + "nu": [0.17] + }, + { + "algorithm": "nusvr", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "C": [2.0], + "kernel": ["rbf"], + "nu": [0.8] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/pca.json b/configs/testing/metrics/pca.json new file mode 100755 index 000000000..8e3dc1624 --- /dev/null +++ b/configs/testing/metrics/pca.json @@ -0,0 +1,170 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": + { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + } + ] +} diff --git a/configs/testing/metrics/rf_clsf.json b/configs/testing/metrics/rf_clsf.json new file mode 100755 index 000000000..144fef33c --- /dev/null +++ b/configs/testing/metrics/rf_clsf.json @@ -0,0 +1,154 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "df_clsf", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + }, + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + }, + + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + }, + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + }, + { + "source": "npy", + "name": "covtype", + "training": { + "x": "data/covtype_x_train.npy", + "y": "data/covtype_y_train.npy" + }, + "testing": { + "x": "data/covtype_x_test.npy", + "y": "data/covtype_y_test.npy" + } + } + ], + "num-trees": [500] + } + ] +} diff --git a/configs/testing/metrics/rf_regr.json b/configs/testing/metrics/rf_regr.json new file mode 100644 index 000000000..8c2709b5a --- /dev/null +++ b/configs/testing/metrics/rf_regr.json @@ -0,0 +1,150 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/ridge.json b/configs/testing/metrics/ridge.json new file mode 100755 index 000000000..4b676ae8a --- /dev/null +++ b/configs/testing/metrics/ridge.json @@ -0,0 +1,150 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "alpha": [1.0] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/svc.json b/configs/testing/metrics/svc.json new file mode 100755 index 000000000..a566469ff --- /dev/null +++ b/configs/testing/metrics/svc.json @@ -0,0 +1,94 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "C": [0.0015], + "kernel": ["linear"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "C": [500], + "kernel": ["sigmoid"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "C": [100], + "kernel": ["linear"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "C": [1.0], + "kernel": ["rbf"] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/svr.json b/configs/testing/metrics/svr.json new file mode 100755 index 000000000..9b2f6adf2 --- /dev/null +++ b/configs/testing/metrics/svr.json @@ -0,0 +1,71 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "svr", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "svr", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": [0.1], + "kernel": ["poly"] + }, + { + "algorithm": "svr", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "C": [2.0], + "kernel": ["rbf"] + } + ] +} \ No newline at end of file diff --git a/report_generator/default_report_gen_config.json b/report_generator/default_report_gen_config.json index 677d12f11..866e901f1 100755 --- a/report_generator/default_report_gen_config.json +++ b/report_generator/default_report_gen_config.json @@ -15,4 +15,4 @@ "hardware_hash", "measurement_time" ] -} +} \ No newline at end of file From 7078ca58c4deac8bad97ce7e5daef828c5cf4b37 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 14:20:36 +0300 Subject: [PATCH 02/22] add report generator metrics config --- .../metrics_report_gen_config.json | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 report_generator/metrics_report_gen_config.json diff --git a/report_generator/metrics_report_gen_config.json b/report_generator/metrics_report_gen_config.json new file mode 100644 index 000000000..7efe1fcf7 --- /dev/null +++ b/report_generator/metrics_report_gen_config.json @@ -0,0 +1,34 @@ +{ + "header": [ + "stage", + "input_data:data_order", + "input_data:data_type", + "input_data:dataset_name", + "input_data:rows", + "input_data:columns", + "input_data:classes", + "input_data:n_clusters", + "n_clusters", + "algorithm_parameters:algorithm", + "algorithm_parameters:tol", + "algorithm_parameters:max_iter", + "algorithm_parameters:n_init", + "algorithm_parameters:alpha", + "algorithm_parameters:l1_ratio", + "algorithm_parameters:solver", + "algorithm_parameters:C", + "algorithm_parameters:cache_size", + "algorithm_parameters:kernel", + "algorithm_parameters:nu", + "algorithm_parameters:eps", + "algorithm_parameters:n_neighbors", + "algorithm_parameters:metric", + "algorithm_parameters:n_estimators" + ], + "comparison_method": { + "default": "2 / 1" + }, + "aggregation_metrics": [ + "geomean" + ] +} From 202a11dc955ba92ae93a593cccfc30a173e06772 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 14:18:52 +0300 Subject: [PATCH 03/22] add configs --- configs/testing/metrics/dbscan.json | 174 +++++++++++++ configs/testing/metrics/elasticnet.json | 120 +++++++++ configs/testing/metrics/kmeans.json | 244 ++++++++++++++++++ configs/testing/metrics/knn_brute_clsf.json | 85 ++++++ configs/testing/metrics/knn_brute_regr.json | 72 ++++++ configs/testing/metrics/knn_kdtree_clsf.json | 71 +++++ configs/testing/metrics/knn_kdtree_regr.json | 87 +++++++ configs/testing/metrics/lasso.json | 115 +++++++++ configs/testing/metrics/linreg.json | 143 ++++++++++ configs/testing/metrics/logreg.json | 140 ++++++++++ configs/testing/metrics/nusvc.json | 94 +++++++ configs/testing/metrics/nusvr.json | 73 ++++++ configs/testing/metrics/pca.json | 170 ++++++++++++ configs/testing/metrics/rf_clsf.json | 154 +++++++++++ configs/testing/metrics/rf_regr.json | 150 +++++++++++ configs/testing/metrics/ridge.json | 150 +++++++++++ configs/testing/metrics/svc.json | 94 +++++++ configs/testing/metrics/svr.json | 71 +++++ .../default_report_gen_config.json | 2 +- 19 files changed, 2208 insertions(+), 1 deletion(-) create mode 100755 configs/testing/metrics/dbscan.json create mode 100755 configs/testing/metrics/elasticnet.json create mode 100755 configs/testing/metrics/kmeans.json create mode 100755 configs/testing/metrics/knn_brute_clsf.json create mode 100755 configs/testing/metrics/knn_brute_regr.json create mode 100755 configs/testing/metrics/knn_kdtree_clsf.json create mode 100755 configs/testing/metrics/knn_kdtree_regr.json create mode 100755 configs/testing/metrics/lasso.json create mode 100644 configs/testing/metrics/linreg.json create mode 100755 configs/testing/metrics/logreg.json create mode 100755 configs/testing/metrics/nusvc.json create mode 100755 configs/testing/metrics/nusvr.json create mode 100755 configs/testing/metrics/pca.json create mode 100755 configs/testing/metrics/rf_clsf.json create mode 100644 configs/testing/metrics/rf_regr.json create mode 100755 configs/testing/metrics/ridge.json create mode 100755 configs/testing/metrics/svc.json create mode 100755 configs/testing/metrics/svr.json diff --git a/configs/testing/metrics/dbscan.json b/configs/testing/metrics/dbscan.json new file mode 100755 index 000000000..6c2f5f9ed --- /dev/null +++ b/configs/testing/metrics/dbscan.json @@ -0,0 +1,174 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "eps": [18800] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "eps": [2] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "eps": [0.5] + }, + { + "algorithm": "dbscan", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + } + ], + "eps": [0.5] + } + ] +} diff --git a/configs/testing/metrics/elasticnet.json b/configs/testing/metrics/elasticnet.json new file mode 100755 index 000000000..ac1aac1ce --- /dev/null +++ b/configs/testing/metrics/elasticnet.json @@ -0,0 +1,120 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": [0.005], + "tol": [1e-4], + "l1_ratio": [0.85] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": [0.01], + "tol": [1e-4], + "l1_ratio": [0.7] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": [0.0625], + "tol": [1e-4], + "l1_ratio": [0.75] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": [0.006], + "tol": [1e-4], + "l1_ratio": [0.25] + }, + { + "algorithm": "elasticnet", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": [0.15], + "tol": [1e-4], + "l1_ratio": [0.4] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/kmeans.json b/configs/testing/metrics/kmeans.json new file mode 100755 index 000000000..0ab273a61 --- /dev/null +++ b/configs/testing/metrics/kmeans.json @@ -0,0 +1,244 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + }, + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + }, + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + }, + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "airline-ohe", + "training": { + "x": "data/airline-ohe_x_train.npy", + "y": "data/airline-ohe_y_train.npy" + }, + "testing": { + "x": "data/airline-ohe_x_test.npy", + "y": "data/airline-ohe_y_test.npy" + } + }, + { + "source": "npy", + "name": "higgs1m", + "training": { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "airline", + "training": { + "x": "data/airline_x_train.npy", + "y": "data/airline_y_train.npy" + }, + "testing": { + "x": "data/airline_x_test.npy", + "y": "data/airline_y_test.npy" + } + }, + { + "source": "npy", + "name": "covtype", + "training": { + "x": "data/covtype_x_train.npy", + "y": "data/covtype_y_train.npy" + }, + "testing": { + "x": "data/covtype_x_test.npy", + "y": "data/covtype_y_test.npy" + } + }, + { + "source": "npy", + "name": "epsilon", + "training": { + "x": "data/epsilon_x_train.npy", + "y": "data/epsilon_y_train.npy" + }, + "testing": { + "x": "data/epsilon_x_test.npy", + "y": "data/epsilon_y_test.npy" + } + }, + { + "source": "npy", + "name": "higgs", + "training": { + "x": "data/higgs_x_train.npy", + "y": "data/higgs_y_train.npy" + }, + "testing": { + "x": "data/higgs_x_test.npy", + "y": "data/higgs_y_test.npy" + } + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [20], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + } + ] +} diff --git a/configs/testing/metrics/knn_brute_clsf.json b/configs/testing/metrics/knn_brute_clsf.json new file mode 100755 index 000000000..549cb1e95 --- /dev/null +++ b/configs/testing/metrics/knn_brute_clsf.json @@ -0,0 +1,85 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_clsf", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "method": ["brute"] + } + ] +} diff --git a/configs/testing/metrics/knn_brute_regr.json b/configs/testing/metrics/knn_brute_regr.json new file mode 100755 index 000000000..ac629fa0c --- /dev/null +++ b/configs/testing/metrics/knn_brute_regr.json @@ -0,0 +1,72 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + }, + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + }, + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/knn_kdtree_clsf.json b/configs/testing/metrics/knn_kdtree_clsf.json new file mode 100755 index 000000000..927dc4466 --- /dev/null +++ b/configs/testing/metrics/knn_kdtree_clsf.json @@ -0,0 +1,71 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_clsf", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "method": ["kd_tree"] + } + ] +} diff --git a/configs/testing/metrics/knn_kdtree_regr.json b/configs/testing/metrics/knn_kdtree_regr.json new file mode 100755 index 000000000..5b20c9ea3 --- /dev/null +++ b/configs/testing/metrics/knn_kdtree_regr.json @@ -0,0 +1,87 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "knn_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + }, + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + }, + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + }, + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + }, + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "method": ["kd_tree"] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/lasso.json b/configs/testing/metrics/lasso.json new file mode 100755 index 000000000..adbfa012a --- /dev/null +++ b/configs/testing/metrics/lasso.json @@ -0,0 +1,115 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": [-0.0025], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": [0.015625], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": [0.0625], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": [-0.0625], + "tol": [1e-4] + }, + { + "algorithm": "lasso", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": [0.03125], + "tol": [1e-4] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/linreg.json b/configs/testing/metrics/linreg.json new file mode 100644 index 000000000..7de63fd72 --- /dev/null +++ b/configs/testing/metrics/linreg.json @@ -0,0 +1,143 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/logreg.json b/configs/testing/metrics/logreg.json new file mode 100755 index 000000000..ff078f47a --- /dev/null +++ b/configs/testing/metrics/logreg.json @@ -0,0 +1,140 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "log_reg", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + }, + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + }, + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + }, + { + "source": "npy", + "name": "higgs1m", + "training": { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + + ], + "maxiter": [5000], + "tol": [0] + } + ] +} diff --git a/configs/testing/metrics/nusvc.json b/configs/testing/metrics/nusvc.json new file mode 100755 index 000000000..0021ecf4e --- /dev/null +++ b/configs/testing/metrics/nusvc.json @@ -0,0 +1,94 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "nu": [0.07], + "kernel": ["linear"] + }, + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "nu": [0.25], + "kernel": ["sigmoid"] + }, + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "nu": [0.25], + "kernel": ["linear"] + }, + { + "algorithm": "nusvc", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "nu": [0.01], + "kernel": ["rbf"] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/nusvr.json b/configs/testing/metrics/nusvr.json new file mode 100755 index 000000000..36ac54d34 --- /dev/null +++ b/configs/testing/metrics/nusvr.json @@ -0,0 +1,73 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "nusvr", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "nusvr", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": [0.1], + "kernel": ["poly"], + "nu": [0.17] + }, + { + "algorithm": "nusvr", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "C": [2.0], + "kernel": ["rbf"], + "nu": [0.8] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/pca.json b/configs/testing/metrics/pca.json new file mode 100755 index 000000000..8e3dc1624 --- /dev/null +++ b/configs/testing/metrics/pca.json @@ -0,0 +1,170 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": + { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.6] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + }, + { + "algorithm": "pca", + "dataset": [ + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + } + } + ], + "svd-solver": ["full"], + "n-components": [0.8] + } + ] +} diff --git a/configs/testing/metrics/rf_clsf.json b/configs/testing/metrics/rf_clsf.json new file mode 100755 index 000000000..144fef33c --- /dev/null +++ b/configs/testing/metrics/rf_clsf.json @@ -0,0 +1,154 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "df_clsf", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "letters", + "training": { + "x": "data/letters_x_train.npy", + "y": "data/letters_y_train.npy" + }, + "testing": { + "x": "data/letters_x_test.npy", + "y": "data/letters_y_test.npy" + } + }, + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + }, + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + }, + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + }, + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + }, + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + }, + + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + }, + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + }, + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + }, + { + "source": "npy", + "name": "covtype", + "training": { + "x": "data/covtype_x_train.npy", + "y": "data/covtype_y_train.npy" + }, + "testing": { + "x": "data/covtype_x_test.npy", + "y": "data/covtype_y_test.npy" + } + } + ], + "num-trees": [500] + } + ] +} diff --git a/configs/testing/metrics/rf_regr.json b/configs/testing/metrics/rf_regr.json new file mode 100644 index 000000000..8c2709b5a --- /dev/null +++ b/configs/testing/metrics/rf_regr.json @@ -0,0 +1,150 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ] + }, + { + "algorithm": "df_regr", + "dtype": ["float64"], + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/ridge.json b/configs/testing/metrics/ridge.json new file mode 100755 index 000000000..4b676ae8a --- /dev/null +++ b/configs/testing/metrics/ridge.json @@ -0,0 +1,150 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ], + "alpha": [1.0] + }, + { + "algorithm": "ridge", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "alpha": [1.0] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/svc.json b/configs/testing/metrics/svc.json new file mode 100755 index 000000000..a566469ff --- /dev/null +++ b/configs/testing/metrics/svc.json @@ -0,0 +1,94 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "C": [0.0015], + "kernel": ["linear"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "C": [500], + "kernel": ["sigmoid"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "C": [100], + "kernel": ["linear"] + }, + { + "algorithm": "svm", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "C": [1.0], + "kernel": ["rbf"] + } + ] +} \ No newline at end of file diff --git a/configs/testing/metrics/svr.json b/configs/testing/metrics/svr.json new file mode 100755 index 000000000..9b2f6adf2 --- /dev/null +++ b/configs/testing/metrics/svr.json @@ -0,0 +1,71 @@ +{ + "common": { + "lib": ["sklearn"], + "data-format": ["pandas"], + "data-order": ["F"], + "dtype": ["float64"] + }, + "cases": [ + { + "algorithm": "svr", + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": + { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": + { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ] + }, + { + "algorithm": "svr", + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": [0.1], + "kernel": ["poly"] + }, + { + "algorithm": "svr", + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "C": [2.0], + "kernel": ["rbf"] + } + ] +} \ No newline at end of file diff --git a/report_generator/default_report_gen_config.json b/report_generator/default_report_gen_config.json index a4f75a5ec..f45502fea 100755 --- a/report_generator/default_report_gen_config.json +++ b/report_generator/default_report_gen_config.json @@ -16,4 +16,4 @@ "aggregation_metrics": [ "geomean" ] -} +} \ No newline at end of file From 14e85b2dde1ae58023e6987180feacd38c9866c9 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 14:20:36 +0300 Subject: [PATCH 04/22] add report generator metrics config --- .../metrics_report_gen_config.json | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 report_generator/metrics_report_gen_config.json diff --git a/report_generator/metrics_report_gen_config.json b/report_generator/metrics_report_gen_config.json new file mode 100644 index 000000000..7efe1fcf7 --- /dev/null +++ b/report_generator/metrics_report_gen_config.json @@ -0,0 +1,34 @@ +{ + "header": [ + "stage", + "input_data:data_order", + "input_data:data_type", + "input_data:dataset_name", + "input_data:rows", + "input_data:columns", + "input_data:classes", + "input_data:n_clusters", + "n_clusters", + "algorithm_parameters:algorithm", + "algorithm_parameters:tol", + "algorithm_parameters:max_iter", + "algorithm_parameters:n_init", + "algorithm_parameters:alpha", + "algorithm_parameters:l1_ratio", + "algorithm_parameters:solver", + "algorithm_parameters:C", + "algorithm_parameters:cache_size", + "algorithm_parameters:kernel", + "algorithm_parameters:nu", + "algorithm_parameters:eps", + "algorithm_parameters:n_neighbors", + "algorithm_parameters:metric", + "algorithm_parameters:n_estimators" + ], + "comparison_method": { + "default": "2 / 1" + }, + "aggregation_metrics": [ + "geomean" + ] +} From 2a901962346a99ece6cebc79e62467f3ba4a9240 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 14:41:59 +0300 Subject: [PATCH 05/22] add knn_regr --- sklearn_bench/knn_regr.py | 100 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 sklearn_bench/knn_regr.py diff --git a/sklearn_bench/knn_regr.py b/sklearn_bench/knn_regr.py new file mode 100644 index 000000000..3c8efa9ab --- /dev/null +++ b/sklearn_bench/knn_regr.py @@ -0,0 +1,100 @@ +# =============================================================================== +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +import argparse + +import bench +import numpy as np + + +def main(): + from sklearn.neighbors import KNeighborsRegressor + + # Load generated data + X_train, X_test, y_train, y_test = bench.load_data(params) + params.n_classes = len(np.unique(y_train)) + + # Create regression object + knn_regr = KNeighborsRegressor(n_neighbors=params.n_neighbors, + weights=params.weights, + algorithm=params.method, + metric=params.metric, + n_jobs=params.n_jobs) + + # Measure time and accuracy on fitting + train_time, _ = bench.measure_function_time( + knn_regr.fit, X_train, y_train, params=params) + if params.task == 'regression': + y_pred = knn_regr.predict(X_train) + train_rmse = bench.rmse_score(y_train, y_pred) + train_r2 = bench.r2_score(y_train, y_pred) + + # Measure time and accuracy on prediction + if params.task == 'regression': + predict_time, yp = bench.measure_function_time(knn_regr.predict, X_test, + params=params) + test_rmse = bench.rmse_score(y_test, yp) + test_r2 = bench.r2_score(y_test, yp) + else: + predict_time, _ = bench.measure_function_time(knn_regr.kneighbors, X_test, + params=params) + + if params.task == 'regression': + bench.print_output( + library='sklearn', + algorithm=knn_regr._fit_method + '_knn_regression', + stages=['training', 'prediction'], + params=params, + functions=['knn_regr.fit', 'knn_regr.predict'], + times=[train_time, predict_time], + metric_type=['rmse', 'r2_score'], + metrics=[[train_rmse, test_rmse], [train_r2, test_r2]], + data=[X_train, X_test], + alg_instance=knn_regr, + ) + else: + bench.print_output( + library='sklearn', + algorithm=knn_regr._fit_method + '_knn_search', + stages=['training', 'search'], + params=params, + functions=['knn_regr.fit', 'knn_regr.kneighbors'], + times=[train_time, predict_time], + metric_type=None, + metrics=[], + data=[X_train, X_test], + alg_instance=knn_regr, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='scikit-learn kNN classifier benchmark') + + parser.add_argument('--task', default='regression', type=str, + choices=('search', 'regression'), + help='The type of kNN task: search or regression') + parser.add_argument('--n-neighbors', default=5, type=int, + help='The number of neighbors to use') + parser.add_argument('--weights', type=str, default='uniform', + help='The weight function to be used in prediction') + parser.add_argument('--method', type=str, default='brute', + choices=('brute', 'kd_tree', 'ball_tree', 'auto'), + help='The method to find the nearest neighbors') + parser.add_argument('--metric', type=str, default='euclidean', + help='The metric to calculate distances') + params = bench.parse_args(parser) + bench.run_with_context(params, main) From 98dbf287a17b18e934574010719d5dcc9ffbd7a5 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 14:46:34 +0300 Subject: [PATCH 06/22] add ci testing for knn_regr --- configs/testing/sklearn.json | 17 +++++++++++++++++ report_generator/default_report_gen_config.json | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/configs/testing/sklearn.json b/configs/testing/sklearn.json index be566823d..f3e7ca340 100755 --- a/configs/testing/sklearn.json +++ b/configs/testing/sklearn.json @@ -217,6 +217,23 @@ ], "method": ["brute", "kd_tree"] }, + { + "algorithm": "knn_regr", + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "training": { + "n_samples": 1000 + }, + "testing": { + "n_samples": 200 + } + } + ], + "method": ["brute", "kd_tree"] + }, { "algorithm": "train_test_split", "dataset": [ diff --git a/report_generator/default_report_gen_config.json b/report_generator/default_report_gen_config.json index f45502fea..a4f75a5ec 100755 --- a/report_generator/default_report_gen_config.json +++ b/report_generator/default_report_gen_config.json @@ -16,4 +16,4 @@ "aggregation_metrics": [ "geomean" ] -} \ No newline at end of file +} From 8d6d76e2dab1739b2cad5bdcd919796334e3775a Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 21:13:44 +0300 Subject: [PATCH 07/22] rename algos in cuml --- cuml_bench/df_clsf.py | 2 +- cuml_bench/df_regr.py | 2 +- cuml_bench/elasticnet.py | 2 +- cuml_bench/knn_clsf.py | 2 +- cuml_bench/linear.py | 2 +- cuml_bench/log_reg.py | 2 +- cuml_bench/pca.py | 2 +- cuml_bench/ridge.py | 2 +- cuml_bench/svm.py | 2 +- cuml_bench/svr.py | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cuml_bench/df_clsf.py b/cuml_bench/df_clsf.py index 848e97d7e..80f659638 100755 --- a/cuml_bench/df_clsf.py +++ b/cuml_bench/df_clsf.py @@ -93,7 +93,7 @@ def predict(clf, X): predict_time, y_pred = bench.measure_function_time(predict, clf, X_test, params=params) test_acc = 100 * bench.accuracy_score(y_pred, y_test) -bench.print_output(library='cuml', algorithm='decision_forest_classification', +bench.print_output(library='cuml', algorithm='df_clsf', stages=['training', 'prediction'], params=params, functions=['df_clsf.fit', 'df_clsf.predict'], times=[fit_time, predict_time], metric_type='accuracy[%]', diff --git a/cuml_bench/df_regr.py b/cuml_bench/df_regr.py index 61e08ce7b..088760533 100644 --- a/cuml_bench/df_regr.py +++ b/cuml_bench/df_regr.py @@ -89,7 +89,7 @@ def predict(regr, X): predict_time, y_pred = bench.measure_function_time(predict, regr, X_test, params=params) test_rmse = bench.rmse_score(y_pred, y_test) -bench.print_output(library='cuml', algorithm='decision_forest_regression', +bench.print_output(library='cuml', algorithm='df_regr', stages=['training', 'prediction'], params=params, functions=['df_regr.fit', 'df_regr.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/elasticnet.py b/cuml_bench/elasticnet.py index 2f4e3dd5e..2d969a886 100755 --- a/cuml_bench/elasticnet.py +++ b/cuml_bench/elasticnet.py @@ -53,7 +53,7 @@ pred_test = regr.predict(X_test) test_rmse = bench.rmse_score(pred_test, y_test) -bench.print_output(library='cuml', algorithm='elastic-net', +bench.print_output(library='cuml', algorithm='elasticnet', stages=['training', 'prediction'], params=params, functions=['ElasticNet.fit', 'ElasticNet.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/knn_clsf.py b/cuml_bench/knn_clsf.py index 6ccf3aa47..ec7d21490 100755 --- a/cuml_bench/knn_clsf.py +++ b/cuml_bench/knn_clsf.py @@ -64,7 +64,7 @@ if params.task == 'classification': bench.print_output(library='cuml', - algorithm=knn_clsf.algorithm + '_knn_classification', + algorithm=knn_clsf.algorithm + '_knn_clsf', stages=['training', 'prediction'], params=params, functions=['knn_clsf.fit', 'knn_clsf.predict'], times=[train_time, predict_time], diff --git a/cuml_bench/linear.py b/cuml_bench/linear.py index bfe81991f..714454cfc 100644 --- a/cuml_bench/linear.py +++ b/cuml_bench/linear.py @@ -47,7 +47,7 @@ yp = regr.predict(X_train) train_rmse = bench.rmse_score(yp, y_train) -bench.print_output(library='cuml', algorithm='linear_regression', +bench.print_output(library='cuml', algorithm='lin_reg', stages=['training', 'prediction'], params=params, functions=['Linear.fit', 'Linear.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/log_reg.py b/cuml_bench/log_reg.py index 599b1bfdf..5dda0611a 100644 --- a/cuml_bench/log_reg.py +++ b/cuml_bench/log_reg.py @@ -58,7 +58,7 @@ clf.predict, X_test, params=params) test_acc = 100 * bench.accuracy_score(y_pred, y_test) -bench.print_output(library='cuml', algorithm='logistic_regression', +bench.print_output(library='cuml', algorithm='log_reg', stages=['training', 'prediction'], params=params, functions=['LogReg.fit', 'LogReg.predict'], times=[fit_time, predict_time], metric_type='accuracy[%]', diff --git a/cuml_bench/pca.py b/cuml_bench/pca.py index 35f20f3b6..bf9b9a878 100644 --- a/cuml_bench/pca.py +++ b/cuml_bench/pca.py @@ -48,7 +48,7 @@ transform_time, _ = bench.measure_function_time( pca.transform, X_train, params=params) -bench.print_output(library='cuml', algorithm='pca', +bench.print_output(library='cuml', algorithm='PCA', stages=['training', 'transformation'], params=params, functions=['PCA.fit', 'PCA.transform'], times=[fit_time, transform_time], metric_type=None, diff --git a/cuml_bench/ridge.py b/cuml_bench/ridge.py index d6d488673..caf80392b 100644 --- a/cuml_bench/ridge.py +++ b/cuml_bench/ridge.py @@ -49,7 +49,7 @@ yp = regr.predict(X_train) train_rmse = bench.rmse_score(yp, y_train) -bench.print_output(library='cuml', algorithm='ridge_regression', +bench.print_output(library='cuml', algorithm='ridge_regr', stages=['training', 'prediction'], params=params, functions=['Ridge.fit', 'Ridge.predict'], times=[fit_time, predict_time], metric_type='rmse', diff --git a/cuml_bench/svm.py b/cuml_bench/svm.py index 112427397..0b2c0020a 100644 --- a/cuml_bench/svm.py +++ b/cuml_bench/svm.py @@ -79,7 +79,7 @@ def metric_call(x, y): clf_predict, X_test, params=params) test_acc = metric_call(y_test, y_pred) -bench.print_output(library='cuml', algorithm='svc', +bench.print_output(library='cuml', algorithm='SVC', stages=['training', state_predict], params=params, functions=['SVM.fit', 'SVM.predict'], times=[fit_time, predict_train_time], metric_type=metric_type, diff --git a/cuml_bench/svr.py b/cuml_bench/svr.py index 7560ff103..616b5bcbd 100644 --- a/cuml_bench/svr.py +++ b/cuml_bench/svr.py @@ -63,7 +63,7 @@ regr.predict, X_test, params=params) test_rmse = bench.rmse_score(y_test, y_pred) -bench.print_output(library='cuml', algorithm='svr', +bench.print_output(library='cuml', algorithm='SVR', stages=['training', 'prediction'], params=params, functions=['SVR.fit', 'SVR.predict'], times=[fit_time, predict_train_time], metric_type='rmse', From b4cdd6cf1592d76d8023f6b271b2795a48442fea Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 21:14:02 +0300 Subject: [PATCH 08/22] rename algos in sklearn --- sklearn_bench/df_clsf.py | 2 +- sklearn_bench/df_regr.py | 2 +- sklearn_bench/knn_clsf.py | 2 +- sklearn_bench/knn_regr.py | 2 +- sklearn_bench/linear.py | 2 +- sklearn_bench/log_reg.py | 2 +- sklearn_bench/nusvc.py | 2 +- sklearn_bench/nusvr.py | 2 +- sklearn_bench/pca.py | 2 +- sklearn_bench/ridge.py | 2 +- sklearn_bench/svm.py | 2 +- sklearn_bench/svr.py | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sklearn_bench/df_clsf.py b/sklearn_bench/df_clsf.py index 95709340c..d99ffa898 100644 --- a/sklearn_bench/df_clsf.py +++ b/sklearn_bench/df_clsf.py @@ -56,7 +56,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='decision_forest_classification', + algorithm='df_clsf', stages=['training', 'prediction'], params=params, functions=['df_clsf.fit', 'df_clsf.predict'], diff --git a/sklearn_bench/df_regr.py b/sklearn_bench/df_regr.py index 2d12c65f7..f21eaee9c 100644 --- a/sklearn_bench/df_regr.py +++ b/sklearn_bench/df_regr.py @@ -50,7 +50,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='decision_forest_regression', + algorithm='df_regr', stages=['training', 'prediction'], params=params, functions=['df_regr.fit', 'df_regr.predict'], diff --git a/sklearn_bench/knn_clsf.py b/sklearn_bench/knn_clsf.py index ef581f537..f58be1650 100755 --- a/sklearn_bench/knn_clsf.py +++ b/sklearn_bench/knn_clsf.py @@ -59,7 +59,7 @@ def main(): if params.task == 'classification': bench.print_output( library='sklearn', - algorithm=knn_clsf._fit_method + '_knn_classification', + algorithm=knn_clsf._fit_method + '_knn_clsf', stages=['training', 'prediction'], params=params, functions=['knn_clsf.fit', 'knn_clsf.predict'], diff --git a/sklearn_bench/knn_regr.py b/sklearn_bench/knn_regr.py index 3c8efa9ab..97de91c10 100644 --- a/sklearn_bench/knn_regr.py +++ b/sklearn_bench/knn_regr.py @@ -55,7 +55,7 @@ def main(): if params.task == 'regression': bench.print_output( library='sklearn', - algorithm=knn_regr._fit_method + '_knn_regression', + algorithm=knn_regr._fit_method + '_knn_regr', stages=['training', 'prediction'], params=params, functions=['knn_regr.fit', 'knn_regr.predict'], diff --git a/sklearn_bench/linear.py b/sklearn_bench/linear.py index c7390efbe..b97d49a6e 100644 --- a/sklearn_bench/linear.py +++ b/sklearn_bench/linear.py @@ -42,7 +42,7 @@ def main(): train_r2 = bench.r2_score(y_train, yp) bench.print_output( - library='sklearn', algorithm='linear_regression', + library='sklearn', algorithm='lin_reg', stages=['training', 'prediction'], params=params, functions=['Linear.fit', 'Linear.predict'], times=[fit_time, predict_time], diff --git a/sklearn_bench/log_reg.py b/sklearn_bench/log_reg.py index 1053d3819..733ee5765 100644 --- a/sklearn_bench/log_reg.py +++ b/sklearn_bench/log_reg.py @@ -58,7 +58,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='logistic_regression', + algorithm='log_reg', stages=['training', 'prediction'], params=params, functions=['LogReg.fit', 'LogReg.predict'], diff --git a/sklearn_bench/nusvc.py b/sklearn_bench/nusvc.py index d98b184df..d3e6eeece 100644 --- a/sklearn_bench/nusvc.py +++ b/sklearn_bench/nusvc.py @@ -68,7 +68,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='nusvc', + algorithm='nuSVC', stages=['training', state_predict], params=params, functions=['NuSVC.fit', f'NuSVC.{state_predict}'], times=[fit_time, predict_train_time], diff --git a/sklearn_bench/nusvr.py b/sklearn_bench/nusvr.py index d31b7d26e..ccfe519ba 100644 --- a/sklearn_bench/nusvr.py +++ b/sklearn_bench/nusvr.py @@ -53,7 +53,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='nusvr', + algorithm='nuSVR', stages=['training', 'prediction'], params=params, functions=['NuSVR.fit', 'NuSVR.predict'], diff --git a/sklearn_bench/pca.py b/sklearn_bench/pca.py index 7e4fcf366..ef71f4cfc 100644 --- a/sklearn_bench/pca.py +++ b/sklearn_bench/pca.py @@ -42,7 +42,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='pca', + algorithm='PCA', stages=['training', 'transformation'], params=params, functions=['PCA.fit', 'PCA.transform'], diff --git a/sklearn_bench/ridge.py b/sklearn_bench/ridge.py index 3b8f138d2..19718a4e7 100644 --- a/sklearn_bench/ridge.py +++ b/sklearn_bench/ridge.py @@ -44,7 +44,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='ridge_regression', + algorithm='ridge_regr', stages=['training', 'prediction'], params=params, functions=['Ridge.fit', 'Ridge.predict'], diff --git a/sklearn_bench/svm.py b/sklearn_bench/svm.py index 6e17ea00a..88160283e 100644 --- a/sklearn_bench/svm.py +++ b/sklearn_bench/svm.py @@ -68,7 +68,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='svc', + algorithm='SVC', stages=['training', state_predict], params=params, functions=['SVM.fit', f'SVM.{state_predict}'], diff --git a/sklearn_bench/svr.py b/sklearn_bench/svr.py index a3447332b..7e9dc2c8d 100644 --- a/sklearn_bench/svr.py +++ b/sklearn_bench/svr.py @@ -53,7 +53,7 @@ def main(): bench.print_output( library='sklearn', - algorithm='svr', + algorithm='SVR', stages=['training', 'prediction'], params=params, functions=['SVR.fit', 'SVR.predict'], From e9a51c7189be35854b1e7e33a5f7b4a653263d10 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 21:14:45 +0300 Subject: [PATCH 09/22] reworked kmeans config for test metrics --- configs/testing/metrics/kmeans.json | 232 ++++++++++++++++++++++------ 1 file changed, 183 insertions(+), 49 deletions(-) diff --git a/configs/testing/metrics/kmeans.json b/configs/testing/metrics/kmeans.json index 0ab273a61..3c755ec9b 100755 --- a/configs/testing/metrics/kmeans.json +++ b/configs/testing/metrics/kmeans.json @@ -23,7 +23,20 @@ "x": "data/ijcnn_x_test.npy", "y": "data/ijcnn_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [10], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "a9a", @@ -37,7 +50,20 @@ "x": "data/a9a_x_test.npy", "y": "data/a9a_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [45], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "gisette", @@ -51,7 +77,20 @@ "x": "data/gisette_x_test.npy", "y": "data/gisette_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [70], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "klaverjas", @@ -65,7 +104,20 @@ "x": "data/klaverjas_x_test.npy", "y": "data/klaverjas_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [60], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "mnist", @@ -79,7 +131,20 @@ "x": "data/mnist_x_test.npy", "y": "data/mnist_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [25], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "sensit", @@ -93,7 +158,20 @@ "x": "data/sensit_x_test.npy", "y": "data/sensit_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [40], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "skin_segmentation", @@ -107,7 +185,20 @@ "x": "data/skin_segmentation_x_test.npy", "y": "data/skin_segmentation_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [5], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "covertype", @@ -121,7 +212,20 @@ "x": "data/covertype_x_test.npy", "y": "data/covertype_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [8], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "codrnanorm", @@ -135,7 +239,20 @@ "x": "data/codrnanorm_x_test.npy", "y": "data/codrnanorm_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [12], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "abalone", @@ -147,19 +264,20 @@ "x": "data/abalone_x_test.npy", "y": "data/abalone_y_test.npy" } - }, - { - "source": "npy", - "name": "airline-ohe", - "training": { - "x": "data/airline-ohe_x_train.npy", - "y": "data/airline-ohe_y_train.npy" - }, - "testing": { - "x": "data/airline-ohe_x_test.npy", - "y": "data/airline-ohe_y_test.npy" - } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [3], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "higgs1m", @@ -171,7 +289,20 @@ "x": "data/higgs1m_x_test.npy", "y": "data/higgs1m_y_test.npy" } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [35], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "letters", @@ -183,19 +314,20 @@ "x": "data/letters_x_test.npy", "y": "data/letters_y_test.npy" } - }, - { - "source": "npy", - "name": "airline", - "training": { - "x": "data/airline_x_train.npy", - "y": "data/airline_y_train.npy" - }, - "testing": { - "x": "data/airline_x_test.npy", - "y": "data/airline_y_test.npy" - } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [9], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "covtype", @@ -207,19 +339,20 @@ "x": "data/covtype_x_test.npy", "y": "data/covtype_y_test.npy" } - }, - { - "source": "npy", - "name": "epsilon", - "training": { - "x": "data/epsilon_x_train.npy", - "y": "data/epsilon_y_train.npy" - }, - "testing": { - "x": "data/epsilon_x_test.npy", - "y": "data/epsilon_y_test.npy" - } - }, + } + ], + "time-method": ["box_filter"], + "time-limit": [50], + "n-clusters": [10], + "n_init": [10], + "maxiter": [300], + "init": ["k-means++"], + "tol": [1e-4] + }, + { + "algorithm": "kmeans", + "dtype": ["float64"], + "dataset": [ { "source": "npy", "name": "higgs", @@ -236,6 +369,7 @@ "time-method": ["box_filter"], "time-limit": [50], "n-clusters": [20], + "n_init": [10], "maxiter": [300], "init": ["k-means++"], "tol": [1e-4] From f96893d13c62a560bcefe9a9e2c59395c13648f7 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 21:15:24 +0300 Subject: [PATCH 10/22] add init to report config --- report_generator/metrics_report_gen_config.json | 1 + 1 file changed, 1 insertion(+) diff --git a/report_generator/metrics_report_gen_config.json b/report_generator/metrics_report_gen_config.json index 7efe1fcf7..66c993953 100644 --- a/report_generator/metrics_report_gen_config.json +++ b/report_generator/metrics_report_gen_config.json @@ -12,6 +12,7 @@ "algorithm_parameters:algorithm", "algorithm_parameters:tol", "algorithm_parameters:max_iter", + "algorithm_parameters:init", "algorithm_parameters:n_init", "algorithm_parameters:alpha", "algorithm_parameters:l1_ratio", From 2ca81908c63f4905597f17b81d652bd6cb443e1b Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 21:17:50 +0300 Subject: [PATCH 11/22] add opportunity to write path to folder in configs --- runner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/runner.py b/runner.py index c4cba2449..a41fff392 100755 --- a/runner.py +++ b/runner.py @@ -29,7 +29,7 @@ parser = argparse.ArgumentParser() parser.add_argument('--configs', metavar='ConfigPath', type=str, default='configs/config_example.json', - help='Path to configuration files') + help='Path to configuration files or Path to directory which contains configuration files') parser.add_argument('--dummy-run', default=False, action='store_true', help='Run configuration parser and datasets generation ' 'without benchmarks running') @@ -60,7 +60,9 @@ 'results': [] } is_successful = True - + if os.path.isdir(args.configs): + files = [(args.configs + f) for f in os.listdir(args.configs) if f.endswith('.json')] + args.configs = ','.join(files) for config_name in args.configs.split(','): logging.info(f'Config: {config_name}') with open(config_name, 'r') as config_file: From d8689a750420a89faa06be0114dc822b542e8d97 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Wed, 11 Aug 2021 21:23:31 +0300 Subject: [PATCH 12/22] pep8 --- runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/runner.py b/runner.py index a41fff392..bb275b17b 100755 --- a/runner.py +++ b/runner.py @@ -29,13 +29,14 @@ parser = argparse.ArgumentParser() parser.add_argument('--configs', metavar='ConfigPath', type=str, default='configs/config_example.json', - help='Path to configuration files or Path to directory which contains configuration files') + help='Path to configuration files or ' + 'Path to directory which contains configuration files') parser.add_argument('--dummy-run', default=False, action='store_true', help='Run configuration parser and datasets generation ' 'without benchmarks running') parser.add_argument('--no-intel-optimized', default=False, action='store_true', help='Use no intel optimized version. ' - 'Now avalible for scikit-learn benchmarks'), + 'Now avalible for scikit-learn benchmarks') parser.add_argument('--output-file', default='results.json', type=argparse.FileType('w'), help='Output file of benchmarks to use with their runner') From 5d11996f78d3be75476c7a0a61ef795c919b780f Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Thu, 12 Aug 2021 12:25:49 +0300 Subject: [PATCH 13/22] change readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e478a229a..1f96a7ffa 100755 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Run `python runner.py --configs configs/config_example.json [--output-file resul Options: -- ``--configs``: specify the path to a configuration file. +- ``--configs``: specify the path to a configuration file or the path to folder which contains configuration files. - ``--no-intel-optimized``: use Scikit-learn without [Intel(R) Extension for Scikit-learn*](#intelr-extension-for-scikit-learn-support). Now available for [scikit-learn benchmarks](https://github.com/IntelPython/scikit-learn_bench/tree/master/sklearn_bench). By default, the runner uses Intel(R) Extension for Scikit-learn. - ``--output-file``: specify the name of the output file for the benchmark result. The default name is `result.json` - ``--report``: create an Excel report based on benchmark results. The `openpyxl` library is required. From b9babf4676ed3345e54673b53f2f564e02d2b58e Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Thu, 12 Aug 2021 14:22:45 +0300 Subject: [PATCH 14/22] apply comments, part1 --- configs/blogs/skl_2021_3.json | 104 ++++++------- configs/blogs/skl_conda_config.json | 90 +++++------ configs/testing/daal4py.json | 16 +- configs/testing/metrics/dbscan.json | 37 ++--- configs/testing/metrics/elasticnet.json | 44 +++--- configs/testing/metrics/kmeans.json | 155 +++---------------- configs/testing/metrics/knn_brute_clsf.json | 15 +- configs/testing/metrics/knn_brute_regr.json | 11 +- configs/testing/metrics/knn_kdtree_clsf.json | 15 +- configs/testing/metrics/knn_kdtree_regr.json | 15 +- configs/testing/metrics/lasso.json | 30 ++-- configs/testing/metrics/linreg.json | 10 +- configs/testing/metrics/logreg.json | 65 ++++++-- configs/testing/metrics/nusvc.json | 29 ++-- configs/testing/metrics/nusvr.json | 24 ++- configs/testing/metrics/pca.json | 49 ++---- configs/testing/metrics/rf_clsf.json | 72 ++++++--- configs/testing/metrics/rf_regr.json | 23 +-- configs/testing/metrics/ridge.json | 32 ++-- configs/testing/metrics/svc.json | 29 ++-- configs/testing/metrics/svr.json | 20 ++- configs/testing/sklearn.json | 56 +++---- 22 files changed, 412 insertions(+), 529 deletions(-) diff --git a/configs/blogs/skl_2021_3.json b/configs/blogs/skl_2021_3.json index c3e2f409b..0035ae288 100644 --- a/configs/blogs/skl_2021_3.json +++ b/configs/blogs/skl_2021_3.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,11 +19,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -38,12 +38,12 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -58,11 +58,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -107,12 +107,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -129,10 +129,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -146,7 +146,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -201,8 +201,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -222,8 +222,8 @@ } } ], - "C": [500.0], - "kernel": ["rbf"] + "C": 500.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -243,8 +243,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -264,8 +264,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -285,8 +285,8 @@ } } ], - "C": [50.0], - "kernel": ["rbf"] + "C": 50.0, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -306,8 +306,8 @@ } } ], - "nu": [0.25], - "kernel": ["poly"] + "nu": 0.25, + "kernel": "poly" }, { "algorithm": "svr", @@ -327,8 +327,8 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { "algorithm": "nusvr", @@ -348,9 +348,9 @@ } } ], - "nu": [0.8], - "C": [2.0], - "kernel": ["rbf"] + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -386,7 +386,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -437,11 +437,11 @@ } } ], - "method": ["brute"] + "method": "brute" }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -468,7 +468,7 @@ } } ], - "method": ["kd_tree"] + "method": "kd_tree" }, { "algorithm": "train_test_split", @@ -483,9 +483,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 } ] } diff --git a/configs/blogs/skl_conda_config.json b/configs/blogs/skl_conda_config.json index 07557d2bf..3f413a617 100755 --- a/configs/blogs/skl_conda_config.json +++ b/configs/blogs/skl_conda_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,11 +19,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -38,12 +38,12 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -58,11 +58,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -107,12 +107,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -129,10 +129,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -146,7 +146,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -201,8 +201,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -222,8 +222,8 @@ } } ], - "C": [500.0], - "kernel": ["rbf"] + "C": 500.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -243,8 +243,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -264,8 +264,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -285,8 +285,8 @@ } } ], - "C": [50.0], - "kernel": ["rbf"] + "C": 50.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -322,7 +322,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -373,11 +373,11 @@ } } ], - "method": ["brute"] + "method": "brute" }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -404,7 +404,7 @@ } } ], - "method": ["kd_tree"] + "method": "kd_tree" }, { "algorithm": "train_test_split", @@ -419,9 +419,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 } ] } diff --git a/configs/testing/daal4py.json b/configs/testing/daal4py.json index 2f0250ccc..9af747f16 100755 --- a/configs/testing/daal4py.json +++ b/configs/testing/daal4py.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["daal4py"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "daal4py", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,7 +19,7 @@ } } ], - "n-clusters": [10] + "n-clusters": 10 }, { "algorithm": "df_clsf", @@ -37,7 +37,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "df_regr", @@ -55,7 +55,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "ridge", @@ -72,7 +72,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", diff --git a/configs/testing/metrics/dbscan.json b/configs/testing/metrics/dbscan.json index 6c2f5f9ed..8a35d5bcd 100755 --- a/configs/testing/metrics/dbscan.json +++ b/configs/testing/metrics/dbscan.json @@ -1,14 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "dbscan", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -28,8 +27,6 @@ "eps": [0.5] }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -49,8 +46,6 @@ "eps": [0.5] }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -67,11 +62,9 @@ } } ], - "eps": [18800] + "eps": 18800 }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -88,11 +81,9 @@ } } ], - "eps": [2] + "eps": 2 }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -109,11 +100,9 @@ } } ], - "eps": [0.5] + "eps": 0.5 }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -130,11 +119,9 @@ } } ], - "eps": [0.5] + "eps": 0.5 }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -149,11 +136,9 @@ } } ], - "eps": [0.5] + "eps": 0.5 }, { - "algorithm": "dbscan", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -168,7 +153,7 @@ } } ], - "eps": [0.5] + "eps": 0.5 } ] } diff --git a/configs/testing/metrics/elasticnet.json b/configs/testing/metrics/elasticnet.json index ac1aac1ce..3959d5047 100755 --- a/configs/testing/metrics/elasticnet.json +++ b/configs/testing/metrics/elasticnet.json @@ -1,13 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "elasticnet", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "elasticnet", "dataset": [ { "source": "npy", @@ -24,12 +24,11 @@ } } ], - "alpha": [0.005], - "tol": [1e-4], - "l1_ratio": [0.85] + "alpha": 0.005, + "tol": 1e-4, + "l1_ratio": 0.85 }, { - "algorithm": "elasticnet", "dataset": [ { "source": "npy", @@ -46,12 +45,11 @@ } } ], - "alpha": [0.01], - "tol": [1e-4], - "l1_ratio": [0.7] + "alpha": 0.01, + "tol": 1e-4, + "l1_ratio": 0.7 }, { - "algorithm": "elasticnet", "dataset": [ { "source": "npy", @@ -68,12 +66,11 @@ } } ], - "alpha": [0.0625], - "tol": [1e-4], - "l1_ratio": [0.75] + "alpha": 0.0625, + "tol": 1e-4, + "l1_ratio": 0.75 }, { - "algorithm": "elasticnet", "dataset": [ { "source": "npy", @@ -90,12 +87,11 @@ } } ], - "alpha": [0.006], - "tol": [1e-4], - "l1_ratio": [0.25] + "alpha": 0.006, + "tol": 1e-4, + "l1_ratio": 0.25 }, { - "algorithm": "elasticnet", "dataset": [ { "source": "npy", @@ -112,9 +108,9 @@ } } ], - "alpha": [0.15], - "tol": [1e-4], - "l1_ratio": [0.4] + "alpha": 0.15, + "tol": 1e-4, + "l1_ratio": 0.4 } ] } \ No newline at end of file diff --git a/configs/testing/metrics/kmeans.json b/configs/testing/metrics/kmeans.json index 3c755ec9b..427844912 100755 --- a/configs/testing/metrics/kmeans.json +++ b/configs/testing/metrics/kmeans.json @@ -1,14 +1,19 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "kmeans", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "time-method": "box_filter", + "time-limit": 50, + "n_init": 10, + "maxiter": 300, + "tol": 1e-4, + "init": "k-means++" }, "cases": [ { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -25,17 +30,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [10], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 10 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -52,17 +49,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [45], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 45 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -79,17 +68,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [70], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 70 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -106,17 +87,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [60], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 60 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -133,17 +106,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [25], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 25 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -160,17 +125,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [40], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 40 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -187,17 +144,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 5 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -214,17 +163,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [8], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 8 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -241,17 +182,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [12], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 12 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -266,17 +199,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [3], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 3 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -291,17 +216,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [35], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 35 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -316,17 +233,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [9], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 9 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -341,17 +250,9 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [10], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 10 }, { - "algorithm": "kmeans", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -366,13 +267,7 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "n_init": [10], - "maxiter": [300], - "init": ["k-means++"], - "tol": [1e-4] + "n-clusters": 20 } ] } diff --git a/configs/testing/metrics/knn_brute_clsf.json b/configs/testing/metrics/knn_brute_clsf.json index 549cb1e95..8903055b8 100755 --- a/configs/testing/metrics/knn_brute_clsf.json +++ b/configs/testing/metrics/knn_brute_clsf.json @@ -1,14 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "knn_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "method": "brute" }, "cases": [ { - "algorithm": "knn_clsf", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -78,8 +78,7 @@ "y": "data/sensit_y_test.npy" } } - ], - "method": ["brute"] + ] } ] } diff --git a/configs/testing/metrics/knn_brute_regr.json b/configs/testing/metrics/knn_brute_regr.json index ac629fa0c..8c7b15c96 100755 --- a/configs/testing/metrics/knn_brute_regr.json +++ b/configs/testing/metrics/knn_brute_regr.json @@ -1,14 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "knn_regr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "knn_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", diff --git a/configs/testing/metrics/knn_kdtree_clsf.json b/configs/testing/metrics/knn_kdtree_clsf.json index 927dc4466..ea15e071a 100755 --- a/configs/testing/metrics/knn_kdtree_clsf.json +++ b/configs/testing/metrics/knn_kdtree_clsf.json @@ -1,14 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "knn_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "method": "kd_tree" }, "cases": [ { - "algorithm": "knn_clsf", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -64,8 +64,7 @@ "y": "data/a9a_y_test.npy" } } - ], - "method": ["kd_tree"] + ] } ] } diff --git a/configs/testing/metrics/knn_kdtree_regr.json b/configs/testing/metrics/knn_kdtree_regr.json index 5b20c9ea3..f2f300930 100755 --- a/configs/testing/metrics/knn_kdtree_regr.json +++ b/configs/testing/metrics/knn_kdtree_regr.json @@ -1,14 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "knn_regr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "method": "kd_tree" }, "cases": [ { - "algorithm": "knn_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -80,8 +80,7 @@ "y": "data/medical_charges_nominal_y_test.npy" } } - ], - "method": ["kd_tree"] + ] } ] } \ No newline at end of file diff --git a/configs/testing/metrics/lasso.json b/configs/testing/metrics/lasso.json index adbfa012a..c036b5402 100755 --- a/configs/testing/metrics/lasso.json +++ b/configs/testing/metrics/lasso.json @@ -1,13 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "lasso", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "tol": 1e-4 }, "cases": [ { - "algorithm": "lasso", "dataset": [ { "source": "npy", @@ -24,11 +25,9 @@ } } ], - "alpha": [-0.0025], - "tol": [1e-4] + "alpha": -0.0025 }, { - "algorithm": "lasso", "dataset": [ { "source": "npy", @@ -45,11 +44,9 @@ } } ], - "alpha": [0.015625], - "tol": [1e-4] + "alpha": 0.015625 }, { - "algorithm": "lasso", "dataset": [ { "source": "npy", @@ -66,11 +63,9 @@ } } ], - "alpha": [0.0625], - "tol": [1e-4] + "alpha": 0.0625 }, { - "algorithm": "lasso", "dataset": [ { "source": "npy", @@ -87,11 +82,9 @@ } } ], - "alpha": [-0.0625], - "tol": [1e-4] + "alpha": -0.0625 }, { - "algorithm": "lasso", "dataset": [ { "source": "npy", @@ -108,8 +101,7 @@ } } ], - "alpha": [0.03125], - "tol": [1e-4] + "alpha": 0.03125 } ] } \ No newline at end of file diff --git a/configs/testing/metrics/linreg.json b/configs/testing/metrics/linreg.json index 7de63fd72..e96b1fe51 100644 --- a/configs/testing/metrics/linreg.json +++ b/configs/testing/metrics/linreg.json @@ -1,13 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "linear", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "linear", "dataset": [ { "source": "npy", diff --git a/configs/testing/metrics/logreg.json b/configs/testing/metrics/logreg.json index ff078f47a..1f4e3bab9 100755 --- a/configs/testing/metrics/logreg.json +++ b/configs/testing/metrics/logreg.json @@ -1,13 +1,15 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "log_reg", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "maxiter": 5000, + "tol": 0 }, "cases": [ { - "algorithm": "log_reg", "dataset": [ { "source": "npy", @@ -22,7 +24,11 @@ "x": "data/gisette_x_test.npy", "y": "data/gisette_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "letters", @@ -34,7 +40,11 @@ "x": "data/letters_x_test.npy", "y": "data/letters_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "a9a", @@ -48,7 +58,11 @@ "x": "data/a9a_x_test.npy", "y": "data/a9a_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "sensit", @@ -62,7 +76,11 @@ "x": "data/sensit_x_test.npy", "y": "data/sensit_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "ijcnn", @@ -76,7 +94,11 @@ "x": "data/ijcnn_x_test.npy", "y": "data/ijcnn_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "skin_segmentation", @@ -90,7 +112,11 @@ "x": "data/skin_segmentation_x_test.npy", "y": "data/skin_segmentation_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "klaverjas", @@ -104,7 +130,11 @@ "x": "data/klaverjas_x_test.npy", "y": "data/klaverjas_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "codrnanorm", @@ -118,7 +148,11 @@ "x": "data/codrnanorm_x_test.npy", "y": "data/codrnanorm_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "higgs1m", @@ -131,10 +165,7 @@ "y": "data/higgs1m_y_test.npy" } } - - ], - "maxiter": [5000], - "tol": [0] + ] } ] } diff --git a/configs/testing/metrics/nusvc.json b/configs/testing/metrics/nusvc.json index 0021ecf4e..053fa52f9 100755 --- a/configs/testing/metrics/nusvc.json +++ b/configs/testing/metrics/nusvc.json @@ -1,13 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "nusvc", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "nusvc", "dataset": [ { "source": "npy", @@ -24,11 +24,10 @@ } } ], - "nu": [0.07], - "kernel": ["linear"] + "nu": 0.07, + "kernel": "linear" }, { - "algorithm": "nusvc", "dataset": [ { "source": "npy", @@ -45,11 +44,10 @@ } } ], - "nu": [0.25], - "kernel": ["sigmoid"] + "nu": 0.25, + "kernel": "sigmoid" }, { - "algorithm": "nusvc", "dataset": [ { "source": "npy", @@ -66,11 +64,10 @@ } } ], - "nu": [0.25], - "kernel": ["linear"] + "nu": 0.25, + "kernel": "linear" }, { - "algorithm": "nusvc", "dataset": [ { "source": "npy", @@ -87,8 +84,8 @@ } } ], - "nu": [0.01], - "kernel": ["rbf"] + "nu": 0.01, + "kernel": "rbf" } ] } \ No newline at end of file diff --git a/configs/testing/metrics/nusvr.json b/configs/testing/metrics/nusvr.json index 36ac54d34..35b93096a 100755 --- a/configs/testing/metrics/nusvr.json +++ b/configs/testing/metrics/nusvr.json @@ -1,13 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "nusvr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "nusvr", "dataset": [ { "source": "npy", @@ -26,7 +26,6 @@ ] }, { - "algorithm": "nusvr", "dataset": [ { "source": "npy", @@ -43,12 +42,11 @@ } } ], - "C": [0.1], - "kernel": ["poly"], - "nu": [0.17] + "C": 0.1, + "kernel": "poly", + "nu": 0.17 }, { - "algorithm": "nusvr", "dataset": [ { "source": "npy", @@ -65,9 +63,9 @@ } } ], - "C": [2.0], - "kernel": ["rbf"], - "nu": [0.8] + "C": 2.0, + "kernel": "rbf", + "nu": 0.8 } ] } \ No newline at end of file diff --git a/configs/testing/metrics/pca.json b/configs/testing/metrics/pca.json index 8e3dc1624..7479666bc 100755 --- a/configs/testing/metrics/pca.json +++ b/configs/testing/metrics/pca.json @@ -1,13 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "pca", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "svd-solver": "full" }, "cases": [ { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -19,11 +20,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.8] + "n-components": 0.8 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -35,11 +34,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.6] + "n-components": 0.6 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -51,11 +48,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.8] + "n-components": 0.8 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -67,11 +62,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.6] + "n-components": 0.6 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -83,11 +76,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.8] + "n-components": 0.8 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -99,8 +90,7 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.6] + "n-components": 0.6 }, { "algorithm": "pca", @@ -115,11 +105,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.8] + "n-components": 0.8 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -131,11 +119,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.6] + "n-components": 0.6 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -147,11 +133,9 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.8] + "n-components": 0.8 }, { - "algorithm": "pca", "dataset": [ { "source": "npy", @@ -163,8 +147,7 @@ } } ], - "svd-solver": ["full"], - "n-components": [0.8] + "n-components": 0.8 } ] } diff --git a/configs/testing/metrics/rf_clsf.json b/configs/testing/metrics/rf_clsf.json index 144fef33c..bfdaaf730 100755 --- a/configs/testing/metrics/rf_clsf.json +++ b/configs/testing/metrics/rf_clsf.json @@ -1,14 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "df_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "num-trees": 500 }, "cases": [ - { - "algorithm": "df_clsf", - "dtype": ["float64"], + { "dataset": [ { "source": "npy", @@ -21,7 +21,11 @@ "x": "data/letters_x_test.npy", "y": "data/letters_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "skin_segmentation", @@ -35,7 +39,11 @@ "x": "data/skin_segmentation_x_test.npy", "y": "data/skin_segmentation_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "codrnanorm", @@ -49,7 +57,11 @@ "x": "data/codrnanorm_x_test.npy", "y": "data/codrnanorm_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "ijcnn", @@ -63,7 +75,11 @@ "x": "data/ijcnn_x_test.npy", "y": "data/ijcnn_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "a9a", @@ -77,7 +93,11 @@ "x": "data/a9a_x_test.npy", "y": "data/a9a_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "klaverjas", @@ -91,8 +111,11 @@ "x": "data/klaverjas_x_test.npy", "y": "data/klaverjas_y_test.npy" } - }, - + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "gisette", @@ -106,7 +129,11 @@ "x": "data/gisette_x_test.npy", "y": "data/gisette_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "sensit", @@ -120,7 +147,11 @@ "x": "data/sensit_x_test.npy", "y": "data/sensit_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "covertype", @@ -134,7 +165,11 @@ "x": "data/covertype_x_test.npy", "y": "data/covertype_y_test.npy" } - }, + } + ] + }, + { + "dataset": [ { "source": "npy", "name": "covtype", @@ -147,8 +182,7 @@ "y": "data/covtype_y_test.npy" } } - ], - "num-trees": [500] + ] } ] } diff --git a/configs/testing/metrics/rf_regr.json b/configs/testing/metrics/rf_regr.json index 8c2709b5a..b62f9a938 100644 --- a/configs/testing/metrics/rf_regr.json +++ b/configs/testing/metrics/rf_regr.json @@ -1,14 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "df_regr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -27,8 +26,6 @@ ] }, { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -47,8 +44,6 @@ ] }, { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -67,8 +62,6 @@ ] }, { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -87,8 +80,6 @@ ] }, { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -107,8 +98,6 @@ ] }, { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", @@ -127,8 +116,6 @@ ] }, { - "algorithm": "df_regr", - "dtype": ["float64"], "dataset": [ { "source": "npy", diff --git a/configs/testing/metrics/ridge.json b/configs/testing/metrics/ridge.json index 4b676ae8a..fe9fb591a 100755 --- a/configs/testing/metrics/ridge.json +++ b/configs/testing/metrics/ridge.json @@ -1,13 +1,14 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "ridge", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "alpha": 1.0 }, "cases": [ { - "algorithm": "ridge", "dataset": [ { "source": "npy", @@ -23,8 +24,7 @@ "y": "data/abalone_y_test.npy" } } - ], - "alpha": [1.0] + ] }, { "algorithm": "ridge", @@ -43,8 +43,7 @@ "y": "data/california_housing_y_test.npy" } } - ], - "alpha": [1.0] + ] }, { "algorithm": "ridge", @@ -63,8 +62,7 @@ "y": "data/fried_y_test.npy" } } - ], - "alpha": [1.0] + ] }, { "algorithm": "ridge", @@ -83,8 +81,7 @@ "y": "data/twodplanes_y_test.npy" } } - ], - "alpha": [1.0] + ] }, { "algorithm": "ridge", @@ -103,8 +100,7 @@ "y": "data/medical_charges_nominal_y_test.npy" } } - ], - "alpha": [1.0] + ] }, { "algorithm": "ridge", @@ -123,8 +119,7 @@ "y": "data/yolanda_y_test.npy" } } - ], - "alpha": [1.0] + ] }, { "algorithm": "ridge", @@ -143,8 +138,7 @@ "y": "data/year_prediction_msd_y_test.npy" } } - ], - "alpha": [1.0] + ] } ] } \ No newline at end of file diff --git a/configs/testing/metrics/svc.json b/configs/testing/metrics/svc.json index a566469ff..2f6997fcc 100755 --- a/configs/testing/metrics/svc.json +++ b/configs/testing/metrics/svc.json @@ -1,13 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "svm", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "svm", "dataset": [ { "source": "npy", @@ -24,11 +24,10 @@ } } ], - "C": [0.0015], - "kernel": ["linear"] + "C": 0.0015, + "kernel": "linear" }, { - "algorithm": "svm", "dataset": [ { "source": "npy", @@ -45,11 +44,10 @@ } } ], - "C": [500], - "kernel": ["sigmoid"] + "C": 500, + "kernel": "sigmoid" }, { - "algorithm": "svm", "dataset": [ { "source": "npy", @@ -66,11 +64,10 @@ } } ], - "C": [100], - "kernel": ["linear"] + "C": 100, + "kernel": "linear" }, { - "algorithm": "svm", "dataset": [ { "source": "npy", @@ -87,8 +84,8 @@ } } ], - "C": [1.0], - "kernel": ["rbf"] + "C": 1.0, + "kernel": "rbf" } ] } \ No newline at end of file diff --git a/configs/testing/metrics/svr.json b/configs/testing/metrics/svr.json index 9b2f6adf2..c5b8d6306 100755 --- a/configs/testing/metrics/svr.json +++ b/configs/testing/metrics/svr.json @@ -1,13 +1,13 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "algorithm": "svr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "algorithm": "svr", "dataset": [ { "source": "npy", @@ -26,7 +26,6 @@ ] }, { - "algorithm": "svr", "dataset": [ { "source": "npy", @@ -43,11 +42,10 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { - "algorithm": "svr", "dataset": [ { "source": "npy", @@ -64,8 +62,8 @@ } } ], - "C": [2.0], - "kernel": ["rbf"] + "C": 2.0, + "kernel": "rbf" } ] } \ No newline at end of file diff --git a/configs/testing/sklearn.json b/configs/testing/sklearn.json index f3e7ca340..c2045f289 100755 --- a/configs/testing/sklearn.json +++ b/configs/testing/sklearn.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,8 +19,8 @@ } } ], - "init": ["k-means++"], - "n-clusters": [10] + "init": "k-means++", + "n-clusters": 10 }, { "algorithm": "df_clsf", @@ -38,7 +38,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "df_regr", @@ -56,7 +56,7 @@ } } ], - "num-trees": [10] + "num-trees": 10 }, { "algorithm": "ridge", @@ -73,7 +73,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -107,7 +107,7 @@ } } ], - "tol": [0.01] + "tol": 0.01 }, { "algorithm": "svm", @@ -125,8 +125,8 @@ } } ], - "C": [10.0], - "kernel": ["linear"] + "C": 10.0, + "kernel": "linear" }, { "algorithm": "nusvc", @@ -144,8 +144,8 @@ } } ], - "nu": [0.1], - "kernel": ["poly"] + "nu": 0.1, + "kernel": "poly" }, { "algorithm": "svr", @@ -162,8 +162,8 @@ } } ], - "C": [10.0], - "kernel": ["rbf"] + "C": 10.0, + "kernel": "rbf" }, { "algorithm": "nusvr", @@ -180,10 +180,10 @@ } } ], - "nu": [0.1], - "C": [1.0], - "kernel": ["poly"], - "degree": [2] + "nu": 0.1, + "C": 1.0, + "kernel": "poly", + "degree": 2 }, { "algorithm": "dbscan", @@ -247,9 +247,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 }, { "algorithm": "lasso", @@ -267,8 +267,8 @@ } } ], - "alpha": [1.0], - "tol": [1e-4] + "alpha": 1.0, + "tol": 1e-4 }, { "algorithm": "elasticnet", @@ -286,9 +286,9 @@ } } ], - "alpha": [2.0], - "l1_ratio": [0.5], - "tol": [1e-4] + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 }, { "algorithm": "pca", From 618e7c44ba7ade8099ec633edd7492b54eb7b691 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Thu, 12 Aug 2021 15:02:54 +0300 Subject: [PATCH 15/22] apply comments, part2 --- configs/config_example.json | 28 ++-- configs/cuml_config.json | 108 ++++++------ configs/skl_config.json | 156 +++++++++--------- configs/skl_xpu_config.json | 10 +- ...=> sklearn_metrics_report_gen_config.json} | 0 5 files changed, 151 insertions(+), 151 deletions(-) rename report_generator/{metrics_report_gen_config.json => sklearn_metrics_report_gen_config.json} (100%) diff --git a/configs/config_example.json b/configs/config_example.json index 0a5be54ac..fa615cf29 100644 --- a/configs/config_example.json +++ b/configs/config_example.json @@ -1,12 +1,12 @@ { "common": { - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { - "lib": ["daal4py"], + "lib": "daal4py", "algorithm": "linear", "dataset": [ { @@ -20,7 +20,7 @@ ] }, { - "lib": ["sklearn"], + "lib": "sklearn", "algorithm": "svm", "dataset": [ { @@ -33,11 +33,11 @@ } } ], - "max-cache-size": [4], - "kernel": ["rbf"] + "max-cache-size": 4, + "kernel": "rbf" }, { - "lib": ["xgboost"], + "lib": "xgboost", "algorithm": "gbt", "dataset": [ { @@ -49,12 +49,12 @@ } } ], - "n-estimators": [1000], - "objective": ["reg:squarederror"], - "tree-method": ["hist"], - "max-depth": [1], - "subsample": [0.5], - "eta": [0.1] + "n-estimators": 1000, + "objective": "reg:squarederror", + "tree-method": "hist", + "max-depth": 1, + "subsample": 0.5, + "eta": 0.1 } ] } diff --git a/configs/cuml_config.json b/configs/cuml_config.json index f17797c70..1a60da90a 100755 --- a/configs/cuml_config.json +++ b/configs/cuml_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["cuml"], - "data-format": ["cudf"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "cuml", + "data-format": "cudf", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,10 +19,10 @@ } } ], - "time-method": ["box_filter"], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -37,11 +37,11 @@ } } ], - "time-method": ["box_filter"], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -56,10 +56,10 @@ } } ], - "time-method": ["box_filter"], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -95,12 +95,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -131,10 +131,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -156,7 +156,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -219,8 +219,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -240,8 +240,8 @@ } } ], - "C": [1000.0], - "kernel": ["linear"] + "C": 1000.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -261,8 +261,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -282,8 +282,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -303,8 +303,8 @@ } } ], - "C": [500.0], - "kernel": ["linear"] + "C": 500.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -324,8 +324,8 @@ } } ], - "C": [1.0], - "kernel": ["rbf"] + "C": 1.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -345,8 +345,8 @@ } } ], - "C": [100.0], - "kernel": ["rbf"] + "C": 100.0, + "kernel": "rbf" }, { "algorithm": "svr", @@ -366,8 +366,8 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { "algorithm": "svr", @@ -387,8 +387,8 @@ } } ], - "C": [10.0], - "kernel": ["rbf"] + "C": 10.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -424,7 +424,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -475,7 +475,7 @@ } } ], - "method": ["brute"] + "method": "brute" }, { "algorithm": "train_test_split", @@ -508,8 +508,8 @@ } } ], - "train-size": [0.75], - "test-size": [0.25] + "train-size": 0.75, + "test-size": 0.25 }, { "algorithm": "train_test_split", @@ -524,8 +524,8 @@ } } ], - "train-size": [0.9], - "test-size": [0.1] + "train-size": 0.9, + "test-size": 0.1 }, { "algorithm": "lasso", @@ -540,8 +540,8 @@ } } ], - "alpha": [1.0], - "tol": [1e-4] + "alpha": 1.0, + "tol": 1e-4 }, { "algorithm": "elasticnet", @@ -561,9 +561,9 @@ } } ], - "alpha": [2.0], - "l1_ratio": [0.5], - "tol": [1e-4] + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 } ] } diff --git a/configs/skl_config.json b/configs/skl_config.json index 4d8be6792..ca74489fe 100755 --- a/configs/skl_config.json +++ b/configs/skl_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"] + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64" }, "cases": [ { @@ -19,11 +19,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [1000], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "kmeans", @@ -38,12 +38,12 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [5], - "maxiter": [50], - "init": ["k-means++"], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 }, { "algorithm": "kmeans", @@ -58,11 +58,11 @@ } } ], - "time-method": ["box_filter"], - "time-limit": [50], - "n-clusters": [20], - "maxiter": [50], - "tol": [0.0] + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 }, { "algorithm": "pca", @@ -107,12 +107,12 @@ } } ], - "svd-solver": ["full"], - "n-components": [10] + "svd-solver": "full", + "n-components": 10 }, { "algorithm": "df_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "npy", @@ -143,10 +143,10 @@ } } ], - "num-trees": [50], - "max-depth": [16], - "max-leaf-nodes": [131072], - "max-features": [0.2] + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 }, { "algorithm": "ridge", @@ -168,7 +168,7 @@ } } ], - "alpha": [5] + "alpha": 5 }, { "algorithm": "linear", @@ -231,8 +231,8 @@ } } ], - "maxiter": [100], - "tol": [0] + "maxiter": 100, + "tol": 0 }, { "algorithm": "svm", @@ -252,8 +252,8 @@ } } ], - "C": [1000.0], - "kernel": ["linear"] + "C": 1000.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -273,8 +273,8 @@ } } ], - "C": [1.5e-3], - "kernel": ["linear"] + "C": 1.5e-3, + "kernel": "linear" }, { "algorithm": "svm", @@ -294,8 +294,8 @@ } } ], - "C": [100.0], - "kernel": ["linear"] + "C": 100.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -315,8 +315,8 @@ } } ], - "C": [500.0], - "kernel": ["linear"] + "C": 500.0, + "kernel": "linear" }, { "algorithm": "svm", @@ -336,8 +336,8 @@ } } ], - "C": [1.0], - "kernel": ["rbf"] + "C": 1.0, + "kernel": "rbf" }, { "algorithm": "svm", @@ -357,8 +357,8 @@ } } ], - "C": [100.0], - "kernel": ["rbf"] + "C": 100.0, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -378,8 +378,8 @@ } } ], - "nu": [0.25], - "kernel": ["sigmoid"] + "nu": 0.25, + "kernel": "sigmoid" }, { "algorithm": "nusvc", @@ -399,8 +399,8 @@ } } ], - "nu": [0.7], - "kernel": ["rbf"] + "nu": 0.7, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -420,8 +420,8 @@ } } ], - "nu": [0.5], - "kernel": ["rbf"] + "nu": 0.5, + "kernel": "rbf" }, { "algorithm": "nusvc", @@ -441,8 +441,8 @@ } } ], - "nu": [0.15], - "kernel": ["poly"] + "nu": 0.15, + "kernel": "poly" }, { "algorithm": "svr", @@ -462,8 +462,8 @@ } } ], - "C": [0.1], - "kernel": ["poly"] + "C": 0.1, + "kernel": "poly" }, { "algorithm": "svr", @@ -483,8 +483,8 @@ } } ], - "C": [10.0], - "kernel": ["rbf"] + "C": 10.0, + "kernel": "rbf" }, { "algorithm": "nusvr", @@ -504,9 +504,9 @@ } } ], - "nu": [0.8], - "C": [2.0], - "kernel": ["rbf"] + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" }, { "algorithm": "nusvr", @@ -526,10 +526,10 @@ } } ], - "nu": [0.5], - "C": [10.0], - "kernel": ["poly"], - "degree": [2] + "nu": 0.5, + "C": 10.0, + "kernel": "poly", + "degree": 2 }, { "algorithm": "nusvr", @@ -549,9 +549,9 @@ } } ], - "nu": [0.8], - "C": [2.0], - "kernel": ["rbf"] + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" }, { "algorithm": "dbscan", @@ -587,7 +587,7 @@ }, { "algorithm": "knn_clsf", - "dtype": ["float32"], + "dtype": "float32", "dataset": [ { "source": "synthetic", @@ -671,9 +671,9 @@ } } ], - "include-y": [""], - "train-size": [0.75], - "test-size": [0.25] + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 }, { "algorithm": "train_test_split", @@ -688,11 +688,11 @@ } } ], - "data-format": ["numpy"], - "data-order": ["C"], - "include-y": [""], - "train-size": [0.9], - "test-size": [0.1] + "data-format": "numpy", + "data-order": "C", + "include-y": "", + "train-size": 0.9, + "test-size": 0.1 }, { "algorithm": "lasso", @@ -707,8 +707,8 @@ } } ], - "alpha": [1.0], - "tol": [1e-4] + "alpha": 1.0, + "tol": 1e-4 }, { "algorithm": "elasticnet", @@ -728,9 +728,9 @@ } } ], - "alpha": [2.0], - "l1_ratio": [0.5], - "tol": [1e-4] + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 } ] } diff --git a/configs/skl_xpu_config.json b/configs/skl_xpu_config.json index 023850c38..06849287e 100644 --- a/configs/skl_xpu_config.json +++ b/configs/skl_xpu_config.json @@ -1,9 +1,9 @@ { "common": { - "lib": ["sklearn"], - "data-format": ["pandas"], - "data-order": ["F"], - "dtype": ["float64"], + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", "device": ["host", "cpu", "gpu", "None"] }, "cases": [ @@ -20,7 +20,7 @@ } } ], - "n-clusters": [10] + "n-clusters": 10 }, { "algorithm": "dbscan", diff --git a/report_generator/metrics_report_gen_config.json b/report_generator/sklearn_metrics_report_gen_config.json similarity index 100% rename from report_generator/metrics_report_gen_config.json rename to report_generator/sklearn_metrics_report_gen_config.json From 57dd110e42abc7840ce5b8152788a78dbf5558cc Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Thu, 12 Aug 2021 15:24:20 +0300 Subject: [PATCH 16/22] dfs strategy to take jsons from folders --- runner.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/runner.py b/runner.py index bb275b17b..6ef683cd5 100755 --- a/runner.py +++ b/runner.py @@ -24,6 +24,19 @@ import datasets.make_datasets as make_datasets import utils +from pathlib import Path + + +def get_configs(path: Path) -> List[str]: + result = list() + for dir_or_file in os.listdir(path): + new_path = Path(path, dir_or_file) + if dir_or_file.endswith('.json'): + result.append(str(new_path)) + elif os.path.isdir(new_path): + result += get_configs(new_path) + return result + if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -62,8 +75,7 @@ } is_successful = True if os.path.isdir(args.configs): - files = [(args.configs + f) for f in os.listdir(args.configs) if f.endswith('.json')] - args.configs = ','.join(files) + args.configs = ','.join(get_configs(args.configs)) for config_name in args.configs.split(','): logging.info(f'Config: {config_name}') with open(config_name, 'r') as config_file: From 4a21cd80d2b6fc6b864177f78de3260d6fe4a3aa Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Thu, 12 Aug 2021 15:31:34 +0300 Subject: [PATCH 17/22] now you can --configs=folder,.json,folder --- runner.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/runner.py b/runner.py index 6ef683cd5..f979ca95d 100755 --- a/runner.py +++ b/runner.py @@ -74,8 +74,16 @@ def get_configs(path: Path) -> List[str]: 'results': [] } is_successful = True - if os.path.isdir(args.configs): - args.configs = ','.join(get_configs(args.configs)) + # getting jsons from folders + paths_to_configs: List[str] = list() + for config_name in args.configs.split(','): + if os.path.isdir(config_name): + config_name = get_configs(Path(config_name)) + else: + config_name = [config_name] + paths_to_configs += config_name + args.configs = ','.join(paths_to_configs) + for config_name in args.configs.split(','): logging.info(f'Config: {config_name}') with open(config_name, 'r') as config_file: From e4d6f9413f73170327599fecf0aafa423f7fcbdd Mon Sep 17 00:00:00 2001 From: Kulandin Denis Date: Thu, 12 Aug 2021 15:37:36 +0300 Subject: [PATCH 18/22] Update README.md Co-authored-by: Ekaterina Mekhnetsova --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f96a7ffa..8214e4022 100755 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Run `python runner.py --configs configs/config_example.json [--output-file resul Options: -- ``--configs``: specify the path to a configuration file or the path to folder which contains configuration files. +- ``--configs``: specify the path to a configuration file or a folder that contains configuration files. - ``--no-intel-optimized``: use Scikit-learn without [Intel(R) Extension for Scikit-learn*](#intelr-extension-for-scikit-learn-support). Now available for [scikit-learn benchmarks](https://github.com/IntelPython/scikit-learn_bench/tree/master/sklearn_bench). By default, the runner uses Intel(R) Extension for Scikit-learn. - ``--output-file``: specify the name of the output file for the benchmark result. The default name is `result.json` - ``--report``: create an Excel report based on benchmark results. The `openpyxl` library is required. From 6a126d88863bf3f943124414016c3f2059094b58 Mon Sep 17 00:00:00 2001 From: Kulandin Denis Date: Thu, 12 Aug 2021 15:37:45 +0300 Subject: [PATCH 19/22] Update sklearn_bench/knn_regr.py Co-authored-by: Ekaterina Mekhnetsova --- sklearn_bench/knn_regr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_bench/knn_regr.py b/sklearn_bench/knn_regr.py index 97de91c10..c2048e3f4 100644 --- a/sklearn_bench/knn_regr.py +++ b/sklearn_bench/knn_regr.py @@ -27,7 +27,7 @@ def main(): X_train, X_test, y_train, y_test = bench.load_data(params) params.n_classes = len(np.unique(y_train)) - # Create regression object + # Create a regression object knn_regr = KNeighborsRegressor(n_neighbors=params.n_neighbors, weights=params.weights, algorithm=params.method, From ecb8938c412ff6d48945642f29da9aa7f6e4e76c Mon Sep 17 00:00:00 2001 From: Kulandin Denis Date: Thu, 12 Aug 2021 15:38:20 +0300 Subject: [PATCH 20/22] Update runner.py Co-authored-by: Ekaterina Mekhnetsova --- runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runner.py b/runner.py index f979ca95d..e0d85ebb7 100755 --- a/runner.py +++ b/runner.py @@ -42,8 +42,8 @@ def get_configs(path: Path) -> List[str]: parser = argparse.ArgumentParser() parser.add_argument('--configs', metavar='ConfigPath', type=str, default='configs/config_example.json', - help='Path to configuration files or ' - 'Path to directory which contains configuration files') + help='The path to a configuration file or ' + 'a directory that contains configuration files') parser.add_argument('--dummy-run', default=False, action='store_true', help='Run configuration parser and datasets generation ' 'without benchmarks running') From 67cc9419c83292f98b8dbc076b95bc82570e5438 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Fri, 13 Aug 2021 16:58:15 +0300 Subject: [PATCH 21/22] EOF --- configs/testing/metrics/elasticnet.json | 2 +- configs/testing/metrics/knn_brute_regr.json | 2 +- configs/testing/metrics/knn_kdtree_regr.json | 2 +- configs/testing/metrics/lasso.json | 2 +- configs/testing/metrics/linreg.json | 2 +- configs/testing/metrics/nusvc.json | 2 +- configs/testing/metrics/nusvr.json | 2 +- configs/testing/metrics/rf_regr.json | 2 +- configs/testing/metrics/ridge.json | 2 +- configs/testing/metrics/svc.json | 2 +- configs/testing/metrics/svr.json | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/configs/testing/metrics/elasticnet.json b/configs/testing/metrics/elasticnet.json index 3959d5047..c64044f89 100755 --- a/configs/testing/metrics/elasticnet.json +++ b/configs/testing/metrics/elasticnet.json @@ -113,4 +113,4 @@ "l1_ratio": 0.4 } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/knn_brute_regr.json b/configs/testing/metrics/knn_brute_regr.json index 8c7b15c96..46edbb2fc 100755 --- a/configs/testing/metrics/knn_brute_regr.json +++ b/configs/testing/metrics/knn_brute_regr.json @@ -68,4 +68,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/knn_kdtree_regr.json b/configs/testing/metrics/knn_kdtree_regr.json index f2f300930..21a8dd3b1 100755 --- a/configs/testing/metrics/knn_kdtree_regr.json +++ b/configs/testing/metrics/knn_kdtree_regr.json @@ -83,4 +83,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/lasso.json b/configs/testing/metrics/lasso.json index c036b5402..53daa8daa 100755 --- a/configs/testing/metrics/lasso.json +++ b/configs/testing/metrics/lasso.json @@ -104,4 +104,4 @@ "alpha": 0.03125 } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/linreg.json b/configs/testing/metrics/linreg.json index e96b1fe51..704aa7c04 100644 --- a/configs/testing/metrics/linreg.json +++ b/configs/testing/metrics/linreg.json @@ -140,4 +140,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/nusvc.json b/configs/testing/metrics/nusvc.json index 053fa52f9..83755f2a6 100755 --- a/configs/testing/metrics/nusvc.json +++ b/configs/testing/metrics/nusvc.json @@ -88,4 +88,4 @@ "kernel": "rbf" } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/nusvr.json b/configs/testing/metrics/nusvr.json index 35b93096a..1f3b2981c 100755 --- a/configs/testing/metrics/nusvr.json +++ b/configs/testing/metrics/nusvr.json @@ -68,4 +68,4 @@ "nu": 0.8 } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/rf_regr.json b/configs/testing/metrics/rf_regr.json index b62f9a938..399dc1add 100644 --- a/configs/testing/metrics/rf_regr.json +++ b/configs/testing/metrics/rf_regr.json @@ -134,4 +134,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/ridge.json b/configs/testing/metrics/ridge.json index fe9fb591a..271e677fa 100755 --- a/configs/testing/metrics/ridge.json +++ b/configs/testing/metrics/ridge.json @@ -141,4 +141,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/svc.json b/configs/testing/metrics/svc.json index 2f6997fcc..60a5cd96c 100755 --- a/configs/testing/metrics/svc.json +++ b/configs/testing/metrics/svc.json @@ -88,4 +88,4 @@ "kernel": "rbf" } ] -} \ No newline at end of file +} diff --git a/configs/testing/metrics/svr.json b/configs/testing/metrics/svr.json index c5b8d6306..7884e1edb 100755 --- a/configs/testing/metrics/svr.json +++ b/configs/testing/metrics/svr.json @@ -66,4 +66,4 @@ "kernel": "rbf" } ] -} \ No newline at end of file +} From ec05b8634064cf2cc58fd994cf5b6f885e94adf2 Mon Sep 17 00:00:00 2001 From: OnlyDeniko Date: Mon, 16 Aug 2021 18:22:46 +0300 Subject: [PATCH 22/22] apply comment --- runner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runner.py b/runner.py index e0d85ebb7..99d992704 100755 --- a/runner.py +++ b/runner.py @@ -48,8 +48,7 @@ def get_configs(path: Path) -> List[str]: help='Run configuration parser and datasets generation ' 'without benchmarks running') parser.add_argument('--no-intel-optimized', default=False, action='store_true', - help='Use no intel optimized version. ' - 'Now avalible for scikit-learn benchmarks') + help='Use Scikit-learn without Intel optimizations') parser.add_argument('--output-file', default='results.json', type=argparse.FileType('w'), help='Output file of benchmarks to use with their runner')