diff --git a/configs/xpu/dbscan.json b/configs/sklearn/performance/dbscan.json similarity index 59% rename from configs/xpu/dbscan.json rename to configs/sklearn/performance/dbscan.json index 68ced8668..64dacc40a 100644 --- a/configs/xpu/dbscan.json +++ b/configs/sklearn/performance/dbscan.json @@ -4,10 +4,42 @@ "algorithm": "dbscan", "data-format": "pandas", "data-order": "F", - "dtype": ["float64"], + "dtype": ["float32", "float64"], "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 50, + "n_features": 3, + "training": { + "n_samples": 500000 + } + }, + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 50, + "n_features": 10, + "training": { + "n_samples": 500000 + } + }, + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 100, + "n_features": 50, + "training": { + "n_samples": 500000 + } + } + ], + "workload-size": "medium" + }, { "dataset": [ { diff --git a/configs/xpu/df_clsf.json b/configs/sklearn/performance/df_clsf.json similarity index 70% rename from configs/xpu/df_clsf.json rename to configs/sklearn/performance/df_clsf.json index 0c7c25d70..3c0bc477c 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/sklearn/performance/df_clsf.json @@ -9,6 +9,53 @@ "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": + { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ], + "workload-size": "medium", + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 + }, + { + "device": "none", + "dataset": [ + { + "source": "npy", + "name": "airline-ohe", + "training": + { + "x": "data/airline-ohe_x_train.npy", + "y": "data/airline-ohe_y_train.npy" + }, + "testing": + { + "x": "data/airline-ohe_x_test.npy", + "y": "data/airline-ohe_y_test.npy" + } + } + ], + "workload-size": "medium", + "num-trees": 50, + "max-depth": 16, + "max-leaf-nodes": 131072, + "max-features": 0.2 + }, { "dataset": [ { diff --git a/configs/xpu/df_regr.json b/configs/sklearn/performance/df_regr.json similarity index 78% rename from configs/xpu/df_regr.json rename to configs/sklearn/performance/df_regr.json index 3c6c05bfd..c757f1f02 100644 --- a/configs/xpu/df_regr.json +++ b/configs/sklearn/performance/df_regr.json @@ -5,11 +5,52 @@ "data-format": "pandas", "data-order": "F", "dtype": ["float32", "float64"], - "max-features": 0.33, "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "device": "none", + "dataset": [ + { + "source": "npy", + "name": "airline_regression", + "training": + { + "x": "data/airline_regression_x_train.npy", + "y": "data/airline_regression_y_train.npy" + }, + "testing": + { + "x": "data/airline_regression_x_test.npy", + "y": "data/airline_regression_y_test.npy" + } + } + ], + "workload-size": "large", + "num-trees": 100 + }, { + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "workload-size": "large", + "num-trees": 100 + }, + { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -31,6 +72,7 @@ "max-depth": 5 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -48,10 +90,11 @@ } ], "workload-size": "large", - "num-trees": 10, + "num-trees": 100, "max-depth": 5 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -73,6 +116,7 @@ "max-depth": 8 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -94,6 +138,7 @@ "max-depth": 8 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -115,6 +160,7 @@ "max-depth": 16 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -136,6 +182,7 @@ "max-depth": 8 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -157,6 +204,7 @@ "max-depth": 8 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", @@ -178,6 +226,7 @@ "max-depth": 8 }, { + "max-features": 0.33, "dataset": [ { "source": "npy", diff --git a/configs/sklearn/performance/elasticnet.json b/configs/sklearn/performance/elasticnet.json new file mode 100644 index 000000000..896076a08 --- /dev/null +++ b/configs/sklearn/performance/elasticnet.json @@ -0,0 +1,34 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "elasticnet", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "workload-size": "medium", + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 + } + ] +} diff --git a/configs/xpu/kmeans.json b/configs/sklearn/performance/kmeans.json similarity index 71% rename from configs/xpu/kmeans.json rename to configs/sklearn/performance/kmeans.json index 680df88ca..b466ea5d9 100644 --- a/configs/xpu/kmeans.json +++ b/configs/sklearn/performance/kmeans.json @@ -3,11 +3,70 @@ "lib": "sklearn", "algorithm": "kmeans", "data-format": "pandas", - "data-order": "C", + "data-order": "F", "dtype": ["float32", "float64"], "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 1000, + "n_features": 20, + "training": { + "n_samples": 1000000 + } + } + ], + "workload-size": "medium", + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 1000, + "maxiter": 50, + "tol": 0.0 + }, + { + "device": "none", + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 5, + "n_features": 50, + "training": { + "n_samples": 10000000 + } + } + ], + "workload-size": "medium", + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 5, + "maxiter": 50, + "init": "k-means++", + "tol": 0.0 + }, + { + "dataset": [ + { + "source": "synthetic", + "type": "blobs", + "n_clusters": 20, + "n_features": 50, + "training": { + "n_samples": 3000000 + } + } + ], + "workload-size": "medium", + "time-method": "box_filter", + "time-limit": 50, + "n-clusters": 20, + "maxiter": 50, + "tol": 0.0 + }, { "dataset": [ { diff --git a/configs/sklearn/performance/knn_clsf.json b/configs/sklearn/performance/knn_clsf.json new file mode 100644 index 000000000..ac556c407 --- /dev/null +++ b/configs/sklearn/performance/knn_clsf.json @@ -0,0 +1,347 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "knn_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": ["host", "cpu", "gpu", "none"] + }, + "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 3, + "training": { + "n_samples": 100000 + }, + "testing": { + "n_samples": 100000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 10, + "training": { + "n_samples": 100000 + }, + "testing": { + "n_samples": 100000 + } + } + ], + "workload-size": "medium", + "method": "brute" + }, + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 50, + "training": { + "n_samples": 20000 + }, + "testing": { + "n_samples": 20000 + } + } + ], + "workload-size": "small", + "method": "brute" + }, + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 10, + "n_features": 16, + "training": { + "n_samples": 250000 + }, + "testing": { + "n_samples": 250000 + } + } + ], + "workload-size": "large", + "method": "brute" + }, + { + "device": "none", + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 3, + "training": { + "n_samples": 100000 + }, + "testing": { + "n_samples": 100000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 10, + "training": { + "n_samples": 100000 + }, + "testing": { + "n_samples": 100000 + } + } + ], + "workload-size": "medium", + "method": "kd_tree" + }, + { + "device": "none", + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 50, + "training": { + "n_samples": 20000 + }, + "testing": { + "n_samples": 20000 + } + } + ], + "workload-size": "small", + "method": "kd_tree" + }, + { + "device": "none", + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 10, + "n_features": 16, + "training": { + "n_samples": 250000 + }, + "testing": { + "n_samples": 250000 + } + } + ], + "workload-size": "large", + "method": "kd_tree" + }, + { + "dataset": [ + { + "source": "npy", + "name": "epsilon_100K", + "training": + { + "x": "data/epsilon_100K_x_train.npy", + "y": "data/epsilon_100K_y_train.npy" + }, + "testing": + { + "x": "data/epsilon_100K_x_train.npy", + "y": "data/epsilon_100K_y_train.npy" + } + } + ], + "workload-size": "medium", + "n-neighbors": [2, 100] + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + }, + { + "source": "npy", + "name": "hepmass_150K", + "training": + { + "x": "data/hepmass_150K_x_train.npy", + "y": "data/hepmass_150K_y_train.npy" + }, + "testing": + { + "x": "data/hepmass_150K_x_test.npy", + "y": "data/hepmass_150K_y_test.npy" + } + } + ], + "workload-size": "medium", + "n-neighbors": [5, 100] + }, + { + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "workload-size": "medium", + "n-neighbors": 7 + }, + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "workload-size": "medium", + "n-neighbors": 5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "epsilon_100K", + "training": + { + "x": "data/epsilon_100K_x_train.npy", + "y": "data/epsilon_100K_y_train.npy" + }, + "testing": + { + "x": "data/epsilon_100K_x_train.npy", + "y": "data/epsilon_100K_y_train.npy" + } + } + ], + "workload-size": "medium", + "task": "search", + "n-neighbors": 2 + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + } + ], + "workload-size": "medium", + "task": "search", + "n-neighbors": 5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "workload-size": "medium", + "task": "search", + "n-neighbors": 7 + }, + { + "device": "none", + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "method": "kd_tree", + "n-neighbors": 7 + }, + { + "algorithm": "knn_clsf", + "device": "none", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "method": "kd_tree", + "n-neighbors": 5 + } + ] +} diff --git a/configs/xpu/knn_regr.json b/configs/sklearn/performance/knn_regr.json similarity index 62% rename from configs/xpu/knn_regr.json rename to configs/sklearn/performance/knn_regr.json index 0861a855d..38c9629b9 100644 --- a/configs/xpu/knn_regr.json +++ b/configs/sklearn/performance/knn_regr.json @@ -67,6 +67,48 @@ ], "workload-size": "medium", "n-neighbors": 7 + }, + { + "device": "none", + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + } + ], + "method": "kd_tree", + "n-neighbors": 5 + }, + { + "device": "none", + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "method": "kd_tree", + "n-neighbors": 7 } ] } diff --git a/configs/sklearn/performance/lasso.json b/configs/sklearn/performance/lasso.json new file mode 100644 index 000000000..7acaffef5 --- /dev/null +++ b/configs/sklearn/performance/lasso.json @@ -0,0 +1,33 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "lasso", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "workload-size": "medium", + "alpha": 1.0, + "tol": 1e-4 + } + ] +} diff --git a/configs/xpu/linear.json b/configs/sklearn/performance/linear.json similarity index 69% rename from configs/xpu/linear.json rename to configs/sklearn/performance/linear.json index 281f97135..1acc165cf 100644 --- a/configs/xpu/linear.json +++ b/configs/sklearn/performance/linear.json @@ -8,6 +8,28 @@ "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "algorithm": "linear", + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "training": { + "n_samples": 10000000 + } + }, + { + "source": "synthetic", + "type": "regression", + "n_features": 100, + "training": { + "n_samples": 2000000 + } + } + ], + "workload-size": "medium" + }, { "dataset": [ { diff --git a/configs/xpu/log_reg.json b/configs/sklearn/performance/log_reg.json similarity index 66% rename from configs/xpu/log_reg.json rename to configs/sklearn/performance/log_reg.json index b65ac0316..09abc1e02 100644 --- a/configs/xpu/log_reg.json +++ b/configs/sklearn/performance/log_reg.json @@ -8,6 +8,49 @@ "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 20, + "training": { + "n_samples": 10000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 100, + "training": { + "n_samples": 2000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 5, + "n_features": 20, + "training": { + "n_samples": 10000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 5, + "n_features": 100, + "training": { + "n_samples": 2000000 + } + } + ], + "workload-size": "medium", + "maxiter": 100, + "tol": 0 + }, { "dataset": [ { diff --git a/configs/sklearn/performance/nusvc.json b/configs/sklearn/performance/nusvc.json new file mode 100644 index 000000000..9c82f68f1 --- /dev/null +++ b/configs/sklearn/performance/nusvc.json @@ -0,0 +1,96 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "nusvc", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "workload-size": "small", + "nu": 0.25, + "kernel": "sigmoid" + }, + { + "dataset": [ + { + "source": "npy", + "name": "klaverjas", + "training": + { + "x": "data/klaverjas_x_train.npy", + "y": "data/klaverjas_y_train.npy" + }, + "testing": + { + "x": "data/klaverjas_x_test.npy", + "y": "data/klaverjas_y_test.npy" + } + } + ], + "workload-size": "large", + "nu": 0.7, + "kernel": "rbf" + }, + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "workload-size": "medium", + "nu": 0.5, + "kernel": "rbf" + }, + { + "dataset": [ + { + "source": "npy", + "name": "codrnanorm", + "training": + { + "x": "data/codrnanorm_x_train.npy", + "y": "data/codrnanorm_y_train.npy" + }, + "testing": + { + "x": "data/codrnanorm_x_test.npy", + "y": "data/codrnanorm_y_test.npy" + } + } + ], + "workload-size": "medium", + "nu": 0.15, + "kernel": "poly" + } + ] +} diff --git a/configs/sklearn/performance/nusvr.json b/configs/sklearn/performance/nusvr.json new file mode 100644 index 000000000..702303db0 --- /dev/null +++ b/configs/sklearn/performance/nusvr.json @@ -0,0 +1,79 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "nusvr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "workload-size": "small", + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" + }, + { + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "workload-size": "medium", + "nu": 0.5, + "C": 10.0, + "kernel": "poly", + "degree": 2 + }, + { + "dataset": [ + { + "source": "npy", + "name": "yolanda", + "training": + { + "x": "data/yolanda_x_train.npy", + "y": "data/yolanda_y_train.npy" + }, + "testing": + { + "x": "data/yolanda_x_test.npy", + "y": "data/yolanda_y_test.npy" + } + } + ], + "workload-size": "large", + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" + } + ] +} diff --git a/configs/sklearn/performance/pca.json b/configs/sklearn/performance/pca.json new file mode 100644 index 000000000..294db5076 --- /dev/null +++ b/configs/sklearn/performance/pca.json @@ -0,0 +1,107 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "pca", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": ["host", "cpu", "gpu", "none"] + }, + "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 100, + "training": { + "n_samples": 1000000 + }, + "testing": { + "n_samples": 100000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 2000, + "training": { + "n_samples": 10000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 1000, + "training": { + "n_samples": 30000 + } + } + ], + "workload-size": "small", + "svd-solver": "full", + "n-components": 10 + }, + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 4000, + "training": { + "n_samples": 6000 + } + } + ], + "workload-size": "medium", + "svd-solver": "full", + "n-components": 10 + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + } + } + ], + "workload-size": "small" + }, + { + "dataset": [ + { + "source": "npy", + "name": "epsilon_30K", + "training": + { + "x": "data/epsilon_30K_x_train.npy", + "y": "data/epsilon_30K_y_train.npy" + } + } + ], + "workload-size": "small" + }, + { + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + } + } + ], + "workload-size": "medium" + } + ] +} diff --git a/configs/sklearn/performance/ridge.json b/configs/sklearn/performance/ridge.json new file mode 100644 index 000000000..3792589ff --- /dev/null +++ b/configs/sklearn/performance/ridge.json @@ -0,0 +1,34 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "ridge", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "training": { + "n_samples": 10000000 + } + }, + { + "source": "synthetic", + "type": "regression", + "n_features": 100, + "training": { + "n_samples": 2000000 + } + } + ], + "workload-size": "small", + "alpha": 5 + } + ] +} diff --git a/configs/xpu/svm.json b/configs/sklearn/performance/svm.json similarity index 52% rename from configs/xpu/svm.json rename to configs/sklearn/performance/svm.json index 74d7713fe..c213a195c 100644 --- a/configs/xpu/svm.json +++ b/configs/sklearn/performance/svm.json @@ -8,6 +8,150 @@ "device": ["host", "cpu", "gpu", "none"] }, "cases": [ + { + "device": "none", + "data-order": "F", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "ijcnn", + "training": + { + "x": "data/ijcnn_x_train.npy", + "y": "data/ijcnn_y_train.npy" + }, + "testing": + { + "x": "data/ijcnn_x_test.npy", + "y": "data/ijcnn_y_test.npy" + } + } + ], + "workload-size": "medium", + "C": 1000.0, + "kernel": "linear" + }, + { + "device": "none", + "data-order": "F", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "connect", + "training": + { + "x": "data/connect_x_train.npy", + "y": "data/connect_y_train.npy" + }, + "testing": + { + "x": "data/connect_x_test.npy", + "y": "data/connect_y_test.npy" + } + } + ], + "workload-size": "medium", + "C": 100.0, + "kernel": "linear" + }, + { + "device": "none", + "data-order": "F", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "workload-size": "large", + "C": 500.0, + "kernel": "linear" + }, + { + "device": "none", + "data-order": "F", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "skin_segmentation", + "training": + { + "x": "data/skin_segmentation_x_train.npy", + "y": "data/skin_segmentation_y_train.npy" + }, + "testing": + { + "x": "data/skin_segmentation_x_test.npy", + "y": "data/skin_segmentation_y_test.npy" + } + } + ], + "workload-size": "medium", + "C": 1.0, + "kernel": "rbf" + }, + { + "device": "none", + "data-order": "F", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "covertype", + "training": + { + "x": "data/covertype_x_train.npy", + "y": "data/covertype_y_train.npy" + }, + "testing": + { + "x": "data/covertype_x_test.npy", + "y": "data/covertype_y_test.npy" + } + } + ], + "workload-size": "large", + "C": 100.0, + "kernel": "rbf" + }, + { + "device": "none", + "data-order": "F", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "workload-size": "small", + "C": 1.5e-3, + "kernel": "linear" + }, { "dataset": [ { diff --git a/configs/sklearn/performance/svr.json b/configs/sklearn/performance/svr.json new file mode 100644 index 000000000..0bcdaf2ce --- /dev/null +++ b/configs/sklearn/performance/svr.json @@ -0,0 +1,54 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "svr", + "data-format": "pandas", + "data-order": "F", + "dtype": "float64", + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "workload-size": "small", + "C": 0.1, + "kernel": "poly" + }, + { + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "workload-size": "medium", + "C": 10.0, + "kernel": "rbf" + } + ] +} diff --git a/configs/sklearn/performance/train_test_split.json b/configs/sklearn/performance/train_test_split.json new file mode 100644 index 000000000..1e9aaafd0 --- /dev/null +++ b/configs/sklearn/performance/train_test_split.json @@ -0,0 +1,66 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "train_test_split", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 20, + "training": { + "n_samples": 5000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 100, + "training": { + "n_samples": 1000000 + } + }, + { + "source": "synthetic", + "type": "classification", + "n_classes": 2, + "n_features": 10000, + "training": { + "n_samples": 10000 + } + } + ], + "workload-size": "small", + "include-y": "", + "train-size": 0.75, + "test-size": 0.25 + }, + { + "dataset": [ + { + "source": "npy", + "name": "higgs_10500K", + "training": + { + "x": "data/higgs_10500K_x_train.npy", + "y": "data/higgs_10500K_y_train.npy" + } + } + ], + "workload-size": "medium", + "data-format": "numpy", + "data-order": "C", + "include-y": "", + "train-size": 0.9, + "test-size": 0.1 + } + ] +} diff --git a/configs/sklearn/performance/tsne.json b/configs/sklearn/performance/tsne.json new file mode 100644 index 000000000..07909d49c --- /dev/null +++ b/configs/sklearn/performance/tsne.json @@ -0,0 +1,31 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "tsne", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": "none" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "workload-size": "medium" + } + ] +} diff --git a/configs/xpu/knn_clsf.json b/configs/xpu/knn_clsf.json deleted file mode 100644 index 0e352f262..000000000 --- a/configs/xpu/knn_clsf.json +++ /dev/null @@ -1,169 +0,0 @@ -{ - "common": { - "lib": "sklearn", - "algorithm": "knn_clsf", - "data-format": "pandas", - "data-order": "F", - "dtype": ["float32", "float64"], - "device": ["host", "cpu", "gpu", "none"] - }, - "cases": [ - { - "dataset": [ - { - "source": "npy", - "name": "epsilon_100K", - "training": - { - "x": "data/epsilon_100K_x_train.npy", - "y": "data/epsilon_100K_y_train.npy" - }, - "testing": - { - "x": "data/epsilon_100K_x_train.npy", - "y": "data/epsilon_100K_y_train.npy" - } - } - ], - "workload-size": "medium", - "n-neighbors": [2, 100] - }, - { - "dataset": [ - { - "source": "npy", - "name": "higgs_150K", - "training": - { - "x": "data/higgs_150K_x_train.npy", - "y": "data/higgs_150K_y_train.npy" - }, - "testing": - { - "x": "data/higgs_150K_x_test.npy", - "y": "data/higgs_150K_y_test.npy" - } - }, - { - "source": "npy", - "name": "hepmass_150K", - "training": - { - "x": "data/hepmass_150K_x_train.npy", - "y": "data/hepmass_150K_y_train.npy" - }, - "testing": - { - "x": "data/hepmass_150K_x_test.npy", - "y": "data/hepmass_150K_y_test.npy" - } - } - ], - "workload-size": "medium", - "n-neighbors": [5, 100] - }, - { - "dataset": [ - { - "source": "npy", - "name": "cifar_binary", - "training": - { - "x": "data/cifar_binary_x_train.npy", - "y": "data/cifar_binary_y_train.npy" - }, - "testing": - { - "x": "data/cifar_binary_x_test.npy", - "y": "data/cifar_binary_y_test.npy" - } - } - ], - "workload-size": "medium", - "n-neighbors": 7 - }, - { - "dataset": [ - { - "source": "npy", - "name": "mnist", - "training": - { - "x": "data/mnist_x_train.npy", - "y": "data/mnist_y_train.npy" - }, - "testing": - { - "x": "data/mnist_x_test.npy", - "y": "data/mnist_y_test.npy" - } - } - ], - "workload-size": "medium", - "n-neighbors": 5 - }, - { - "dataset": [ - { - "source": "npy", - "name": "epsilon_100K", - "training": - { - "x": "data/epsilon_100K_x_train.npy", - "y": "data/epsilon_100K_y_train.npy" - }, - "testing": - { - "x": "data/epsilon_100K_x_train.npy", - "y": "data/epsilon_100K_y_train.npy" - } - } - ], - "workload-size": "medium", - "task": "search", - "n-neighbors": 2 - }, - { - "dataset": [ - { - "source": "npy", - "name": "higgs_150K", - "training": - { - "x": "data/higgs_150K_x_train.npy", - "y": "data/higgs_150K_y_train.npy" - }, - "testing": - { - "x": "data/higgs_150K_x_test.npy", - "y": "data/higgs_150K_y_test.npy" - } - } - ], - "workload-size": "medium", - "task": "search", - "n-neighbors": 5 - }, - { - "dataset": [ - { - "source": "npy", - "name": "cifar_binary", - "training": - { - "x": "data/cifar_binary_x_train.npy", - "y": "data/cifar_binary_y_train.npy" - }, - "testing": - { - "x": "data/cifar_binary_x_test.npy", - "y": "data/cifar_binary_y_test.npy" - } - } - ], - "workload-size": "medium", - "task": "search", - "n-neighbors": 7 - } - ] -} diff --git a/configs/xpu/pca.json b/configs/xpu/pca.json deleted file mode 100644 index 5c0845c2f..000000000 --- a/configs/xpu/pca.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "common": { - "lib": "sklearn", - "algorithm": "pca", - "data-format": "pandas", - "data-order": "F", - "dtype": ["float32", "float64"], - "device": ["host", "cpu", "none"] - }, - "cases": [ - { - "dataset": [ - { - "source": "npy", - "name": "higgs1m", - "training": - { - "x": "data/higgs1m_x_train.npy", - "y": "data/higgs1m_y_train.npy" - } - } - ], - "workload-size": "small" - }, - { - "device": ["host", "cpu", "gpu", "none"], - "dataset": [ - { - "source": "npy", - "name": "epsilon_30K", - "training": - { - "x": "data/epsilon_30K_x_train.npy", - "y": "data/epsilon_30K_y_train.npy" - } - } - ], - "workload-size": "small" - }, - { - "dataset": [ - { - "source": "npy", - "name": "cifar_binary", - "training": - { - "x": "data/cifar_binary_x_train.npy", - "y": "data/cifar_binary_y_train.npy" - } - } - ], - "workload-size": "medium" - } - ] -} diff --git a/report_generator/report_generator.py b/report_generator/report_generator.py index d486e88a3..bad2f3c76 100755 --- a/report_generator/report_generator.py +++ b/report_generator/report_generator.py @@ -33,6 +33,8 @@ def get_property(entry: Dict[str, Any], prop: str): if key not in value: return None value = value[key] + if (not value): + return "null" return value diff --git a/report_generator/xpu_report_gen_config.json b/report_generator/sklearn_performance_report_gen_config.json similarity index 94% rename from report_generator/xpu_report_gen_config.json rename to report_generator/sklearn_performance_report_gen_config.json index f717f107b..23c30c243 100644 --- a/report_generator/xpu_report_gen_config.json +++ b/report_generator/sklearn_performance_report_gen_config.json @@ -6,6 +6,8 @@ "input_data:data_order", "input_data:data_type", "input_data:dataset_name", + "input_data:rows", + "input_data:columns", "input_data:classes", "algorithm_parameters:tol", "algorithm_parameters:max_iter", diff --git a/runner.py b/runner.py index 5bf84e453..c7b460385 100755 --- a/runner.py +++ b/runner.py @@ -149,6 +149,9 @@ def get_configs(path: Path) -> List[str]: logging.info(f'{algorithm} algorithm: {len(libs) * len(cases)} case(s),' f' {len(params_set["dataset"])} dataset(s)\n') + if (len(libs) * len(cases) == 0): + continue + for dataset in params_set['dataset']: if dataset['source'] in ['csv', 'npy']: dataset_name = dataset['name'] if 'name' in dataset else 'unknown' @@ -207,26 +210,31 @@ class GenerationArgs: file_prefix = f'data/synthetic-{gen_args.type}{cls_num_for_file}-' file_postfix = f'-{gen_args.samples}x{gen_args.features}.npy' + isfiles = True gen_args.filex = f'{file_prefix}X-train{file_postfix}' paths += f' --file-X-train {gen_args.filex}' + isfiles = isfiles and os.path.isfile(gen_args.filex) if gen_args.type not in ['blobs']: gen_args.filey = f'{file_prefix}y-train{file_postfix}' paths += f' --file-y-train {gen_args.filey}' + isfiles = isfiles and os.path.isfile(gen_args.filey) if 'testing' in dataset: gen_args.test_samples = dataset['testing']['n_samples'] gen_args.filextest = f'{file_prefix}X-test{file_postfix}' paths += f' --file-X-test {gen_args.filextest}' + isfiles = isfiles and os.path.isfile(gen_args.filextest) if gen_args.type not in ['blobs']: gen_args.fileytest = f'{file_prefix}y-test{file_postfix}' paths += f' --file-y-test {gen_args.fileytest}' + isfiles = isfiles and os.path.isfile(gen_args.fileytest) else: gen_args.test_samples = 0 gen_args.filextest = gen_args.filex if gen_args.type not in ['blobs']: gen_args.fileytest = gen_args.filey - if not args.dummy_run and not os.path.isfile(gen_args.filex): + if not args.dummy_run and not isfiles: if gen_args.type == 'regression': make_datasets.gen_regression(gen_args) elif gen_args.type == 'classification':