diff --git a/configs/skl_public_config.json b/configs/skl_public_config.json new file mode 100644 index 000000000..4aa6feaad --- /dev/null +++ b/configs/skl_public_config.json @@ -0,0 +1,1007 @@ +{ + "common": { + "lib": "sklearn", + "data-format": "pandas", + "data-order": "F", + "device": ["host", "cpu", "gpu", "none"] + }, + "cases": [ + { + "algorithm": "knn_clsf", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + } + ], + "method": ["brute", "kd_tree"], + "n-neighbors": [5, 100] + }, + { + "algorithm": "knn_clsf", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "method": ["brute", "kd_tree"], + "n-neighbors": 7 + }, + { + "algorithm": "knn_clsf", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "method": ["brute", "kd_tree"], + "n-neighbors": 5 + }, + { + "algorithm": "knn_regr", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + } + ], + "method": ["brute", "kd_tree"], + "n-neighbors": 5 + }, + { + "algorithm": "knn_regr", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "method": ["brute", "kd_tree"], + "n-neighbors": 7 + }, + { + "algorithm": "knn_regr", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + }, + "testing": + { + "x": "data/cifar_binary_x_test.npy", + "y": "data/cifar_binary_y_test.npy" + } + } + ], + "task": "search", + "n-neighbors": 7 + }, + { + "algorithm": "knn_regr", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + } + ], + "task": "search", + "n-neighbors": 5 + }, + { + "algorithm": "pca", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + } + } + ] + }, + { + "algorithm": "pca", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "epsilon_30K", + "training": + { + "x": "data/epsilon_30K_x_train.npy", + "y": "data/epsilon_30K_y_train.npy" + } + } + ] + }, + { + "algorithm": "pca", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "cifar_binary", + "training": + { + "x": "data/cifar_binary_x_train.npy", + "y": "data/cifar_binary_y_train.npy" + } + } + ] + }, + { + "algorithm": "dbscan", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "hepmass_10K_cluster", + "training": + { + "x": "data/hepmass_10K_cluster.npy" + } + } + ], + "eps": 5, + "min-samples": 3 + }, + { + "algorithm": "dbscan", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "mnist_10K_cluster", + "training": + { + "x": "data/mnist_10K_cluster.npy" + } + } + ], + "eps": 1.7e3, + "min-samples": 3 + }, + { + "algorithm": "dbscan", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "road_network_20K_cluster", + "training": + { + "x": "data/road_network_20K_cluster.npy" + } + } + ], + "eps": 1.0e3, + "min-samples": 220 + }, + { + "algorithm": "log_reg", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "solver": "saga", + "maxiter": "20", + "tol": 1e-3 + }, + { + "algorithm": "log_reg", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "C": 0.2, + "maxiter": "500", + "tol": 1e-2 + }, + { + "algorithm": "df_clsf", + "dtype": "float32", + "max-features": "sqrt", + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "num-trees": 10, + "max-depth": 5 + }, + { + "algorithm": "df_clsf", + "dtype": "float32", + "max-features": "sqrt", + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "num-trees": 100, + "max-depth": 8 + }, + { + "algorithm": "df_clsf", + "dtype": "float32", + "max-features": "sqrt", + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "num-trees": 20, + "max-depth": 16 + }, + { + "algorithm": "df_clsf", + "dtype": "float32", + "max-features": "sqrt", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "num-trees": 100, + "max-depth": 10 + }, + { + "algorithm": "df_clsf", + "dtype": ["float32", "float64"], + "max-features": "sqrt", + "dataset": [ + { + "source": "npy", + "name": "hepmass_150K", + "training": + { + "x": "data/hepmass_150K_x_train.npy", + "y": "data/hepmass_150K_y_train.npy" + }, + "testing": + { + "x": "data/hepmass_150K_x_test.npy", + "y": "data/hepmass_150K_y_test.npy" + } + } + ], + "num-trees": 50, + "max-depth": 15 + }, + { + "algorithm": "df_regr", + "dtype": ["float32", "float64"], + "max-features": 0.33, + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "num-trees": 10, + "max-depth": 5 + }, + { + "algorithm": "df_regr", + "dtype": "float32", + "max-features": 0.33, + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": + { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ], + "num-trees": 20, + "max-depth": 8 + }, + { + "algorithm": "df_regr", + "dtype": "float32", + "max-features": 0.33, + "dataset": [ + { + "source": "npy", + "name": "higgs_10500K", + "training": + { + "x": "data/higgs_10500K_x_train.npy", + "y": "data/higgs_10500K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_10500K_x_test.npy", + "y": "data/higgs_10500K_y_test.npy" + } + } + ], + "num-trees": 100, + "max-depth": 8 + }, + { + "algorithm": "df_regr", + "dtype": "float32", + "max-features": 0.33, + "dataset": [ + { + "source": "npy", + "name": "higgs_10500K", + "training": + { + "x": "data/higgs_10500K_x_train.npy", + "y": "data/higgs_10500K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_10500K_x_test.npy", + "y": "data/higgs_10500K_y_test.npy" + } + } + ], + "num-trees": 20, + "max-depth": 16 + }, + { + "algorithm": "svm", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "gisette", + "training": + { + "x": "data/gisette_x_train.npy", + "y": "data/gisette_y_train.npy" + }, + "testing": + { + "x": "data/gisette_x_test.npy", + "y": "data/gisette_y_test.npy" + } + } + ], + "C": 1.5e-3, + "kernel": "linear" + }, + { + "algorithm": "svm", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "higgs_150K", + "training": + { + "x": "data/higgs_150K_x_train.npy", + "y": "data/higgs_150K_y_train.npy" + }, + "testing": + { + "x": "data/higgs_150K_x_test.npy", + "y": "data/higgs_150K_y_test.npy" + } + } + ], + "C": 1.0, + "kernel": "linear" + }, + { + "algorithm": "svm", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "sensit", + "training": + { + "x": "data/sensit_x_train.npy", + "y": "data/sensit_y_train.npy" + }, + "testing": + { + "x": "data/sensit_x_test.npy", + "y": "data/sensit_y_test.npy" + } + } + ], + "C": 500.0, + "kernel": "linear" + }, + { + "algorithm": "svm", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "epsilon_16K", + "training": + { + "x": "data/epsilon_16K_x_train.npy", + "y": "data/epsilon_16K_y_train.npy" + }, + "testing": + { + "x": "data/epsilon_16K_x_test.npy", + "y": "data/epsilon_16K_y_test.npy" + } + } + ], + "C": 9.0e2, + "kernel": "rbf" + }, + { + "algorithm": "nusvc", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "a9a", + "training": + { + "x": "data/a9a_x_train.npy", + "y": "data/a9a_y_train.npy" + }, + "testing": + { + "x": "data/a9a_x_test.npy", + "y": "data/a9a_y_test.npy" + } + } + ], + "nu": 0.25, + "kernel": "sigmoid" + }, + { + "algorithm": "nusvc", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "nu": 0.5, + "kernel": "rbf" + }, + { + "algorithm": "svr", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "california_housing", + "training": + { + "x": "data/california_housing_x_train.npy", + "y": "data/california_housing_y_train.npy" + }, + "testing": + { + "x": "data/california_housing_x_test.npy", + "y": "data/california_housing_y_test.npy" + } + } + ], + "C": 0.1, + "kernel": "poly" + }, + { + "algorithm": "svr", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "twodplanes", + "training": + { + "x": "data/twodplanes_x_train.npy", + "y": "data/twodplanes_y_train.npy" + }, + "testing": + { + "x": "data/twodplanes_x_test.npy", + "y": "data/twodplanes_y_test.npy" + } + } + ], + "C": 10.0, + "kernel": "rbf" + }, + { + "algorithm": "nusvr", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "fried", + "training": + { + "x": "data/fried_x_train.npy", + "y": "data/fried_y_train.npy" + }, + "testing": + { + "x": "data/fried_x_test.npy", + "y": "data/fried_y_test.npy" + } + } + ], + "nu": 0.8, + "C": 2.0, + "kernel": "rbf" + }, + { + "algorithm": "nusvr", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "medical_charges_nominal", + "training": + { + "x": "data/medical_charges_nominal_x_train.npy", + "y": "data/medical_charges_nominal_y_train.npy" + }, + "testing": + { + "x": "data/medical_charges_nominal_x_test.npy", + "y": "data/medical_charges_nominal_y_test.npy" + } + } + ], + "nu": 0.5, + "C": 10.0, + "kernel": "poly", + "degree": 2 + }, + { + "algorithm": "linear", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": + { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ] + }, + { + "algorithm": "linear", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + }, + { + "algorithm": "ridge", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "higgs1m", + "training": + { + "x": "data/higgs1m_x_train.npy", + "y": "data/higgs1m_y_train.npy" + }, + "testing": + { + "x": "data/higgs1m_x_test.npy", + "y": "data/higgs1m_y_test.npy" + } + } + ], + "alpha": 5 + }, + { + "algorithm": "ridge", + "dtype": "float32", + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "higgs_one_m_clustering", + "training": + { + "x": "data/higgs_one_m_clustering.npy" + } + } + ], + "n-clusters": 10, + "maxiter": 100 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "higgs_one_m_clustering", + "training": + { + "x": "data/higgs_one_m_clustering.npy" + } + } + ], + "n-clusters": [100, 250], + "maxiter": 10 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "epsilon_50K_cluster", + "training": + { + "x": "data/epsilon_50K_cluster.npy" + } + } + ], + "n-clusters": [512, 1024], + "maxiter": 10 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "hepmass_1M_cluster", + "training": + { + "x": "data/hepmass_1M_cluster.npy" + } + } + ], + "n-clusters": 100, + "maxiter": 10 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "hepmass_1M_cluster", + "training": + { + "x": "data/hepmass_1M_cluster.npy" + } + } + ], + "n-clusters": 10, + "maxiter": 100 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "susy_cluster", + "training": + { + "x": "data/susy_cluster.npy" + } + } + ], + "n-clusters": 10, + "maxiter": 100 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "susy_cluster", + "training": + { + "x": "data/susy_cluster.npy" + } + } + ], + "n-clusters": [100 , 250], + "maxiter": 10 + }, + { + "algorithm": "kmeans", + "dtype": "float64", + "dataset": [ + { + "source": "npy", + "name": "cifar_cluster", + "training": + { + "x": "data/cifar_cluster.npy" + } + } + ], + "n-clusters": [512, 1024, 2048], + "maxiter": 10 + }, + { + "algorithm": "elasticnet", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "alpha": 2.0, + "l1_ratio": 0.5, + "tol": 1e-4 + }, + { + "algorithm": "lasso", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": + { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": + { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ], + "alpha": 1.0, + "tol": 1e-4 + }, + { + "algorithm": "tsne", + "dtype": ["float32", "float64"], + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ] + } + ] +}