From 2c0d098ed23d9b009a13362c71be1fd11252fb1b Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 7 Dec 2021 10:55:19 +0000 Subject: [PATCH 01/16] df_clsf --- configs/xpu/df_clsf.json | 112 ++++++++++++++++++++++++++++++ datasets/load_datasets.py | 6 +- datasets/loader_classification.py | 90 ++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 configs/xpu/df_clsf.json diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json new file mode 100644 index 000000000..65594ed30 --- /dev/null +++ b/configs/xpu/df_clsf.json @@ -0,0 +1,112 @@ +{ + "common": { + "lib": "sklearn", + "algorithm": "df_clsf", + "data-format": "pandas", + "data-order": "F", + "dtype": ["float32", "float64"], + "device": "gpu" + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "num-trees": 10, + "max-depth": 5 + }, + { + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "num-trees": 100, + "max-depth": 8 + }, + { + "dataset": [ + { + "source": "npy", + "name": "susy", + "training": + { + "x": "data/susy_x_train.npy", + "y": "data/susy_y_train.npy" + }, + "testing": + { + "x": "data/susy_x_test.npy", + "y": "data/susy_y_test.npy" + } + } + ], + "num-trees": 20, + "max-depth": 16 + }, + { + "dataset": [ + { + "source": "npy", + "name": "mnist", + "training": + { + "x": "data/mnist_x_train.npy", + "y": "data/mnist_y_train.npy" + }, + "testing": + { + "x": "data/mnist_x_test.npy", + "y": "data/mnist_y_test.npy" + } + } + ], + "num-trees": 100, + "max-depth": 10 + }, + { + "dataset": [ + { + "source": "npy", + "name": "hepmass_150K", + "training": + { + "x": "data/hepmass_150K_x_train.npy", + "y": "data/hepmass_150K_y_train.npy" + }, + "testing": + { + "x": "data/hepmass_150K_x_test.npy", + "y": "data/hepmass_150K_y_test.npy" + } + } + ], + "num-trees": 50, + "max-depth": 15 + } + ] +} \ No newline at end of file diff --git a/datasets/load_datasets.py b/datasets/load_datasets.py index a3bc15187..0a7874d92 100644 --- a/datasets/load_datasets.py +++ b/datasets/load_datasets.py @@ -23,8 +23,8 @@ from .loader_classification import (a_nine_a, airline, airline_ohe, bosch, census, codrnanorm, creditcard, epsilon, fraud, - gisette, higgs, higgs_one_m, ijcnn, - klaverjas, santander, skin_segmentation) + gisette, hepmass_150K, higgs, higgs_one_m, ijcnn, + klaverjas, santander, skin_segmentation, susy) from .loader_multiclass import (connect, covertype, covtype, letters, mlsr, mnist, msrank, plasticc, sensit) from .loader_regression import (abalone, california_housing, fried, @@ -49,6 +49,7 @@ "fraud": fraud, "fried": fried, "gisette": gisette, + "hepmass_150K": hepmass_150K, "higgs": higgs, "higgs1m": higgs_one_m, "ijcnn": ijcnn, @@ -63,6 +64,7 @@ "santander": santander, "sensit": sensit, "skin_segmentation": skin_segmentation, + "susy": susy, "twodplanes": twodplanes, "year_prediction_msd": year_prediction_msd, "yolanda": yolanda, diff --git a/datasets/loader_classification.py b/datasets/loader_classification.py index 5a5d9df74..c5383b972 100644 --- a/datasets/loader_classification.py +++ b/datasets/loader_classification.py @@ -446,6 +446,56 @@ def gisette(dataset_dir: Path) -> bool: return True +def hepmass_150K(dataset_dir: Path) -> bool: + """ + HEPMASS dataset from UCI machine learning repository ( + https://archive.ics.uci.edu/ml/datasets/HEPMASS). + + Classification task. n_classes = 2. + hepmass_150K X train dataset (100000, 28) + hepmass_150K y train dataset (100000, 1) + hepmass_150K X test dataset (50000, 28) + hepmass_150K y test dataset (50000, 1) + """ + dataset_name = 'hepmass_150K' + os.makedirs(dataset_dir, exist_ok=True) + + url_test = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00347/all_test.csv.gz' + url_train = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00347/all_train.csv.gz' + + local_url_test = os.path.join(dataset_dir, os.path.basename(url_test)) + local_url_train = os.path.join(dataset_dir, os.path.basename(url_train)) + + if not os.path.isfile(local_url_test): + logging.info(f'Started loading {dataset_name}, test') + retrieve(url_test, local_url_test) + if not os.path.isfile(local_url_train): + logging.info(f'Started loading {dataset_name}, train') + retrieve(url_train, local_url_train) + logging.info(f'{dataset_name} is loaded, started parsing...') + + nrows_train, nrows_test, dtype = 100000, 50000, np.float32 + data_test: Any = pd.read_csv(local_url_test, delimiter=",", + compression="gzip", dtype=dtype, + nrows=nrows_test) + data_train: Any = pd.read_csv(local_url_train, delimiter=",", + compression="gzip", dtype=dtype, + nrows=nrows_train) + + x_test = np.ascontiguousarray(data_test.values[:nrows_test, 1:], dtype=dtype) + y_test = np.ascontiguousarray(data_test.values[:nrows_test, 0], dtype=dtype) + x_train = np.ascontiguousarray(data_train.values[:nrows_train, 1:], dtype=dtype) + y_train = np.ascontiguousarray(data_train.values[:nrows_train, 0], dtype=dtype) + + for data, name in zip((x_train, x_test, y_train, y_test), + ('x_train', 'x_test', 'y_train', 'y_test')): + filename = f'{dataset_name}_{name}.npy' + np.save(os.path.join(dataset_dir, filename), data) + logging.info(f'dataset {dataset_name} is ready.') + return True + + + def higgs(dataset_dir: Path) -> bool: """ Higgs dataset from UCI machine learning repository @@ -637,3 +687,43 @@ def skin_segmentation(dataset_dir: Path) -> bool: np.save(os.path.join(dataset_dir, filename), data) logging.info(f'dataset {dataset_name} is ready.') return True + + +def susy(dataset_dir: Path) -> bool: + """ + SUSY dataset from UCI machine learning repository ( + https://archive.ics.uci.edu/ml/datasets/SUSY). + + Classification task. n_classes = 2. + susy X train dataset (4500000, 28) + susy y train dataset (4500000, 1) + susy X test dataset (500000, 28) + susy y test dataset (500000, 1) + """ + dataset_name = 'susy' + os.makedirs(dataset_dir, exist_ok=True) + + url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00279/SUSY.csv.gz' + local_url = os.path.join(dataset_dir, os.path.basename(url)) + if not os.path.isfile(local_url): + logging.info(f'Started loading {dataset_name}') + retrieve(url, local_url) + logging.info(f'{dataset_name} is loaded, started parsing...') + + nrows_train, nrows_test, dtype = 4500000, 500000, np.float32 + data: Any = pd.read_csv(local_url, delimiter=",", header=None, + compression="gzip", dtype=dtype, + nrows=nrows_train + nrows_test) + + X = data[data.columns[1:]] + y = data[data.columns[0:1]] + + x_train, x_test, y_train, y_test = train_test_split( + X, y, train_size=nrows_train, test_size=nrows_test, shuffle=False) + + for data, name in zip((x_train, x_test, y_train, y_test), + ('x_train', 'x_test', 'y_train', 'y_test')): + filename = f'{dataset_name}_{name}.npy' + np.save(os.path.join(dataset_dir, filename), data) + logging.info(f'dataset {dataset_name} is ready.') + return True From c2227ab31fb58dfa15381e8b4cfe4057747caf4c Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 7 Dec 2021 11:08:50 +0000 Subject: [PATCH 02/16] newline --- configs/xpu/df_clsf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index 65594ed30..055f05b1e 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -109,4 +109,4 @@ "max-depth": 15 } ] -} \ No newline at end of file +} From b1691ef81abdf8072dca029dcdc84291a9b26f90 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 7 Dec 2021 11:17:06 +0000 Subject: [PATCH 03/16] disable float64 --- configs/xpu/df_clsf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index 055f05b1e..96344b45a 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -4,7 +4,7 @@ "algorithm": "df_clsf", "data-format": "pandas", "data-order": "F", - "dtype": ["float32", "float64"], + "dtype": "float32", "device": "gpu" }, "cases": [ From 2576c15892afe82b4974b4a4a57b6081836c826a Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 7 Dec 2021 11:40:53 +0000 Subject: [PATCH 04/16] pep8 --- datasets/loader_classification.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/datasets/loader_classification.py b/datasets/loader_classification.py index c5383b972..fc3cb892d 100644 --- a/datasets/loader_classification.py +++ b/datasets/loader_classification.py @@ -476,11 +476,11 @@ def hepmass_150K(dataset_dir: Path) -> bool: nrows_train, nrows_test, dtype = 100000, 50000, np.float32 data_test: Any = pd.read_csv(local_url_test, delimiter=",", - compression="gzip", dtype=dtype, - nrows=nrows_test) + compression="gzip", dtype=dtype, + nrows=nrows_test) data_train: Any = pd.read_csv(local_url_train, delimiter=",", - compression="gzip", dtype=dtype, - nrows=nrows_train) + compression="gzip", dtype=dtype, + nrows=nrows_train) x_test = np.ascontiguousarray(data_test.values[:nrows_test, 1:], dtype=dtype) y_test = np.ascontiguousarray(data_test.values[:nrows_test, 0], dtype=dtype) @@ -495,7 +495,6 @@ def hepmass_150K(dataset_dir: Path) -> bool: return True - def higgs(dataset_dir: Path) -> bool: """ Higgs dataset from UCI machine learning repository @@ -716,7 +715,7 @@ def susy(dataset_dir: Path) -> bool: nrows=nrows_train + nrows_test) X = data[data.columns[1:]] - y = data[data.columns[0:1]] + y = data[data.columns[0:1]] x_train, x_test, y_train, y_test = train_test_split( X, y, train_size=nrows_train, test_size=nrows_test, shuffle=False) From a85389f653af9116c479dfef2bf97d6e7706c713 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Wed, 8 Dec 2021 12:52:10 +0000 Subject: [PATCH 05/16] sqrt and log2 parse --- bench.py | 4 ++++ configs/xpu/df_clsf.json | 1 + sklearn_bench/df_clsf.py | 2 +- sklearn_bench/df_regr.py | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/bench.py b/bench.py index c68e909a5..4ff857a04 100644 --- a/bench.py +++ b/bench.py @@ -68,6 +68,10 @@ def float_or_int(string): return float(string) if '.' in string else int(string) +def float_or_int_or_str(string): + return float(string) if '.' in string else int(string) if string.isdigit() else string + + def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64): ''' Get an optimal cache size for sklearn.svm.SVC. diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index 96344b45a..e140c0744 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -5,6 +5,7 @@ "data-format": "pandas", "data-order": "F", "dtype": "float32", + "max-features": "sqrt", "device": "gpu" }, "cases": [ diff --git a/sklearn_bench/df_clsf.py b/sklearn_bench/df_clsf.py index d99ffa898..5d35ef02d 100644 --- a/sklearn_bench/df_clsf.py +++ b/sklearn_bench/df_clsf.py @@ -81,7 +81,7 @@ def main(): help='The function to measure the quality of a split') parser.add_argument('--num-trees', type=int, default=100, help='Number of trees in the forest') - parser.add_argument('--max-features', type=bench.float_or_int, default=None, + parser.add_argument('--max-features', type=bench.float_or_int_or_str, default=None, help='Upper bound on features used at each split') parser.add_argument('--max-depth', type=int, default=None, help='Upper bound on depth of constructed trees') diff --git a/sklearn_bench/df_regr.py b/sklearn_bench/df_regr.py index 460a28804..4c7491af3 100644 --- a/sklearn_bench/df_regr.py +++ b/sklearn_bench/df_regr.py @@ -72,7 +72,7 @@ def main(): help='The function to measure the quality of a split') parser.add_argument('--num-trees', type=int, default=100, help='Number of trees in the forest') - parser.add_argument('--max-features', type=bench.float_or_int, default=None, + parser.add_argument('--max-features', type=bench.float_or_int_or_str, default=None, help='Upper bound on features used at each split') parser.add_argument('--max-depth', type=int, default=None, help='Upper bound on depth of constructed trees') From bdb372219a6b75eed2d1e3aee64d1ad4e831a049 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Fri, 10 Dec 2021 08:03:26 +0000 Subject: [PATCH 06/16] fix codefactor --- bench.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bench.py b/bench.py index 4ff857a04..dbc2191cd 100644 --- a/bench.py +++ b/bench.py @@ -69,7 +69,12 @@ def float_or_int(string): def float_or_int_or_str(string): - return float(string) if '.' in string else int(string) if string.isdigit() else string + if '.' in string: + return float(string) + elif string.isdigit(): + return int(string) + else + return string def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64): From dded1c0fa1b6160e622adbe0e1f90e4890a80eb5 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Fri, 10 Dec 2021 08:04:36 +0000 Subject: [PATCH 07/16] fix --- bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench.py b/bench.py index dbc2191cd..3663fa476 100644 --- a/bench.py +++ b/bench.py @@ -73,7 +73,7 @@ def float_or_int_or_str(string): return float(string) elif string.isdigit(): return int(string) - else + else: return string From 54981ec2c01f85f7df2f692d5d02c831e46d6411 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Fri, 10 Dec 2021 08:12:52 +0000 Subject: [PATCH 08/16] return ternary --- bench.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/bench.py b/bench.py index 3663fa476..4ff857a04 100644 --- a/bench.py +++ b/bench.py @@ -69,12 +69,7 @@ def float_or_int(string): def float_or_int_or_str(string): - if '.' in string: - return float(string) - elif string.isdigit(): - return int(string) - else: - return string + return float(string) if '.' in string else int(string) if string.isdigit() else string def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64): From 78400e613ce5f252d3e5dfc81ed1f1f781a3a712 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Mon, 27 Dec 2021 15:39:07 +0300 Subject: [PATCH 09/16] enable all devices --- configs/xpu/df_clsf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index e140c0744..a6069b3fa 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -6,7 +6,7 @@ "data-order": "F", "dtype": "float32", "max-features": "sqrt", - "device": "gpu" + "device": ["host", "cpu", "gpu", "None"] }, "cases": [ { From f2b57f2be5308080471c1a458a76b06d9bd62860 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 28 Dec 2021 14:21:58 +0300 Subject: [PATCH 10/16] enable float64 --- configs/xpu/df_clsf.json | 2 +- result_DeviceType_GEN9.json | 336 ++++++++++++++++++++++++++++++++++++ 2 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 result_DeviceType_GEN9.json diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index a6069b3fa..0c504de59 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -4,7 +4,7 @@ "algorithm": "df_clsf", "data-format": "pandas", "data-order": "F", - "dtype": "float32", + "dtype": ["float32", "float64"], "max-features": "sqrt", "device": ["host", "cpu", "gpu", "None"] }, diff --git a/result_DeviceType_GEN9.json b/result_DeviceType_GEN9.json new file mode 100644 index 000000000..560525cc3 --- /dev/null +++ b/result_DeviceType_GEN9.json @@ -0,0 +1,336 @@ +{ + "hardware": { + "CPU": { + "Architecture": "x86_64 CPU op-mode(s)" + }, + "RAM size[GB]": 31.154407501220703 + }, + "software": { + "_libgcc_mutex": { + "version": "0.1", + "build_string": "conda_forge", + "channel": "conda-forge" + }, + "_openmp_mutex": { + "version": "4.5", + "build_string": "1_gnu", + "channel": "conda-forge" + }, + "blas": { + "version": "1.0", + "build_string": "mkl", + "channel": "pkgs/main" + }, + "bottleneck": { + "version": "1.3.2", + "build_string": "py38heb32a55_1", + "channel": "pkgs/main" + }, + "brotlipy": { + "version": "0.7.0", + "build_string": "py38h27cfd23_1003", + "channel": "pkgs/main" + }, + "ca-certificates": { + "version": "2021.10.26", + "build_string": "h06a4308_2", + "channel": "pkgs/main" + }, + "certifi": { + "version": "2021.10.8", + "build_string": "py38h06a4308_0", + "channel": "pkgs/main" + }, + "cffi": { + "version": "1.14.6", + "build_string": "py38h400218f_0", + "channel": "pkgs/main" + }, + "charset-normalizer": { + "version": "2.0.4", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "clang-format": { + "version": "12.0.1", + "build_string": "default_ha53f305_4", + "channel": "conda-forge" + }, + "clang-format-12": { + "version": "12.0.1", + "build_string": "default_ha53f305_4", + "channel": "conda-forge" + }, + "clang-tools": { + "version": "12.0.1", + "build_string": "default_ha53f305_4", + "channel": "conda-forge" + }, + "cmake": { + "version": "3.21.3", + "build_string": "pypi_0", + "channel": "pypi" + }, + "cryptography": { + "version": "35.0.0", + "build_string": "py38hd23ed53_0", + "channel": "pkgs/main" + }, + "cython": { + "version": "0.29.24", + "build_string": "pypi_0", + "channel": "pypi" + }, + "et_xmlfile": { + "version": "1.1.0", + "build_string": "py38h06a4308_0", + "channel": "pkgs/main" + }, + "idna": { + "version": "3.3", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "intel-openmp": { + "version": "2021.4.0", + "build_string": "h06a4308_3561", + "channel": "pkgs/main" + }, + "jinja2": { + "version": "3.0.2", + "build_string": "pypi_0", + "channel": "pypi" + }, + "joblib": { + "version": "1.1.0", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "ld_impl_linux-64": { + "version": "2.35.1", + "build_string": "h7274673_9", + "channel": "pkgs/main" + }, + "libclang": { + "version": "12.0.1", + "build_string": "default_ha53f305_4", + "channel": "conda-forge" + }, + "libclang-cpp12": { + "version": "12.0.1", + "build_string": "default_ha53f305_4", + "channel": "conda-forge" + }, + "libffi": { + "version": "3.3", + "build_string": "he6710b0_2", + "channel": "pkgs/main" + }, + "libgcc-ng": { + "version": "11.2.0", + "build_string": "h1d223b6_11", + "channel": "conda-forge" + }, + "libgfortran-ng": { + "version": "7.5.0", + "build_string": "ha8ba4b0_17", + "channel": "pkgs/main" + }, + "libgfortran4": { + "version": "7.5.0", + "build_string": "ha8ba4b0_17", + "channel": "pkgs/main" + }, + "libgomp": { + "version": "11.2.0", + "build_string": "h1d223b6_11", + "channel": "conda-forge" + }, + "libllvm12": { + "version": "12.0.1", + "build_string": "hf817b99_2", + "channel": "conda-forge" + }, + "libstdcxx-ng": { + "version": "11.2.0", + "build_string": "he4da1e4_11", + "channel": "conda-forge" + }, + "markupsafe": { + "version": "2.0.1", + "build_string": "pypi_0", + "channel": "pypi" + }, + "mkl": { + "version": "2021.4.0", + "build_string": "h06a4308_640", + "channel": "pkgs/main" + }, + "mkl-service": { + "version": "2.4.0", + "build_string": "py38h7f8727e_0", + "channel": "pkgs/main" + }, + "mkl_fft": { + "version": "1.3.1", + "build_string": "py38hd3c417c_0", + "channel": "pkgs/main" + }, + "mkl_random": { + "version": "1.2.2", + "build_string": "py38h51133e4_0", + "channel": "pkgs/main" + }, + "ncurses": { + "version": "6.3", + "build_string": "h7f8727e_2", + "channel": "pkgs/main" + }, + "numexpr": { + "version": "2.7.3", + "build_string": "py38h22e1b3c_1", + "channel": "pkgs/main" + }, + "numpy": { + "version": "1.19.2", + "build_string": "pypi_0", + "channel": "pypi" + }, + "numpy-base": { + "version": "1.21.2", + "build_string": "py38h79a1101_0", + "channel": "pkgs/main" + }, + "openpyxl": { + "version": "3.0.9", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "openssl": { + "version": "1.1.1l", + "build_string": "h7f8727e_0", + "channel": "pkgs/main" + }, + "pandas": { + "version": "1.3.4", + "build_string": "py38h8c16a72_0", + "channel": "pkgs/main" + }, + "pip": { + "version": "21.2.4", + "build_string": "py38h06a4308_0", + "channel": "pkgs/main" + }, + "pybind11": { + "version": "2.8.0", + "build_string": "pypi_0", + "channel": "pypi" + }, + "pycparser": { + "version": "2.21", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "pyopenssl": { + "version": "21.0.0", + "build_string": "pyhd3eb1b0_1", + "channel": "pkgs/main" + }, + "pysocks": { + "version": "1.7.1", + "build_string": "py38h06a4308_0", + "channel": "pkgs/main" + }, + "python": { + "version": "3.8.12", + "build_string": "h12debd9_0", + "channel": "pkgs/main" + }, + "python-dateutil": { + "version": "2.8.2", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "pytz": { + "version": "2021.3", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "readline": { + "version": "8.1", + "build_string": "h27cfd23_0", + "channel": "pkgs/main" + }, + "requests": { + "version": "2.26.0", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "scikit-learn": { + "version": "1.0.1", + "build_string": "py38h51133e4_0", + "channel": "pkgs/main" + }, + "scikit-learn-intelex": { + "version": "2021.20211111.130037", + "build_string": "dev_0", + "channel": "" + }, + "scipy": { + "version": "1.7.1", + "build_string": "py38h292c36d_2", + "channel": "pkgs/main" + }, + "setuptools": { + "version": "58.0.4", + "build_string": "py38h06a4308_0", + "channel": "pkgs/main" + }, + "six": { + "version": "1.16.0", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "sqlite": { + "version": "3.36.0", + "build_string": "hc218d9a_0", + "channel": "pkgs/main" + }, + "threadpoolctl": { + "version": "2.2.0", + "build_string": "pyh0d69192_0", + "channel": "pkgs/main" + }, + "tk": { + "version": "8.6.11", + "build_string": "h1ccaba5_0", + "channel": "pkgs/main" + }, + "tqdm": { + "version": "4.62.3", + "build_string": "pyhd3eb1b0_1", + "channel": "pkgs/main" + }, + "urllib3": { + "version": "1.26.7", + "build_string": "pyhd3eb1b0_0", + "channel": "pkgs/main" + }, + "wheel": { + "version": "0.37.0", + "build_string": "pyhd3eb1b0_1", + "channel": "pkgs/main" + }, + "xz": { + "version": "5.2.5", + "build_string": "h7b6447c_0", + "channel": "pkgs/main" + }, + "zlib": { + "version": "1.2.11", + "build_string": "h7b6447c_3", + "channel": "pkgs/main" + } + }, + "results": [] +} \ No newline at end of file From 8cadd83fe3096fb95f3eb1a06415f860f3eb9052 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 28 Dec 2021 14:26:03 +0300 Subject: [PATCH 11/16] delete extra files --- result_DeviceType_GEN9.json | 336 ------------------------------------ 1 file changed, 336 deletions(-) delete mode 100644 result_DeviceType_GEN9.json diff --git a/result_DeviceType_GEN9.json b/result_DeviceType_GEN9.json deleted file mode 100644 index 560525cc3..000000000 --- a/result_DeviceType_GEN9.json +++ /dev/null @@ -1,336 +0,0 @@ -{ - "hardware": { - "CPU": { - "Architecture": "x86_64 CPU op-mode(s)" - }, - "RAM size[GB]": 31.154407501220703 - }, - "software": { - "_libgcc_mutex": { - "version": "0.1", - "build_string": "conda_forge", - "channel": "conda-forge" - }, - "_openmp_mutex": { - "version": "4.5", - "build_string": "1_gnu", - "channel": "conda-forge" - }, - "blas": { - "version": "1.0", - "build_string": "mkl", - "channel": "pkgs/main" - }, - "bottleneck": { - "version": "1.3.2", - "build_string": "py38heb32a55_1", - "channel": "pkgs/main" - }, - "brotlipy": { - "version": "0.7.0", - "build_string": "py38h27cfd23_1003", - "channel": "pkgs/main" - }, - "ca-certificates": { - "version": "2021.10.26", - "build_string": "h06a4308_2", - "channel": "pkgs/main" - }, - "certifi": { - "version": "2021.10.8", - "build_string": "py38h06a4308_0", - "channel": "pkgs/main" - }, - "cffi": { - "version": "1.14.6", - "build_string": "py38h400218f_0", - "channel": "pkgs/main" - }, - "charset-normalizer": { - "version": "2.0.4", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "clang-format": { - "version": "12.0.1", - "build_string": "default_ha53f305_4", - "channel": "conda-forge" - }, - "clang-format-12": { - "version": "12.0.1", - "build_string": "default_ha53f305_4", - "channel": "conda-forge" - }, - "clang-tools": { - "version": "12.0.1", - "build_string": "default_ha53f305_4", - "channel": "conda-forge" - }, - "cmake": { - "version": "3.21.3", - "build_string": "pypi_0", - "channel": "pypi" - }, - "cryptography": { - "version": "35.0.0", - "build_string": "py38hd23ed53_0", - "channel": "pkgs/main" - }, - "cython": { - "version": "0.29.24", - "build_string": "pypi_0", - "channel": "pypi" - }, - "et_xmlfile": { - "version": "1.1.0", - "build_string": "py38h06a4308_0", - "channel": "pkgs/main" - }, - "idna": { - "version": "3.3", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "intel-openmp": { - "version": "2021.4.0", - "build_string": "h06a4308_3561", - "channel": "pkgs/main" - }, - "jinja2": { - "version": "3.0.2", - "build_string": "pypi_0", - "channel": "pypi" - }, - "joblib": { - "version": "1.1.0", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "ld_impl_linux-64": { - "version": "2.35.1", - "build_string": "h7274673_9", - "channel": "pkgs/main" - }, - "libclang": { - "version": "12.0.1", - "build_string": "default_ha53f305_4", - "channel": "conda-forge" - }, - "libclang-cpp12": { - "version": "12.0.1", - "build_string": "default_ha53f305_4", - "channel": "conda-forge" - }, - "libffi": { - "version": "3.3", - "build_string": "he6710b0_2", - "channel": "pkgs/main" - }, - "libgcc-ng": { - "version": "11.2.0", - "build_string": "h1d223b6_11", - "channel": "conda-forge" - }, - "libgfortran-ng": { - "version": "7.5.0", - "build_string": "ha8ba4b0_17", - "channel": "pkgs/main" - }, - "libgfortran4": { - "version": "7.5.0", - "build_string": "ha8ba4b0_17", - "channel": "pkgs/main" - }, - "libgomp": { - "version": "11.2.0", - "build_string": "h1d223b6_11", - "channel": "conda-forge" - }, - "libllvm12": { - "version": "12.0.1", - "build_string": "hf817b99_2", - "channel": "conda-forge" - }, - "libstdcxx-ng": { - "version": "11.2.0", - "build_string": "he4da1e4_11", - "channel": "conda-forge" - }, - "markupsafe": { - "version": "2.0.1", - "build_string": "pypi_0", - "channel": "pypi" - }, - "mkl": { - "version": "2021.4.0", - "build_string": "h06a4308_640", - "channel": "pkgs/main" - }, - "mkl-service": { - "version": "2.4.0", - "build_string": "py38h7f8727e_0", - "channel": "pkgs/main" - }, - "mkl_fft": { - "version": "1.3.1", - "build_string": "py38hd3c417c_0", - "channel": "pkgs/main" - }, - "mkl_random": { - "version": "1.2.2", - "build_string": "py38h51133e4_0", - "channel": "pkgs/main" - }, - "ncurses": { - "version": "6.3", - "build_string": "h7f8727e_2", - "channel": "pkgs/main" - }, - "numexpr": { - "version": "2.7.3", - "build_string": "py38h22e1b3c_1", - "channel": "pkgs/main" - }, - "numpy": { - "version": "1.19.2", - "build_string": "pypi_0", - "channel": "pypi" - }, - "numpy-base": { - "version": "1.21.2", - "build_string": "py38h79a1101_0", - "channel": "pkgs/main" - }, - "openpyxl": { - "version": "3.0.9", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "openssl": { - "version": "1.1.1l", - "build_string": "h7f8727e_0", - "channel": "pkgs/main" - }, - "pandas": { - "version": "1.3.4", - "build_string": "py38h8c16a72_0", - "channel": "pkgs/main" - }, - "pip": { - "version": "21.2.4", - "build_string": "py38h06a4308_0", - "channel": "pkgs/main" - }, - "pybind11": { - "version": "2.8.0", - "build_string": "pypi_0", - "channel": "pypi" - }, - "pycparser": { - "version": "2.21", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "pyopenssl": { - "version": "21.0.0", - "build_string": "pyhd3eb1b0_1", - "channel": "pkgs/main" - }, - "pysocks": { - "version": "1.7.1", - "build_string": "py38h06a4308_0", - "channel": "pkgs/main" - }, - "python": { - "version": "3.8.12", - "build_string": "h12debd9_0", - "channel": "pkgs/main" - }, - "python-dateutil": { - "version": "2.8.2", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "pytz": { - "version": "2021.3", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "readline": { - "version": "8.1", - "build_string": "h27cfd23_0", - "channel": "pkgs/main" - }, - "requests": { - "version": "2.26.0", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "scikit-learn": { - "version": "1.0.1", - "build_string": "py38h51133e4_0", - "channel": "pkgs/main" - }, - "scikit-learn-intelex": { - "version": "2021.20211111.130037", - "build_string": "dev_0", - "channel": "" - }, - "scipy": { - "version": "1.7.1", - "build_string": "py38h292c36d_2", - "channel": "pkgs/main" - }, - "setuptools": { - "version": "58.0.4", - "build_string": "py38h06a4308_0", - "channel": "pkgs/main" - }, - "six": { - "version": "1.16.0", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "sqlite": { - "version": "3.36.0", - "build_string": "hc218d9a_0", - "channel": "pkgs/main" - }, - "threadpoolctl": { - "version": "2.2.0", - "build_string": "pyh0d69192_0", - "channel": "pkgs/main" - }, - "tk": { - "version": "8.6.11", - "build_string": "h1ccaba5_0", - "channel": "pkgs/main" - }, - "tqdm": { - "version": "4.62.3", - "build_string": "pyhd3eb1b0_1", - "channel": "pkgs/main" - }, - "urllib3": { - "version": "1.26.7", - "build_string": "pyhd3eb1b0_0", - "channel": "pkgs/main" - }, - "wheel": { - "version": "0.37.0", - "build_string": "pyhd3eb1b0_1", - "channel": "pkgs/main" - }, - "xz": { - "version": "5.2.5", - "build_string": "h7b6447c_0", - "channel": "pkgs/main" - }, - "zlib": { - "version": "1.2.11", - "build_string": "h7b6447c_3", - "channel": "pkgs/main" - } - }, - "results": [] -} \ No newline at end of file From b8eb13d45b1c01bc9a8b96747244de70fc3d36e1 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Tue, 28 Dec 2021 16:32:23 +0300 Subject: [PATCH 12/16] replace None with none --- configs/xpu/df_clsf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index 0c504de59..4d14763b8 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -6,7 +6,7 @@ "data-order": "F", "dtype": ["float32", "float64"], "max-features": "sqrt", - "device": ["host", "cpu", "gpu", "None"] + "device": ["host", "cpu", "gpu", "none"] }, "cases": [ { From b2e4ff4fad33f3dddbf5b325274f90ce8ff17980 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Wed, 12 Jan 2022 12:44:43 +0300 Subject: [PATCH 13/16] column major --- configs/xpu/df_clsf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index 4d14763b8..802543d32 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -3,7 +3,7 @@ "lib": "sklearn", "algorithm": "df_clsf", "data-format": "pandas", - "data-order": "F", + "data-order": "C", "dtype": ["float32", "float64"], "max-features": "sqrt", "device": ["host", "cpu", "gpu", "none"] From 780f4f91213a8bfd9979b9273123f245006f4da1 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Wed, 12 Jan 2022 15:45:18 +0300 Subject: [PATCH 14/16] data-order F back --- configs/xpu/df_clsf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json index 802543d32..4d14763b8 100644 --- a/configs/xpu/df_clsf.json +++ b/configs/xpu/df_clsf.json @@ -3,7 +3,7 @@ "lib": "sklearn", "algorithm": "df_clsf", "data-format": "pandas", - "data-order": "C", + "data-order": "F", "dtype": ["float32", "float64"], "max-features": "sqrt", "device": ["host", "cpu", "gpu", "none"] From 937913f21ad98f321e40b6b3d286481d69e99e9b Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Wed, 12 Jan 2022 19:26:08 +0300 Subject: [PATCH 15/16] float scientific notation handling --- bench.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bench.py b/bench.py index d22c2abb9..b30540c58 100644 --- a/bench.py +++ b/bench.py @@ -19,6 +19,7 @@ import logging import sys import timeit +import re import numpy as np import sklearn @@ -63,13 +64,16 @@ def _parse_size(string, dim=2): return tup +def is_float(string): + return bool(re.match(r"^[-+]?(?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)(?:[eE][-+]?[0-9]+\b)?$", string)) + def float_or_int(string): - return float(string) if '.' in string else int(string) + return int(string) if string.isdigit() else float(string) def float_or_int_or_str(string): - return float(string) if '.' in string else int(string) if string.isdigit() else string + return int(string) if string.isdigit() else float(string) if is_float(string) else string def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64): From bf546f8a624db124cec443c0716c9adc1d4ea6d8 Mon Sep 17 00:00:00 2001 From: dmitrii-kriukov Date: Thu, 13 Jan 2022 10:11:16 +0300 Subject: [PATCH 16/16] pep8 --- bench.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bench.py b/bench.py index b30540c58..cbc969bb4 100644 --- a/bench.py +++ b/bench.py @@ -64,8 +64,10 @@ def _parse_size(string, dim=2): return tup + def is_float(string): - return bool(re.match(r"^[-+]?(?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)(?:[eE][-+]?[0-9]+\b)?$", string)) + return bool(re.match(r"^[-+]?(?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)(?:[eE][-+]?[0-9]+\b)?$", + string)) def float_or_int(string):