From 2c0d098ed23d9b009a13362c71be1fd11252fb1b Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 7 Dec 2021 10:55:19 +0000
Subject: [PATCH 01/16] df_clsf

---
 configs/xpu/df_clsf.json          | 112 ++++++++++++++++++++++++++++++
 datasets/load_datasets.py         |   6 +-
 datasets/loader_classification.py |  90 ++++++++++++++++++++++++
 3 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 configs/xpu/df_clsf.json

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
new file mode 100644
index 000000000..65594ed30
--- /dev/null
+++ b/configs/xpu/df_clsf.json
@@ -0,0 +1,112 @@
+{
+    "common": {
+        "lib": "sklearn",
+        "algorithm": "df_clsf",
+        "data-format": "pandas",
+        "data-order": "F",
+        "dtype": ["float32", "float64"],
+        "device": "gpu"
+    },
+    "cases": [
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "susy",
+                    "training":
+                    {
+                        "x": "data/susy_x_train.npy",
+                        "y": "data/susy_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x": "data/susy_x_test.npy",
+                        "y": "data/susy_y_test.npy"
+                    }
+                }
+            ],
+            "num-trees": 10,
+            "max-depth": 5
+        },
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "susy",
+                    "training":
+                    {
+                        "x": "data/susy_x_train.npy",
+                        "y": "data/susy_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x": "data/susy_x_test.npy",
+                        "y": "data/susy_y_test.npy"
+                    }
+                }
+            ],
+            "num-trees": 100,
+            "max-depth": 8
+        },
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "susy",
+                    "training":
+                    {
+                        "x": "data/susy_x_train.npy",
+                        "y": "data/susy_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x": "data/susy_x_test.npy",
+                        "y": "data/susy_y_test.npy"
+                    }
+                }
+            ],
+            "num-trees": 20,
+            "max-depth": 16
+        },
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "mnist",
+                    "training":
+                    {
+                        "x": "data/mnist_x_train.npy",
+                        "y": "data/mnist_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x": "data/mnist_x_test.npy",
+                        "y": "data/mnist_y_test.npy"
+                    }
+                }
+            ],
+            "num-trees": 100,
+            "max-depth": 10
+        },
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "hepmass_150K",
+                    "training":
+                    {
+                        "x": "data/hepmass_150K_x_train.npy",
+                        "y": "data/hepmass_150K_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x": "data/hepmass_150K_x_test.npy",
+                        "y": "data/hepmass_150K_y_test.npy"
+                    }
+                }
+            ],
+            "num-trees": 50,
+            "max-depth": 15
+        }
+    ]
+}
\ No newline at end of file
diff --git a/datasets/load_datasets.py b/datasets/load_datasets.py
index a3bc15187..0a7874d92 100644
--- a/datasets/load_datasets.py
+++ b/datasets/load_datasets.py
@@ -23,8 +23,8 @@
 
 from .loader_classification import (a_nine_a, airline, airline_ohe, bosch,
                                     census, codrnanorm, creditcard, epsilon, fraud,
-                                    gisette, higgs, higgs_one_m, ijcnn,
-                                    klaverjas, santander, skin_segmentation)
+                                    gisette, hepmass_150K, higgs, higgs_one_m, ijcnn,
+                                    klaverjas, santander, skin_segmentation, susy)
 from .loader_multiclass import (connect, covertype, covtype, letters, mlsr,
                                 mnist, msrank, plasticc, sensit)
 from .loader_regression import (abalone, california_housing, fried,
@@ -49,6 +49,7 @@
     "fraud": fraud,
     "fried": fried,
     "gisette": gisette,
+    "hepmass_150K": hepmass_150K,
     "higgs": higgs,
     "higgs1m": higgs_one_m,
     "ijcnn": ijcnn,
@@ -63,6 +64,7 @@
     "santander": santander,
     "sensit": sensit,
     "skin_segmentation": skin_segmentation,
+    "susy": susy,
     "twodplanes": twodplanes,
     "year_prediction_msd": year_prediction_msd,
     "yolanda": yolanda,
diff --git a/datasets/loader_classification.py b/datasets/loader_classification.py
index 5a5d9df74..c5383b972 100644
--- a/datasets/loader_classification.py
+++ b/datasets/loader_classification.py
@@ -446,6 +446,56 @@ def gisette(dataset_dir: Path) -> bool:
     return True
 
 
+def hepmass_150K(dataset_dir: Path) -> bool:
+    """
+    HEPMASS dataset from UCI machine learning repository (
+    https://archive.ics.uci.edu/ml/datasets/HEPMASS).
+
+    Classification task. n_classes = 2.
+    hepmass_150K X train dataset (100000, 28)
+    hepmass_150K y train dataset (100000, 1)
+    hepmass_150K X test dataset  (50000,  28)
+    hepmass_150K y test dataset  (50000,  1)
+    """
+    dataset_name = 'hepmass_150K'
+    os.makedirs(dataset_dir, exist_ok=True)
+
+    url_test = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00347/all_test.csv.gz'
+    url_train = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00347/all_train.csv.gz'
+
+    local_url_test = os.path.join(dataset_dir, os.path.basename(url_test))
+    local_url_train = os.path.join(dataset_dir, os.path.basename(url_train))
+
+    if not os.path.isfile(local_url_test):
+        logging.info(f'Started loading {dataset_name}, test')
+        retrieve(url_test, local_url_test)
+    if not os.path.isfile(local_url_train):
+        logging.info(f'Started loading {dataset_name}, train')
+        retrieve(url_train, local_url_train)
+    logging.info(f'{dataset_name} is loaded, started parsing...')
+
+    nrows_train, nrows_test, dtype = 100000, 50000, np.float32
+    data_test: Any = pd.read_csv(local_url_test, delimiter=",",
+                            compression="gzip", dtype=dtype,
+                            nrows=nrows_test)
+    data_train: Any = pd.read_csv(local_url_train, delimiter=",",
+                            compression="gzip", dtype=dtype,
+                            nrows=nrows_train)
+
+    x_test = np.ascontiguousarray(data_test.values[:nrows_test, 1:], dtype=dtype)
+    y_test = np.ascontiguousarray(data_test.values[:nrows_test, 0], dtype=dtype)
+    x_train = np.ascontiguousarray(data_train.values[:nrows_train, 1:], dtype=dtype)
+    y_train = np.ascontiguousarray(data_train.values[:nrows_train, 0], dtype=dtype)
+
+    for data, name in zip((x_train, x_test, y_train, y_test),
+                          ('x_train', 'x_test', 'y_train', 'y_test')):
+        filename = f'{dataset_name}_{name}.npy'
+        np.save(os.path.join(dataset_dir, filename), data)
+    logging.info(f'dataset {dataset_name} is ready.')
+    return True
+
+
+
 def higgs(dataset_dir: Path) -> bool:
     """
     Higgs dataset from UCI machine learning repository
@@ -637,3 +687,43 @@ def skin_segmentation(dataset_dir: Path) -> bool:
         np.save(os.path.join(dataset_dir, filename), data)
     logging.info(f'dataset {dataset_name} is ready.')
     return True
+
+
+def susy(dataset_dir: Path) -> bool:
+    """
+    SUSY dataset from UCI machine learning repository (
+    https://archive.ics.uci.edu/ml/datasets/SUSY).
+
+    Classification task. n_classes = 2.
+    susy X train dataset (4500000, 28)
+    susy y train dataset (4500000, 1)
+    susy X test dataset  (500000,  28)
+    susy y test dataset  (500000,  1)
+    """
+    dataset_name = 'susy'
+    os.makedirs(dataset_dir, exist_ok=True)
+
+    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00279/SUSY.csv.gz'
+    local_url = os.path.join(dataset_dir, os.path.basename(url))
+    if not os.path.isfile(local_url):
+        logging.info(f'Started loading {dataset_name}')
+        retrieve(url, local_url)
+    logging.info(f'{dataset_name} is loaded, started parsing...')
+
+    nrows_train, nrows_test, dtype = 4500000, 500000, np.float32
+    data: Any = pd.read_csv(local_url, delimiter=",", header=None,
+                            compression="gzip", dtype=dtype,
+                            nrows=nrows_train + nrows_test)
+
+    X = data[data.columns[1:]]
+    y =  data[data.columns[0:1]]
+
+    x_train, x_test, y_train, y_test = train_test_split(
+        X, y, train_size=nrows_train, test_size=nrows_test, shuffle=False)
+
+    for data, name in zip((x_train, x_test, y_train, y_test),
+                          ('x_train', 'x_test', 'y_train', 'y_test')):
+        filename = f'{dataset_name}_{name}.npy'
+        np.save(os.path.join(dataset_dir, filename), data)
+    logging.info(f'dataset {dataset_name} is ready.')
+    return True

From c2227ab31fb58dfa15381e8b4cfe4057747caf4c Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 7 Dec 2021 11:08:50 +0000
Subject: [PATCH 02/16] newline

---
 configs/xpu/df_clsf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index 65594ed30..055f05b1e 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -109,4 +109,4 @@
             "max-depth": 15
         }
     ]
-}
\ No newline at end of file
+}

From b1691ef81abdf8072dca029dcdc84291a9b26f90 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 7 Dec 2021 11:17:06 +0000
Subject: [PATCH 03/16] disable float64

---
 configs/xpu/df_clsf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index 055f05b1e..96344b45a 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -4,7 +4,7 @@
         "algorithm": "df_clsf",
         "data-format": "pandas",
         "data-order": "F",
-        "dtype": ["float32", "float64"],
+        "dtype": "float32",
         "device": "gpu"
     },
     "cases": [

From 2576c15892afe82b4974b4a4a57b6081836c826a Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 7 Dec 2021 11:40:53 +0000
Subject: [PATCH 04/16] pep8

---
 datasets/loader_classification.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/datasets/loader_classification.py b/datasets/loader_classification.py
index c5383b972..fc3cb892d 100644
--- a/datasets/loader_classification.py
+++ b/datasets/loader_classification.py
@@ -476,11 +476,11 @@ def hepmass_150K(dataset_dir: Path) -> bool:
 
     nrows_train, nrows_test, dtype = 100000, 50000, np.float32
     data_test: Any = pd.read_csv(local_url_test, delimiter=",",
-                            compression="gzip", dtype=dtype,
-                            nrows=nrows_test)
+                                 compression="gzip", dtype=dtype,
+                                 nrows=nrows_test)
     data_train: Any = pd.read_csv(local_url_train, delimiter=",",
-                            compression="gzip", dtype=dtype,
-                            nrows=nrows_train)
+                                  compression="gzip", dtype=dtype,
+                                  nrows=nrows_train)
 
     x_test = np.ascontiguousarray(data_test.values[:nrows_test, 1:], dtype=dtype)
     y_test = np.ascontiguousarray(data_test.values[:nrows_test, 0], dtype=dtype)
@@ -495,7 +495,6 @@ def hepmass_150K(dataset_dir: Path) -> bool:
     return True
 
 
-
 def higgs(dataset_dir: Path) -> bool:
     """
     Higgs dataset from UCI machine learning repository
@@ -716,7 +715,7 @@ def susy(dataset_dir: Path) -> bool:
                             nrows=nrows_train + nrows_test)
 
     X = data[data.columns[1:]]
-    y =  data[data.columns[0:1]]
+    y = data[data.columns[0:1]]
 
     x_train, x_test, y_train, y_test = train_test_split(
         X, y, train_size=nrows_train, test_size=nrows_test, shuffle=False)

From a85389f653af9116c479dfef2bf97d6e7706c713 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Wed, 8 Dec 2021 12:52:10 +0000
Subject: [PATCH 05/16] sqrt and log2 parse

---
 bench.py                 | 4 ++++
 configs/xpu/df_clsf.json | 1 +
 sklearn_bench/df_clsf.py | 2 +-
 sklearn_bench/df_regr.py | 2 +-
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/bench.py b/bench.py
index c68e909a5..4ff857a04 100644
--- a/bench.py
+++ b/bench.py
@@ -68,6 +68,10 @@ def float_or_int(string):
     return float(string) if '.' in string else int(string)
 
 
+def float_or_int_or_str(string):
+    return float(string) if '.' in string else int(string) if string.isdigit() else string
+
+
 def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):
     '''
     Get an optimal cache size for sklearn.svm.SVC.
diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index 96344b45a..e140c0744 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -5,6 +5,7 @@
         "data-format": "pandas",
         "data-order": "F",
         "dtype": "float32",
+        "max-features": "sqrt",
         "device": "gpu"
     },
     "cases": [
diff --git a/sklearn_bench/df_clsf.py b/sklearn_bench/df_clsf.py
index d99ffa898..5d35ef02d 100644
--- a/sklearn_bench/df_clsf.py
+++ b/sklearn_bench/df_clsf.py
@@ -81,7 +81,7 @@ def main():
                         help='The function to measure the quality of a split')
     parser.add_argument('--num-trees', type=int, default=100,
                         help='Number of trees in the forest')
-    parser.add_argument('--max-features', type=bench.float_or_int, default=None,
+    parser.add_argument('--max-features', type=bench.float_or_int_or_str, default=None,
                         help='Upper bound on features used at each split')
     parser.add_argument('--max-depth', type=int, default=None,
                         help='Upper bound on depth of constructed trees')
diff --git a/sklearn_bench/df_regr.py b/sklearn_bench/df_regr.py
index 460a28804..4c7491af3 100644
--- a/sklearn_bench/df_regr.py
+++ b/sklearn_bench/df_regr.py
@@ -72,7 +72,7 @@ def main():
                         help='The function to measure the quality of a split')
     parser.add_argument('--num-trees', type=int, default=100,
                         help='Number of trees in the forest')
-    parser.add_argument('--max-features', type=bench.float_or_int, default=None,
+    parser.add_argument('--max-features', type=bench.float_or_int_or_str, default=None,
                         help='Upper bound on features used at each split')
     parser.add_argument('--max-depth', type=int, default=None,
                         help='Upper bound on depth of constructed trees')

From bdb372219a6b75eed2d1e3aee64d1ad4e831a049 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Fri, 10 Dec 2021 08:03:26 +0000
Subject: [PATCH 06/16] fix codefactor

---
 bench.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index 4ff857a04..dbc2191cd 100644
--- a/bench.py
+++ b/bench.py
@@ -69,7 +69,12 @@ def float_or_int(string):
 
 
 def float_or_int_or_str(string):
-    return float(string) if '.' in string else int(string) if string.isdigit() else string
+    if '.' in string:
+        return float(string)
+    elif string.isdigit():
+        return int(string)
+    else
+        return string
 
 
 def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):

From dded1c0fa1b6160e622adbe0e1f90e4890a80eb5 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Fri, 10 Dec 2021 08:04:36 +0000
Subject: [PATCH 07/16] fix

---
 bench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index dbc2191cd..3663fa476 100644
--- a/bench.py
+++ b/bench.py
@@ -73,7 +73,7 @@ def float_or_int_or_str(string):
         return float(string)
     elif string.isdigit():
         return int(string)
-    else
+    else:
         return string
 
 

From 54981ec2c01f85f7df2f692d5d02c831e46d6411 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Fri, 10 Dec 2021 08:12:52 +0000
Subject: [PATCH 08/16] return ternary

---
 bench.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/bench.py b/bench.py
index 3663fa476..4ff857a04 100644
--- a/bench.py
+++ b/bench.py
@@ -69,12 +69,7 @@ def float_or_int(string):
 
 
 def float_or_int_or_str(string):
-    if '.' in string:
-        return float(string)
-    elif string.isdigit():
-        return int(string)
-    else:
-        return string
+    return float(string) if '.' in string else int(string) if string.isdigit() else string
 
 
 def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):

From 78400e613ce5f252d3e5dfc81ed1f1f781a3a712 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Mon, 27 Dec 2021 15:39:07 +0300
Subject: [PATCH 09/16] enable all devices

---
 configs/xpu/df_clsf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index e140c0744..a6069b3fa 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -6,7 +6,7 @@
         "data-order": "F",
         "dtype": "float32",
         "max-features": "sqrt",
-        "device": "gpu"
+        "device": ["host", "cpu", "gpu", "None"]
     },
     "cases": [
         {

From f2b57f2be5308080471c1a458a76b06d9bd62860 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 28 Dec 2021 14:21:58 +0300
Subject: [PATCH 10/16] enable float64

---
 configs/xpu/df_clsf.json    |   2 +-
 result_DeviceType_GEN9.json | 336 ++++++++++++++++++++++++++++++++++++
 2 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 result_DeviceType_GEN9.json

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index a6069b3fa..0c504de59 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -4,7 +4,7 @@
         "algorithm": "df_clsf",
         "data-format": "pandas",
         "data-order": "F",
-        "dtype": "float32",
+        "dtype": ["float32", "float64"],
         "max-features": "sqrt",
         "device": ["host", "cpu", "gpu", "None"]
     },
diff --git a/result_DeviceType_GEN9.json b/result_DeviceType_GEN9.json
new file mode 100644
index 000000000..560525cc3
--- /dev/null
+++ b/result_DeviceType_GEN9.json
@@ -0,0 +1,336 @@
+{
+    "hardware": {
+        "CPU": {
+            "Architecture": "x86_64 CPU op-mode(s)"
+        },
+        "RAM size[GB]": 31.154407501220703
+    },
+    "software": {
+        "_libgcc_mutex": {
+            "version": "0.1",
+            "build_string": "conda_forge",
+            "channel": "conda-forge"
+        },
+        "_openmp_mutex": {
+            "version": "4.5",
+            "build_string": "1_gnu",
+            "channel": "conda-forge"
+        },
+        "blas": {
+            "version": "1.0",
+            "build_string": "mkl",
+            "channel": "pkgs/main"
+        },
+        "bottleneck": {
+            "version": "1.3.2",
+            "build_string": "py38heb32a55_1",
+            "channel": "pkgs/main"
+        },
+        "brotlipy": {
+            "version": "0.7.0",
+            "build_string": "py38h27cfd23_1003",
+            "channel": "pkgs/main"
+        },
+        "ca-certificates": {
+            "version": "2021.10.26",
+            "build_string": "h06a4308_2",
+            "channel": "pkgs/main"
+        },
+        "certifi": {
+            "version": "2021.10.8",
+            "build_string": "py38h06a4308_0",
+            "channel": "pkgs/main"
+        },
+        "cffi": {
+            "version": "1.14.6",
+            "build_string": "py38h400218f_0",
+            "channel": "pkgs/main"
+        },
+        "charset-normalizer": {
+            "version": "2.0.4",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "clang-format": {
+            "version": "12.0.1",
+            "build_string": "default_ha53f305_4",
+            "channel": "conda-forge"
+        },
+        "clang-format-12": {
+            "version": "12.0.1",
+            "build_string": "default_ha53f305_4",
+            "channel": "conda-forge"
+        },
+        "clang-tools": {
+            "version": "12.0.1",
+            "build_string": "default_ha53f305_4",
+            "channel": "conda-forge"
+        },
+        "cmake": {
+            "version": "3.21.3",
+            "build_string": "pypi_0",
+            "channel": "pypi"
+        },
+        "cryptography": {
+            "version": "35.0.0",
+            "build_string": "py38hd23ed53_0",
+            "channel": "pkgs/main"
+        },
+        "cython": {
+            "version": "0.29.24",
+            "build_string": "pypi_0",
+            "channel": "pypi"
+        },
+        "et_xmlfile": {
+            "version": "1.1.0",
+            "build_string": "py38h06a4308_0",
+            "channel": "pkgs/main"
+        },
+        "idna": {
+            "version": "3.3",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "intel-openmp": {
+            "version": "2021.4.0",
+            "build_string": "h06a4308_3561",
+            "channel": "pkgs/main"
+        },
+        "jinja2": {
+            "version": "3.0.2",
+            "build_string": "pypi_0",
+            "channel": "pypi"
+        },
+        "joblib": {
+            "version": "1.1.0",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "ld_impl_linux-64": {
+            "version": "2.35.1",
+            "build_string": "h7274673_9",
+            "channel": "pkgs/main"
+        },
+        "libclang": {
+            "version": "12.0.1",
+            "build_string": "default_ha53f305_4",
+            "channel": "conda-forge"
+        },
+        "libclang-cpp12": {
+            "version": "12.0.1",
+            "build_string": "default_ha53f305_4",
+            "channel": "conda-forge"
+        },
+        "libffi": {
+            "version": "3.3",
+            "build_string": "he6710b0_2",
+            "channel": "pkgs/main"
+        },
+        "libgcc-ng": {
+            "version": "11.2.0",
+            "build_string": "h1d223b6_11",
+            "channel": "conda-forge"
+        },
+        "libgfortran-ng": {
+            "version": "7.5.0",
+            "build_string": "ha8ba4b0_17",
+            "channel": "pkgs/main"
+        },
+        "libgfortran4": {
+            "version": "7.5.0",
+            "build_string": "ha8ba4b0_17",
+            "channel": "pkgs/main"
+        },
+        "libgomp": {
+            "version": "11.2.0",
+            "build_string": "h1d223b6_11",
+            "channel": "conda-forge"
+        },
+        "libllvm12": {
+            "version": "12.0.1",
+            "build_string": "hf817b99_2",
+            "channel": "conda-forge"
+        },
+        "libstdcxx-ng": {
+            "version": "11.2.0",
+            "build_string": "he4da1e4_11",
+            "channel": "conda-forge"
+        },
+        "markupsafe": {
+            "version": "2.0.1",
+            "build_string": "pypi_0",
+            "channel": "pypi"
+        },
+        "mkl": {
+            "version": "2021.4.0",
+            "build_string": "h06a4308_640",
+            "channel": "pkgs/main"
+        },
+        "mkl-service": {
+            "version": "2.4.0",
+            "build_string": "py38h7f8727e_0",
+            "channel": "pkgs/main"
+        },
+        "mkl_fft": {
+            "version": "1.3.1",
+            "build_string": "py38hd3c417c_0",
+            "channel": "pkgs/main"
+        },
+        "mkl_random": {
+            "version": "1.2.2",
+            "build_string": "py38h51133e4_0",
+            "channel": "pkgs/main"
+        },
+        "ncurses": {
+            "version": "6.3",
+            "build_string": "h7f8727e_2",
+            "channel": "pkgs/main"
+        },
+        "numexpr": {
+            "version": "2.7.3",
+            "build_string": "py38h22e1b3c_1",
+            "channel": "pkgs/main"
+        },
+        "numpy": {
+            "version": "1.19.2",
+            "build_string": "pypi_0",
+            "channel": "pypi"
+        },
+        "numpy-base": {
+            "version": "1.21.2",
+            "build_string": "py38h79a1101_0",
+            "channel": "pkgs/main"
+        },
+        "openpyxl": {
+            "version": "3.0.9",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "openssl": {
+            "version": "1.1.1l",
+            "build_string": "h7f8727e_0",
+            "channel": "pkgs/main"
+        },
+        "pandas": {
+            "version": "1.3.4",
+            "build_string": "py38h8c16a72_0",
+            "channel": "pkgs/main"
+        },
+        "pip": {
+            "version": "21.2.4",
+            "build_string": "py38h06a4308_0",
+            "channel": "pkgs/main"
+        },
+        "pybind11": {
+            "version": "2.8.0",
+            "build_string": "pypi_0",
+            "channel": "pypi"
+        },
+        "pycparser": {
+            "version": "2.21",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "pyopenssl": {
+            "version": "21.0.0",
+            "build_string": "pyhd3eb1b0_1",
+            "channel": "pkgs/main"
+        },
+        "pysocks": {
+            "version": "1.7.1",
+            "build_string": "py38h06a4308_0",
+            "channel": "pkgs/main"
+        },
+        "python": {
+            "version": "3.8.12",
+            "build_string": "h12debd9_0",
+            "channel": "pkgs/main"
+        },
+        "python-dateutil": {
+            "version": "2.8.2",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "pytz": {
+            "version": "2021.3",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "readline": {
+            "version": "8.1",
+            "build_string": "h27cfd23_0",
+            "channel": "pkgs/main"
+        },
+        "requests": {
+            "version": "2.26.0",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "scikit-learn": {
+            "version": "1.0.1",
+            "build_string": "py38h51133e4_0",
+            "channel": "pkgs/main"
+        },
+        "scikit-learn-intelex": {
+            "version": "2021.20211111.130037",
+            "build_string": "dev_0",
+            "channel": "<develop>"
+        },
+        "scipy": {
+            "version": "1.7.1",
+            "build_string": "py38h292c36d_2",
+            "channel": "pkgs/main"
+        },
+        "setuptools": {
+            "version": "58.0.4",
+            "build_string": "py38h06a4308_0",
+            "channel": "pkgs/main"
+        },
+        "six": {
+            "version": "1.16.0",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "sqlite": {
+            "version": "3.36.0",
+            "build_string": "hc218d9a_0",
+            "channel": "pkgs/main"
+        },
+        "threadpoolctl": {
+            "version": "2.2.0",
+            "build_string": "pyh0d69192_0",
+            "channel": "pkgs/main"
+        },
+        "tk": {
+            "version": "8.6.11",
+            "build_string": "h1ccaba5_0",
+            "channel": "pkgs/main"
+        },
+        "tqdm": {
+            "version": "4.62.3",
+            "build_string": "pyhd3eb1b0_1",
+            "channel": "pkgs/main"
+        },
+        "urllib3": {
+            "version": "1.26.7",
+            "build_string": "pyhd3eb1b0_0",
+            "channel": "pkgs/main"
+        },
+        "wheel": {
+            "version": "0.37.0",
+            "build_string": "pyhd3eb1b0_1",
+            "channel": "pkgs/main"
+        },
+        "xz": {
+            "version": "5.2.5",
+            "build_string": "h7b6447c_0",
+            "channel": "pkgs/main"
+        },
+        "zlib": {
+            "version": "1.2.11",
+            "build_string": "h7b6447c_3",
+            "channel": "pkgs/main"
+        }
+    },
+    "results": []
+}
\ No newline at end of file

From 8cadd83fe3096fb95f3eb1a06415f860f3eb9052 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 28 Dec 2021 14:26:03 +0300
Subject: [PATCH 11/16] delete extra files

---
 result_DeviceType_GEN9.json | 336 ------------------------------------
 1 file changed, 336 deletions(-)
 delete mode 100644 result_DeviceType_GEN9.json

diff --git a/result_DeviceType_GEN9.json b/result_DeviceType_GEN9.json
deleted file mode 100644
index 560525cc3..000000000
--- a/result_DeviceType_GEN9.json
+++ /dev/null
@@ -1,336 +0,0 @@
-{
-    "hardware": {
-        "CPU": {
-            "Architecture": "x86_64 CPU op-mode(s)"
-        },
-        "RAM size[GB]": 31.154407501220703
-    },
-    "software": {
-        "_libgcc_mutex": {
-            "version": "0.1",
-            "build_string": "conda_forge",
-            "channel": "conda-forge"
-        },
-        "_openmp_mutex": {
-            "version": "4.5",
-            "build_string": "1_gnu",
-            "channel": "conda-forge"
-        },
-        "blas": {
-            "version": "1.0",
-            "build_string": "mkl",
-            "channel": "pkgs/main"
-        },
-        "bottleneck": {
-            "version": "1.3.2",
-            "build_string": "py38heb32a55_1",
-            "channel": "pkgs/main"
-        },
-        "brotlipy": {
-            "version": "0.7.0",
-            "build_string": "py38h27cfd23_1003",
-            "channel": "pkgs/main"
-        },
-        "ca-certificates": {
-            "version": "2021.10.26",
-            "build_string": "h06a4308_2",
-            "channel": "pkgs/main"
-        },
-        "certifi": {
-            "version": "2021.10.8",
-            "build_string": "py38h06a4308_0",
-            "channel": "pkgs/main"
-        },
-        "cffi": {
-            "version": "1.14.6",
-            "build_string": "py38h400218f_0",
-            "channel": "pkgs/main"
-        },
-        "charset-normalizer": {
-            "version": "2.0.4",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "clang-format": {
-            "version": "12.0.1",
-            "build_string": "default_ha53f305_4",
-            "channel": "conda-forge"
-        },
-        "clang-format-12": {
-            "version": "12.0.1",
-            "build_string": "default_ha53f305_4",
-            "channel": "conda-forge"
-        },
-        "clang-tools": {
-            "version": "12.0.1",
-            "build_string": "default_ha53f305_4",
-            "channel": "conda-forge"
-        },
-        "cmake": {
-            "version": "3.21.3",
-            "build_string": "pypi_0",
-            "channel": "pypi"
-        },
-        "cryptography": {
-            "version": "35.0.0",
-            "build_string": "py38hd23ed53_0",
-            "channel": "pkgs/main"
-        },
-        "cython": {
-            "version": "0.29.24",
-            "build_string": "pypi_0",
-            "channel": "pypi"
-        },
-        "et_xmlfile": {
-            "version": "1.1.0",
-            "build_string": "py38h06a4308_0",
-            "channel": "pkgs/main"
-        },
-        "idna": {
-            "version": "3.3",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "intel-openmp": {
-            "version": "2021.4.0",
-            "build_string": "h06a4308_3561",
-            "channel": "pkgs/main"
-        },
-        "jinja2": {
-            "version": "3.0.2",
-            "build_string": "pypi_0",
-            "channel": "pypi"
-        },
-        "joblib": {
-            "version": "1.1.0",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "ld_impl_linux-64": {
-            "version": "2.35.1",
-            "build_string": "h7274673_9",
-            "channel": "pkgs/main"
-        },
-        "libclang": {
-            "version": "12.0.1",
-            "build_string": "default_ha53f305_4",
-            "channel": "conda-forge"
-        },
-        "libclang-cpp12": {
-            "version": "12.0.1",
-            "build_string": "default_ha53f305_4",
-            "channel": "conda-forge"
-        },
-        "libffi": {
-            "version": "3.3",
-            "build_string": "he6710b0_2",
-            "channel": "pkgs/main"
-        },
-        "libgcc-ng": {
-            "version": "11.2.0",
-            "build_string": "h1d223b6_11",
-            "channel": "conda-forge"
-        },
-        "libgfortran-ng": {
-            "version": "7.5.0",
-            "build_string": "ha8ba4b0_17",
-            "channel": "pkgs/main"
-        },
-        "libgfortran4": {
-            "version": "7.5.0",
-            "build_string": "ha8ba4b0_17",
-            "channel": "pkgs/main"
-        },
-        "libgomp": {
-            "version": "11.2.0",
-            "build_string": "h1d223b6_11",
-            "channel": "conda-forge"
-        },
-        "libllvm12": {
-            "version": "12.0.1",
-            "build_string": "hf817b99_2",
-            "channel": "conda-forge"
-        },
-        "libstdcxx-ng": {
-            "version": "11.2.0",
-            "build_string": "he4da1e4_11",
-            "channel": "conda-forge"
-        },
-        "markupsafe": {
-            "version": "2.0.1",
-            "build_string": "pypi_0",
-            "channel": "pypi"
-        },
-        "mkl": {
-            "version": "2021.4.0",
-            "build_string": "h06a4308_640",
-            "channel": "pkgs/main"
-        },
-        "mkl-service": {
-            "version": "2.4.0",
-            "build_string": "py38h7f8727e_0",
-            "channel": "pkgs/main"
-        },
-        "mkl_fft": {
-            "version": "1.3.1",
-            "build_string": "py38hd3c417c_0",
-            "channel": "pkgs/main"
-        },
-        "mkl_random": {
-            "version": "1.2.2",
-            "build_string": "py38h51133e4_0",
-            "channel": "pkgs/main"
-        },
-        "ncurses": {
-            "version": "6.3",
-            "build_string": "h7f8727e_2",
-            "channel": "pkgs/main"
-        },
-        "numexpr": {
-            "version": "2.7.3",
-            "build_string": "py38h22e1b3c_1",
-            "channel": "pkgs/main"
-        },
-        "numpy": {
-            "version": "1.19.2",
-            "build_string": "pypi_0",
-            "channel": "pypi"
-        },
-        "numpy-base": {
-            "version": "1.21.2",
-            "build_string": "py38h79a1101_0",
-            "channel": "pkgs/main"
-        },
-        "openpyxl": {
-            "version": "3.0.9",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "openssl": {
-            "version": "1.1.1l",
-            "build_string": "h7f8727e_0",
-            "channel": "pkgs/main"
-        },
-        "pandas": {
-            "version": "1.3.4",
-            "build_string": "py38h8c16a72_0",
-            "channel": "pkgs/main"
-        },
-        "pip": {
-            "version": "21.2.4",
-            "build_string": "py38h06a4308_0",
-            "channel": "pkgs/main"
-        },
-        "pybind11": {
-            "version": "2.8.0",
-            "build_string": "pypi_0",
-            "channel": "pypi"
-        },
-        "pycparser": {
-            "version": "2.21",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "pyopenssl": {
-            "version": "21.0.0",
-            "build_string": "pyhd3eb1b0_1",
-            "channel": "pkgs/main"
-        },
-        "pysocks": {
-            "version": "1.7.1",
-            "build_string": "py38h06a4308_0",
-            "channel": "pkgs/main"
-        },
-        "python": {
-            "version": "3.8.12",
-            "build_string": "h12debd9_0",
-            "channel": "pkgs/main"
-        },
-        "python-dateutil": {
-            "version": "2.8.2",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "pytz": {
-            "version": "2021.3",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "readline": {
-            "version": "8.1",
-            "build_string": "h27cfd23_0",
-            "channel": "pkgs/main"
-        },
-        "requests": {
-            "version": "2.26.0",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "scikit-learn": {
-            "version": "1.0.1",
-            "build_string": "py38h51133e4_0",
-            "channel": "pkgs/main"
-        },
-        "scikit-learn-intelex": {
-            "version": "2021.20211111.130037",
-            "build_string": "dev_0",
-            "channel": "<develop>"
-        },
-        "scipy": {
-            "version": "1.7.1",
-            "build_string": "py38h292c36d_2",
-            "channel": "pkgs/main"
-        },
-        "setuptools": {
-            "version": "58.0.4",
-            "build_string": "py38h06a4308_0",
-            "channel": "pkgs/main"
-        },
-        "six": {
-            "version": "1.16.0",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "sqlite": {
-            "version": "3.36.0",
-            "build_string": "hc218d9a_0",
-            "channel": "pkgs/main"
-        },
-        "threadpoolctl": {
-            "version": "2.2.0",
-            "build_string": "pyh0d69192_0",
-            "channel": "pkgs/main"
-        },
-        "tk": {
-            "version": "8.6.11",
-            "build_string": "h1ccaba5_0",
-            "channel": "pkgs/main"
-        },
-        "tqdm": {
-            "version": "4.62.3",
-            "build_string": "pyhd3eb1b0_1",
-            "channel": "pkgs/main"
-        },
-        "urllib3": {
-            "version": "1.26.7",
-            "build_string": "pyhd3eb1b0_0",
-            "channel": "pkgs/main"
-        },
-        "wheel": {
-            "version": "0.37.0",
-            "build_string": "pyhd3eb1b0_1",
-            "channel": "pkgs/main"
-        },
-        "xz": {
-            "version": "5.2.5",
-            "build_string": "h7b6447c_0",
-            "channel": "pkgs/main"
-        },
-        "zlib": {
-            "version": "1.2.11",
-            "build_string": "h7b6447c_3",
-            "channel": "pkgs/main"
-        }
-    },
-    "results": []
-}
\ No newline at end of file

From b8eb13d45b1c01bc9a8b96747244de70fc3d36e1 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Tue, 28 Dec 2021 16:32:23 +0300
Subject: [PATCH 12/16] replace None with none

---
 configs/xpu/df_clsf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index 0c504de59..4d14763b8 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -6,7 +6,7 @@
         "data-order": "F",
         "dtype": ["float32", "float64"],
         "max-features": "sqrt",
-        "device": ["host", "cpu", "gpu", "None"]
+        "device": ["host", "cpu", "gpu", "none"]
     },
     "cases": [
         {

From b2e4ff4fad33f3dddbf5b325274f90ce8ff17980 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Wed, 12 Jan 2022 12:44:43 +0300
Subject: [PATCH 13/16] column major

---
 configs/xpu/df_clsf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index 4d14763b8..802543d32 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -3,7 +3,7 @@
         "lib": "sklearn",
         "algorithm": "df_clsf",
         "data-format": "pandas",
-        "data-order": "F",
+        "data-order": "C",
         "dtype": ["float32", "float64"],
         "max-features": "sqrt",
         "device": ["host", "cpu", "gpu", "none"]

From 780f4f91213a8bfd9979b9273123f245006f4da1 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Wed, 12 Jan 2022 15:45:18 +0300
Subject: [PATCH 14/16] data-order F back

---
 configs/xpu/df_clsf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/xpu/df_clsf.json b/configs/xpu/df_clsf.json
index 802543d32..4d14763b8 100644
--- a/configs/xpu/df_clsf.json
+++ b/configs/xpu/df_clsf.json
@@ -3,7 +3,7 @@
         "lib": "sklearn",
         "algorithm": "df_clsf",
         "data-format": "pandas",
-        "data-order": "C",
+        "data-order": "F",
         "dtype": ["float32", "float64"],
         "max-features": "sqrt",
         "device": ["host", "cpu", "gpu", "none"]

From 937913f21ad98f321e40b6b3d286481d69e99e9b Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Wed, 12 Jan 2022 19:26:08 +0300
Subject: [PATCH 15/16] float scientific notation handling

---
 bench.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/bench.py b/bench.py
index d22c2abb9..b30540c58 100644
--- a/bench.py
+++ b/bench.py
@@ -19,6 +19,7 @@
 import logging
 import sys
 import timeit
+import re
 
 import numpy as np
 import sklearn
@@ -63,13 +64,16 @@ def _parse_size(string, dim=2):
 
     return tup
 
+def is_float(string):
+    return bool(re.match(r"^[-+]?(?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)(?:[eE][-+]?[0-9]+\b)?$", string))
+
 
 def float_or_int(string):
-    return float(string) if '.' in string else int(string)
+    return int(string) if string.isdigit() else float(string)
 
 
 def float_or_int_or_str(string):
-    return float(string) if '.' in string else int(string) if string.isdigit() else string
+    return int(string) if string.isdigit() else float(string) if is_float(string) else string
 
 
 def get_optimal_cache_size(n_rows, dtype=np.double, max_cache=64):

From bf546f8a624db124cec443c0716c9adc1d4ea6d8 Mon Sep 17 00:00:00 2001
From: dmitrii-kriukov <dmitrii.kriukov@intel.com>
Date: Thu, 13 Jan 2022 10:11:16 +0300
Subject: [PATCH 16/16] pep8

---
 bench.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index b30540c58..cbc969bb4 100644
--- a/bench.py
+++ b/bench.py
@@ -64,8 +64,10 @@ def _parse_size(string, dim=2):
 
     return tup
 
+
 def is_float(string):
-    return bool(re.match(r"^[-+]?(?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)(?:[eE][-+]?[0-9]+\b)?$", string))
+    return bool(re.match(r"^[-+]?(?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)(?:[eE][-+]?[0-9]+\b)?$",
+                string))
 
 
 def float_or_int(string):