IntelPython · Alexsandruss · Apr 26, 2021 · Mar 22, 2021 · Mar 22, 2021 · Mar 22, 2021
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -6,7 +6,7 @@ jobs:
   - script: |
       conda update -y -q conda
       export FORCE_DAAL4PY_SKLEARN=yes
-      conda create -q -y -n bench -c conda-forge python=3.7 pandas scikit-learn scikit-learn-intelex
+      conda create -q -y -n bench -c conda-forge python=3.7 pandas scikit-learn scikit-learn-intelex tqdm
     displayName: Create Anaconda environment
   - script: |
       . /usr/share/miniconda/etc/profile.d/conda.sh
@@ -19,7 +19,7 @@ jobs:
   steps:
   - script: |
       conda update -y -q conda
-      conda create -n bench -q -y -c conda-forge python=3.7 pandas xgboost scikit-learn
+      conda create -n bench -q -y -c conda-forge python=3.7 pandas xgboost scikit-learn tqdm
     displayName: Create Anaconda environment
   - script: |
       . /usr/share/miniconda/etc/profile.d/conda.sh
@@ -32,7 +32,7 @@ jobs:
   steps:
   - script: |
       conda update -y -q conda
-      conda create -n bench -q -y -c conda-forge python=3.7 pandas scikit-learn daal4py
+      conda create -n bench -q -y -c conda-forge python=3.7 pandas scikit-learn daal4py tqdm
     displayName: Create Anaconda environment
   - script: |
       . /usr/share/miniconda/etc/profile.d/conda.sh
@@ -45,7 +45,7 @@ jobs:
   steps:
   - script: |
       conda update -y -q conda
-      conda create -n bench -q -y -c conda-forge python=3.7 pandas xgboost scikit-learn daal4py
+      conda create -n bench -q -y -c conda-forge python=3.7 pandas xgboost scikit-learn daal4py tqdm
     displayName: Create Anaconda environment
   - script: |
       . /usr/share/miniconda/etc/profile.d/conda.sh
@@ -71,7 +71,7 @@ jobs:
   steps:
   - task: UsePythonVersion@0
     inputs:
-      versionSpec: '3.7'
+      versionSpec: '3.8'
       addToPath: true
   - script: |
       python -m pip install --upgrade pip setuptools

diff --git a/bench.py b/bench.py
@@ -16,6 +16,7 @@
 
 import argparse
 import json
+import logging
 import sys
 import timeit
 
@@ -200,15 +201,16 @@ def parse_args(parser, size=None, loop_types=(),
             from sklearnex import patch_sklearn
             patch_sklearn()
         except ImportError:
-            print('Failed to import sklearnex.patch_sklearn.'
-                  'Use stock version scikit-learn', file=sys.stderr)
+            logging.info('Failed to import sklearnex.patch_sklearn.'
+                         'Use stock version scikit-learn', file=sys.stderr)
             params.device = 'None'
     else:
         if params.device != 'None':
-            print('Device context is not supported for stock scikit-learn.'
-                  'Please use --no-intel-optimized=False with'
-                  f'--device={params.device} parameter. Fallback to --device=None.',
-                  file=sys.stderr)
+            logging.info(
+                'Device context is not supported for stock scikit-learn.'
+                'Please use --no-intel-optimized=False with'
+                f'--device={params.device} parameter. Fallback to --device=None.',
+                file=sys.stderr)
             params.device = 'None'
 
     # disable finiteness check (default)
@@ -218,7 +220,7 @@ def parse_args(parser, size=None, loop_types=(),
     # Ask DAAL what it thinks about this number of threads
     num_threads = prepare_daal_threads(num_threads=params.threads)
     if params.verbose:
-        print(f'@ DAAL gave us {num_threads} threads')
+        logging.info(f'@ DAAL gave us {num_threads} threads')
 
     n_jobs = None
     if n_jobs_supported:
@@ -234,7 +236,7 @@ def parse_args(parser, size=None, loop_types=(),
 
     # Very verbose output
     if params.verbose:
-        print(f'@ params = {params.__dict__}')
+        logging.info(f'@ params = {params.__dict__}')
 
     return params
 
@@ -249,8 +251,8 @@ def set_daal_num_threads(num_threads):
         if num_threads:
             daal4py.daalinit(nthreads=num_threads)
     except ImportError:
-        print('@ Package "daal4py" was not found. Number of threads '
-              'is being ignored')
+        logging.info('@ Package "daal4py" was not found. Number of threads '
+                     'is being ignored')
 
 
 def prepare_daal_threads(num_threads=-1):
@@ -484,7 +486,7 @@ def print_output(library, algorithm, stages, params, functions,
         output = []
         for i in range(len(stages)):
             result = gen_basic_dict(library, algorithm, stages[i], params,
-                                    data[i], alg_instance, alg_params)
+                                    data[i], alg_instance, alg_params if i == 0 else None)
             result.update({'time[s]': times[i]})
             if accuracy_type is not None:
                 result.update({f'{accuracy_type}': accuracies[i]})

diff --git a/configs/cuml_config.json b/configs/cuml_config.json
@@ -1,5 +1,4 @@
 {
-    "omp_env": ["OMP_NUM_THREADS"],
     "common": {
         "lib": ["cuml"],
         "data-format": ["cudf"],

diff --git a/configs/lgbm_mb_cpu_config.json b/configs/lgbm_mb_cpu_config.json
@@ -1,109 +1,120 @@
 {
-    "omp_env": ["OMP_NUM_THREADS", "OMP_PLACES"],
     "common": {
-        "lib": ["modelbuilders"],
-        "data-format": ["pandas"],
-        "data-order": ["F"],
-        "dtype": ["float32"]
+        "lib":          "modelbuilders",
+        "data-format":  "pandas",
+        "data-order":   "F",
+        "dtype":        "float32",
+        "algorithm":    "lgbm_mb"
     },
     "cases": [
         {
-            "algorithm": "lgbm_mb",
             "dataset": [
                 {
-                    "source": "csv",
-                    "name": "mortgage1Q",
+                    "source":   "npy",
+                    "name":     "airline-ohe",
                     "training":
                     {
-                        "x": "data/mortgage_x.csv",
-                        "y": "data/mortgage_y.csv"
+                        "x":    "data/airline-ohe_x_train.npy",
+                        "y":    "data/airline-ohe_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x":    "data/airline-ohe_x_test.npy",
+                        "y":    "data/airline-ohe_y_test.npy"
                     }
                 }
             ],
-            "n-estimators": [100],
-            "objective": ["regression"],
-            "max-depth": [8],
-            "scale-pos-weight": [2],
-            "learning-rate": [0.1],
-            "subsample": [1],
-            "reg-alpha": [0.9],
-            "reg-lambda": [1],
-            "min-child-weight": [0],
-            "max-leaves": [256]
+            "reg-alpha":        0.9,
+            "max-bin":          256,
+            "scale-pos-weight": 2,
+            "learning-rate":    0.1,
+            "subsample":        1,
+            "reg-lambda":       1,
+            "min-child-weight": 0,
+            "max-depth":        8,
+            "max-leaves":       256,
+            "n-estimators":     1000,
+            "objective":        "binary"
         },
         {
-            "algorithm": "lgbm_mb",
             "dataset": [
                 {
-                    "source": "csv",
-                    "name": "airline-ohe",
+                    "source":   "npy",
+                    "name":     "higgs1m",
                     "training":
                     {
-                        "x": "data/airline-ohe_x_train.csv",
-                        "y": "data/airline-ohe_y_train.csv"
+                        "x":    "data/higgs1m_x_train.npy",
+                        "y":    "data/higgs1m_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x":    "data/higgs1m_x_test.npy",
+                        "y":    "data/higgs1m_y_test.npy"
                     }
                 }
             ],
-            "reg-alpha": [0.9],
-            "max-bin": [256],
-            "scale-pos-weight": [2],
-            "learning-rate": [0.1],
-            "subsample": [1],
-            "reg-lambda":  [1],
-            "min-child-weight": [0],
-            "max-depth": [8],
-            "max-leaves": [256],
-            "n-estimators": [1000],
-            "objective": ["binary"]
+            "reg-alpha":        0.9,
+            "max-bin":          256,
+            "scale-pos-weight": 2,
+            "learning-rate":    0.1,
+            "subsample":        1,
+            "reg-lambda":       1,
+            "min-child-weight": 0,
+            "max-depth":        8,
+            "max-leaves":       256,
+            "n-estimators":     1000,
+            "objective":        "binary"
         },
         {
-            "algorithm": "lgbm_mb",
             "dataset": [
                 {
-                    "source": "csv",
-                    "name": "higgs1m",
+                    "source":   "csv",
+                    "name":     "mortgage1Q",
                     "training":
                     {
-                        "x": "data/higgs1m_x_train.csv",
-                        "y": "data/higgs1m_y_train.csv"
+                        "x":    "data/mortgage_x.csv",
+                        "y":    "data/mortgage_y.csv"
                     }
                 }
             ],
-            "reg-alpha": [0.9],
-            "max-bin": [256],
-            "scale-pos-weight": [2],
-            "learning-rate": [0.1],
-            "subsample": [1],
-            "reg-lambda":  [1],
-            "min-child-weight": [0],
-            "max-depth": [8],
-            "max-leaves": [256],
-            "n-estimators": [1000],
-            "objective": ["binary"]
+            "n-estimators":     100,
+            "objective":        "regression",
+            "max-depth":        8,
+            "scale-pos-weight": 2,
+            "learning-rate":    0.1,
+            "subsample":        1,
+            "reg-alpha":        0.9,
+            "reg-lambda":       1,
+            "min-child-weight": 0,
+            "max-leaves":       256
         },
         {
-            "algorithm": "lgbm_mb",
             "dataset": [
                 {
-                    "source": "csv",
-                    "name": "msrank",
+                    "source":   "npy",
+                    "name":     "msrank",
                     "training":
                     {
-                        "x": "data/mlsr_x_train.csv",
-                        "y": "data/mlsr_y_train.csv"
+                        "x":    "data/msrank_x_train.npy",
+                        "y":    "data/msrank_y_train.npy"
+                    },
+                    "testing":
+                    {
+                        "x":    "data/msrank_x_test.npy",
+                        "y":    "data/msrank_y_test.npy"
                     }
                 }
             ],
-            "max-bin": [256],
-            "learning-rate": [0.3],
-            "subsample": [1],
-            "reg-lambda":  [2],
-            "min-child-weight": [1],
-            "min-split-gain": [0.1],
-            "max-depth": [8],
-            "max-leaves": [256],
-            "n-estimators": [200],
-            "objective": ["multiclass"]
+            "max-bin":          256,
+            "learning-rate":    0.3,
+            "subsample":        1,
+            "reg-lambda":       2,
+            "min-child-weight": 1,
+            "min-split-loss":   0.1,
+            "max-depth":        8,
+            "max-leaves":       256,
+            "n-estimators":     200,
+            "objective":        "multiclass"
         }
     ]
 }