From 3b67cc41cec46bda091f8e4f086675eee4949f62 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:07:24 +0100
Subject: [PATCH 01/41] refactoring

---
 bench.py | 67 ++++++++++++++++++++++++++++----------------------------
 utils.py |  8 +++----
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/bench.py b/bench.py
index f3f9ce5c3..560d96d50 100644
--- a/bench.py
+++ b/bench.py
@@ -389,14 +389,13 @@ def convert_data(data, dtype, data_order, data_format):
     # Secondly, change format of data
     if data_format == 'numpy':
         return data
-    elif data_format == 'pandas':
+    if data_format == 'pandas':
         import pandas as pd
 
         if data.ndim == 1:
             return pd.Series(data)
-        else:
-            return pd.DataFrame(data)
-    elif data_format == 'cudf':
+        return pd.DataFrame(data)
+    if data_format == 'cudf':
         import cudf
         import pandas as pd
 
@@ -516,36 +515,36 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
 def print_output(library, algorithm, stages, params, functions,
                  times, metric_type, metrics, data, alg_instance=None,
                  alg_params=None):
-    if params.output_format == 'json':
-        output = []
-        for i, stage in enumerate(stages):
-            result = gen_basic_dict(library, algorithm, stage, params,
-                                    data[i], alg_instance, alg_params)
-            result.update({'time[s]': times[i]})
-            if metric_type is not None:
-                if isinstance(metric_type, str):
-                    result.update({f'{metric_type}': metrics[i]})
-                elif isinstance(metric_type, list):
-                    for ind, val in enumerate(metric_type):
-                        if metrics[ind][i] is not None:
-                            result.update({f'{val}': metrics[ind][i]})
-            if hasattr(params, 'n_classes'):
-                result['input_data'].update({'classes': params.n_classes})
-            if hasattr(params, 'n_clusters'):
-                if algorithm == 'kmeans':
-                    result['input_data'].update(
-                        {'n_clusters': params.n_clusters})
-                elif algorithm == 'dbscan':
-                    result.update({'n_clusters': params.n_clusters})
-            # replace non-string init with string for kmeans benchmarks
-            if alg_instance is not None:
-                if 'init' in result['algorithm_parameters'].keys():
-                    if not isinstance(result['algorithm_parameters']['init'], str):
-                        result['algorithm_parameters']['init'] = 'random'
-                if 'handle' in result['algorithm_parameters'].keys():
-                    del result['algorithm_parameters']['handle']
-            output.append(result)
-        print(json.dumps(output, indent=4))
+    if params.output_format != 'json': return
+    output = []
+    for i, stage in enumerate(stages):
+        result = gen_basic_dict(library, algorithm, stage, params,
+                                data[i], alg_instance, alg_params)
+        result.update({'time[s]': times[i]})
+        if metric_type is not None:
+            if isinstance(metric_type, str):
+                result.update({f'{metric_type}': metrics[i]})
+            elif isinstance(metric_type, list):
+                for ind, val in enumerate(metric_type):
+                    if metrics[ind][i] is not None:
+                        result.update({f'{val}': metrics[ind][i]})
+        if hasattr(params, 'n_classes'):
+            result['input_data'].update({'classes': params.n_classes})
+        if hasattr(params, 'n_clusters'):
+            if algorithm == 'kmeans':
+                result['input_data'].update(
+                    {'n_clusters': params.n_clusters})
+            elif algorithm == 'dbscan':
+                result.update({'n_clusters': params.n_clusters})
+        # replace non-string init with string for kmeans benchmarks
+        if alg_instance is not None:
+            if 'init' in result['algorithm_parameters'].keys() and \
+                not isinstance(result['algorithm_parameters']['init'], str):
+                    result['algorithm_parameters']['init'] = 'random'
+            if 'handle' in result['algorithm_parameters'].keys():
+                del result['algorithm_parameters']['handle']
+        output.append(result)
+    print(json.dumps(output, indent=4))
 
 
 def run_with_context(params, function):
diff --git a/utils.py b/utils.py
index 8c1720dcb..0696eb6a7 100755
--- a/utils.py
+++ b/utils.py
@@ -175,11 +175,11 @@ def generate_cases(params: Dict[str, Union[List[Any], Any]]) -> List[str]:
             commands *= len(values)
             dashes = '-' if len(param) == 1 else '--'
             for command_num in range(prev_len):
-                for value_num in range(len(values)):
-                    commands[prev_len * value_num + command_num] += ' ' + \
-                        dashes + param + ' ' + str(values[value_num])
+                for idx, val in enumerate(values):
+                    commands[prev_len * idx + command_num] += ' ' + \
+                        dashes + param + ' ' + str(val)
         else:
             dashes = '-' if len(param) == 1 else '--'
-            for command_num in range(len(commands)):
+            for command_num,_ in enumerate(commands):
                 commands[command_num] += ' ' + dashes + param + ' ' + str(values)
     return commands

From 29074714a579712704523bf4c57b07511308c3dc Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:17:33 +0100
Subject: [PATCH 02/41] remove global

---
 datasets/loader_utils.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index 29366eccb..b65148441 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -20,13 +20,9 @@
 import numpy as np
 import tqdm
 
-pbar: tqdm.tqdm = None
-
 
 def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
-    global pbar
-    if pbar is None:
-        pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
+    pbar: tqdm.tqdm = tqdm.tqdm(total=total_size / 1024, unit='kB')
 
     downloaded = block_num * block_size
     if downloaded < total_size:

From 7c6db21518a9a4b8b6ec531f89c87a4a836a4e9e Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:18:58 +0100
Subject: [PATCH 03/41] refactor

---
 bench.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bench.py b/bench.py
index 560d96d50..f6374ae68 100644
--- a/bench.py
+++ b/bench.py
@@ -515,7 +515,8 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
 def print_output(library, algorithm, stages, params, functions,
                  times, metric_type, metrics, data, alg_instance=None,
                  alg_params=None):
-    if params.output_format != 'json': return
+    if params.output_format != 'json':
+        return
     output = []
     for i, stage in enumerate(stages):
         result = gen_basic_dict(library, algorithm, stage, params,
@@ -538,8 +539,8 @@ def print_output(library, algorithm, stages, params, functions,
                 result.update({'n_clusters': params.n_clusters})
         # replace non-string init with string for kmeans benchmarks
         if alg_instance is not None:
-            if 'init' in result['algorithm_parameters'].keys() and \
-                not isinstance(result['algorithm_parameters']['init'], str):
+            if 'init' in result['algorithm_parameters'].keys():
+                if isinstance(result['algorithm_parameters']['init'], str):
                     result['algorithm_parameters']['init'] = 'random'
             if 'handle' in result['algorithm_parameters'].keys():
                 del result['algorithm_parameters']['handle']

From 3cbc5a6889afe7cda07a45bed56c6854628e4799 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:23:34 +0100
Subject: [PATCH 04/41] fix typo

---
 bench.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bench.py b/bench.py
index f6374ae68..1f7e89c02 100644
--- a/bench.py
+++ b/bench.py
@@ -539,9 +539,10 @@ def print_output(library, algorithm, stages, params, functions,
                 result.update({'n_clusters': params.n_clusters})
         # replace non-string init with string for kmeans benchmarks
         if alg_instance is not None:
-            if 'init' in result['algorithm_parameters'].keys():
-                if isinstance(result['algorithm_parameters']['init'], str):
-                    result['algorithm_parameters']['init'] = 'random'
+            condition = 'init' in result['algorithm_parameters'].keys() and\
+            not isinstance(result['algorithm_parameters']['init'], str)
+            if condition:
+                result['algorithm_parameters']['init'] = 'random'
             if 'handle' in result['algorithm_parameters'].keys():
                 del result['algorithm_parameters']['handle']
         output.append(result)

From 0b44688f4160b13d31324650631696e9a66c36be Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:26:11 +0100
Subject: [PATCH 05/41] refactor?

---
 bench.py | 73 +++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 24 deletions(-)

diff --git a/bench.py b/bench.py
index 1f7e89c02..1053be7a2 100644
--- a/bench.py
+++ b/bench.py
@@ -512,6 +512,31 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
     return result
 
 
+def update_result_dict(result) -> None:
+    result.update({'time[s]': times[i]})
+    if metric_type is not None:
+        if isinstance(metric_type, str):
+            result.update({f'{metric_type}': metrics[i]})
+        elif isinstance(metric_type, list):
+            for ind, val in enumerate(metric_type):
+                if metrics[ind][i] is not None:
+                    result.update({f'{val}': metrics[ind][i]})
+    if hasattr(params, 'n_classes'):
+        result['input_data'].update({'classes': params.n_classes})
+    if hasattr(params, 'n_clusters'):
+        if algorithm == 'kmeans':
+            result['input_data'].update(
+                {'n_clusters': params.n_clusters})
+        elif algorithm == 'dbscan':
+            result.update({'n_clusters': params.n_clusters})
+    # replace non-string init with string for kmeans benchmarks
+    if alg_instance is not None:
+        if 'init' in result['algorithm_parameters'].keys():
+            if not isinstance(result['algorithm_parameters']['init'], str):
+                result['algorithm_parameters']['init'] = 'random'
+        if 'handle' in result['algorithm_parameters'].keys():
+            del result['algorithm_parameters']['handle']
+
 def print_output(library, algorithm, stages, params, functions,
                  times, metric_type, metrics, data, alg_instance=None,
                  alg_params=None):
@@ -521,30 +546,30 @@ def print_output(library, algorithm, stages, params, functions,
     for i, stage in enumerate(stages):
         result = gen_basic_dict(library, algorithm, stage, params,
                                 data[i], alg_instance, alg_params)
-        result.update({'time[s]': times[i]})
-        if metric_type is not None:
-            if isinstance(metric_type, str):
-                result.update({f'{metric_type}': metrics[i]})
-            elif isinstance(metric_type, list):
-                for ind, val in enumerate(metric_type):
-                    if metrics[ind][i] is not None:
-                        result.update({f'{val}': metrics[ind][i]})
-        if hasattr(params, 'n_classes'):
-            result['input_data'].update({'classes': params.n_classes})
-        if hasattr(params, 'n_clusters'):
-            if algorithm == 'kmeans':
-                result['input_data'].update(
-                    {'n_clusters': params.n_clusters})
-            elif algorithm == 'dbscan':
-                result.update({'n_clusters': params.n_clusters})
-        # replace non-string init with string for kmeans benchmarks
-        if alg_instance is not None:
-            condition = 'init' in result['algorithm_parameters'].keys() and\
-            not isinstance(result['algorithm_parameters']['init'], str)
-            if condition:
-                result['algorithm_parameters']['init'] = 'random'
-            if 'handle' in result['algorithm_parameters'].keys():
-                del result['algorithm_parameters']['handle']
+        update_result_dict(result)
+        # result.update({'time[s]': times[i]})
+        # if metric_type is not None:
+        #     if isinstance(metric_type, str):
+        #         result.update({f'{metric_type}': metrics[i]})
+        #     elif isinstance(metric_type, list):
+        #         for ind, val in enumerate(metric_type):
+        #             if metrics[ind][i] is not None:
+        #                 result.update({f'{val}': metrics[ind][i]})
+        # if hasattr(params, 'n_classes'):
+        #     result['input_data'].update({'classes': params.n_classes})
+        # if hasattr(params, 'n_clusters'):
+        #     if algorithm == 'kmeans':
+        #         result['input_data'].update(
+        #             {'n_clusters': params.n_clusters})
+        #     elif algorithm == 'dbscan':
+        #         result.update({'n_clusters': params.n_clusters})
+        # # replace non-string init with string for kmeans benchmarks
+        # if alg_instance is not None:
+        #     if 'init' in result['algorithm_parameters'].keys():
+        #         if not isinstance(result['algorithm_parameters']['init'], str):
+        #             result['algorithm_parameters']['init'] = 'random'
+        #     if 'handle' in result['algorithm_parameters'].keys():
+        #         del result['algorithm_parameters']['handle']
         output.append(result)
     print(json.dumps(output, indent=4))
 

From 786acb43e018010b413ebd623abc3c8283984c1b Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:31:50 +0100
Subject: [PATCH 06/41] refactor

---
 bench.py | 70 +++++++++++++++++---------------------------------------
 1 file changed, 21 insertions(+), 49 deletions(-)

diff --git a/bench.py b/bench.py
index 1053be7a2..c438b7ec9 100644
--- a/bench.py
+++ b/bench.py
@@ -512,31 +512,6 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
     return result
 
 
-def update_result_dict(result) -> None:
-    result.update({'time[s]': times[i]})
-    if metric_type is not None:
-        if isinstance(metric_type, str):
-            result.update({f'{metric_type}': metrics[i]})
-        elif isinstance(metric_type, list):
-            for ind, val in enumerate(metric_type):
-                if metrics[ind][i] is not None:
-                    result.update({f'{val}': metrics[ind][i]})
-    if hasattr(params, 'n_classes'):
-        result['input_data'].update({'classes': params.n_classes})
-    if hasattr(params, 'n_clusters'):
-        if algorithm == 'kmeans':
-            result['input_data'].update(
-                {'n_clusters': params.n_clusters})
-        elif algorithm == 'dbscan':
-            result.update({'n_clusters': params.n_clusters})
-    # replace non-string init with string for kmeans benchmarks
-    if alg_instance is not None:
-        if 'init' in result['algorithm_parameters'].keys():
-            if not isinstance(result['algorithm_parameters']['init'], str):
-                result['algorithm_parameters']['init'] = 'random'
-        if 'handle' in result['algorithm_parameters'].keys():
-            del result['algorithm_parameters']['handle']
-
 def print_output(library, algorithm, stages, params, functions,
                  times, metric_type, metrics, data, alg_instance=None,
                  alg_params=None):
@@ -546,30 +521,27 @@ def print_output(library, algorithm, stages, params, functions,
     for i, stage in enumerate(stages):
         result = gen_basic_dict(library, algorithm, stage, params,
                                 data[i], alg_instance, alg_params)
-        update_result_dict(result)
-        # result.update({'time[s]': times[i]})
-        # if metric_type is not None:
-        #     if isinstance(metric_type, str):
-        #         result.update({f'{metric_type}': metrics[i]})
-        #     elif isinstance(metric_type, list):
-        #         for ind, val in enumerate(metric_type):
-        #             if metrics[ind][i] is not None:
-        #                 result.update({f'{val}': metrics[ind][i]})
-        # if hasattr(params, 'n_classes'):
-        #     result['input_data'].update({'classes': params.n_classes})
-        # if hasattr(params, 'n_clusters'):
-        #     if algorithm == 'kmeans':
-        #         result['input_data'].update(
-        #             {'n_clusters': params.n_clusters})
-        #     elif algorithm == 'dbscan':
-        #         result.update({'n_clusters': params.n_clusters})
-        # # replace non-string init with string for kmeans benchmarks
-        # if alg_instance is not None:
-        #     if 'init' in result['algorithm_parameters'].keys():
-        #         if not isinstance(result['algorithm_parameters']['init'], str):
-        #             result['algorithm_parameters']['init'] = 'random'
-        #     if 'handle' in result['algorithm_parameters'].keys():
-        #         del result['algorithm_parameters']['handle']
+        result.update({'time[s]': times[i]})
+        if isinstance(metric_type, str):
+            result.update({f'{metric_type}': metrics[i]})
+        elif isinstance(metric_type, list):
+            for ind, val in enumerate(metric_type):
+                if metrics[ind][i] is not None:
+                    result.update({f'{val}': metrics[ind][i]})
+        if hasattr(params, 'n_classes'):
+            result['input_data'].update({'classes': params.n_classes})
+        if hasattr(params, 'n_clusters'):
+            if algorithm == 'kmeans':
+                result['input_data'].update(
+                    {'n_clusters': params.n_clusters})
+            elif algorithm == 'dbscan':
+                result.update({'n_clusters': params.n_clusters})
+        # replace non-string init with string for kmeans benchmarks
+        if alg_instance is not None:
+            if 'init' in result['algorithm_parameters'].keys():
+                if not isinstance(result['algorithm_parameters']['init'], str):
+                    result['algorithm_parameters']['init'] = 'random'
+            result['algorithm_parameters'].pop('handle',None)
         output.append(result)
     print(json.dumps(output, indent=4))
 

From 6fc17ef090360dabb5675ecff39187f8e785ae1b Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 12:38:05 +0100
Subject: [PATCH 07/41] refactor

---
 bench.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/bench.py b/bench.py
index c438b7ec9..23251c6ee 100644
--- a/bench.py
+++ b/bench.py
@@ -448,25 +448,25 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
                 int_dtype if 'y' in element and int_label else params.dtype,
                 params.data_order, params.data_format
             )
-        # generate and convert data if it's marked and path isn't specified
-        if full_data[element] is None and element in generated_data:
-            full_data[element] = convert_data(
-                np.random.rand(*params.shape),
-                int_dtype if 'y' in element and int_label else params.dtype,
-                params.data_order, params.data_format)
-        # convert existing labels from 1- to 2-dimensional
-        # if it's forced and possible
-        if full_data[element] is not None and 'y' in element \
-                and label_2d and hasattr(full_data[element], 'reshape'):
-            full_data[element] = full_data[element].reshape(
-                (full_data[element].shape[0], 1))
-        # add dtype property to data if it's needed and doesn't exist
-        if full_data[element] is not None and add_dtype and \
-                not hasattr(full_data[element], 'dtype'):
-            if hasattr(full_data[element], 'values'):
-                full_data[element].dtype = full_data[element].values.dtype
-            elif hasattr(full_data[element], 'dtypes'):
-                full_data[element].dtype = full_data[element].dtypes[0].type
+        if full_data[element] is None:
+            # generate and convert data if it's marked and path isn't specified
+            if element in generated_data:
+                full_data[element] = convert_data(
+                    np.random.rand(*params.shape),
+                    int_dtype if 'y' in element and int_label else params.dtype,
+                    params.data_order, params.data_format)
+        else:
+            # convert existing labels from 1- to 2-dimensional
+            # if it's forced and possible
+            if 'y' in element and label_2d and hasattr(full_data[element], 'reshape'):
+                full_data[element] = full_data[element].reshape(
+                    (full_data[element].shape[0], 1))
+            # add dtype property to data if it's needed and doesn't exist
+            if add_dtype and not hasattr(full_data[element], 'dtype'):
+                if hasattr(full_data[element], 'values'):
+                    full_data[element].dtype = full_data[element].values.dtype
+                elif hasattr(full_data[element], 'dtypes'):
+                    full_data[element].dtype = full_data[element].dtypes[0].type
 
     params.dtype = get_dtype(full_data['X_train'])
     # add size to parameters which is need for some cases

From 3509b1de53e440601a9fbdb1acc12861fd8e5342 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 14:50:21 +0100
Subject: [PATCH 08/41] refactor?

---
 bench.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index 23251c6ee..3b27c0615 100644
--- a/bench.py
+++ b/bench.py
@@ -458,7 +458,9 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
         else:
             # convert existing labels from 1- to 2-dimensional
             # if it's forced and possible
-            if 'y' in element and label_2d and hasattr(full_data[element], 'reshape'):
+            condition1: bool = 'y' in element and label_2d
+            condition1 = condition1 and hasattr(full_data[element], 'reshape')
+            if condition1:
                 full_data[element] = full_data[element].reshape(
                     (full_data[element].shape[0], 1))
             # add dtype property to data if it's needed and doesn't exist

From 469336298ab21d4d705f1012ec68bf8a6fffd56e Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 15:03:28 +0100
Subject: [PATCH 09/41] refactor?

---
 bench.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/bench.py b/bench.py
index 3b27c0615..dfb980903 100644
--- a/bench.py
+++ b/bench.py
@@ -458,9 +458,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
         else:
             # convert existing labels from 1- to 2-dimensional
             # if it's forced and possible
-            condition1: bool = 'y' in element and label_2d
-            condition1 = condition1 and hasattr(full_data[element], 'reshape')
-            if condition1:
+            if 'y' in element and label_2d and hasattr(full_data[element], 'reshape'):
                 full_data[element] = full_data[element].reshape(
                     (full_data[element].shape[0], 1))
             # add dtype property to data if it's needed and doesn't exist
@@ -482,8 +480,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
     return tuple(full_data.values())
 
 
-def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
-                   alg_params=None):
+def gen_basic_dict(library, algorithm, stage, params, data):
     result = {
         'library': library,
         'algorithm': algorithm,
@@ -498,6 +495,9 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
             'columns': data.shape[1]
         }
     }
+    return result
+
+def update_algorithm_parameters(result, alg_instance=None, alg_params=None):
     result['algorithm_parameters'] = {}
     if alg_instance is not None:
         if 'Booster' in str(type(alg_instance)):
@@ -509,8 +509,15 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
                 alg_instance_params['dtype'] = str(
                     alg_instance_params['dtype'])
         result['algorithm_parameters'].update(alg_instance_params)
+        if 'init' in result['algorithm_parameters']:
+            if not isinstance(result['algorithm_parameters']['init'], str):
+                result['algorithm_parameters']['init'] = 'random'
     if alg_params is not None:
         result['algorithm_parameters'].update(alg_params)
+        if 'init' in result['algorithm_parameters'].keys():
+            if not isinstance(result['algorithm_parameters']['init'], str):
+                result['algorithm_parameters']['init'] = 'random'
+    result['algorithm_parameters'].pop('handle',None)
     return result
 
 
@@ -521,8 +528,7 @@ def print_output(library, algorithm, stages, params, functions,
         return
     output = []
     for i, stage in enumerate(stages):
-        result = gen_basic_dict(library, algorithm, stage, params,
-                                data[i], alg_instance, alg_params)
+        result = gen_basic_dict(library, algorithm, stage, params, data[i])
         result.update({'time[s]': times[i]})
         if isinstance(metric_type, str):
             result.update({f'{metric_type}': metrics[i]})
@@ -539,11 +545,7 @@ def print_output(library, algorithm, stages, params, functions,
             elif algorithm == 'dbscan':
                 result.update({'n_clusters': params.n_clusters})
         # replace non-string init with string for kmeans benchmarks
-        if alg_instance is not None:
-            if 'init' in result['algorithm_parameters'].keys():
-                if not isinstance(result['algorithm_parameters']['init'], str):
-                    result['algorithm_parameters']['init'] = 'random'
-            result['algorithm_parameters'].pop('handle',None)
+        result = update_algorithm_parameters(result, alg_instance, alg_params)
         output.append(result)
     print(json.dumps(output, indent=4))
 

From 0da84144944cebae2a259e3d70d930f5a0a3332d Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 15:09:36 +0100
Subject: [PATCH 10/41] Revert "refactor?"

3509b1de53e440601a9fbdb1acc12861fd8e5342

From 5194413dd3047baac0d2e8fbe78b5947070ba81e Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 15:13:25 +0100
Subject: [PATCH 11/41] undo unecessary change

---
 bench.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/bench.py b/bench.py
index dfb980903..315f89a43 100644
--- a/bench.py
+++ b/bench.py
@@ -480,7 +480,8 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
     return tuple(full_data.values())
 
 
-def gen_basic_dict(library, algorithm, stage, params, data):
+def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
+                   alg_params=None):
     result = {
         'library': library,
         'algorithm': algorithm,
@@ -495,9 +496,6 @@ def gen_basic_dict(library, algorithm, stage, params, data):
             'columns': data.shape[1]
         }
     }
-    return result
-
-def update_algorithm_parameters(result, alg_instance=None, alg_params=None):
     result['algorithm_parameters'] = {}
     if alg_instance is not None:
         if 'Booster' in str(type(alg_instance)):
@@ -509,15 +507,8 @@ def update_algorithm_parameters(result, alg_instance=None, alg_params=None):
                 alg_instance_params['dtype'] = str(
                     alg_instance_params['dtype'])
         result['algorithm_parameters'].update(alg_instance_params)
-        if 'init' in result['algorithm_parameters']:
-            if not isinstance(result['algorithm_parameters']['init'], str):
-                result['algorithm_parameters']['init'] = 'random'
     if alg_params is not None:
         result['algorithm_parameters'].update(alg_params)
-        if 'init' in result['algorithm_parameters'].keys():
-            if not isinstance(result['algorithm_parameters']['init'], str):
-                result['algorithm_parameters']['init'] = 'random'
-    result['algorithm_parameters'].pop('handle',None)
     return result
 
 
@@ -545,7 +536,11 @@ def print_output(library, algorithm, stages, params, functions,
             elif algorithm == 'dbscan':
                 result.update({'n_clusters': params.n_clusters})
         # replace non-string init with string for kmeans benchmarks
-        result = update_algorithm_parameters(result, alg_instance, alg_params)
+        if alg_instance is not None:
+            if 'init' in result['algorithm_parameters'].keys():
+                if not isinstance(result['algorithm_parameters']['init'], str):
+                    result['algorithm_parameters']['init'] = 'random'
+        result['algorithm_parameters'].pop('handle',None)
         output.append(result)
     print(json.dumps(output, indent=4))
 

From 778f3c086ec91b6ae58fb2f30c52923340e2df0a Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 15:21:06 +0100
Subject: [PATCH 12/41] refactor load_data?

---
 bench.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bench.py b/bench.py
index 315f89a43..fdc30ffd6 100644
--- a/bench.py
+++ b/bench.py
@@ -438,6 +438,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
     for element in full_data:
         file_arg = f'file_{element}'
         # load and convert data from npy/csv file if path is specified
+        new_dtype = int_dtype if 'y' in element and int_label else params.dtype
         if param_vars[file_arg] is not None:
             if param_vars[file_arg].name.endswith('.npy'):
                 data = np.load(param_vars[file_arg].name, allow_pickle=True)
@@ -445,7 +446,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
                 data = read_csv(param_vars[file_arg].name, params)
             full_data[element] = convert_data(
                 data,
-                int_dtype if 'y' in element and int_label else params.dtype,
+                new_dtype,
                 params.data_order, params.data_format
             )
         if full_data[element] is None:
@@ -453,7 +454,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
             if element in generated_data:
                 full_data[element] = convert_data(
                     np.random.rand(*params.shape),
-                    int_dtype if 'y' in element and int_label else params.dtype,
+                    new_dtype,
                     params.data_order, params.data_format)
         else:
             # convert existing labels from 1- to 2-dimensional

From a88cac07fde1bc3b4d6d62817922a86919001f12 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 15:39:50 +0100
Subject: [PATCH 13/41] refactor load_data?

---
 bench.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/bench.py b/bench.py
index fdc30ffd6..a4152d7fb 100644
--- a/bench.py
+++ b/bench.py
@@ -459,11 +459,15 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
         else:
             # convert existing labels from 1- to 2-dimensional
             # if it's forced and possible
-            if 'y' in element and label_2d and hasattr(full_data[element], 'reshape'):
+            condition = 'y' in element
+            condition = condition and label_2d
+            condition = condition and hasattr(full_data[element], 'reshape')
+            if condition:
                 full_data[element] = full_data[element].reshape(
                     (full_data[element].shape[0], 1))
+            add_dtype = add_dtype and not hasattr(full_data[element], 'dtype')
             # add dtype property to data if it's needed and doesn't exist
-            if add_dtype and not hasattr(full_data[element], 'dtype'):
+            if add_dtype:
                 if hasattr(full_data[element], 'values'):
                     full_data[element].dtype = full_data[element].values.dtype
                 elif hasattr(full_data[element], 'dtypes'):

From 982c0f6d73e3c54f6597f9f3006f23039c7daca4 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 15:44:03 +0100
Subject: [PATCH 14/41] undo mistake

---
 bench.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bench.py b/bench.py
index a4152d7fb..cc18decb4 100644
--- a/bench.py
+++ b/bench.py
@@ -465,9 +465,8 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
             if condition:
                 full_data[element] = full_data[element].reshape(
                     (full_data[element].shape[0], 1))
-            add_dtype = add_dtype and not hasattr(full_data[element], 'dtype')
             # add dtype property to data if it's needed and doesn't exist
-            if add_dtype:
+            if add_dtype and not hasattr(full_data[element], 'dtype'):
                 if hasattr(full_data[element], 'values'):
                     full_data[element].dtype = full_data[element].values.dtype
                 elif hasattr(full_data[element], 'dtypes'):

From 53011f3e706438e0feeee98545d72a1aa0e54360 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Fri, 1 Oct 2021 16:08:59 +0100
Subject: [PATCH 15/41] undo pbar

---
 datasets/loader_utils.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index b65148441..4a378e6ad 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -15,14 +15,17 @@
 # ===============================================================================
 
 import re
-from urllib.request import urlretrieve
-
+from urllib.request import urlretrieve, Request
+import os
 import numpy as np
 import tqdm
 
+pbar: tqdm.tqdm = None
 
 def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
-    pbar: tqdm.tqdm = tqdm.tqdm(total=total_size / 1024, unit='kB')
+    global pbar
+    if pbar is None:
+        pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
 
     downloaded = block_num * block_size
     if downloaded < total_size:
@@ -33,7 +36,11 @@ def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
 
 
 def retrieve(url: str, filename: str) -> None:
-    urlretrieve(url, filename, reporthook=_show_progress)
+    if url.lower().startswith('http'):
+        req = Request(url)
+    elif not os.path.isfile(url):
+        raise ValueError, None
+    urlretrieve(url, filename, reporthook=_show_progress) #nosec
 
 
 def read_libsvm_msrank(file_obj, n_samples, n_features, dtype):

From 9b041394a96c2fe27a8bb63e8a795a8749e3ecc6 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 15:45:49 +0100
Subject: [PATCH 16/41] rewrite urlretrieve w/o urllib

---
 datasets/loader_utils.py | 44 +++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index 4a378e6ad..77f93a839 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -15,32 +15,34 @@
 # ===============================================================================
 
 import re
-from urllib.request import urlretrieve, Request
+import requests
 import os
+from urllib.request import urlretrieve
+from shutil import copyfile
 import numpy as np
-import tqdm
-
-pbar: tqdm.tqdm = None
-
-def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
-    global pbar
-    if pbar is None:
-        pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
-
-    downloaded = block_num * block_size
-    if downloaded < total_size:
-        pbar.update(block_size / 1024)
-    else:
-        pbar.close()
-        pbar = None
+from tqdm import tqdm
 
 
 def retrieve(url: str, filename: str) -> None:
-    if url.lower().startswith('http'):
-        req = Request(url)
-    elif not os.path.isfile(url):
-        raise ValueError, None
-    urlretrieve(url, filename, reporthook=_show_progress) #nosec
+    # rewritting urlretrieve without using urllib library,
+    # otherwise it would fail codefactor test due to security issues.
+    if os.path.isfile(url):
+        # reporthook is ignored for local urls
+        copyfile(url, filename)
+    elif url.startswith('http'):
+        response = requests.get(url,stream=True)
+        if response.status_code != 200:
+            raise AssertionError(f"Failed to download from {url},\nResponse returned status code {response.status_code}")
+        total_size = int(response.headers.get('content-length', 0))
+        block_size = 8192
+        pbar = tqdm(total=total_size/1024, unit='kB')
+        with open(filename, 'wb+') as file:
+            for data in response.iter_content(block_size):
+                pbar.update(len(data)/1024)
+                file.write(data)
+        pbar.close()
+        if total_size != 0 and pbar.n != total_size/1024:
+            raise AssertionError("Some content was present but not downloaded/written")
 
 
 def read_libsvm_msrank(file_obj, n_samples, n_features, dtype):

From 3d4f9455df51d55a6f65e2ee1e98e7e6e96f4f2b Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:22:52 +0100
Subject: [PATCH 17/41] Revert "rewrite urlretrieve w/o urllib"

9b041394a96c2fe27a8bb63e8a795a8749e3ecc6
---
 datasets/loader_utils.py | 44 +++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index 77f93a839..4a378e6ad 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -15,34 +15,32 @@
 # ===============================================================================
 
 import re
-import requests
+from urllib.request import urlretrieve, Request
 import os
-from urllib.request import urlretrieve
-from shutil import copyfile
 import numpy as np
-from tqdm import tqdm
+import tqdm
 
+pbar: tqdm.tqdm = None
 
-def retrieve(url: str, filename: str) -> None:
-    # rewritting urlretrieve without using urllib library,
-    # otherwise it would fail codefactor test due to security issues.
-    if os.path.isfile(url):
-        # reporthook is ignored for local urls
-        copyfile(url, filename)
-    elif url.startswith('http'):
-        response = requests.get(url,stream=True)
-        if response.status_code != 200:
-            raise AssertionError(f"Failed to download from {url},\nResponse returned status code {response.status_code}")
-        total_size = int(response.headers.get('content-length', 0))
-        block_size = 8192
-        pbar = tqdm(total=total_size/1024, unit='kB')
-        with open(filename, 'wb+') as file:
-            for data in response.iter_content(block_size):
-                pbar.update(len(data)/1024)
-                file.write(data)
+def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
+    global pbar
+    if pbar is None:
+        pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
+
+    downloaded = block_num * block_size
+    if downloaded < total_size:
+        pbar.update(block_size / 1024)
+    else:
         pbar.close()
-        if total_size != 0 and pbar.n != total_size/1024:
-            raise AssertionError("Some content was present but not downloaded/written")
+        pbar = None
+
+
+def retrieve(url: str, filename: str) -> None:
+    if url.lower().startswith('http'):
+        req = Request(url)
+    elif not os.path.isfile(url):
+        raise ValueError, None
+    urlretrieve(url, filename, reporthook=_show_progress) #nosec
 
 
 def read_libsvm_msrank(file_obj, n_samples, n_features, dtype):

From f07e97e93b5db75b5b484b5795e5c2faea53d616 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:25:58 +0100
Subject: [PATCH 18/41] Reapply "rewrite urlretrieve w/o urllib"

9b041394a96c2fe27a8bb63e8a795a8749e3ecc6

From 58e93a0c7a4d0ed49a378eb8ab051372a424ec46 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:30:50 +0100
Subject: [PATCH 19/41] fix bug

---
 bench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index cc18decb4..7922f1e5a 100644
--- a/bench.py
+++ b/bench.py
@@ -456,7 +456,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
                     np.random.rand(*params.shape),
                     new_dtype,
                     params.data_order, params.data_format)
-        else:
+        if full_data[element] is not None:
             # convert existing labels from 1- to 2-dimensional
             # if it's forced and possible
             condition = 'y' in element

From d258536ca328d2594b7700a9933d1cd48ac1e0d8 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:22:52 +0100
Subject: [PATCH 20/41] Revert "rewrite urlretrieve w/o urllib"

9b041394a96c2fe27a8bb63e8a795a8749e3ecc6

From f10e26140d9a9f077a4c1ee996438cd6e3759a79 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:30:50 +0100
Subject: [PATCH 21/41] fix bug

---
 bench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index cc18decb4..7922f1e5a 100644
--- a/bench.py
+++ b/bench.py
@@ -456,7 +456,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
                     np.random.rand(*params.shape),
                     new_dtype,
                     params.data_order, params.data_format)
-        else:
+        if full_data[element] is not None:
             # convert existing labels from 1- to 2-dimensional
             # if it's forced and possible
             condition = 'y' in element

From 95a34c1e323eb50e70301a6cfa07bf4eef04a232 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:25:58 +0100
Subject: [PATCH 22/41] Reapply "rewrite urlretrieve w/o urllib"

9b041394a96c2fe27a8bb63e8a795a8749e3ecc6

From 52c39cf664945af25734a16cee62357ae2ebbf10 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 15:45:49 +0100
Subject: [PATCH 23/41] rewrite urlretrieve w/o urllib

---
 datasets/loader_utils.py | 44 +++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index 4a378e6ad..77f93a839 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -15,32 +15,34 @@
 # ===============================================================================
 
 import re
-from urllib.request import urlretrieve, Request
+import requests
 import os
+from urllib.request import urlretrieve
+from shutil import copyfile
 import numpy as np
-import tqdm
-
-pbar: tqdm.tqdm = None
-
-def _show_progress(block_num: int, block_size: int, total_size: int) -> None:
-    global pbar
-    if pbar is None:
-        pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
-
-    downloaded = block_num * block_size
-    if downloaded < total_size:
-        pbar.update(block_size / 1024)
-    else:
-        pbar.close()
-        pbar = None
+from tqdm import tqdm
 
 
 def retrieve(url: str, filename: str) -> None:
-    if url.lower().startswith('http'):
-        req = Request(url)
-    elif not os.path.isfile(url):
-        raise ValueError, None
-    urlretrieve(url, filename, reporthook=_show_progress) #nosec
+    # rewritting urlretrieve without using urllib library,
+    # otherwise it would fail codefactor test due to security issues.
+    if os.path.isfile(url):
+        # reporthook is ignored for local urls
+        copyfile(url, filename)
+    elif url.startswith('http'):
+        response = requests.get(url,stream=True)
+        if response.status_code != 200:
+            raise AssertionError(f"Failed to download from {url},\nResponse returned status code {response.status_code}")
+        total_size = int(response.headers.get('content-length', 0))
+        block_size = 8192
+        pbar = tqdm(total=total_size/1024, unit='kB')
+        with open(filename, 'wb+') as file:
+            for data in response.iter_content(block_size):
+                pbar.update(len(data)/1024)
+                file.write(data)
+        pbar.close()
+        if total_size != 0 and pbar.n != total_size/1024:
+            raise AssertionError("Some content was present but not downloaded/written")
 
 
 def read_libsvm_msrank(file_obj, n_samples, n_features, dtype):

From 5f8f8af505df10c464c0cb2bf93faffa9b9b2d53 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:53:24 +0100
Subject: [PATCH 24/41] undo refactoring

---
 bench.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/bench.py b/bench.py
index 7922f1e5a..c080b05eb 100644
--- a/bench.py
+++ b/bench.py
@@ -456,21 +456,25 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
                     np.random.rand(*params.shape),
                     new_dtype,
                     params.data_order, params.data_format)
-        if full_data[element] is not None:
-            # convert existing labels from 1- to 2-dimensional
-            # if it's forced and possible
-            condition = 'y' in element
-            condition = condition and label_2d
-            condition = condition and hasattr(full_data[element], 'reshape')
-            if condition:
-                full_data[element] = full_data[element].reshape(
-                    (full_data[element].shape[0], 1))
-            # add dtype property to data if it's needed and doesn't exist
-            if add_dtype and not hasattr(full_data[element], 'dtype'):
-                if hasattr(full_data[element], 'values'):
-                    full_data[element].dtype = full_data[element].values.dtype
-                elif hasattr(full_data[element], 'dtypes'):
-                    full_data[element].dtype = full_data[element].dtypes[0].type
+       # generate and convert data if it's marked and path isn't specified
+        if full_data[element] is None and element in generated_data:
+            full_data[element] = convert_data(
+                np.random.rand(*params.shape),
+                int_dtype if 'y' in element and int_label else params.dtype,
+                params.data_order, params.data_format)
+        # convert existing labels from 1- to 2-dimensional
+        # if it's forced and possible
+        if full_data[element] is not None and 'y' in element \
+                and label_2d and hasattr(full_data[element], 'reshape'):
+            full_data[element] = full_data[element].reshape(
+                (full_data[element].shape[0], 1))
+        # add dtype property to data if it's needed and doesn't exist
+        if full_data[element] is not None and add_dtype and \
+                not hasattr(full_data[element], 'dtype'):
+            if hasattr(full_data[element], 'values'):
+                full_data[element].dtype = full_data[element].values.dtype
+            elif hasattr(full_data[element], 'dtypes'):
+                full_data[element].dtype = full_data[element].dtypes[0].type
 
     params.dtype = get_dtype(full_data['X_train'])
     # add size to parameters which is need for some cases

From 4f33ef727f03a0d722da065548601edaceeceb0c Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 17:57:17 +0100
Subject: [PATCH 25/41] add requests to requirements

---
 sklearn_bench/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn_bench/requirements.txt b/sklearn_bench/requirements.txt
index 28c7de80d..fa269e6cb 100755
--- a/sklearn_bench/requirements.txt
+++ b/sklearn_bench/requirements.txt
@@ -3,3 +3,4 @@ pandas
 scikit-learn-intelex
 openpyxl
 tqdm
+requests
\ No newline at end of file

From 4f3db1c8d06a92b0fff7b5d0b966e9f61b7caade Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sat, 2 Oct 2021 18:02:16 +0100
Subject: [PATCH 26/41] add requests as requirement

---
 azure-pipelines.yml            | 152 ++++++++++++++++-----------------
 daal4py_bench/requirements.txt |   1 +
 xgboost_bench/requirements.txt |   1 +
 3 files changed, 78 insertions(+), 76 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4e9cc4b5f..300a50f33 100755
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -1,80 +1,80 @@
 variables:
   - name: python.version
-    value: '3.8'
+    value: "3.8"
 
 jobs:
-- job: Linux_Sklearn
-  pool:
-    vmImage: 'ubuntu-20.04'
-  steps:
-  - task: UsePythonVersion@0
-    displayName: 'Use Python $(python.version)'
-    inputs:
-      versionSpec: '$(python.version)'
-  - script: |
-      pip install -r sklearn_bench/requirements.txt
-      python runner.py --configs configs/testing/sklearn.json
-    displayName: Run bench
-- job: Linux_XGBoost
-  pool:
-    vmImage: 'ubuntu-20.04'
-  steps:
-  - task: UsePythonVersion@0
-    displayName: 'Use Python $(python.version)'
-    inputs:
-      versionSpec: '$(python.version)'
-  - script: |
-      pip install -r xgboost_bench/requirements.txt
-      python runner.py --configs configs/testing/xgboost.json --no-intel-optimized
-    displayName: Run bench
-- job: Linux_daal4py
-  pool:
-    vmImage: 'ubuntu-20.04'
-  steps:
-  - task: UsePythonVersion@0
-    displayName: 'Use Python $(python.version)'
-    inputs:
-      versionSpec: '$(python.version)'
-  - script: |
-      pip install -r daal4py_bench/requirements.txt
-      python runner.py --configs configs/testing/daal4py.json --no-intel-optimized
-    displayName: Run bench
-- job: Linux_XGBoost_and_daal4py
-  pool:
-    vmImage: 'ubuntu-20.04'
-  steps:
-  - script: |
-      conda update -y -q conda
-      conda create -n bench -q -y -c conda-forge python=3.7 pandas xgboost scikit-learn daal4py tqdm
-    displayName: Create Anaconda environment
-  - script: |
-      . /usr/share/miniconda/etc/profile.d/conda.sh
-      conda activate bench
-      python runner.py --configs configs/testing/daal4py_xgboost.json --no-intel-optimized
-    displayName: Run bench
-- job: Pep8
-  pool:
-    vmImage: 'ubuntu-20.04'
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-      addToPath: true
-  - script: |
-      python -m pip install --upgrade pip setuptools
-      pip install flake8
-      flake8 --max-line-length=100 --count
-    displayName: 'PEP 8 check'
-- job: Mypy
-  pool:
-    vmImage: 'ubuntu-20.04'
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-      addToPath: true
-  - script: |
-      python -m pip install --upgrade pip setuptools
-      pip install mypy data-science-types
-      mypy . --ignore-missing-imports
-    displayName: 'mypy check'
+  - job: Linux_Sklearn
+    pool:
+      vmImage: "ubuntu-20.04"
+    steps:
+      - task: UsePythonVersion@0
+        displayName: "Use Python $(python.version)"
+        inputs:
+          versionSpec: "$(python.version)"
+      - script: |
+          pip install -r sklearn_bench/requirements.txt
+          python runner.py --configs configs/testing/sklearn.json
+        displayName: Run bench
+  - job: Linux_XGBoost
+    pool:
+      vmImage: "ubuntu-20.04"
+    steps:
+      - task: UsePythonVersion@0
+        displayName: "Use Python $(python.version)"
+        inputs:
+          versionSpec: "$(python.version)"
+      - script: |
+          pip install -r xgboost_bench/requirements.txt
+          python runner.py --configs configs/testing/xgboost.json --no-intel-optimized
+        displayName: Run bench
+  - job: Linux_daal4py
+    pool:
+      vmImage: "ubuntu-20.04"
+    steps:
+      - task: UsePythonVersion@0
+        displayName: "Use Python $(python.version)"
+        inputs:
+          versionSpec: "$(python.version)"
+      - script: |
+          pip install -r daal4py_bench/requirements.txt
+          python runner.py --configs configs/testing/daal4py.json --no-intel-optimized
+        displayName: Run bench
+  - job: Linux_XGBoost_and_daal4py
+    pool:
+      vmImage: "ubuntu-20.04"
+    steps:
+      - script: |
+          conda update -y -q conda
+          conda create -n bench -q -y -c conda-forge python=3.7 pandas xgboost scikit-learn daal4py tqdm requests
+        displayName: Create Anaconda environment
+      - script: |
+          . /usr/share/miniconda/etc/profile.d/conda.sh
+          conda activate bench
+          python runner.py --configs configs/testing/daal4py_xgboost.json --no-intel-optimized
+        displayName: Run bench
+  - job: Pep8
+    pool:
+      vmImage: "ubuntu-20.04"
+    steps:
+      - task: UsePythonVersion@0
+        inputs:
+          versionSpec: "$(python.version)"
+          addToPath: true
+      - script: |
+          python -m pip install --upgrade pip setuptools
+          pip install flake8 requests
+          flake8 --max-line-length=100 --count
+        displayName: "PEP 8 check"
+  - job: Mypy
+    pool:
+      vmImage: "ubuntu-20.04"
+    steps:
+      - task: UsePythonVersion@0
+        inputs:
+          versionSpec: "$(python.version)"
+          addToPath: true
+      - script: |
+          python -m pip install --upgrade pip setuptools
+          pip install mypy data-science-types requests
+          mypy . --ignore-missing-imports
+        displayName: "mypy check"
diff --git a/daal4py_bench/requirements.txt b/daal4py_bench/requirements.txt
index 1051f78ca..400c1ab7c 100644
--- a/daal4py_bench/requirements.txt
+++ b/daal4py_bench/requirements.txt
@@ -3,3 +3,4 @@ pandas < 1.3.0
 daal4py
 openpyxl
 tqdm
+requests
\ No newline at end of file
diff --git a/xgboost_bench/requirements.txt b/xgboost_bench/requirements.txt
index 79bc07cc5..3be916066 100755
--- a/xgboost_bench/requirements.txt
+++ b/xgboost_bench/requirements.txt
@@ -3,3 +3,4 @@ pandas
 xgboost
 openpyxl
 tqdm
+requests
\ No newline at end of file

From e6846439c8c4e23c270c6ac75705ac29a372006e Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 08:52:36 +0100
Subject: [PATCH 27/41] fix line too long

---
 datasets/loader_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index 77f93a839..c7c922358 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -32,7 +32,8 @@ def retrieve(url: str, filename: str) -> None:
     elif url.startswith('http'):
         response = requests.get(url,stream=True)
         if response.status_code != 200:
-            raise AssertionError(f"Failed to download from {url},\nResponse returned status code {response.status_code}")
+            raise AssertionError(f"Failed to download from {url},\n"+\
+                "Response returned status code {response.status_code}")
         total_size = int(response.headers.get('content-length', 0))
         block_size = 8192
         pbar = tqdm(total=total_size/1024, unit='kB')

From bad15adc46920c49fc051d5c2d6c7d0bee5bd544 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 09:03:21 +0100
Subject: [PATCH 28/41] attempt to fix mypy error

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 300a50f33..037052a9a 100755
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -75,6 +75,6 @@ jobs:
           addToPath: true
       - script: |
           python -m pip install --upgrade pip setuptools
-          pip install mypy data-science-types requests
+          pip install mypy data-science-types requests types-requests
           mypy . --ignore-missing-imports
         displayName: "mypy check"

From c3d70e0e9e15cc8d62322cade6de363b4a9cc706 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 09:09:26 +0100
Subject: [PATCH 29/41] add mising params

---
 bench.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bench.py b/bench.py
index c080b05eb..a43ed1fa6 100644
--- a/bench.py
+++ b/bench.py
@@ -527,7 +527,8 @@ def print_output(library, algorithm, stages, params, functions,
         return
     output = []
     for i, stage in enumerate(stages):
-        result = gen_basic_dict(library, algorithm, stage, params, data[i])
+        result = gen_basic_dict(library, algorithm, stage, params,
+                                data[i], alg_instance, alg_params)
         result.update({'time[s]': times[i]})
         if isinstance(metric_type, str):
             result.update({f'{metric_type}': metrics[i]})
@@ -548,7 +549,7 @@ def print_output(library, algorithm, stages, params, functions,
             if 'init' in result['algorithm_parameters'].keys():
                 if not isinstance(result['algorithm_parameters']['init'], str):
                     result['algorithm_parameters']['init'] = 'random'
-        result['algorithm_parameters'].pop('handle',None)
+        result['algorithm_parameters'].pop('handle', None)
         output.append(result)
     print(json.dumps(output, indent=4))
 

From f081a0c7a3364b15f412c649106e9b96aa4676a5 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 09:15:26 +0100
Subject: [PATCH 30/41] autopep8 fix

---
 datasets/loader_utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index c7c922358..de172b7e8 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -30,10 +30,10 @@ def retrieve(url: str, filename: str) -> None:
         # reporthook is ignored for local urls
         copyfile(url, filename)
     elif url.startswith('http'):
-        response = requests.get(url,stream=True)
+        response = requests.get(url, stream=True)
         if response.status_code != 200:
-            raise AssertionError(f"Failed to download from {url},\n"+\
-                "Response returned status code {response.status_code}")
+            raise AssertionError(f"Failed to download from {url},\n" +
+                                 "Response returned status code {response.status_code}")
         total_size = int(response.headers.get('content-length', 0))
         block_size = 8192
         pbar = tqdm(total=total_size/1024, unit='kB')
@@ -43,7 +43,8 @@ def retrieve(url: str, filename: str) -> None:
                 file.write(data)
         pbar.close()
         if total_size != 0 and pbar.n != total_size/1024:
-            raise AssertionError("Some content was present but not downloaded/written")
+            raise AssertionError(
+                "Some content was present but not downloaded/written")
 
 
 def read_libsvm_msrank(file_obj, n_samples, n_features, dtype):

From f0f7dac73df264816711b95870cc11ab8ddbe1f8 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 09:15:36 +0100
Subject: [PATCH 31/41] fix wrong indentation lvl

---
 bench.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench.py b/bench.py
index a43ed1fa6..3b4700a32 100644
--- a/bench.py
+++ b/bench.py
@@ -549,7 +549,7 @@ def print_output(library, algorithm, stages, params, functions,
             if 'init' in result['algorithm_parameters'].keys():
                 if not isinstance(result['algorithm_parameters']['init'], str):
                     result['algorithm_parameters']['init'] = 'random'
-        result['algorithm_parameters'].pop('handle', None)
+            result['algorithm_parameters'].pop('handle', None)
         output.append(result)
     print(json.dumps(output, indent=4))
 

From 51ed719e96fe2f1b2a7a9b63fe7faab989cea6d1 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 09:22:37 +0100
Subject: [PATCH 32/41] pep8 fixes?

---
 bench.py                 | 2 +-
 datasets/loader_utils.py | 1 -
 utils.py                 | 5 +++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bench.py b/bench.py
index 3b4700a32..18501a807 100644
--- a/bench.py
+++ b/bench.py
@@ -456,7 +456,7 @@ def load_data(params, generated_data=[], add_dtype=False, label_2d=False,
                     np.random.rand(*params.shape),
                     new_dtype,
                     params.data_order, params.data_format)
-       # generate and convert data if it's marked and path isn't specified
+        # generate and convert data if it's marked and path isn't specified
         if full_data[element] is None and element in generated_data:
             full_data[element] = convert_data(
                 np.random.rand(*params.shape),
diff --git a/datasets/loader_utils.py b/datasets/loader_utils.py
index de172b7e8..4385e3dda 100755
--- a/datasets/loader_utils.py
+++ b/datasets/loader_utils.py
@@ -17,7 +17,6 @@
 import re
 import requests
 import os
-from urllib.request import urlretrieve
 from shutil import copyfile
 import numpy as np
 from tqdm import tqdm
diff --git a/utils.py b/utils.py
index 0696eb6a7..6e025b804 100755
--- a/utils.py
+++ b/utils.py
@@ -180,6 +180,7 @@ def generate_cases(params: Dict[str, Union[List[Any], Any]]) -> List[str]:
                         dashes + param + ' ' + str(val)
         else:
             dashes = '-' if len(param) == 1 else '--'
-            for command_num,_ in enumerate(commands):
-                commands[command_num] += ' ' + dashes + param + ' ' + str(values)
+            for command_num, _ in enumerate(commands):
+                commands[command_num] += ' ' + \
+                    dashes + param + ' ' + str(values)
     return commands

From 38a5355b04aac5b6b84c9578415500f95cc6b303 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 09:35:17 +0100
Subject: [PATCH 33/41] undo if return None change

---
 bench.py | 57 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/bench.py b/bench.py
index 18501a807..c68e909a5 100644
--- a/bench.py
+++ b/bench.py
@@ -523,35 +523,34 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
 def print_output(library, algorithm, stages, params, functions,
                  times, metric_type, metrics, data, alg_instance=None,
                  alg_params=None):
-    if params.output_format != 'json':
-        return
-    output = []
-    for i, stage in enumerate(stages):
-        result = gen_basic_dict(library, algorithm, stage, params,
-                                data[i], alg_instance, alg_params)
-        result.update({'time[s]': times[i]})
-        if isinstance(metric_type, str):
-            result.update({f'{metric_type}': metrics[i]})
-        elif isinstance(metric_type, list):
-            for ind, val in enumerate(metric_type):
-                if metrics[ind][i] is not None:
-                    result.update({f'{val}': metrics[ind][i]})
-        if hasattr(params, 'n_classes'):
-            result['input_data'].update({'classes': params.n_classes})
-        if hasattr(params, 'n_clusters'):
-            if algorithm == 'kmeans':
-                result['input_data'].update(
-                    {'n_clusters': params.n_clusters})
-            elif algorithm == 'dbscan':
-                result.update({'n_clusters': params.n_clusters})
-        # replace non-string init with string for kmeans benchmarks
-        if alg_instance is not None:
-            if 'init' in result['algorithm_parameters'].keys():
-                if not isinstance(result['algorithm_parameters']['init'], str):
-                    result['algorithm_parameters']['init'] = 'random'
-            result['algorithm_parameters'].pop('handle', None)
-        output.append(result)
-    print(json.dumps(output, indent=4))
+    if params.output_format == 'json':
+        output = []
+        for i, stage in enumerate(stages):
+            result = gen_basic_dict(library, algorithm, stage, params,
+                                    data[i], alg_instance, alg_params)
+            result.update({'time[s]': times[i]})
+            if isinstance(metric_type, str):
+                result.update({f'{metric_type}': metrics[i]})
+            elif isinstance(metric_type, list):
+                for ind, val in enumerate(metric_type):
+                    if metrics[ind][i] is not None:
+                        result.update({f'{val}': metrics[ind][i]})
+            if hasattr(params, 'n_classes'):
+                result['input_data'].update({'classes': params.n_classes})
+            if hasattr(params, 'n_clusters'):
+                if algorithm == 'kmeans':
+                    result['input_data'].update(
+                        {'n_clusters': params.n_clusters})
+                elif algorithm == 'dbscan':
+                    result.update({'n_clusters': params.n_clusters})
+            # replace non-string init with string for kmeans benchmarks
+            if alg_instance is not None:
+                if 'init' in result['algorithm_parameters'].keys():
+                    if not isinstance(result['algorithm_parameters']['init'], str):
+                        result['algorithm_parameters']['init'] = 'random'
+                result['algorithm_parameters'].pop('handle', None)
+            output.append(result)
+        print(json.dumps(output, indent=4))
 
 
 def run_with_context(params, function):

From 3e875d1d8857f6a718ab40e9b6cc20d92d920044 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:14:03 +0100
Subject: [PATCH 34/41] not use getattr for daal4py

---
 daal4py_bench/distances.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/daal4py_bench/distances.py b/daal4py_bench/distances.py
index 70408856d..82d2c5ec8 100644
--- a/daal4py_bench/distances.py
+++ b/daal4py_bench/distances.py
@@ -17,7 +17,7 @@
 import argparse
 
 import bench
-import daal4py
+from daal4py import cosine_distance, correlation_distance
 from daal4py.sklearn._utils import getFPType
 
 
@@ -34,9 +34,10 @@ def compute_distances(pairwise_distances, X):
 params = bench.parse_args(parser)
 
 # Load data
-X, _, _, _ = bench.load_data(params, generated_data=['X_train'], add_dtype=True)
+X, _, _, _ = bench.load_data(params, generated_data=[
+                             'X_train'], add_dtype=True)
 
-pairwise_distances = getattr(daal4py, f'{params.metric}_distance')
+pairwise_distances = cosine_distance if params.metric == 'cosine' else correlation_distance
 
 time, _ = bench.measure_function_time(
     compute_distances, pairwise_distances, X, params=params)

From 0d744d4bf33f6d9c2d403c8006e4c53d8f7f87b0 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:14:09 +0100
Subject: [PATCH 35/41] debugging for tsne

---
 sklearn_bench/tsne.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn_bench/tsne.py b/sklearn_bench/tsne.py
index 0083972a3..eb3fa8b91 100644
--- a/sklearn_bench/tsne.py
+++ b/sklearn_bench/tsne.py
@@ -29,9 +29,13 @@ def main():
                 learning_rate=params.learning_rate, angle=params.angle,
                 min_grad_norm=params.min_grad_norm, random_state=params.random_state)
 
+    print("Created TSNE model")
+
     fit_time, _ = bench.measure_function_time(tsne.fit, X, params=params)
     divergence = tsne.kl_divergence_
 
+    print("Ready to print output")
+
     bench.print_output(
         library='sklearn',
         algorithm='TSNE',

From c2139cbad50b62a1f7fc78698f57a04270050de9 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:24:31 +0100
Subject: [PATCH 36/41] undo logging for tsne

---
 sklearn_bench/tsne.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn_bench/tsne.py b/sklearn_bench/tsne.py
index eb3fa8b91..0083972a3 100644
--- a/sklearn_bench/tsne.py
+++ b/sklearn_bench/tsne.py
@@ -29,13 +29,9 @@ def main():
                 learning_rate=params.learning_rate, angle=params.angle,
                 min_grad_norm=params.min_grad_norm, random_state=params.random_state)
 
-    print("Created TSNE model")
-
     fit_time, _ = bench.measure_function_time(tsne.fit, X, params=params)
     divergence = tsne.kl_divergence_
 
-    print("Ready to print output")
-
     bench.print_output(
         library='sklearn',
         algorithm='TSNE',

From b8a162cfc63a06bdd254952159cd60f6a82af548 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:25:41 +0100
Subject: [PATCH 37/41] ignore daal4py warning

---
 runner.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/runner.py b/runner.py
index 99d992704..685d5fd62 100755
--- a/runner.py
+++ b/runner.py
@@ -120,7 +120,8 @@ def get_configs(path: Path) -> List[str]:
                     if 'testing' in dataset:
                         paths += ' --file-X-test ' + dataset["testing"]["x"]
                         if 'y' in dataset['testing']:
-                            paths += ' --file-y-test ' + dataset["testing"]["y"]
+                            paths += ' --file-y-test ' + \
+                                dataset["testing"]["y"]
                 elif dataset['source'] == 'synthetic':
                     class GenerationArgs:
                         classes: int
@@ -214,14 +215,18 @@ class GenerationArgs:
                                     + f'{extra_stdout}\n'
                             try:
                                 if isinstance(json_result['results'], list):
-                                    json_result['results'].extend(json.loads(stdout))
+                                    json_result['results'].extend(
+                                        json.loads(stdout))
                             except json.JSONDecodeError as decoding_exception:
                                 stderr += f'CASE {case} JSON DECODING ERROR:\n' \
                                     + f'{decoding_exception}\n{stdout}\n'
 
                             if stderr != '':
-                                is_successful = False
-                                logging.warning('Error in benchmark: \n' + stderr)
+                                if stderr != 'root:Device support is limited in daal4py patching. '
+                                'Use Intel(R) Extension for Scikit-learn * for full experience.':
+                                    is_successful = False
+                                    logging.warning(
+                                        'Error in benchmark: \n' + stderr)
 
     json.dump(json_result, args.output_file, indent=4)
     name_result_file = args.output_file.name

From f265af323e85953cacbb4f102b2c29dbf878d326 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:27:20 +0100
Subject: [PATCH 38/41] fix typo

---
 runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runner.py b/runner.py
index 685d5fd62..76316bd38 100755
--- a/runner.py
+++ b/runner.py
@@ -222,8 +222,8 @@ class GenerationArgs:
                                     + f'{decoding_exception}\n{stdout}\n'
 
                             if stderr != '':
-                                if stderr != 'root:Device support is limited in daal4py patching. '
-                                'Use Intel(R) Extension for Scikit-learn * for full experience.':
+                                if stderr != 'root:Device support is limited in daal4py patching. ' \
+                                        + 'Use Intel(R) Extension for Scikit-learn * for full experience.':
                                     is_successful = False
                                     logging.warning(
                                         'Error in benchmark: \n' + stderr)

From 182b256440c705a34aedd51a1aac9cdf562b3a87 Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:45:51 +0100
Subject: [PATCH 39/41] suppress FutureWarning

---
 sklearn_bench/tsne.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn_bench/tsne.py b/sklearn_bench/tsne.py
index 0083972a3..2d9f2d0aa 100644
--- a/sklearn_bench/tsne.py
+++ b/sklearn_bench/tsne.py
@@ -14,8 +14,10 @@
 # limitations under the License.
 # ===============================================================================
 
-import argparse
 import bench
+import argparse
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
 
 
 def main():

From 6fb61f2e6f23ac86a6060a46c6f8ae0ec875211a Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:46:05 +0100
Subject: [PATCH 40/41] ignore daal4py warning

---
 runner.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/runner.py b/runner.py
index 76316bd38..ece6bf07c 100755
--- a/runner.py
+++ b/runner.py
@@ -222,8 +222,7 @@ class GenerationArgs:
                                     + f'{decoding_exception}\n{stdout}\n'
 
                             if stderr != '':
-                                if stderr != 'root:Device support is limited in daal4py patching. ' \
-                                        + 'Use Intel(R) Extension for Scikit-learn * for full experience.':
+                                if not 'daal4py' in stderr:
                                     is_successful = False
                                     logging.warning(
                                         'Error in benchmark: \n' + stderr)

From f442be85491c69e8240e9d847321bd7cf1f0f4bc Mon Sep 17 00:00:00 2001
From: LyndonFan <lyndon0808@gmail.com>
Date: Sun, 3 Oct 2021 10:49:54 +0100
Subject: [PATCH 41/41] pep8 fix

---
 runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner.py b/runner.py
index ece6bf07c..a8ab77e5d 100755
--- a/runner.py
+++ b/runner.py
@@ -222,7 +222,7 @@ class GenerationArgs:
                                     + f'{decoding_exception}\n{stdout}\n'
 
                             if stderr != '':
-                                if not 'daal4py' in stderr:
+                                if 'daal4py' not in stderr:
                                     is_successful = False
                                     logging.warning(
                                         'Error in benchmark: \n' + stderr)