From 78f3576aaa94c5b643c3c6bcdb5922789923c758 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Sun, 22 Oct 2023 23:36:57 -0700
Subject: [PATCH 1/8] Fix breaking kwarg

---
 modelbuilders_bench/lgbm_mb.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modelbuilders_bench/lgbm_mb.py b/modelbuilders_bench/lgbm_mb.py
index f263d419c..7ddc6a68c 100644
--- a/modelbuilders_bench/lgbm_mb.py
+++ b/modelbuilders_bench/lgbm_mb.py
@@ -118,8 +118,7 @@
 t_train, model_lgbm = bench.measure_function_time(lgbm.train, lgbm_params, lgbm_train,
                                                   params=params,
                                                   num_boost_round=params.n_estimators,
-                                                  valid_sets=lgbm_train,
-                                                  verbose_eval=False)
+                                                  valid_sets=lgbm_train)
 train_metric = None
 if not X_train.equals(X_test):
     y_train_pred = model_lgbm.predict(X_train)

From b3b3a538ad6e49b7221aca15ff49456de59f16df Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Sun, 22 Oct 2023 23:37:13 -0700
Subject: [PATCH 2/8] Add SHAP calculation measurements

---
 modelbuilders_bench/xgb_mb.py | 391 +++++++++++++++++++++++-----------
 1 file changed, 269 insertions(+), 122 deletions(-)

diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index 75da615b8..b3199901e 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -1,5 +1,5 @@
-# ===============================================================================
-# Copyright 2020-2021 Intel Corporation
+# ==============================================================================
+# Copyright 2020-2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ===============================================================================
+# ==============================================================================
 
 import argparse
 
@@ -27,120 +27,189 @@ def convert_probs_to_classes(y_prob):
 
 
 def convert_xgb_predictions(y_pred, objective):
-    if objective == 'multi:softprob':
+    if objective == "multi:softprob":
         y_pred = convert_probs_to_classes(y_pred)
-    elif objective == 'binary:logistic':
+    elif objective == "binary:logistic":
         y_pred = (y_pred >= 0.5).astype(np.int32)
     return y_pred
 
 
+def shap_accuracy(new, ref, threshold=1e-5):
+    new_sh = new.reshape(-1, )
+    ref_sh = ref.reshape(-1, )
+    diff = np.abs(new_sh - ref_sh)
+    return (diff < threshold).sum() / float(len(ref_sh))
+
+
 parser = argparse.ArgumentParser(
-    description='xgboost gbt + model transform + daal predict benchmark')
-
-parser.add_argument('--colsample-bytree', type=float, default=1,
-                    help='Subsample ratio of columns '
-                         'when constructing each tree')
-parser.add_argument('--count-dmatrix', default=False, action='store_true',
-                    help='Count DMatrix creation in time measurements')
-parser.add_argument('--enable-experimental-json-serialization', default=True,
-                    choices=('True', 'False'), help='Use JSON to store memory snapshots')
-parser.add_argument('--grow-policy', type=str, default='depthwise',
-                    help='Controls a way new nodes are added to the tree')
-parser.add_argument('--inplace-predict', default=False, action='store_true',
-                    help='Perform inplace_predict instead of default')
-parser.add_argument('--learning-rate', '--eta', type=float, default=0.3,
-                    help='Step size shrinkage used in update '
-                         'to prevents overfitting')
-parser.add_argument('--max-bin', type=int, default=256,
-                    help='Maximum number of discrete bins to '
-                         'bucket continuous features')
-parser.add_argument('--max-delta-step', type=float, default=0,
-                    help='Maximum delta step we allow each leaf output to be')
-parser.add_argument('--max-depth', type=int, default=6,
-                    help='Maximum depth of a tree')
-parser.add_argument('--max-leaves', type=int, default=0,
-                    help='Maximum number of nodes to be added')
-parser.add_argument('--min-child-weight', type=float, default=1,
-                    help='Minimum sum of instance weight needed in a child')
-parser.add_argument('--min-split-loss', '--gamma', type=float, default=0,
-                    help='Minimum loss reduction required to make'
-                         ' partition on a leaf node')
-parser.add_argument('--n-estimators', type=int, default=100,
-                    help='Number of gradient boosted trees')
-parser.add_argument('--objective', type=str, required=True,
-                    choices=('reg:squarederror', 'binary:logistic',
-                             'multi:softmax', 'multi:softprob'),
-                    help='Control a balance of positive and negative weights')
-parser.add_argument('--reg-alpha', type=float, default=0,
-                    help='L1 regularization term on weights')
-parser.add_argument('--reg-lambda', type=float, default=1,
-                    help='L2 regularization term on weights')
-parser.add_argument('--scale-pos-weight', type=float, default=1,
-                    help='Controls a balance of positive and negative weights')
-parser.add_argument('--single-precision-histogram', default=False, action='store_true',
-                    help='Build histograms instead of double precision')
-parser.add_argument('--subsample', type=float, default=1,
-                    help='Subsample ratio of the training instances')
-parser.add_argument('--tree-method', type=str, required=True,
-                    help='The tree construction algorithm used in XGBoost')
+    description="xgboost gbt + model transform + daal predict benchmark"
+)
+
+parser.add_argument(
+    "--colsample-bytree",
+    type=float,
+    default=1,
+    help="Subsample ratio of columns " "when constructing each tree",
+)
+parser.add_argument(
+    "--count-dmatrix",
+    default=False,
+    action="store_true",
+    help="Count DMatrix creation in time measurements",
+)
+parser.add_argument(
+    "--enable-experimental-json-serialization",
+    default=True,
+    choices=("True", "False"),
+    help="Use JSON to store memory snapshots",
+)
+parser.add_argument(
+    "--grow-policy",
+    type=str,
+    default="depthwise",
+    help="Controls a way new nodes are added to the tree",
+)
+parser.add_argument(
+    "--inplace-predict",
+    default=False,
+    action="store_true",
+    help="Perform inplace_predict instead of default",
+)
+parser.add_argument(
+    "--learning-rate",
+    "--eta",
+    type=float,
+    default=0.3,
+    help="Step size shrinkage used in update " "to prevents overfitting",
+)
+parser.add_argument(
+    "--max-bin",
+    type=int,
+    default=256,
+    help="Maximum number of discrete bins to " "bucket continuous features",
+)
+parser.add_argument(
+    "--max-delta-step",
+    type=float,
+    default=0,
+    help="Maximum delta step we allow each leaf output to be",
+)
+parser.add_argument("--max-depth", type=int, default=6, help="Maximum depth of a tree")
+parser.add_argument(
+    "--max-leaves", type=int, default=0, help="Maximum number of nodes to be added"
+)
+parser.add_argument(
+    "--min-child-weight",
+    type=float,
+    default=1,
+    help="Minimum sum of instance weight needed in a child",
+)
+parser.add_argument(
+    "--min-split-loss",
+    "--gamma",
+    type=float,
+    default=0,
+    help="Minimum loss reduction required to make" " partition on a leaf node",
+)
+parser.add_argument(
+    "--n-estimators", type=int, default=100, help="Number of gradient boosted trees"
+)
+parser.add_argument(
+    "--objective",
+    type=str,
+    required=True,
+    choices=("reg:squarederror", "binary:logistic", "multi:softmax", "multi:softprob"),
+    help="Control a balance of positive and negative weights",
+)
+parser.add_argument(
+    "--reg-alpha", type=float, default=0, help="L1 regularization term on weights"
+)
+parser.add_argument(
+    "--reg-lambda", type=float, default=1, help="L2 regularization term on weights"
+)
+parser.add_argument(
+    "--scale-pos-weight",
+    type=float,
+    default=1,
+    help="Controls a balance of positive and negative weights",
+)
+parser.add_argument(
+    "--single-precision-histogram",
+    default=False,
+    action="store_true",
+    help="Build histograms instead of double precision",
+)
+parser.add_argument(
+    "--subsample",
+    type=float,
+    default=1,
+    help="Subsample ratio of the training instances",
+)
+parser.add_argument(
+    "--tree-method",
+    type=str,
+    required=True,
+    help="The tree construction algorithm used in XGBoost",
+)
 
 params = bench.parse_args(parser)
 
 X_train, X_test, y_train, y_test = bench.load_data(params)
 
 xgb_params = {
-    'booster': 'gbtree',
-    'verbosity': 0,
-    'learning_rate': params.learning_rate,
-    'min_split_loss': params.min_split_loss,
-    'max_depth': params.max_depth,
-    'min_child_weight': params.min_child_weight,
-    'max_delta_step': params.max_delta_step,
-    'subsample': params.subsample,
-    'sampling_method': 'uniform',
-    'colsample_bytree': params.colsample_bytree,
-    'colsample_bylevel': 1,
-    'colsample_bynode': 1,
-    'reg_lambda': params.reg_lambda,
-    'reg_alpha': params.reg_alpha,
-    'tree_method': params.tree_method,
-    'scale_pos_weight': params.scale_pos_weight,
-    'grow_policy': params.grow_policy,
-    'max_leaves': params.max_leaves,
-    'max_bin': params.max_bin,
-    'objective': params.objective,
-    'seed': params.seed,
-    'single_precision_histogram': params.single_precision_histogram,
-    'enable_experimental_json_serialization':
-        params.enable_experimental_json_serialization
+    "booster": "gbtree",
+    "verbosity": 0,
+    "learning_rate": params.learning_rate,
+    "min_split_loss": params.min_split_loss,
+    "max_depth": params.max_depth,
+    "min_child_weight": params.min_child_weight,
+    "max_delta_step": params.max_delta_step,
+    "subsample": params.subsample,
+    "sampling_method": "uniform",
+    "colsample_bytree": params.colsample_bytree,
+    "colsample_bylevel": 1,
+    "colsample_bynode": 1,
+    "reg_lambda": params.reg_lambda,
+    "reg_alpha": params.reg_alpha,
+    "tree_method": params.tree_method,
+    "scale_pos_weight": params.scale_pos_weight,
+    "grow_policy": params.grow_policy,
+    "max_leaves": params.max_leaves,
+    "max_bin": params.max_bin,
+    "objective": params.objective,
+    "seed": params.seed,
+    "single_precision_histogram": params.single_precision_histogram,
+    "enable_experimental_json_serialization": params.enable_experimental_json_serialization,
 }
 
 if params.threads != -1:
-    xgb_params.update({'nthread': params.threads})
+    xgb_params.update({"nthread": params.threads})
 
-if params.objective.startswith('reg'):
-    task = 'regression'
-    metric_name, metric_func = 'rmse', bench.rmse_score
+if params.objective.startswith("reg"):
+    task = "regression"
+    metric_name, metric_func = "rmse", bench.rmse_score
 else:
-    task = 'classification'
-    metric_name = 'accuracy'
+    task = "classification"
+    metric_name = "accuracy"
     metric_func = bench.accuracy_score
-    if 'cudf' in str(type(y_train)):
+    if "cudf" in str(type(y_train)):
         params.n_classes = y_train[y_train.columns[0]].nunique()
     else:
         params.n_classes = len(np.unique(y_train))
 
     # Covtype has one class more than there is in train
-    if params.dataset_name == 'covtype':
+    if params.dataset_name == "covtype":
         params.n_classes += 1
 
     if params.n_classes > 2:
-        xgb_params['num_class'] = params.n_classes
+        xgb_params["num_class"] = params.n_classes
 
-t_creat_train, dtrain = bench.measure_function_time(xgb.DMatrix, X_train,
-                                                    params=params, label=y_train)
+t_creat_train, dtrain = bench.measure_function_time(
+    xgb.DMatrix, X_train, params=params, label=y_train
+)
 t_creat_test, dtest = bench.measure_function_time(
-    xgb.DMatrix, X_test, params=params, label=y_test)
+    xgb.DMatrix, X_test, params=params, label=y_test
+)
 
 
 def fit(dmatrix):
@@ -150,52 +219,130 @@ def fit(dmatrix):
 
 
 if params.inplace_predict:
+
     def predict(*args):
-        return booster.inplace_predict(np.ascontiguousarray(X_test.values,
-                                                            dtype=np.float32))
+        return booster.inplace_predict(
+            np.ascontiguousarray(X_test.values, dtype=np.float32)
+        )
+
 else:
-    def predict(dmatrix):  # type: ignore
+
+    def predict(dmatrix, **kwargs):  # type: ignore
         if dmatrix is None:
             dmatrix = xgb.DMatrix(X_test, y_test)
-        return booster.predict(dmatrix)
+        return booster.predict(dmatrix, **kwargs)
 
 
 fit_time, booster = bench.measure_function_time(
-    fit, None if params.count_dmatrix else dtrain, params=params)
+    fit, None if params.count_dmatrix else dtrain, params=params
+)
 train_metric = metric_func(
-    convert_xgb_predictions(
-        booster.predict(dtrain),
-        params.objective),
-    y_train)
+    convert_xgb_predictions(booster.predict(dtrain), params.objective), y_train
+)
 
 predict_time, y_pred = bench.measure_function_time(
-    predict, None if params.inplace_predict or params.count_dmatrix else dtest, params=params)
+    predict,
+    None if params.inplace_predict or params.count_dmatrix else dtest,
+    params=params,
+)
 test_metric = metric_func(convert_xgb_predictions(y_pred, params.objective), y_test)
 
+shap_contrib_time, shap_contribs = bench.measure_function_time(
+    predict, dtest, pred_contribs=True, params=params
+)
+
+shap_interaction_time, shap_interactions = bench.measure_function_time(
+    predict, dtest, pred_interactions=True, params=params
+)
+
 transform_time, model_daal = bench.measure_function_time(
-    daal4py.get_gbt_model_from_xgboost, booster, params=params)
-
-if hasattr(params, 'n_classes'):
-    predict_algo = daal4py.gbt_classification_prediction(
-        nClasses=params.n_classes, resultsToEvaluate='computeClassLabels', fptype='float')
-    predict_time_daal, daal_pred = bench.measure_function_time(
-        predict_algo.compute, X_test, model_daal, params=params)
-    test_metric_daal = metric_func(y_test, daal_pred.prediction)
-else:
-    predict_algo = daal4py.gbt_regression_prediction()
-    predict_time_daal, daal_pred = bench.measure_function_time(
-        predict_algo.compute, X_test, model_daal, params=params)
-    test_metric_daal = metric_func(y_test, daal_pred.prediction)
+    daal4py.mb.convert_model, booster, params=params
+)
+
+predict_time_daal, daal_pred = bench.measure_function_time(
+    model_daal.predict, X_test, params=params
+)
+test_metric_daal = metric_func(y_test, daal_pred)
+
+shap_contrib_time_daal, daal_contribs = bench.measure_function_time(
+    model_daal.predict, X_test, pred_contribs=True, params=params
+)
+
+shap_interaction_time_daal, daal_interactions = bench.measure_function_time(
+    model_daal.predict, X_test, pred_interactions=True, params=params
+)
+
+contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs)
+
+interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions)
+
 
 bench.print_output(
-    library='modelbuilders', algorithm=f'xgboost_{task}_and_modelbuilder',
-    stages=['training_preparation', 'training', 'prediction_preparation', 'prediction',
-            'transformation', 'alternative_prediction'],
+    library="modelbuilders",
+    algorithm=f"xgboost_{task}_and_modelbuilder",
+    stages=[
+        "training_preparation",
+        "training",
+        "prediction_preparation",
+        "prediction",
+        "transformation",
+        "alternative_prediction",
+        "shap_contrib_prediction",
+        "alternative_shap_contrib_prediction",
+        "shap_interaction_prediction",
+        "alternative_shap_interaction_prediction",
+    ],
     params=params,
-    functions=['xgb.dmatrix.train', 'xgb.train', 'xgb.dmatrix.test', 'xgb.predict',
-               'daal4py.get_gbt_model_from_xgboost', 'daal4py.compute'],
-    times=[t_creat_train, fit_time, t_creat_test, predict_time, transform_time,
-           predict_time_daal],
-    metric_type=metric_name,
-    metrics=[None, train_metric, None, test_metric, None, test_metric_daal],
-    data=[X_train, X_train, X_test, X_test, X_test, X_test])
+    functions=[
+        "xgb.dmatrix.train",
+        "xgb.train",
+        "xgb.dmatrix.test",
+        "xgb.predict",
+        "daal4py.get_gbt_model_from_xgboost",
+        "daal4py.predict",
+        "xgb.predict(pred_contribs=True)",
+        "daal4py.predict(pred_contribs=True)",
+        "xgb.predict(pred_interactions=True)",
+        "daal4py.predict(pred_interactions=True)",
+    ],
+    times=[
+        t_creat_train,
+        fit_time,
+        t_creat_test,
+        predict_time,
+        transform_time,
+        predict_time_daal,
+        shap_contrib_time,
+        shap_contrib_time_daal,
+        shap_interaction_time,
+        shap_interaction_time_daal,
+    ],
+    metric_type=[metric_name, "accuracy"],
+    metrics=[
+        [
+            None,
+            train_metric,
+            None,
+            test_metric,
+            None,
+            test_metric_daal,
+            None,
+            None,
+            None,
+            None,
+        ],
+        [
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            contrib_accuracy,
+            None,
+            interaction_accuracy,
+        ],
+    ],
+    data=[X_train] * 2 + [X_test] * 8,
+)

From ca03b57cbd1ecafc451246f023c427a5e5797bba Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Sun, 22 Oct 2023 23:46:16 -0700
Subject: [PATCH 3/8] provide lgmb_mb converter script to fix result files

---
 report_generator/fix-lgbm-mb-results.py | 93 +++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 report_generator/fix-lgbm-mb-results.py

diff --git a/report_generator/fix-lgbm-mb-results.py b/report_generator/fix-lgbm-mb-results.py
new file mode 100644
index 000000000..35b1689cf
--- /dev/null
+++ b/report_generator/fix-lgbm-mb-results.py
@@ -0,0 +1,93 @@
+# ==============================================================================
+# Copyright 2020-2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""
+Temporary solution to fix the .json result files created from lgbm_mb.py.
+The result files are in an incompatible format for report_generator.py.
+Attempts to produce xlsx reports fail and create empty files.
+
+After running this script on my-file.json, a new file my-file-fixed.json will be
+produced, containing a JSON version of the results in a compatible format.
+
+Usage:
+
+  python fix-lgbm-mb-results.py my-file.json [another-file.json ...]
+
+
+Note: This is just a quick and dirty hack that does not fix the underlying
+      issue. Rather than changing this file (if something breaks again), the
+      original script lgbm_mb.py should be updated such that it produces valid
+      JSON dumps again.
+"""
+
+from argparse import ArgumentParser
+import json
+from pathlib import Path
+
+def fix_file(fname: Path):
+    with open(fname) as fp:
+        data = json.load(fp)
+
+    # copy all data (aux info etc)
+    fixed = {}
+    for key, val in data.items():
+        fixed[key] = val
+
+    # reset the results - we'll fix them
+    fixed["results"] = []
+
+    current_result = {}
+    for result in data["results"]:
+        if "algorithm" in result:
+            # found a new algo / measurement
+            current_result = result
+            continue
+
+        if "stage" in result:
+            comb = current_result | result
+            if "device" not in comb:
+                comb["device"] = "none"
+
+            if "time[s]" not in comb:
+                comb["time[s]"] = result.get("training_time") or result["prediction_time"]
+
+            if "algorithm_parameters" not in comb:
+                comb["algorithm_paramters"] = {}
+
+            if "accuracy[%]" in comb:
+                comb["accuracy"] = comb["accuracy[%]"]
+
+            replace_pairs = (
+                ("lgbm_train", "training"),
+                ("lgbm_predict", "prediction"),
+                ("daal4py_predict", "alternative_prediction"),
+            )
+            for s, r in replace_pairs:
+                comb["stage"] = comb["stage"].replace(s, r)
+
+            fixed["results"].append(comb)
+
+    out_fname = fname.stem + "-fixed.json"
+    with open(out_fname, "w") as fp:
+        json.dump(fixed, fp, indent=4)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("filenames", nargs="+")
+    args = parser.parse_args()
+    for fname in args.filenames:
+        fix_file(Path(fname))

From f9d6257b46a0b9a7657c2ff9d7cc3cfe52512540 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Mon, 23 Oct 2023 01:02:48 -0700
Subject: [PATCH 4/8] Use RMSE for SHAP accuracy

---
 modelbuilders_bench/xgb_mb.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index b3199901e..749f9ee16 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -34,11 +34,9 @@ def convert_xgb_predictions(y_pred, objective):
     return y_pred
 
 
-def shap_accuracy(new, ref, threshold=1e-5):
-    new_sh = new.reshape(-1, )
-    ref_sh = ref.reshape(-1, )
-    diff = np.abs(new_sh - ref_sh)
-    return (diff < threshold).sum() / float(len(ref_sh))
+def shap_accuracy(new, ref):
+    # broadcast all values into single column and calculate RMSE
+    return bench.rmse_score(new.reshape(-1, ), ref.reshape(-1, ))
 
 
 parser = argparse.ArgumentParser(
@@ -317,7 +315,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
         shap_interaction_time,
         shap_interaction_time_daal,
     ],
-    metric_type=[metric_name, "accuracy"],
+    metric_type=[metric_name, "RMSE"],
     metrics=[
         [
             None,

From 2e354927ad699602b6ad279d9a4773000e079ae9 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Fri, 27 Oct 2023 01:55:20 -0700
Subject: [PATCH 5/8] auto-format

---
 report_generator/fix-lgbm-mb-results.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/report_generator/fix-lgbm-mb-results.py b/report_generator/fix-lgbm-mb-results.py
index 35b1689cf..c5dbc9702 100644
--- a/report_generator/fix-lgbm-mb-results.py
+++ b/report_generator/fix-lgbm-mb-results.py
@@ -37,6 +37,7 @@
 import json
 from pathlib import Path
 
+
 def fix_file(fname: Path):
     with open(fname) as fp:
         data = json.load(fp)
@@ -62,7 +63,9 @@ def fix_file(fname: Path):
                 comb["device"] = "none"
 
             if "time[s]" not in comb:
-                comb["time[s]"] = result.get("training_time") or result["prediction_time"]
+                comb["time[s]"] = (
+                    result.get("training_time") or result["prediction_time"]
+                )
 
             if "algorithm_parameters" not in comb:
                 comb["algorithm_paramters"] = {}

From a16531741372187df32480d8c40323d88e4ee3ba Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Fri, 27 Oct 2023 03:19:49 -0700
Subject: [PATCH 6/8] Revert "Fix breaking kwarg"

This reverts commit 78f3576aaa94c5b643c3c6bcdb5922789923c758.
---
 modelbuilders_bench/lgbm_mb.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modelbuilders_bench/lgbm_mb.py b/modelbuilders_bench/lgbm_mb.py
index 7ddc6a68c..f263d419c 100644
--- a/modelbuilders_bench/lgbm_mb.py
+++ b/modelbuilders_bench/lgbm_mb.py
@@ -118,7 +118,8 @@
 t_train, model_lgbm = bench.measure_function_time(lgbm.train, lgbm_params, lgbm_train,
                                                   params=params,
                                                   num_boost_round=params.n_estimators,
-                                                  valid_sets=lgbm_train)
+                                                  valid_sets=lgbm_train,
+                                                  verbose_eval=False)
 train_metric = None
 if not X_train.equals(X_test):
     y_train_pred = model_lgbm.predict(X_train)

From 99b4e93580602da5aa3e6a13ac6cb9f55fce8008 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Fri, 27 Oct 2023 03:21:42 -0700
Subject: [PATCH 7/8] Revert "provide lgmb_mb converter script to fix result
 files"

This reverts commit ca03b57cbd1ecafc451246f023c427a5e5797bba.
---
 report_generator/fix-lgbm-mb-results.py | 96 -------------------------
 1 file changed, 96 deletions(-)
 delete mode 100644 report_generator/fix-lgbm-mb-results.py

diff --git a/report_generator/fix-lgbm-mb-results.py b/report_generator/fix-lgbm-mb-results.py
deleted file mode 100644
index c5dbc9702..000000000
--- a/report_generator/fix-lgbm-mb-results.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# ==============================================================================
-# Copyright 2020-2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""
-Temporary solution to fix the .json result files created from lgbm_mb.py.
-The result files are in an incompatible format for report_generator.py.
-Attempts to produce xlsx reports fail and create empty files.
-
-After running this script on my-file.json, a new file my-file-fixed.json will be
-produced, containing a JSON version of the results in a compatible format.
-
-Usage:
-
-  python fix-lgbm-mb-results.py my-file.json [another-file.json ...]
-
-
-Note: This is just a quick and dirty hack that does not fix the underlying
-      issue. Rather than changing this file (if something breaks again), the
-      original script lgbm_mb.py should be updated such that it produces valid
-      JSON dumps again.
-"""
-
-from argparse import ArgumentParser
-import json
-from pathlib import Path
-
-
-def fix_file(fname: Path):
-    with open(fname) as fp:
-        data = json.load(fp)
-
-    # copy all data (aux info etc)
-    fixed = {}
-    for key, val in data.items():
-        fixed[key] = val
-
-    # reset the results - we'll fix them
-    fixed["results"] = []
-
-    current_result = {}
-    for result in data["results"]:
-        if "algorithm" in result:
-            # found a new algo / measurement
-            current_result = result
-            continue
-
-        if "stage" in result:
-            comb = current_result | result
-            if "device" not in comb:
-                comb["device"] = "none"
-
-            if "time[s]" not in comb:
-                comb["time[s]"] = (
-                    result.get("training_time") or result["prediction_time"]
-                )
-
-            if "algorithm_parameters" not in comb:
-                comb["algorithm_paramters"] = {}
-
-            if "accuracy[%]" in comb:
-                comb["accuracy"] = comb["accuracy[%]"]
-
-            replace_pairs = (
-                ("lgbm_train", "training"),
-                ("lgbm_predict", "prediction"),
-                ("daal4py_predict", "alternative_prediction"),
-            )
-            for s, r in replace_pairs:
-                comb["stage"] = comb["stage"].replace(s, r)
-
-            fixed["results"].append(comb)
-
-    out_fname = fname.stem + "-fixed.json"
-    with open(out_fname, "w") as fp:
-        json.dump(fixed, fp, indent=4)
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser()
-    parser.add_argument("filenames", nargs="+")
-    args = parser.parse_args()
-    for fname in args.filenames:
-        fix_file(Path(fname))

From 8deeebd9185ced373101f2305237ff88bc95157e Mon Sep 17 00:00:00 2001
From: Alexander Andreev <alexander.andreev@intel.com>
Date: Mon, 30 Oct 2023 16:36:34 +0000
Subject: [PATCH 8/8] Update modelbuilders_bench/xgb_mb.py

Co-authored-by: Nikolay Petrov <nikolay.a.petrov@intel.com>
---
 modelbuilders_bench/xgb_mb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index 749f9ee16..67b35b0a3 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -1,5 +1,5 @@
 # ==============================================================================
-# Copyright 2020-2023 Intel Corporation
+# Copyright 2020 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.