From 4c35dbf2c23c80eaa8e940f7e1453f440ebc91c1 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Mon, 4 Dec 2023 09:52:58 -0800
Subject: [PATCH 1/5] Add SHAP value benchmarks

---
 .../modelbuilders/xgb_mb_cpu_config_shap.json | 309 ++++++++++++++++++
 modelbuilders_bench/xgb_mb.py                 |  43 ++-
 2 files changed, 339 insertions(+), 13 deletions(-)
 create mode 100644 configs/modelbuilders/xgb_mb_cpu_config_shap.json

diff --git a/configs/modelbuilders/xgb_mb_cpu_config_shap.json b/configs/modelbuilders/xgb_mb_cpu_config_shap.json
new file mode 100644
index 000000000..0e98976c2
--- /dev/null
+++ b/configs/modelbuilders/xgb_mb_cpu_config_shap.json
@@ -0,0 +1,309 @@
+{
+    "common": {
+        "lib": "modelbuilders",
+        "data-format": "pandas",
+        "data-order": "F",
+        "dtype": "float32",
+        "algorithm": "xgb_mb",
+        "tree-method": "hist",
+        "count-dmatrix": "",
+        "num-threads": -1,
+        "n-estimators": 50
+    },
+    "cases": [
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "abalone",
+                    "training": {
+                        "x": "data/abalone_x_train.npy",
+                        "y": "data/abalone_y_train.npy"
+                    },
+                    "testing": {
+                        "x": "data/abalone_x_test.npy",
+                        "y": "data/abalone_y_test.npy"
+                    }
+                }
+            ],
+            "learning-rate": 0.03,
+            "max-depth": 6,
+            "n-estimators": 1000,
+            "objective": "reg:squarederror"
+        },
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "mortgage1Q",
+                    "training": {
+                        "x": "data/mortgage1Q_x_train.npy",
+                        "y": "data/mortgage1Q_y_train.npy"
+                    }
+                }
+            ],
+            "n-estimators": 100,
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "scale-pos-weight": 2,
+            "learning-rate": 0.1,
+            "subsample": 1,
+            "reg-alpha": 0.9,
+            "reg-lambda": 1,
+            "min-child-weight": 0,
+            "max-leaves": 256
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "year_prediction_msd",
+                    "training": {
+                        "x": "data/year_prediction_msd_x_train.npy",
+                        "y": "data/year_prediction_msd_y_train.npy"
+                    },
+                    "testing": {
+                        "x": "data/year_prediction_msd_x_test.npy",
+                        "y": "data/year_prediction_msd_y_test.npy"
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 6,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 10,
+                    "n_informative": 10,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 8000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 10,
+                    "n_informative": 10,
+                    "training": {
+                        "n_samples": 3000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 20,
+                    "n_informative": 20,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 20,
+                    "n_informative": 20,
+                    "training": {
+                        "n_samples": 4000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 14,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 20,
+                    "n_informative": 20,
+                    "training": {
+                        "n_samples": 10000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 50,
+                    "n_informative": 50,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 50,
+                    "n_informative": 50,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 14,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 50,
+                    "n_informative": 50,
+                    "training": {
+                        "n_samples": 4000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 100,
+                    "n_informative": 100,
+                    "training": {
+                        "n_samples": 1000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 100,
+                    "n_informative": 100,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 14,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 100,
+                    "n_informative": 100,
+                    "training": {
+                        "n_samples": 3000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index 67b35b0a3..0b9be2c9d 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -21,7 +21,6 @@
 import numpy as np
 import xgboost as xgb
 
-
 def convert_probs_to_classes(y_prob):
     return np.array([np.argmax(y_prob[i]) for i in range(y_prob.shape[0])])
 
@@ -152,6 +151,7 @@ def shap_accuracy(new, ref):
 
 params = bench.parse_args(parser)
 
+
 X_train, X_test, y_train, y_test = bench.load_data(params)
 
 xgb_params = {
@@ -180,8 +180,13 @@ def shap_accuracy(new, ref):
     "enable_experimental_json_serialization": params.enable_experimental_json_serialization,
 }
 
-if params.threads != -1:
+if params.threads == -1:
+    # SHAP value calculation is faster with using logical cores as number of threads
+    import psutil
+    daal4py.daalinit(psutil.cpu_count())
+else:
     xgb_params.update({"nthread": params.threads})
+    daal4py.daalinit(params.threads)
 
 if params.objective.startswith("reg"):
     task = "regression"
@@ -209,6 +214,12 @@ def shap_accuracy(new, ref):
     xgb.DMatrix, X_test, params=params, label=y_test
 )
 
+# SHAP interactions are very expensive - cap the number of rows
+interaction_n_rows = max(2_000, 200_000 // (X_test.shape[0] * X_test.shape[1]))
+
+# not benchmarked, but required for SHAP interactions
+dtest_interactions = xgb.DMatrix(X_test[:interaction_n_rows])
+
 
 def fit(dmatrix):
     if dmatrix is None:
@@ -250,7 +261,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
 )
 
 shap_interaction_time, shap_interactions = bench.measure_function_time(
-    predict, dtest, pred_interactions=True, params=params
+    predict, dtest_interactions, pred_interactions=True, params=params
 )
 
 transform_time, model_daal = bench.measure_function_time(
@@ -262,22 +273,28 @@ def predict(dmatrix, **kwargs):  # type: ignore
 )
 test_metric_daal = metric_func(y_test, daal_pred)
 
-shap_contrib_time_daal, daal_contribs = bench.measure_function_time(
-    model_daal.predict, X_test, pred_contribs=True, params=params
-)
+if model_daal._is_regression:
+    shap_contrib_time_daal, daal_contribs = bench.measure_function_time(
+        model_daal.predict, X_test, pred_contribs=True, params=params
+    )
 
-shap_interaction_time_daal, daal_interactions = bench.measure_function_time(
-    model_daal.predict, X_test, pred_interactions=True, params=params
-)
+    shap_interaction_time_daal, daal_interactions = bench.measure_function_time(
+        model_daal.predict, X_test[:interaction_n_rows], pred_interactions=True, params=params
+    )
 
-contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs)
+    contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs)
 
-interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions)
+    interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions)
 
+else:
+    # classification currently does not support SHAP values
+    shap_contrib_time_daal, shap_interaction_time_daal, contrib_accuracy, interaction_accuracy = [0]*4
 
 bench.print_output(
     library="modelbuilders",
     algorithm=f"xgboost_{task}_and_modelbuilder",
+    alg_instance=booster,
+    alg_params={"max-depth": getattr(params, "max_depth", None), "objective": getattr(params, "objective", None)},
     stages=[
         "training_preparation",
         "training",
@@ -290,6 +307,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
         "shap_interaction_prediction",
         "alternative_shap_interaction_prediction",
     ],
+    data=[X_train]*2 + [X_test]*2 + [X_train] + [X_test]*5,
     params=params,
     functions=[
         "xgb.dmatrix.train",
@@ -315,7 +333,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
         shap_interaction_time,
         shap_interaction_time_daal,
     ],
-    metric_type=[metric_name, "RMSE"],
+    metric_type=[metric_name, "rmse"],
     metrics=[
         [
             None,
@@ -342,5 +360,4 @@ def predict(dmatrix, **kwargs):  # type: ignore
             interaction_accuracy,
         ],
     ],
-    data=[X_train] * 2 + [X_test] * 8,
 )

From ef4201b14da94348cd6f5a13824747619a01e6b1 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Mon, 4 Dec 2023 09:54:55 -0800
Subject: [PATCH 2/5] Add report generator config for MB reports

---
 .../model_builder_report_gen_config.json      | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100755 report_generator/model_builder_report_gen_config.json

diff --git a/report_generator/model_builder_report_gen_config.json b/report_generator/model_builder_report_gen_config.json
new file mode 100755
index 000000000..36f9ed496
--- /dev/null
+++ b/report_generator/model_builder_report_gen_config.json
@@ -0,0 +1,22 @@
+{
+    "header": [
+        "algorithm",
+        "stage",
+        "device",
+        "input_data:data_order",
+        "input_data:data_type",
+        "input_data:dataset_name",
+        "input_data:rows",
+        "input_data:columns",
+        "input_data:classes",
+        "input_data:n_clusters",
+        "algorithm_parameters:max-depth",
+        "algorithm_parameters:objective"
+    ],
+    "comparison_method": {
+        "default": "2 / 1"
+    },
+    "aggregation_metrics": [
+        "geomean"
+    ]
+}
\ No newline at end of file

From f60266a53a2017033e8c9db4ee5931d1fe8d824c Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Mon, 4 Dec 2023 09:59:34 -0800
Subject: [PATCH 3/5] black format

---
 modelbuilders_bench/xgb_mb.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index 0b9be2c9d..7d57f6837 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -21,6 +21,7 @@
 import numpy as np
 import xgboost as xgb
 
+
 def convert_probs_to_classes(y_prob):
     return np.array([np.argmax(y_prob[i]) for i in range(y_prob.shape[0])])
 
@@ -35,7 +36,14 @@ def convert_xgb_predictions(y_pred, objective):
 
 def shap_accuracy(new, ref):
     # broadcast all values into single column and calculate RMSE
-    return bench.rmse_score(new.reshape(-1, ), ref.reshape(-1, ))
+    return bench.rmse_score(
+        new.reshape(
+            -1,
+        ),
+        ref.reshape(
+            -1,
+        ),
+    )
 
 
 parser = argparse.ArgumentParser(
@@ -183,6 +191,7 @@ def shap_accuracy(new, ref):
 if params.threads == -1:
     # SHAP value calculation is faster with using logical cores as number of threads
     import psutil
+
     daal4py.daalinit(psutil.cpu_count())
 else:
     xgb_params.update({"nthread": params.threads})
@@ -279,7 +288,10 @@ def predict(dmatrix, **kwargs):  # type: ignore
     )
 
     shap_interaction_time_daal, daal_interactions = bench.measure_function_time(
-        model_daal.predict, X_test[:interaction_n_rows], pred_interactions=True, params=params
+        model_daal.predict,
+        X_test[:interaction_n_rows],
+        pred_interactions=True,
+        params=params,
     )
 
     contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs)
@@ -288,13 +300,21 @@ def predict(dmatrix, **kwargs):  # type: ignore
 
 else:
     # classification currently does not support SHAP values
-    shap_contrib_time_daal, shap_interaction_time_daal, contrib_accuracy, interaction_accuracy = [0]*4
+    (
+        shap_contrib_time_daal,
+        shap_interaction_time_daal,
+        contrib_accuracy,
+        interaction_accuracy,
+    ) = [0] * 4
 
 bench.print_output(
     library="modelbuilders",
     algorithm=f"xgboost_{task}_and_modelbuilder",
     alg_instance=booster,
-    alg_params={"max-depth": getattr(params, "max_depth", None), "objective": getattr(params, "objective", None)},
+    alg_params={
+        "max-depth": getattr(params, "max_depth", None),
+        "objective": getattr(params, "objective", None),
+    },
     stages=[
         "training_preparation",
         "training",
@@ -307,7 +327,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
         "shap_interaction_prediction",
         "alternative_shap_interaction_prediction",
     ],
-    data=[X_train]*2 + [X_test]*2 + [X_train] + [X_test]*5,
+    data=[X_train] * 2 + [X_test] * 2 + [X_train] + [X_test] * 5,
     params=params,
     functions=[
         "xgb.dmatrix.train",

From b9d2f756013f1f043365516ad83bb550a4145ae9 Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Tue, 12 Dec 2023 01:27:37 -0800
Subject: [PATCH 4/5] review comments

---
 .../modelbuilders/xgb_mb_cpu_config_shap.json |  2 +-
 modelbuilders_bench/xgb_mb.py                 |  2 +-
 .../model_builder_report_gen_config.json      | 44 +++++++++----------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/configs/modelbuilders/xgb_mb_cpu_config_shap.json b/configs/modelbuilders/xgb_mb_cpu_config_shap.json
index 0e98976c2..c91c7fd77 100644
--- a/configs/modelbuilders/xgb_mb_cpu_config_shap.json
+++ b/configs/modelbuilders/xgb_mb_cpu_config_shap.json
@@ -306,4 +306,4 @@
             ]
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index 7d57f6837..db1af1704 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -192,7 +192,7 @@ def shap_accuracy(new, ref):
     # SHAP value calculation is faster with using logical cores as number of threads
     import psutil
 
-    daal4py.daalinit(psutil.cpu_count())
+    daal4py.daalinit(psutil.cpu_count(logical=True))
 else:
     xgb_params.update({"nthread": params.threads})
     daal4py.daalinit(params.threads)
diff --git a/report_generator/model_builder_report_gen_config.json b/report_generator/model_builder_report_gen_config.json
index 36f9ed496..ef2768ffc 100755
--- a/report_generator/model_builder_report_gen_config.json
+++ b/report_generator/model_builder_report_gen_config.json
@@ -1,22 +1,22 @@
-{
-    "header": [
-        "algorithm",
-        "stage",
-        "device",
-        "input_data:data_order",
-        "input_data:data_type",
-        "input_data:dataset_name",
-        "input_data:rows",
-        "input_data:columns",
-        "input_data:classes",
-        "input_data:n_clusters",
-        "algorithm_parameters:max-depth",
-        "algorithm_parameters:objective"
-    ],
-    "comparison_method": {
-        "default": "2 / 1"
-    },
-    "aggregation_metrics": [
-        "geomean"
-    ]
-}
\ No newline at end of file
+{
+    "header": [
+        "algorithm",
+        "stage",
+        "device",
+        "input_data:data_order",
+        "input_data:data_type",
+        "input_data:dataset_name",
+        "input_data:rows",
+        "input_data:columns",
+        "input_data:classes",
+        "input_data:n_clusters",
+        "algorithm_parameters:max-depth",
+        "algorithm_parameters:objective"
+    ],
+    "comparison_method": {
+        "default": "2 / 1"
+    },
+    "aggregation_metrics": [
+        "geomean"
+    ]
+}

From 6c4eb5fad2b543f437ed0822248a291cfbfbde9d Mon Sep 17 00:00:00 2001
From: Andreas Huber <andreas.huber@intel.com>
Date: Tue, 12 Dec 2023 03:08:32 -0800
Subject: [PATCH 5/5] Remove nthreads -1 logic from xgb_mb runner

---
 modelbuilders_bench/xgb_mb.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index db1af1704..3c7caaec0 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -188,14 +188,8 @@ def shap_accuracy(new, ref):
     "enable_experimental_json_serialization": params.enable_experimental_json_serialization,
 }
 
-if params.threads == -1:
-    # SHAP value calculation is faster with using logical cores as number of threads
-    import psutil
-
-    daal4py.daalinit(psutil.cpu_count(logical=True))
-else:
-    xgb_params.update({"nthread": params.threads})
-    daal4py.daalinit(params.threads)
+xgb_params.update({"nthread": params.threads})
+daal4py.daalinit(params.threads)
 
 if params.objective.startswith("reg"):
     task = "regression"