diff --git a/configs/modelbuilders/xgb_mb_cpu_config_shap.json b/configs/modelbuilders/xgb_mb_cpu_config_shap.json
new file mode 100644
index 000000000..c91c7fd77
--- /dev/null
+++ b/configs/modelbuilders/xgb_mb_cpu_config_shap.json
@@ -0,0 +1,309 @@
+{
+    "common": {
+        "lib": "modelbuilders",
+        "data-format": "pandas",
+        "data-order": "F",
+        "dtype": "float32",
+        "algorithm": "xgb_mb",
+        "tree-method": "hist",
+        "count-dmatrix": "",
+        "num-threads": -1,
+        "n-estimators": 50
+    },
+    "cases": [
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "abalone",
+                    "training": {
+                        "x": "data/abalone_x_train.npy",
+                        "y": "data/abalone_y_train.npy"
+                    },
+                    "testing": {
+                        "x": "data/abalone_x_test.npy",
+                        "y": "data/abalone_y_test.npy"
+                    }
+                }
+            ],
+            "learning-rate": 0.03,
+            "max-depth": 6,
+            "n-estimators": 1000,
+            "objective": "reg:squarederror"
+        },
+        {
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "mortgage1Q",
+                    "training": {
+                        "x": "data/mortgage1Q_x_train.npy",
+                        "y": "data/mortgage1Q_y_train.npy"
+                    }
+                }
+            ],
+            "n-estimators": 100,
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "scale-pos-weight": 2,
+            "learning-rate": 0.1,
+            "subsample": 1,
+            "reg-alpha": 0.9,
+            "reg-lambda": 1,
+            "min-child-weight": 0,
+            "max-leaves": 256
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "npy",
+                    "name": "year_prediction_msd",
+                    "training": {
+                        "x": "data/year_prediction_msd_x_train.npy",
+                        "y": "data/year_prediction_msd_y_train.npy"
+                    },
+                    "testing": {
+                        "x": "data/year_prediction_msd_x_test.npy",
+                        "y": "data/year_prediction_msd_y_test.npy"
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 6,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 10,
+                    "n_informative": 10,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 8000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 10,
+                    "n_informative": 10,
+                    "training": {
+                        "n_samples": 3000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 20,
+                    "n_informative": 20,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 20,
+                    "n_informative": 20,
+                    "training": {
+                        "n_samples": 4000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 14,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 20,
+                    "n_informative": 20,
+                    "training": {
+                        "n_samples": 10000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 50,
+                    "n_informative": 50,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 50,
+                    "n_informative": 50,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 14,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 50,
+                    "n_informative": 50,
+                    "training": {
+                        "n_samples": 4000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 8,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 256,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 100,
+                    "n_informative": 100,
+                    "training": {
+                        "n_samples": 1000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 10,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 1024,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 100,
+                    "n_informative": 100,
+                    "training": {
+                        "n_samples": 2000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        },
+        {
+            "objective": "reg:squarederror",
+            "max-depth": 14,
+            "learning-rate": 0.1,
+            "reg-lambda": 1,
+            "max-leaves": 4096,
+            "dataset": [
+                {
+                    "source": "synthetic",
+                    "type": "regression",
+                    "n_features": 100,
+                    "n_informative": 100,
+                    "training": {
+                        "n_samples": 3000
+                    },
+                    "testing": {
+                        "n_samples": 80000
+                    }
+                }
+            ]
+        }
+    ]
+}
diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py
index 67b35b0a3..3c7caaec0 100644
--- a/modelbuilders_bench/xgb_mb.py
+++ b/modelbuilders_bench/xgb_mb.py
@@ -36,7 +36,14 @@ def convert_xgb_predictions(y_pred, objective):
 
 def shap_accuracy(new, ref):
     # broadcast all values into single column and calculate RMSE
-    return bench.rmse_score(new.reshape(-1, ), ref.reshape(-1, ))
+    return bench.rmse_score(
+        new.reshape(
+            -1,
+        ),
+        ref.reshape(
+            -1,
+        ),
+    )
 
 
 parser = argparse.ArgumentParser(
@@ -152,6 +159,7 @@ def shap_accuracy(new, ref):
 
 params = bench.parse_args(parser)
 
+
 X_train, X_test, y_train, y_test = bench.load_data(params)
 
 xgb_params = {
@@ -180,8 +188,8 @@ def shap_accuracy(new, ref):
     "enable_experimental_json_serialization": params.enable_experimental_json_serialization,
 }
 
-if params.threads != -1:
-    xgb_params.update({"nthread": params.threads})
+xgb_params.update({"nthread": params.threads})
+daal4py.daalinit(params.threads)
 
 if params.objective.startswith("reg"):
     task = "regression"
@@ -209,6 +217,12 @@ def shap_accuracy(new, ref):
     xgb.DMatrix, X_test, params=params, label=y_test
 )
 
+# SHAP interactions are very expensive - cap the number of rows
+interaction_n_rows = max(2_000, 200_000 // (X_test.shape[0] * X_test.shape[1]))
+
+# not benchmarked, but required for SHAP interactions
+dtest_interactions = xgb.DMatrix(X_test[:interaction_n_rows])
+
 
 def fit(dmatrix):
     if dmatrix is None:
@@ -250,7 +264,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
 )
 
 shap_interaction_time, shap_interactions = bench.measure_function_time(
-    predict, dtest, pred_interactions=True, params=params
+    predict, dtest_interactions, pred_interactions=True, params=params
 )
 
 transform_time, model_daal = bench.measure_function_time(
@@ -262,22 +276,39 @@ def predict(dmatrix, **kwargs):  # type: ignore
 )
 test_metric_daal = metric_func(y_test, daal_pred)
 
-shap_contrib_time_daal, daal_contribs = bench.measure_function_time(
-    model_daal.predict, X_test, pred_contribs=True, params=params
-)
+if model_daal._is_regression:
+    shap_contrib_time_daal, daal_contribs = bench.measure_function_time(
+        model_daal.predict, X_test, pred_contribs=True, params=params
+    )
 
-shap_interaction_time_daal, daal_interactions = bench.measure_function_time(
-    model_daal.predict, X_test, pred_interactions=True, params=params
-)
+    shap_interaction_time_daal, daal_interactions = bench.measure_function_time(
+        model_daal.predict,
+        X_test[:interaction_n_rows],
+        pred_interactions=True,
+        params=params,
+    )
 
-contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs)
+    contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs)
 
-interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions)
+    interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions)
 
+else:
+    # classification currently does not support SHAP values
+    (
+        shap_contrib_time_daal,
+        shap_interaction_time_daal,
+        contrib_accuracy,
+        interaction_accuracy,
+    ) = [0] * 4
 
 bench.print_output(
     library="modelbuilders",
     algorithm=f"xgboost_{task}_and_modelbuilder",
+    alg_instance=booster,
+    alg_params={
+        "max-depth": getattr(params, "max_depth", None),
+        "objective": getattr(params, "objective", None),
+    },
     stages=[
         "training_preparation",
         "training",
@@ -290,6 +321,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
         "shap_interaction_prediction",
         "alternative_shap_interaction_prediction",
     ],
+    data=[X_train] * 2 + [X_test] * 2 + [X_train] + [X_test] * 5,
     params=params,
     functions=[
         "xgb.dmatrix.train",
@@ -315,7 +347,7 @@ def predict(dmatrix, **kwargs):  # type: ignore
         shap_interaction_time,
         shap_interaction_time_daal,
     ],
-    metric_type=[metric_name, "RMSE"],
+    metric_type=[metric_name, "rmse"],
     metrics=[
         [
             None,
@@ -342,5 +374,4 @@ def predict(dmatrix, **kwargs):  # type: ignore
             interaction_accuracy,
         ],
     ],
-    data=[X_train] * 2 + [X_test] * 8,
 )
diff --git a/report_generator/model_builder_report_gen_config.json b/report_generator/model_builder_report_gen_config.json
new file mode 100755
index 000000000..ef2768ffc
--- /dev/null
+++ b/report_generator/model_builder_report_gen_config.json
@@ -0,0 +1,22 @@
+{
+    "header": [
+        "algorithm",
+        "stage",
+        "device",
+        "input_data:data_order",
+        "input_data:data_type",
+        "input_data:dataset_name",
+        "input_data:rows",
+        "input_data:columns",
+        "input_data:classes",
+        "input_data:n_clusters",
+        "algorithm_parameters:max-depth",
+        "algorithm_parameters:objective"
+    ],
+    "comparison_method": {
+        "default": "2 / 1"
+    },
+    "aggregation_metrics": [
+        "geomean"
+    ]
+}