diff --git a/configs/modelbuilders/xgb_mb_cpu_config_shap.json b/configs/modelbuilders/xgb_mb_cpu_config_shap.json new file mode 100644 index 000000000..c91c7fd77 --- /dev/null +++ b/configs/modelbuilders/xgb_mb_cpu_config_shap.json @@ -0,0 +1,309 @@ +{ + "common": { + "lib": "modelbuilders", + "data-format": "pandas", + "data-order": "F", + "dtype": "float32", + "algorithm": "xgb_mb", + "tree-method": "hist", + "count-dmatrix": "", + "num-threads": -1, + "n-estimators": 50 + }, + "cases": [ + { + "dataset": [ + { + "source": "npy", + "name": "abalone", + "training": { + "x": "data/abalone_x_train.npy", + "y": "data/abalone_y_train.npy" + }, + "testing": { + "x": "data/abalone_x_test.npy", + "y": "data/abalone_y_test.npy" + } + } + ], + "learning-rate": 0.03, + "max-depth": 6, + "n-estimators": 1000, + "objective": "reg:squarederror" + }, + { + "dataset": [ + { + "source": "npy", + "name": "mortgage1Q", + "training": { + "x": "data/mortgage1Q_x_train.npy", + "y": "data/mortgage1Q_y_train.npy" + } + } + ], + "n-estimators": 100, + "objective": "reg:squarederror", + "max-depth": 8, + "scale-pos-weight": 2, + "learning-rate": 0.1, + "subsample": 1, + "reg-alpha": 0.9, + "reg-lambda": 1, + "min-child-weight": 0, + "max-leaves": 256 + }, + { + "objective": "reg:squarederror", + "max-depth": 8, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 256, + "dataset": [ + { + "source": "npy", + "name": "year_prediction_msd", + "training": { + "x": "data/year_prediction_msd_x_train.npy", + "y": "data/year_prediction_msd_y_train.npy" + }, + "testing": { + "x": "data/year_prediction_msd_x_test.npy", + "y": "data/year_prediction_msd_y_test.npy" + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 6, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 1024, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 10, + "n_informative": 10, + "training": { + "n_samples": 2000 + }, + "testing": { + "n_samples": 8000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 10, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 4096, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 10, + "n_informative": 10, + "training": { + "n_samples": 3000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 8, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 256, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "n_informative": 20, + "training": { + "n_samples": 2000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 10, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 1024, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "n_informative": 20, + "training": { + "n_samples": 4000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 14, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 4096, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 20, + "n_informative": 20, + "training": { + "n_samples": 10000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 8, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 256, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 50, + "n_informative": 50, + "training": { + "n_samples": 2000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 10, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 1024, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 50, + "n_informative": 50, + "training": { + "n_samples": 2000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 14, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 4096, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 50, + "n_informative": 50, + "training": { + "n_samples": 4000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 8, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 256, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 100, + "n_informative": 100, + "training": { + "n_samples": 1000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 10, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 1024, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 100, + "n_informative": 100, + "training": { + "n_samples": 2000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + }, + { + "objective": "reg:squarederror", + "max-depth": 14, + "learning-rate": 0.1, + "reg-lambda": 1, + "max-leaves": 4096, + "dataset": [ + { + "source": "synthetic", + "type": "regression", + "n_features": 100, + "n_informative": 100, + "training": { + "n_samples": 3000 + }, + "testing": { + "n_samples": 80000 + } + } + ] + } + ] +} diff --git a/modelbuilders_bench/xgb_mb.py b/modelbuilders_bench/xgb_mb.py index 67b35b0a3..3c7caaec0 100644 --- a/modelbuilders_bench/xgb_mb.py +++ b/modelbuilders_bench/xgb_mb.py @@ -36,7 +36,14 @@ def convert_xgb_predictions(y_pred, objective): def shap_accuracy(new, ref): # broadcast all values into single column and calculate RMSE - return bench.rmse_score(new.reshape(-1, ), ref.reshape(-1, )) + return bench.rmse_score( + new.reshape( + -1, + ), + ref.reshape( + -1, + ), + ) parser = argparse.ArgumentParser( @@ -152,6 +159,7 @@ def shap_accuracy(new, ref): params = bench.parse_args(parser) + X_train, X_test, y_train, y_test = bench.load_data(params) xgb_params = { @@ -180,8 +188,8 @@ def shap_accuracy(new, ref): "enable_experimental_json_serialization": params.enable_experimental_json_serialization, } -if params.threads != -1: - xgb_params.update({"nthread": params.threads}) +xgb_params.update({"nthread": params.threads}) +daal4py.daalinit(params.threads) if params.objective.startswith("reg"): task = "regression" @@ -209,6 +217,12 @@ def shap_accuracy(new, ref): xgb.DMatrix, X_test, params=params, label=y_test ) +# SHAP interactions are very expensive - cap the number of rows +interaction_n_rows = max(2_000, 200_000 // (X_test.shape[0] * X_test.shape[1])) + +# not benchmarked, but required for SHAP interactions +dtest_interactions = xgb.DMatrix(X_test[:interaction_n_rows]) + def fit(dmatrix): if dmatrix is None: @@ -250,7 +264,7 @@ def predict(dmatrix, **kwargs): # type: ignore ) shap_interaction_time, shap_interactions = bench.measure_function_time( - predict, dtest, pred_interactions=True, params=params + predict, dtest_interactions, pred_interactions=True, params=params ) transform_time, model_daal = bench.measure_function_time( @@ -262,22 +276,39 @@ def predict(dmatrix, **kwargs): # type: ignore ) test_metric_daal = metric_func(y_test, daal_pred) -shap_contrib_time_daal, daal_contribs = bench.measure_function_time( - model_daal.predict, X_test, pred_contribs=True, params=params -) +if model_daal._is_regression: + shap_contrib_time_daal, daal_contribs = bench.measure_function_time( + model_daal.predict, X_test, pred_contribs=True, params=params + ) -shap_interaction_time_daal, daal_interactions = bench.measure_function_time( - model_daal.predict, X_test, pred_interactions=True, params=params -) + shap_interaction_time_daal, daal_interactions = bench.measure_function_time( + model_daal.predict, + X_test[:interaction_n_rows], + pred_interactions=True, + params=params, + ) -contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs) + contrib_accuracy = shap_accuracy(shap_contribs, daal_contribs) -interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions) + interaction_accuracy = shap_accuracy(shap_interactions, daal_interactions) +else: + # classification currently does not support SHAP values + ( + shap_contrib_time_daal, + shap_interaction_time_daal, + contrib_accuracy, + interaction_accuracy, + ) = [0] * 4 bench.print_output( library="modelbuilders", algorithm=f"xgboost_{task}_and_modelbuilder", + alg_instance=booster, + alg_params={ + "max-depth": getattr(params, "max_depth", None), + "objective": getattr(params, "objective", None), + }, stages=[ "training_preparation", "training", @@ -290,6 +321,7 @@ def predict(dmatrix, **kwargs): # type: ignore "shap_interaction_prediction", "alternative_shap_interaction_prediction", ], + data=[X_train] * 2 + [X_test] * 2 + [X_train] + [X_test] * 5, params=params, functions=[ "xgb.dmatrix.train", @@ -315,7 +347,7 @@ def predict(dmatrix, **kwargs): # type: ignore shap_interaction_time, shap_interaction_time_daal, ], - metric_type=[metric_name, "RMSE"], + metric_type=[metric_name, "rmse"], metrics=[ [ None, @@ -342,5 +374,4 @@ def predict(dmatrix, **kwargs): # type: ignore interaction_accuracy, ], ], - data=[X_train] * 2 + [X_test] * 8, ) diff --git a/report_generator/model_builder_report_gen_config.json b/report_generator/model_builder_report_gen_config.json new file mode 100755 index 000000000..ef2768ffc --- /dev/null +++ b/report_generator/model_builder_report_gen_config.json @@ -0,0 +1,22 @@ +{ + "header": [ + "algorithm", + "stage", + "device", + "input_data:data_order", + "input_data:data_type", + "input_data:dataset_name", + "input_data:rows", + "input_data:columns", + "input_data:classes", + "input_data:n_clusters", + "algorithm_parameters:max-depth", + "algorithm_parameters:objective" + ], + "comparison_method": { + "default": "2 / 1" + }, + "aggregation_metrics": [ + "geomean" + ] +}