From 4d3e4a30ef73df90c8d047843656c2abb6b2ac35 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Mon, 3 Mar 2025 22:06:19 +0000
Subject: [PATCH 01/15] feat: add Linear_Regression.global_explain()

---
 bigframes/ml/core.py                          | 11 +++++
 bigframes/ml/linear_model.py                  | 45 +++++++++++++++++++
 bigframes/ml/sql.py                           |  8 ++++
 .../linear_regression_tutorial_test.py        |  4 ++
 4 files changed, 68 insertions(+)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index ad00ed3f2c..682fb6fcd7 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -134,6 +134,17 @@ def explain_predict(
             ),
         )
 
+    def global_explain(
+        self, input_data: bpd.DataFrame, options: Mapping[str, bool]
+    ) -> bpd.DataFrame:
+        return self._apply_ml_tvf(
+            input_data,
+            lambda source_sql: self._model_manipulation_sql_generator.ml_global_explain(
+                source_sql=source_sql,
+                struct_options=options,
+            ),
+        )
+
     def transform(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
         return self._apply_ml_tvf(
             input_data,
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 46c5744a42..085ad89437 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -203,6 +203,51 @@ def predict_explain(
             X, options={"top_k_features": top_k_features}
         )
 
+    def global_explain(
+        self,
+        X: utils.ArrayType,
+        *,
+        class_level_explain: bool = False,
+    ) -> bpd.DataFrame:
+        """
+        Provide explanations for an entire linear regression model.
+
+        .. note::
+            Output matches that of the BigQuery ML.GLOBAL_PREDICT function.
+            See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-global-explain
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series or
+            pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or a DataFrame to explain its predictions.
+            class_level_explain (bool, default False):
+                a BOOL value that specifies whether global feature importances
+                are returned for each class. Applies only to non-AutoML Tables
+                classification models. When set to FALSE, the global feature
+                importance of the entire model is returned rather than that of
+                each class. The default value is FALSE.
+
+                Regression models and AutoML Tables classification models only
+                have model-level global feature importance.
+
+        Returns:
+            bigframes.pandas.DataFrame:
+                The predicted DataFrames with feature and attribution columns.
+        """
+        if class_level_explain is not True or False:
+            raise ValueError(
+                f"`class_level_explain` must be set to `True` or `False` but is currently {class_level_explain}"
+            )
+
+        if not self._bqml_model:
+            raise RuntimeError("A model must be fitted before predict")
+
+        (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
+
+        return self._bqml_model.global_explain(
+            X, options={"class_level_explain": class_level_explain}
+        )
+
     def score(
         self,
         X: utils.ArrayType,
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index b662d4c22c..570c65d46c 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -312,6 +312,14 @@ def ml_explain_predict(
         return f"""SELECT * FROM ML.EXPLAIN_PREDICT(MODEL {self._model_ref_sql()},
   ({source_sql}), {struct_options_sql})"""
 
+    def ml_global_explain(
+        self, source_sql: str, struct_options: Mapping[str, bool]
+    ) -> str:
+        """Encode ML.GLOBAL_EXPLAIN for BQML"""
+        struct_options_sql = self.struct_options(**struct_options)
+        return f"""SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL {self._model_ref_sql()},
+  ({source_sql}), {struct_options_sql})"""
+
     def ml_forecast(self, struct_options: Mapping[str, Union[int, float]]) -> str:
         """Encode ML.FORECAST for BQML"""
         struct_options_sql = self.struct_options(**struct_options)
diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py
index e4ace53a5c..03bce4ad93 100644
--- a/samples/snippets/linear_regression_tutorial_test.py
+++ b/samples/snippets/linear_regression_tutorial_test.py
@@ -92,6 +92,10 @@ def test_linear_regression(random_model_id: str) -> None:
     # 3	 5349.603734	        [{'feature': 'island', 'attribution': 7348.877...	-5320.222128	          5349.603734	            0.0	         Gentoo penguin (Pygoscelis papua)	Biscoe	    46.4	              15.6	        221.0	           5000.0	    MALE
     # 4	 4637.165037	        [{'feature': 'island', 'attribution': 7348.877...	-5320.222128	          4637.165037	            0.0	         Gentoo penguin (Pygoscelis papua)	Biscoe	    46.1	              13.2	        211.0	           4500.0	   FEMALE
     # [END bigquery_dataframes_bqml_linear_predict_explain]
+    # [START bigquery_dataframes_bqml_linear_global_explain]
+    explain_model = model.global_explain(biscoe_data, class_level_explain=True)
+    # [END bigquery_dataframes_bqml_linear_global_explain]
+    assert explain_model is not None
     assert feature_columns is not None
     assert label_columns is not None
     assert model is not None

From 87db2b72b505eb5729f7a6fd5c7f92e3c4b15877 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 4 Mar 2025 21:52:31 +0000
Subject: [PATCH 02/15] remove class_level_explain param

---
 bigframes/ml/core.py                          | 10 ++------
 bigframes/ml/linear_model.py                  | 24 ++-----------------
 bigframes/ml/sql.py                           |  7 ++----
 .../linear_regression_tutorial_test.py        |  2 +-
 4 files changed, 7 insertions(+), 36 deletions(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 682fb6fcd7..cc61554c29 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -134,15 +134,9 @@ def explain_predict(
             ),
         )
 
-    def global_explain(
-        self, input_data: bpd.DataFrame, options: Mapping[str, bool]
-    ) -> bpd.DataFrame:
+    def global_explain(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
         return self._apply_ml_tvf(
-            input_data,
-            lambda source_sql: self._model_manipulation_sql_generator.ml_global_explain(
-                source_sql=source_sql,
-                struct_options=options,
-            ),
+            input_data, self._model_manipulation_sql_generator.ml_global_explain
         )
 
     def transform(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 085ad89437..bcf92f8a45 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -203,12 +203,7 @@ def predict_explain(
             X, options={"top_k_features": top_k_features}
         )
 
-    def global_explain(
-        self,
-        X: utils.ArrayType,
-        *,
-        class_level_explain: bool = False,
-    ) -> bpd.DataFrame:
+    def global_explain(self, X: utils.ArrayType) -> bpd.DataFrame:
         """
         Provide explanations for an entire linear regression model.
 
@@ -220,33 +215,18 @@ def global_explain(
             X (bigframes.dataframe.DataFrame or bigframes.series.Series or
             pandas.core.frame.DataFrame or pandas.core.series.Series):
                 Series or a DataFrame to explain its predictions.
-            class_level_explain (bool, default False):
-                a BOOL value that specifies whether global feature importances
-                are returned for each class. Applies only to non-AutoML Tables
-                classification models. When set to FALSE, the global feature
-                importance of the entire model is returned rather than that of
-                each class. The default value is FALSE.
-
-                Regression models and AutoML Tables classification models only
-                have model-level global feature importance.
 
         Returns:
             bigframes.pandas.DataFrame:
                 The predicted DataFrames with feature and attribution columns.
         """
-        if class_level_explain is not True or False:
-            raise ValueError(
-                f"`class_level_explain` must be set to `True` or `False` but is currently {class_level_explain}"
-            )
 
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
 
         (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
 
-        return self._bqml_model.global_explain(
-            X, options={"class_level_explain": class_level_explain}
-        )
+        return self._bqml_model.global_explain(X)
 
     def score(
         self,
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index 570c65d46c..a750d22173 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -312,13 +312,10 @@ def ml_explain_predict(
         return f"""SELECT * FROM ML.EXPLAIN_PREDICT(MODEL {self._model_ref_sql()},
   ({source_sql}), {struct_options_sql})"""
 
-    def ml_global_explain(
-        self, source_sql: str, struct_options: Mapping[str, bool]
-    ) -> str:
+    def ml_global_explain(self, source_sql: str) -> str:
         """Encode ML.GLOBAL_EXPLAIN for BQML"""
-        struct_options_sql = self.struct_options(**struct_options)
         return f"""SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL {self._model_ref_sql()},
-  ({source_sql}), {struct_options_sql})"""
+  ({source_sql}))"""
 
     def ml_forecast(self, struct_options: Mapping[str, Union[int, float]]) -> str:
         """Encode ML.FORECAST for BQML"""
diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py
index 03bce4ad93..9e21c33046 100644
--- a/samples/snippets/linear_regression_tutorial_test.py
+++ b/samples/snippets/linear_regression_tutorial_test.py
@@ -93,7 +93,7 @@ def test_linear_regression(random_model_id: str) -> None:
     # 4	 4637.165037	        [{'feature': 'island', 'attribution': 7348.877...	-5320.222128	          4637.165037	            0.0	         Gentoo penguin (Pygoscelis papua)	Biscoe	    46.1	              13.2	        211.0	           4500.0	   FEMALE
     # [END bigquery_dataframes_bqml_linear_predict_explain]
     # [START bigquery_dataframes_bqml_linear_global_explain]
-    explain_model = model.global_explain(biscoe_data, class_level_explain=True)
+    explain_model = model.global_explain(label_columns["body_mass_g"])
     # [END bigquery_dataframes_bqml_linear_global_explain]
     assert explain_model is not None
     assert feature_columns is not None

From 82a234a8e99b33cd547c54ff82263d0ff5a46d09 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 11 Mar 2025 19:42:00 +0000
Subject: [PATCH 03/15] working global_explain()

---
 bigframes/ml/core.py         | 9 ++++++---
 bigframes/ml/linear_model.py | 8 ++++----
 bigframes/ml/sql.py          | 5 +++--
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index cc61554c29..5cfb457eb2 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -134,10 +134,13 @@ def explain_predict(
             ),
         )
 
-    def global_explain(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
-        return self._apply_ml_tvf(
-            input_data, self._model_manipulation_sql_generator.ml_global_explain
+    def global_explain(self, options: Mapping[str, bool]) -> bpd.DataFrame:
+        sql = self._model_manipulation_sql_generator.ml_global_explain(
+            struct_options=options
         )
+        return self._session.read_gbq(
+            sql,
+        ).reset_index()
 
     def transform(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
         return self._apply_ml_tvf(
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index bcf92f8a45..984b333d1c 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -203,7 +203,9 @@ def predict_explain(
             X, options={"top_k_features": top_k_features}
         )
 
-    def global_explain(self, X: utils.ArrayType) -> bpd.DataFrame:
+    def global_explain(
+        self,
+    ) -> bpd.DataFrame:
         """
         Provide explanations for an entire linear regression model.
 
@@ -224,9 +226,7 @@ def global_explain(self, X: utils.ArrayType) -> bpd.DataFrame:
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
 
-        (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session)
-
-        return self._bqml_model.global_explain(X)
+        return self._bqml_model.global_explain({})
 
     def score(
         self,
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index a750d22173..e89f17bcaa 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -312,10 +312,11 @@ def ml_explain_predict(
         return f"""SELECT * FROM ML.EXPLAIN_PREDICT(MODEL {self._model_ref_sql()},
   ({source_sql}), {struct_options_sql})"""
 
-    def ml_global_explain(self, source_sql: str) -> str:
+    def ml_global_explain(self, struct_options) -> str:
         """Encode ML.GLOBAL_EXPLAIN for BQML"""
+        struct_options_sql = self.struct_options(**struct_options)
         return f"""SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL {self._model_ref_sql()},
-  ({source_sql}))"""
+  {struct_options_sql})"""
 
     def ml_forecast(self, struct_options: Mapping[str, Union[int, float]]) -> str:
         """Encode ML.FORECAST for BQML"""

From ed73f88066f0b260496ba63a8b4a928acf632f47 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 11 Mar 2025 20:44:21 +0000
Subject: [PATCH 04/15] begin adding tests

---
 tests/system/small/ml/conftest.py          |  9 +++++++++
 tests/system/small/ml/test_linear_model.py | 21 +++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index 0e8489c513..dd3fbbf37f 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -84,6 +84,15 @@ def ephemera_penguins_linear_model(
     return bf_model
 
 
+@pytest.fixture(scope="function")
+def global_penguins_linear_model(
+    penguins_bqml_linear_model: core.BqmlModel,
+) -> linear_model.LinearRegression:
+    bf_model = linear_model.LinearRegression(enable_global_explain=True)
+    bf_model._bqml_model = penguins_bqml_linear_model
+    return bf_model
+
+
 @pytest.fixture(scope="session")
 def penguins_logistic_model(
     session, penguins_logistic_model_name
diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index da9fc8e14f..c51935b7eb 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -228,6 +228,27 @@ def test_to_gbq_saved_linear_reg_model_scores(
     )
 
 
+def test_linear_reg_model_global_explain(global_penguins_linear_model, new_penguins_df):
+    training_data = new_penguins_df.dropna(subset=["body_mass_g"])
+    X = training_data.drop(columns=["body_mass_g"])
+    y = training_data[["body_mass_g"]]
+    global_penguins_linear_model.fit(X, y)
+    global_ex = global_penguins_linear_model.global_explain()
+    assert global_ex.shape == (6, 3)
+    # result = predictions[["predicted_body_mass_g"]]
+    # expected = pandas.DataFrame(
+    #     {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]},
+    #     dtype="Float64",
+    #     index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    # )
+    # pandas.testing.assert_frame_equal(
+    #     result.sort_index(),
+    #     expected,
+    #     check_exact=False,
+    #     rtol=0.1,
+    # )
+
+
 def test_to_gbq_replace(penguins_linear_model, table_id_unique):
     penguins_linear_model.to_gbq(table_id_unique, replace=True)
     with pytest.raises(google.api_core.exceptions.Conflict):

From 47b98627527099a8bcd02a1e783eea8b74bdc531 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Wed, 12 Mar 2025 12:59:41 +0000
Subject: [PATCH 05/15] update snippet

---
 samples/snippets/linear_regression_tutorial_test.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py
index 9e21c33046..501dc8f446 100644
--- a/samples/snippets/linear_regression_tutorial_test.py
+++ b/samples/snippets/linear_regression_tutorial_test.py
@@ -93,7 +93,13 @@ def test_linear_regression(random_model_id: str) -> None:
     # 4	 4637.165037	        [{'feature': 'island', 'attribution': 7348.877...	-5320.222128	          4637.165037	            0.0	         Gentoo penguin (Pygoscelis papua)	Biscoe	    46.1	              13.2	        211.0	           4500.0	   FEMALE
     # [END bigquery_dataframes_bqml_linear_predict_explain]
     # [START bigquery_dataframes_bqml_linear_global_explain]
-    explain_model = model.global_explain(label_columns["body_mass_g"])
+    model = LinearRegression(enable_global_explain=True)
+    training_data = bq_df.dropna(subset=["body_mass_g"])
+    X = training_data.drop(columns=["body_mass_g"])
+    y = training_data[["body_mass_g"]]
+    model.fit(X, y)
+    model.to_gbq("bqml_tutorial.penguins_model", replace=True)
+    explain_model = model.global_explain()
     # [END bigquery_dataframes_bqml_linear_global_explain]
     assert explain_model is not None
     assert feature_columns is not None

From 7046dc3fc7b90bfeeae3cde42a10bc00f217ec3e Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Wed, 12 Mar 2025 21:14:30 +0000
Subject: [PATCH 06/15] complete snippet

---
 .../snippets/linear_regression_tutorial_test.py   | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py
index 501dc8f446..4cc385e97e 100644
--- a/samples/snippets/linear_regression_tutorial_test.py
+++ b/samples/snippets/linear_regression_tutorial_test.py
@@ -93,13 +93,28 @@ def test_linear_regression(random_model_id: str) -> None:
     # 4	 4637.165037	        [{'feature': 'island', 'attribution': 7348.877...	-5320.222128	          4637.165037	            0.0	         Gentoo penguin (Pygoscelis papua)	Biscoe	    46.1	              13.2	        211.0	           4500.0	   FEMALE
     # [END bigquery_dataframes_bqml_linear_predict_explain]
     # [START bigquery_dataframes_bqml_linear_global_explain]
+    # To use the `global_explain()` function, the model must be recreated with `enable_global_explain` set to `True`.
     model = LinearRegression(enable_global_explain=True)
+
+    # The model must the be fitted before it can be saved to BigQuery and then explained.
     training_data = bq_df.dropna(subset=["body_mass_g"])
     X = training_data.drop(columns=["body_mass_g"])
     y = training_data[["body_mass_g"]]
     model.fit(X, y)
     model.to_gbq("bqml_tutorial.penguins_model", replace=True)
+
+    # Explain the model
     explain_model = model.global_explain()
+
+    # Expected results:
+    #    index	feature	            attribution
+    # 0	   0	flipper_length_mm	193.612051
+    # 1	   1	sex	                5139.35423
+    # 2	   2	culmen_depth_mm	    117.084944
+    # 3	   3	species	            4259.554372
+    # 4	   4	island	            7330.53279
+    # 5	   5	culmen_length_mm	94.366793
+
     # [END bigquery_dataframes_bqml_linear_global_explain]
     assert explain_model is not None
     assert feature_columns is not None

From b0b9552c63b49d734062b4af7f49c267be2991d7 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Wed, 12 Mar 2025 22:54:40 +0000
Subject: [PATCH 07/15] failing, near complete linear model test

---
 tests/system/small/ml/test_linear_model.py | 34 ++++++++++++++--------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index c51935b7eb..dea1a38c74 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -235,18 +235,28 @@ def test_linear_reg_model_global_explain(global_penguins_linear_model, new_pengu
     global_penguins_linear_model.fit(X, y)
     global_ex = global_penguins_linear_model.global_explain()
     assert global_ex.shape == (6, 3)
-    # result = predictions[["predicted_body_mass_g"]]
-    # expected = pandas.DataFrame(
-    #     {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]},
-    #     dtype="Float64",
-    #     index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
-    # )
-    # pandas.testing.assert_frame_equal(
-    #     result.sort_index(),
-    #     expected,
-    #     check_exact=False,
-    #     rtol=0.1,
-    # )
+    global_columns = set(global_ex.columns)
+    expected_columns = {"index", "feature", "attribution"}
+    assert expected_columns <= global_columns
+    result = global_ex["attribution"].to_pandas()
+    expected = pandas.DataFrame(
+        {
+            "attribution": [
+                193.612051,
+                5139.35423,
+                117.084944,
+                4259.554372,
+                7330.53279,
+                94.366793,
+            ]
+        },
+        dtype="Float64",
+    )
+    pandas.testing.assert_frame_equal(
+        result,
+        expected,
+        check_exact=False,
+    )
 
 
 def test_to_gbq_replace(penguins_linear_model, table_id_unique):

From 1ad520833c1daf9ab4234c8b29f3242031d9a6cf Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Fri, 14 Mar 2025 17:34:11 +0000
Subject: [PATCH 08/15] passing system test

---
 tests/system/small/ml/test_linear_model.py | 41 +++++++++++++---------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index dea1a38c74..a3add631d4 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -235,27 +235,34 @@ def test_linear_reg_model_global_explain(global_penguins_linear_model, new_pengu
     global_penguins_linear_model.fit(X, y)
     global_ex = global_penguins_linear_model.global_explain()
     assert global_ex.shape == (6, 3)
-    global_columns = set(global_ex.columns)
-    expected_columns = {"index", "feature", "attribution"}
-    assert expected_columns <= global_columns
-    result = global_ex["attribution"].to_pandas()
-    expected = pandas.DataFrame(
-        {
-            "attribution": [
-                193.612051,
-                5139.35423,
-                117.084944,
-                4259.554372,
-                7330.53279,
-                94.366793,
-            ]
-        },
-        dtype="Float64",
+    expected_columns = pandas.Index(["index", "feature", "attribution"])
+    pandas.testing.assert_index_equal(global_ex.columns, expected_columns)
+    result = global_ex[["feature"]].to_pandas().set_index("feature").sort_index()
+    features = pandas.Series(
+        [
+            "flipper_length_mm",
+            "species",
+            "sex",
+            "culmen_depth_mm",
+            "culmen_length_mm",
+            "island",
+        ],
+        dtype=pandas.StringDtype(storage="pyarrow"),
+    )
+    expected_feature = (
+        pandas.DataFrame(
+            {
+                "feature": features,
+            }
+        )
+        .set_index("feature")
+        .sort_index()
     )
     pandas.testing.assert_frame_equal(
         result,
-        expected,
+        expected_feature,
         check_exact=False,
+        check_index_type=False,
     )
 
 

From a600539353bc3a60d6b3b8d6fba39e767cee15b0 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Mon, 17 Mar 2025 12:30:09 -0500
Subject: [PATCH 09/15] Update core.py - set index to have sorted by feature

---
 bigframes/ml/core.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 5cfb457eb2..fd77f09282 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -138,9 +138,7 @@ def global_explain(self, options: Mapping[str, bool]) -> bpd.DataFrame:
         sql = self._model_manipulation_sql_generator.ml_global_explain(
             struct_options=options
         )
-        return self._session.read_gbq(
-            sql,
-        ).reset_index()
+        return self._session.read_gbq(sql).sort_values(by='attribution', ascending=False).set_index("feature")
 
     def transform(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
         return self._apply_ml_tvf(

From 7fc0cc6d29c99a1d42ff5634f3730687c4791240 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Mon, 17 Mar 2025 15:13:04 -0500
Subject: [PATCH 10/15] Update test_linear_model.py - remove set/set index

---
 tests/system/small/ml/test_linear_model.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index a3add631d4..ba0968c9bd 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -237,7 +237,7 @@ def test_linear_reg_model_global_explain(global_penguins_linear_model, new_pengu
     assert global_ex.shape == (6, 3)
     expected_columns = pandas.Index(["index", "feature", "attribution"])
     pandas.testing.assert_index_equal(global_ex.columns, expected_columns)
-    result = global_ex[["feature"]].to_pandas().set_index("feature").sort_index()
+    result = global_ex[["feature"]].to_pandas()
     features = pandas.Series(
         [
             "flipper_length_mm",
@@ -255,8 +255,6 @@ def test_linear_reg_model_global_explain(global_penguins_linear_model, new_pengu
                 "feature": features,
             }
         )
-        .set_index("feature")
-        .sort_index()
     )
     pandas.testing.assert_frame_equal(
         result,

From 57c3d4a0e4fa854e9883be2c4e177f9f74ac58ba Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Mon, 17 Mar 2025 15:42:44 -0500
Subject: [PATCH 11/15] Update linear_model.py - fix doc section

---
 bigframes/ml/linear_model.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 984b333d1c..ae9fd8db13 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -210,14 +210,9 @@ def global_explain(
         Provide explanations for an entire linear regression model.
 
         .. note::
-            Output matches that of the BigQuery ML.GLOBAL_PREDICT function.
+            Output matches that of the BigQuery ML.GLOBAL_EXPLAIN function.
             See: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-global-explain
 
-        Args:
-            X (bigframes.dataframe.DataFrame or bigframes.series.Series or
-            pandas.core.frame.DataFrame or pandas.core.series.Series):
-                Series or a DataFrame to explain its predictions.
-
         Returns:
             bigframes.pandas.DataFrame:
                 The predicted DataFrames with feature and attribution columns.

From c2c08377ede5dba1e10c2a8d2b8e1085fd6b3871 Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Mon, 17 Mar 2025 15:44:23 -0500
Subject: [PATCH 12/15] Update conftest.py - rename penguins w global explain

---
 tests/system/small/ml/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index dd3fbbf37f..2b9392f523 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -85,7 +85,7 @@ def ephemera_penguins_linear_model(
 
 
 @pytest.fixture(scope="function")
-def global_penguins_linear_model(
+def penguins_linear_model_w_global_explain(
     penguins_bqml_linear_model: core.BqmlModel,
 ) -> linear_model.LinearRegression:
     bf_model = linear_model.LinearRegression(enable_global_explain=True)

From 3a0c6b97c0e622b52d8787f604e60ac688810dbd Mon Sep 17 00:00:00 2001
From: rey-esp <drespana@google.com>
Date: Mon, 17 Mar 2025 16:39:01 -0500
Subject: [PATCH 13/15] Update linear_model.py - complete doc

---
 bigframes/ml/linear_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index ae9fd8db13..3774a62c0c 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -215,7 +215,7 @@ def global_explain(
 
         Returns:
             bigframes.pandas.DataFrame:
-                The predicted DataFrames with feature and attribution columns.
+                Dataframes containing feature importance values and corresponding attributions, designed to provide a global explanation of feature influence.
         """
 
         if not self._bqml_model:

From 5dac41d21464d37571ad40f16c437f27cd5c2f6b Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Mon, 17 Mar 2025 21:49:01 +0000
Subject: [PATCH 14/15] lint

---
 bigframes/ml/core.py                       |  6 +++++-
 tests/system/small/ml/test_linear_model.py | 10 ++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index fd77f09282..01917fd6d8 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -138,7 +138,11 @@ def global_explain(self, options: Mapping[str, bool]) -> bpd.DataFrame:
         sql = self._model_manipulation_sql_generator.ml_global_explain(
             struct_options=options
         )
-        return self._session.read_gbq(sql).sort_values(by='attribution', ascending=False).set_index("feature")
+        return (
+            self._session.read_gbq(sql)
+            .sort_values(by="attribution", ascending=False)
+            .set_index("feature")
+        )
 
     def transform(self, input_data: bpd.DataFrame) -> bpd.DataFrame:
         return self._apply_ml_tvf(
diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index ba0968c9bd..efb4c3c807 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -249,12 +249,10 @@ def test_linear_reg_model_global_explain(global_penguins_linear_model, new_pengu
         ],
         dtype=pandas.StringDtype(storage="pyarrow"),
     )
-    expected_feature = (
-        pandas.DataFrame(
-            {
-                "feature": features,
-            }
-        )
+    expected_feature = pandas.DataFrame(
+        {
+            "feature": features,
+        }
     )
     pandas.testing.assert_frame_equal(
         result,

From e5f4aad904106841cf9b72a25d10d4aa105e7d22 Mon Sep 17 00:00:00 2001
From: Daniela <drespana@google.com>
Date: Tue, 18 Mar 2025 15:20:13 +0000
Subject: [PATCH 15/15] passing test and fixed expected results

---
 .../linear_regression_tutorial_test.py        | 16 +++----
 tests/system/small/ml/test_linear_model.py    | 44 ++++++++++---------
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py
index 4cc385e97e..8fc1c5ad61 100644
--- a/samples/snippets/linear_regression_tutorial_test.py
+++ b/samples/snippets/linear_regression_tutorial_test.py
@@ -107,14 +107,14 @@ def test_linear_regression(random_model_id: str) -> None:
     explain_model = model.global_explain()
 
     # Expected results:
-    #    index	feature	            attribution
-    # 0	   0	flipper_length_mm	193.612051
-    # 1	   1	sex	                5139.35423
-    # 2	   2	culmen_depth_mm	    117.084944
-    # 3	   3	species	            4259.554372
-    # 4	   4	island	            7330.53279
-    # 5	   5	culmen_length_mm	94.366793
-
+    #                       attribution
+    # feature
+    # island	            5737.315921
+    # species	            4073.280549
+    # sex	                622.070896
+    # flipper_length_mm	    193.612051
+    # culmen_depth_mm	    117.084944
+    # culmen_length_mm	    94.366793
     # [END bigquery_dataframes_bqml_linear_global_explain]
     assert explain_model is not None
     assert feature_columns is not None
diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py
index efb4c3c807..8b04d55e61 100644
--- a/tests/system/small/ml/test_linear_model.py
+++ b/tests/system/small/ml/test_linear_model.py
@@ -228,31 +228,33 @@ def test_to_gbq_saved_linear_reg_model_scores(
     )
 
 
-def test_linear_reg_model_global_explain(global_penguins_linear_model, new_penguins_df):
+def test_linear_reg_model_global_explain(
+    penguins_linear_model_w_global_explain, new_penguins_df
+):
     training_data = new_penguins_df.dropna(subset=["body_mass_g"])
     X = training_data.drop(columns=["body_mass_g"])
     y = training_data[["body_mass_g"]]
-    global_penguins_linear_model.fit(X, y)
-    global_ex = global_penguins_linear_model.global_explain()
-    assert global_ex.shape == (6, 3)
-    expected_columns = pandas.Index(["index", "feature", "attribution"])
+    penguins_linear_model_w_global_explain.fit(X, y)
+    global_ex = penguins_linear_model_w_global_explain.global_explain()
+    assert global_ex.shape == (6, 1)
+    expected_columns = pandas.Index(["attribution"])
     pandas.testing.assert_index_equal(global_ex.columns, expected_columns)
-    result = global_ex[["feature"]].to_pandas()
-    features = pandas.Series(
-        [
-            "flipper_length_mm",
-            "species",
-            "sex",
-            "culmen_depth_mm",
-            "culmen_length_mm",
-            "island",
-        ],
-        dtype=pandas.StringDtype(storage="pyarrow"),
-    )
-    expected_feature = pandas.DataFrame(
-        {
-            "feature": features,
-        }
+    result = global_ex.to_pandas().drop(["attribution"], axis=1).sort_index()
+    expected_feature = (
+        pandas.DataFrame(
+            {
+                "feature": [
+                    "island",
+                    "species",
+                    "sex",
+                    "flipper_length_mm",
+                    "culmen_depth_mm",
+                    "culmen_length_mm",
+                ]
+            },
+        )
+        .set_index("feature")
+        .sort_index()
     )
     pandas.testing.assert_frame_equal(
         result,