From 88c53fece6d4b2ff3a867f08f8523ad3e4067144 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 5 Jun 2025 23:19:17 +0000 Subject: [PATCH 1/4] Regarding the commit to refactor the system test for `cloud_build_service_account`: This commit refactors the system test `test_remote_function_via_session_custom_build_sa` in `tests/system/large/functions/test_remote_function.py` to align with the structure and validation approach of `test_remote_function_via_session_custom_sa`. The test now: - Uses the project "bigframes-dev-perf". - Sets `cloud_build_service_account` to "bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com". - Sets `cloud_function_service_account` to the same value for simplicity in this test. - Uses `cloud_function_ingress_settings="all"`. - Validates that `gcf.build_config.service_account` matches the provided `cloud_build_service_account`. - Employs a dedicated session for the test and ensures proper cleanup. --- bigframes/functions/_function_client.py | 3 + bigframes/functions/_function_session.py | 8 ++ bigframes/pandas/__init__.py | 31 +++++++ bigframes/session/__init__.py | 8 ++ .../large/functions/test_remote_function.py | 82 +++++++++++++++++++ 5 files changed, 132 insertions(+) diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index d03021dd23..e1320aaf48 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -77,6 +77,7 @@ def __init__( cloud_function_service_account=None, cloud_function_kms_key_name=None, cloud_function_docker_repository=None, + cloud_build_service_account=None, *, session: Session, ): @@ -94,6 +95,7 @@ def __init__( self._cloud_function_service_account = cloud_function_service_account self._cloud_function_kms_key_name = cloud_function_kms_key_name self._cloud_function_docker_repository = cloud_function_docker_repository + self._cloud_build_service_account = cloud_build_service_account def _create_bq_connection(self) -> None: if self._bq_connection_manager: @@ -452,6 +454,7 @@ def create_cloud_function( function.build_config.docker_repository = ( self._cloud_function_docker_repository ) + function.build_config.service_account = self._cloud_build_service_account function.service_config = functions_v2.ServiceConfig() if memory_mib is not None: function.service_config.available_memory = f"{memory_mib}Mi" diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index e18f7084db..e9a0a8fe3c 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -263,6 +263,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", + cloud_build_service_account: Optional[str] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. @@ -453,6 +454,12 @@ def remote_function( If no setting is provided, `internal-only` will be used by default. See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. + cloud_build_service_account (str, Optional): + Service account to be used by Cloud Build to build the function + source code into a deployable artifact. If not provided, the + default Cloud Build service account is used. See + https://cloud.google.com/build/docs/cloud-build-service-account + for more details. """ # Some defaults may be used from the session if not provided otherwise. session = self._resolve_session(session) @@ -599,6 +606,7 @@ def wrapper(func): else cloud_function_service_account, cloud_function_kms_key_name, cloud_function_docker_repository, + cloud_build_service_account=cloud_build_service_account, session=session, # type: ignore ) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index d08ef4e91d..dfab626cf1 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -89,6 +89,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", + cloud_build_service_account: Optional[str] = None, ): return global_session.with_default_session( bigframes.session.Session.remote_function, @@ -108,10 +109,40 @@ def remote_function( cloud_function_vpc_connector=cloud_function_vpc_connector, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, + cloud_build_service_account=cloud_build_service_account, ) remote_function.__doc__ = inspect.getdoc(bigframes.session.Session.remote_function) +if remote_function.__doc__ is not None: + # Manually append to the docstring as inspect.getdoc doesn't pick up the new param easily from the wrapper + doc_lines = remote_function.__doc__.splitlines() + # Find the end of the Args section + args_end_index = -1 + for i, line in enumerate(doc_lines): + if line.strip() == "" and i > 0 and "Args:" in doc_lines[i-2]: # Heuristic: blank line after Args' last param + # Check if previous non-blank lines were param descriptions + if any(doc_lines[j].strip().startswith(tuple(param.split(":")[0].strip() for param in doc_lines if ":" in param)[-4:])) : # check last few known params + args_end_index = i + break + if args_end_index == -1: # Fallback if heuristic fails, append before Returns or at end + try: + args_end_index = doc_lines.index(" Returns:") -1 + except ValueError: + args_end_index = len(doc_lines) + + + new_param_doc = [ + " cloud_build_service_account (str, Optional):", + " Service account to be used by Cloud Build to build the function", + " source code into a deployable artifact. If not provided, the", + " default Cloud Build service account is used. See", + " https://cloud.google.com/build/docs/cloud-build-service-account", + " for more details. This parameter is passed to the underlying session's remote_function.", + + ] + doc_lines[args_end_index:args_end_index] = new_param_doc + remote_function.__doc__ = "\n".join(doc_lines) def udf( diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 92708a7f93..b924af5d8e 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1374,6 +1374,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", + cloud_build_service_account: Optional[str] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. Check out the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes. @@ -1549,6 +1550,12 @@ def remote_function( If no setting is provided, `internal-only` will be used by default. See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. + cloud_build_service_account (str, Optional): + Service account to be used by Cloud Build to build the function + source code into a deployable artifact. If not provided, the + default Cloud Build service account is used. See + https://cloud.google.com/build/docs/cloud-build-service-account + for more details. Returns: collections.abc.Callable: A remote function object pointing to the cloud assets created @@ -1577,6 +1584,7 @@ def remote_function( cloud_function_vpc_connector=cloud_function_vpc_connector, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, + cloud_build_service_account=cloud_build_service_account, ) def udf( diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 426813b0ff..8da5c60b6c 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -2114,6 +2114,88 @@ def foo(x: int) -> int: cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient) +@pytest.mark.flaky(retries=2, delay=120) # Added flaky marker +def test_remote_function_via_session_custom_build_sa( + scalars_dfs, # Use existing fixture + bq_cf_connection: str, # Still need this for the specific connection +): + """ + Tests deploying and invoking a remote function specifying a cloud_build_service_account + through a custom session, using the bigframes-dev-perf project. + """ + project = "bigframes-dev-perf" + # This SA must exist in bigframes-dev-perf and have necessary permissions: + # - roles/cloudbuild.builds.builder + # - roles/iam.serviceAccountUser (on itself) + # - roles/storage.objectAdmin (on GCF source bucket) + # - roles/artifactregistry.writer (if using default AR repo) + custom_build_sa = f"bigframes-dev-perf-1@{project}.iam.gserviceaccount.com" + + # For simplicity, using the same SA for running the function. + # This SA also needs roles/run.invoker if the BQ connection SA is different. + # The bq_cf_connection's associated SA must be able to invoke functions run by custom_build_sa. + # If bq_cf_connection uses a default SA, then custom_build_sa needs run.invoker for that default SA. + # Or, grant the bq_cf_connection's SA the Service Account User role on custom_build_sa. + cf_runner_sa = custom_build_sa + + rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project)) + square_num_remote = None + + try: + @rf_session.remote_function( + input_types=[int], + output_type=int, # Changed from float to int to match square_num logic + reuse=False, + cloud_build_service_account=custom_build_sa, + cloud_function_service_account=cf_runner_sa, + cloud_function_ingress_settings="all", # Explicitly set for test environments + bigquery_connection=bq_cf_connection, # Use the provided connection + packages=["pandas==2.0.3"], # Ensure a build step + ) + def square_num(x): + if x is None: + return x # Or handle appropriately, e.g. raise error or return specific value + return x * x + + square_num_remote = square_num # Assign for cleanup + + # Assert that the GCF is created with the intended build SA + gcf = rf_session.cloudfunctionsclient.get_function( + name=square_num_remote.bigframes_cloud_function + ) + assert gcf.build_config.service_account == custom_build_sa + + # Test the function execution + scalars_df, scalars_pandas_df = scalars_dfs + + bf_int64_col = scalars_df["int64_col"].dropna() # Drop NA to avoid type errors in UDF + bf_result_col = bf_int64_col.apply(square_num_remote) + bf_result = ( + bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas() + ) + + pd_int64_col = scalars_pandas_df["int64_col"].dropna() + pd_result_col = pd_int64_col.apply(lambda x: x * x) + pd_result_col = pd_result_col.astype(pandas.Int64Dtype()) # Match expected output type + pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) + + assert_pandas_df_equal(bf_result, pd_result) + + print(f"Successfully deployed and tested remote function with build SA: {custom_build_sa}") + print(f"Function was run by SA: {cf_runner_sa}") + if hasattr(square_num_remote, "bigframes_cloud_function"): + print(f"Deployed GCF: {square_num_remote.bigframes_cloud_function}") + if hasattr(square_num_remote, "bigframes_remote_function"): + print(f"Created BQ Routine: {square_num_remote.bigframes_remote_function}") + + finally: + if square_num_remote is not None: + cleanup_function_assets( + square_num_remote, rf_session.bqclient, rf_session.cloudfunctionsclient + ) + rf_session.close() + + @pytest.mark.flaky(retries=2, delay=120) def test_remote_function_clean_up_by_session_id(): # Use a brand new session to avoid conflict with other tests From ed00cbaa832ce3fc6aa058b1e8cd4013e9588b8f Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 5 Jun 2025 23:23:17 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/pandas/__init__.py | 28 ++++++++++----- bigframes/session/__init__.py | 4 +-- .../large/functions/test_remote_function.py | 35 +++++++++++-------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index dfab626cf1..1162b46766 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -120,18 +120,31 @@ def remote_function( # Find the end of the Args section args_end_index = -1 for i, line in enumerate(doc_lines): - if line.strip() == "" and i > 0 and "Args:" in doc_lines[i-2]: # Heuristic: blank line after Args' last param + if ( + line.strip() == "" and i > 0 and "Args:" in doc_lines[i - 2] + ): # Heuristic: blank line after Args' last param # Check if previous non-blank lines were param descriptions - if any(doc_lines[j].strip().startswith(tuple(param.split(":")[0].strip() for param in doc_lines if ":" in param)[-4:])) : # check last few known params - args_end_index = i - break - if args_end_index == -1: # Fallback if heuristic fails, append before Returns or at end + if any( + doc_lines[j] + .strip() + .startswith( + tuple( + param.split(":")[0].strip() + for param in doc_lines + if ":" in param + )[-4:] + ) + ): # check last few known params + args_end_index = i + break + if ( + args_end_index == -1 + ): # Fallback if heuristic fails, append before Returns or at end try: - args_end_index = doc_lines.index(" Returns:") -1 + args_end_index = doc_lines.index(" Returns:") - 1 except ValueError: args_end_index = len(doc_lines) - new_param_doc = [ " cloud_build_service_account (str, Optional):", " Service account to be used by Cloud Build to build the function", @@ -139,7 +152,6 @@ def remote_function( " default Cloud Build service account is used. See", " https://cloud.google.com/build/docs/cloud-build-service-account", " for more details. This parameter is passed to the underlying session's remote_function.", - ] doc_lines[args_end_index:args_end_index] = new_param_doc remote_function.__doc__ = "\n".join(doc_lines) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index b924af5d8e..4f6ef5d20d 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1374,7 +1374,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", - cloud_build_service_account: Optional[str] = None, + cloud_build_service_account: Optional[str] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. Check out the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes. @@ -1584,7 +1584,7 @@ def remote_function( cloud_function_vpc_connector=cloud_function_vpc_connector, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, - cloud_build_service_account=cloud_build_service_account, + cloud_build_service_account=cloud_build_service_account, ) def udf( diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 8da5c60b6c..3e2332ad00 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -2114,10 +2114,10 @@ def foo(x: int) -> int: cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient) -@pytest.mark.flaky(retries=2, delay=120) # Added flaky marker +@pytest.mark.flaky(retries=2, delay=120) # Added flaky marker def test_remote_function_via_session_custom_build_sa( - scalars_dfs, # Use existing fixture - bq_cf_connection: str, # Still need this for the specific connection + scalars_dfs, # Use existing fixture + bq_cf_connection: str, # Still need this for the specific connection ): """ Tests deploying and invoking a remote function specifying a cloud_build_service_account @@ -2142,22 +2142,23 @@ def test_remote_function_via_session_custom_build_sa( square_num_remote = None try: + @rf_session.remote_function( input_types=[int], - output_type=int, # Changed from float to int to match square_num logic + output_type=int, # Changed from float to int to match square_num logic reuse=False, cloud_build_service_account=custom_build_sa, cloud_function_service_account=cf_runner_sa, - cloud_function_ingress_settings="all", # Explicitly set for test environments - bigquery_connection=bq_cf_connection, # Use the provided connection - packages=["pandas==2.0.3"], # Ensure a build step + cloud_function_ingress_settings="all", # Explicitly set for test environments + bigquery_connection=bq_cf_connection, # Use the provided connection + packages=["pandas==2.0.3"], # Ensure a build step ) def square_num(x): if x is None: - return x # Or handle appropriately, e.g. raise error or return specific value + return x # Or handle appropriately, e.g. raise error or return specific value return x * x - square_num_remote = square_num # Assign for cleanup + square_num_remote = square_num # Assign for cleanup # Assert that the GCF is created with the intended build SA gcf = rf_session.cloudfunctionsclient.get_function( @@ -2168,20 +2169,24 @@ def square_num(x): # Test the function execution scalars_df, scalars_pandas_df = scalars_dfs - bf_int64_col = scalars_df["int64_col"].dropna() # Drop NA to avoid type errors in UDF + bf_int64_col = scalars_df[ + "int64_col" + ].dropna() # Drop NA to avoid type errors in UDF bf_result_col = bf_int64_col.apply(square_num_remote) - bf_result = ( - bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas() - ) + bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas() pd_int64_col = scalars_pandas_df["int64_col"].dropna() pd_result_col = pd_int64_col.apply(lambda x: x * x) - pd_result_col = pd_result_col.astype(pandas.Int64Dtype()) # Match expected output type + pd_result_col = pd_result_col.astype( + pandas.Int64Dtype() + ) # Match expected output type pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) assert_pandas_df_equal(bf_result, pd_result) - print(f"Successfully deployed and tested remote function with build SA: {custom_build_sa}") + print( + f"Successfully deployed and tested remote function with build SA: {custom_build_sa}" + ) print(f"Function was run by SA: {cf_runner_sa}") if hasattr(square_num_remote, "bigframes_cloud_function"): print(f"Deployed GCF: {square_num_remote.bigframes_cloud_function}") From e9fd9b7c97575dae2d4f1935b1d82fa2e0905ef3 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 11 Jun 2025 01:41:28 +0000 Subject: [PATCH 3/4] add proper test, improve documentation --- bigframes/functions/_function_client.py | 12 +- bigframes/functions/_function_session.py | 10 +- bigframes/pandas/__init__.py | 41 ----- bigframes/session/__init__.py | 10 +- .../large/functions/test_remote_function.py | 163 ++++++++---------- 5 files changed, 100 insertions(+), 136 deletions(-) diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index e1320aaf48..fc8c7769b6 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -454,7 +454,17 @@ def create_cloud_function( function.build_config.docker_repository = ( self._cloud_function_docker_repository ) - function.build_config.service_account = self._cloud_build_service_account + + if self._cloud_build_service_account: + canonical_cloud_build_service_account = ( + self._cloud_build_service_account + ) + if "/" not in canonical_cloud_build_service_account: + canonical_cloud_build_service_account = f"projects/{self._gcp_project_id}/serviceAccounts/{canonical_cloud_build_service_account}" + function.build_config.service_account = ( + canonical_cloud_build_service_account + ) + function.service_config = functions_v2.ServiceConfig() if memory_mib is not None: function.service_config.available_memory = f"{memory_mib}Mi" diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index e9a0a8fe3c..2fb3480d6c 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -455,9 +455,13 @@ def remote_function( See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. cloud_build_service_account (str, Optional): - Service account to be used by Cloud Build to build the function - source code into a deployable artifact. If not provided, the - default Cloud Build service account is used. See + Service account in the fully qualified format + `projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or + just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted + as belonging to the BigQuery DataFrames session project. This is + to be used by Cloud Build to build the function source code into + a deployable artifact. If not provided, the default Cloud Build + service account is used. See https://cloud.google.com/build/docs/cloud-build-service-account for more details. """ diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 1162b46766..e8253769be 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -114,47 +114,6 @@ def remote_function( remote_function.__doc__ = inspect.getdoc(bigframes.session.Session.remote_function) -if remote_function.__doc__ is not None: - # Manually append to the docstring as inspect.getdoc doesn't pick up the new param easily from the wrapper - doc_lines = remote_function.__doc__.splitlines() - # Find the end of the Args section - args_end_index = -1 - for i, line in enumerate(doc_lines): - if ( - line.strip() == "" and i > 0 and "Args:" in doc_lines[i - 2] - ): # Heuristic: blank line after Args' last param - # Check if previous non-blank lines were param descriptions - if any( - doc_lines[j] - .strip() - .startswith( - tuple( - param.split(":")[0].strip() - for param in doc_lines - if ":" in param - )[-4:] - ) - ): # check last few known params - args_end_index = i - break - if ( - args_end_index == -1 - ): # Fallback if heuristic fails, append before Returns or at end - try: - args_end_index = doc_lines.index(" Returns:") - 1 - except ValueError: - args_end_index = len(doc_lines) - - new_param_doc = [ - " cloud_build_service_account (str, Optional):", - " Service account to be used by Cloud Build to build the function", - " source code into a deployable artifact. If not provided, the", - " default Cloud Build service account is used. See", - " https://cloud.google.com/build/docs/cloud-build-service-account", - " for more details. This parameter is passed to the underlying session's remote_function.", - ] - doc_lines[args_end_index:args_end_index] = new_param_doc - remote_function.__doc__ = "\n".join(doc_lines) def udf( diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index d350fc9e8a..b6066daed3 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1555,9 +1555,13 @@ def remote_function( See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. cloud_build_service_account (str, Optional): - Service account to be used by Cloud Build to build the function - source code into a deployable artifact. If not provided, the - default Cloud Build service account is used. See + Service account in the fully qualified format + `projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or + just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted + as belonging to the BigQuery DataFrames session project. This is + to be used by Cloud Build to build the function source code into + a deployable artifact. If not provided, the default Cloud Build + service account is used. See https://cloud.google.com/build/docs/cloud-build-service-account for more details. Returns: diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 3e2332ad00..7ff3ba67df 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -1342,7 +1342,7 @@ def test_remote_function_via_session_custom_sa(scalars_dfs): # For upfront convenience, the following set up has been statically created # in the project bigfrmames-dev-perf via cloud console: # - # 1. Create a service account as per + # 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per # https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console # 2. Give necessary roles as per # https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration @@ -1395,6 +1395,80 @@ def square_num(x): ) +@pytest.mark.parametrize( + ("set_build_service_account"), + [ + pytest.param( + "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com", + id="fully-qualified-sa", + ), + pytest.param( + "bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com", + id="just-sa-email", + ), + ], +) +@pytest.mark.flaky(retries=2, delay=120) +def test_remote_function_via_session_custom_build_sa( + scalars_dfs, set_build_service_account +): + # TODO(shobs): Automate the following set-up during testing in the test project. + # + # For upfront convenience, the following set up has been statically created + # in the project bigfrmames-dev-perf via cloud console: + # + # 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per + # https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console + # 2. Give "Cloud Build Service Account (roles/cloudbuild.builds.builder)" role as per + # https://cloud.google.com/build/docs/cloud-build-service-account#default_permissions_of_the_legacy_service_account + # + project = "bigframes-dev-perf" + expected_build_service_account = "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com" + + rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project)) + + try: + + # TODO(shobs): Figure out why the default ingress setting + # (internal-only) does not work here + @rf_session.remote_function( + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + cloud_build_service_account=set_build_service_account, + cloud_function_ingress_settings="all", + ) + def square_num(x): + if x is None: + return x + return x * x + + # assert that the GCF is created with the intended SA + gcf = rf_session.cloudfunctionsclient.get_function( + name=square_num.bigframes_cloud_function + ) + assert gcf.build_config.service_account == expected_build_service_account + + # assert that the function works as expected on data + scalars_df, scalars_pandas_df = scalars_dfs + + bf_int64_col = scalars_df["int64_col"] + bf_result_col = bf_int64_col.apply(square_num) + bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas() + + pd_int64_col = scalars_pandas_df["int64_col"] + pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x) + pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) + + assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_function_assets( + square_num, rf_session.bqclient, rf_session.cloudfunctionsclient + ) + + def test_remote_function_throws_none_cloud_function_service_account(session): with pytest.raises( ValueError, @@ -2114,93 +2188,6 @@ def foo(x: int) -> int: cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient) -@pytest.mark.flaky(retries=2, delay=120) # Added flaky marker -def test_remote_function_via_session_custom_build_sa( - scalars_dfs, # Use existing fixture - bq_cf_connection: str, # Still need this for the specific connection -): - """ - Tests deploying and invoking a remote function specifying a cloud_build_service_account - through a custom session, using the bigframes-dev-perf project. - """ - project = "bigframes-dev-perf" - # This SA must exist in bigframes-dev-perf and have necessary permissions: - # - roles/cloudbuild.builds.builder - # - roles/iam.serviceAccountUser (on itself) - # - roles/storage.objectAdmin (on GCF source bucket) - # - roles/artifactregistry.writer (if using default AR repo) - custom_build_sa = f"bigframes-dev-perf-1@{project}.iam.gserviceaccount.com" - - # For simplicity, using the same SA for running the function. - # This SA also needs roles/run.invoker if the BQ connection SA is different. - # The bq_cf_connection's associated SA must be able to invoke functions run by custom_build_sa. - # If bq_cf_connection uses a default SA, then custom_build_sa needs run.invoker for that default SA. - # Or, grant the bq_cf_connection's SA the Service Account User role on custom_build_sa. - cf_runner_sa = custom_build_sa - - rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project)) - square_num_remote = None - - try: - - @rf_session.remote_function( - input_types=[int], - output_type=int, # Changed from float to int to match square_num logic - reuse=False, - cloud_build_service_account=custom_build_sa, - cloud_function_service_account=cf_runner_sa, - cloud_function_ingress_settings="all", # Explicitly set for test environments - bigquery_connection=bq_cf_connection, # Use the provided connection - packages=["pandas==2.0.3"], # Ensure a build step - ) - def square_num(x): - if x is None: - return x # Or handle appropriately, e.g. raise error or return specific value - return x * x - - square_num_remote = square_num # Assign for cleanup - - # Assert that the GCF is created with the intended build SA - gcf = rf_session.cloudfunctionsclient.get_function( - name=square_num_remote.bigframes_cloud_function - ) - assert gcf.build_config.service_account == custom_build_sa - - # Test the function execution - scalars_df, scalars_pandas_df = scalars_dfs - - bf_int64_col = scalars_df[ - "int64_col" - ].dropna() # Drop NA to avoid type errors in UDF - bf_result_col = bf_int64_col.apply(square_num_remote) - bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas() - - pd_int64_col = scalars_pandas_df["int64_col"].dropna() - pd_result_col = pd_int64_col.apply(lambda x: x * x) - pd_result_col = pd_result_col.astype( - pandas.Int64Dtype() - ) # Match expected output type - pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) - - assert_pandas_df_equal(bf_result, pd_result) - - print( - f"Successfully deployed and tested remote function with build SA: {custom_build_sa}" - ) - print(f"Function was run by SA: {cf_runner_sa}") - if hasattr(square_num_remote, "bigframes_cloud_function"): - print(f"Deployed GCF: {square_num_remote.bigframes_cloud_function}") - if hasattr(square_num_remote, "bigframes_remote_function"): - print(f"Created BQ Routine: {square_num_remote.bigframes_remote_function}") - - finally: - if square_num_remote is not None: - cleanup_function_assets( - square_num_remote, rf_session.bqclient, rf_session.cloudfunctionsclient - ) - rf_session.close() - - @pytest.mark.flaky(retries=2, delay=120) def test_remote_function_clean_up_by_session_id(): # Use a brand new session to avoid conflict with other tests From a7f497800bf1e050531d298704dc86d69e03531f Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 11 Jun 2025 19:14:06 +0000 Subject: [PATCH 4/4] nit rewording for readability --- bigframes/functions/_function_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index fc8c7769b6..e818015a9b 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -458,9 +458,9 @@ def create_cloud_function( if self._cloud_build_service_account: canonical_cloud_build_service_account = ( self._cloud_build_service_account + if "/" in self._cloud_build_service_account + else f"projects/{self._gcp_project_id}/serviceAccounts/{self._cloud_build_service_account}" ) - if "/" not in canonical_cloud_build_service_account: - canonical_cloud_build_service_account = f"projects/{self._gcp_project_id}/serviceAccounts/{canonical_cloud_build_service_account}" function.build_config.service_account = ( canonical_cloud_build_service_account )