diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index d03021dd23..e818015a9b 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -77,6 +77,7 @@ def __init__( cloud_function_service_account=None, cloud_function_kms_key_name=None, cloud_function_docker_repository=None, + cloud_build_service_account=None, *, session: Session, ): @@ -94,6 +95,7 @@ def __init__( self._cloud_function_service_account = cloud_function_service_account self._cloud_function_kms_key_name = cloud_function_kms_key_name self._cloud_function_docker_repository = cloud_function_docker_repository + self._cloud_build_service_account = cloud_build_service_account def _create_bq_connection(self) -> None: if self._bq_connection_manager: @@ -452,6 +454,17 @@ def create_cloud_function( function.build_config.docker_repository = ( self._cloud_function_docker_repository ) + + if self._cloud_build_service_account: + canonical_cloud_build_service_account = ( + self._cloud_build_service_account + if "/" in self._cloud_build_service_account + else f"projects/{self._gcp_project_id}/serviceAccounts/{self._cloud_build_service_account}" + ) + function.build_config.service_account = ( + canonical_cloud_build_service_account + ) + function.service_config = functions_v2.ServiceConfig() if memory_mib is not None: function.service_config.available_memory = f"{memory_mib}Mi" diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index e18f7084db..2fb3480d6c 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -263,6 +263,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", + cloud_build_service_account: Optional[str] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. @@ -453,6 +454,16 @@ def remote_function( If no setting is provided, `internal-only` will be used by default. See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. + cloud_build_service_account (str, Optional): + Service account in the fully qualified format + `projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or + just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted + as belonging to the BigQuery DataFrames session project. This is + to be used by Cloud Build to build the function source code into + a deployable artifact. If not provided, the default Cloud Build + service account is used. See + https://cloud.google.com/build/docs/cloud-build-service-account + for more details. """ # Some defaults may be used from the session if not provided otherwise. session = self._resolve_session(session) @@ -599,6 +610,7 @@ def wrapper(func): else cloud_function_service_account, cloud_function_kms_key_name, cloud_function_docker_repository, + cloud_build_service_account=cloud_build_service_account, session=session, # type: ignore ) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index d08ef4e91d..e8253769be 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -89,6 +89,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", + cloud_build_service_account: Optional[str] = None, ): return global_session.with_default_session( bigframes.session.Session.remote_function, @@ -108,6 +109,7 @@ def remote_function( cloud_function_vpc_connector=cloud_function_vpc_connector, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, + cloud_build_service_account=cloud_build_service_account, ) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index ab09230c99..b6066daed3 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1378,6 +1378,7 @@ def remote_function( cloud_function_ingress_settings: Literal[ "all", "internal-only", "internal-and-gclb" ] = "internal-only", + cloud_build_service_account: Optional[str] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. Check out the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes. @@ -1553,6 +1554,16 @@ def remote_function( If no setting is provided, `internal-only` will be used by default. See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. + cloud_build_service_account (str, Optional): + Service account in the fully qualified format + `projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or + just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted + as belonging to the BigQuery DataFrames session project. This is + to be used by Cloud Build to build the function source code into + a deployable artifact. If not provided, the default Cloud Build + service account is used. See + https://cloud.google.com/build/docs/cloud-build-service-account + for more details. Returns: collections.abc.Callable: A remote function object pointing to the cloud assets created @@ -1581,6 +1592,7 @@ def remote_function( cloud_function_vpc_connector=cloud_function_vpc_connector, cloud_function_memory_mib=cloud_function_memory_mib, cloud_function_ingress_settings=cloud_function_ingress_settings, + cloud_build_service_account=cloud_build_service_account, ) def udf( diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 9e0dcfe4d7..172fff3010 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -1342,7 +1342,7 @@ def test_remote_function_via_session_custom_sa(scalars_dfs): # For upfront convenience, the following set up has been statically created # in the project bigfrmames-dev-perf via cloud console: # - # 1. Create a service account as per + # 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per # https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console # 2. Give necessary roles as per # https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration @@ -1395,6 +1395,80 @@ def square_num(x): ) +@pytest.mark.parametrize( + ("set_build_service_account"), + [ + pytest.param( + "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com", + id="fully-qualified-sa", + ), + pytest.param( + "bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com", + id="just-sa-email", + ), + ], +) +@pytest.mark.flaky(retries=2, delay=120) +def test_remote_function_via_session_custom_build_sa( + scalars_dfs, set_build_service_account +): + # TODO(shobs): Automate the following set-up during testing in the test project. + # + # For upfront convenience, the following set up has been statically created + # in the project bigfrmames-dev-perf via cloud console: + # + # 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per + # https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console + # 2. Give "Cloud Build Service Account (roles/cloudbuild.builds.builder)" role as per + # https://cloud.google.com/build/docs/cloud-build-service-account#default_permissions_of_the_legacy_service_account + # + project = "bigframes-dev-perf" + expected_build_service_account = "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com" + + rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project)) + + try: + + # TODO(shobs): Figure out why the default ingress setting + # (internal-only) does not work here + @rf_session.remote_function( + input_types=[int], + output_type=int, + reuse=False, + cloud_function_service_account="default", + cloud_build_service_account=set_build_service_account, + cloud_function_ingress_settings="all", + ) + def square_num(x): + if x is None: + return x + return x * x + + # assert that the GCF is created with the intended SA + gcf = rf_session.cloudfunctionsclient.get_function( + name=square_num.bigframes_cloud_function + ) + assert gcf.build_config.service_account == expected_build_service_account + + # assert that the function works as expected on data + scalars_df, scalars_pandas_df = scalars_dfs + + bf_int64_col = scalars_df["int64_col"] + bf_result_col = bf_int64_col.apply(square_num) + bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas() + + pd_int64_col = scalars_pandas_df["int64_col"] + pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x) + pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) + + assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) + finally: + # clean up the gcp assets created for the remote function + cleanup_function_assets( + square_num, rf_session.bqclient, rf_session.cloudfunctionsclient + ) + + def test_remote_function_throws_none_cloud_function_service_account(session): with pytest.raises( ValueError,