Skip to content

feat: support custom build service account in remote_function #1796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions bigframes/functions/_function_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
cloud_function_service_account=None,
cloud_function_kms_key_name=None,
cloud_function_docker_repository=None,
cloud_build_service_account=None,
*,
session: Session,
):
Expand All @@ -94,6 +95,7 @@ def __init__(
self._cloud_function_service_account = cloud_function_service_account
self._cloud_function_kms_key_name = cloud_function_kms_key_name
self._cloud_function_docker_repository = cloud_function_docker_repository
self._cloud_build_service_account = cloud_build_service_account

def _create_bq_connection(self) -> None:
if self._bq_connection_manager:
Expand Down Expand Up @@ -452,6 +454,17 @@ def create_cloud_function(
function.build_config.docker_repository = (
self._cloud_function_docker_repository
)

if self._cloud_build_service_account:
canonical_cloud_build_service_account = (
self._cloud_build_service_account
if "/" in self._cloud_build_service_account
else f"projects/{self._gcp_project_id}/serviceAccounts/{self._cloud_build_service_account}"
)
function.build_config.service_account = (
canonical_cloud_build_service_account
)

function.service_config = functions_v2.ServiceConfig()
if memory_mib is not None:
function.service_config.available_memory = f"{memory_mib}Mi"
Expand Down
12 changes: 12 additions & 0 deletions bigframes/functions/_function_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def remote_function(
cloud_function_ingress_settings: Literal[
"all", "internal-only", "internal-and-gclb"
] = "internal-only",
cloud_build_service_account: Optional[str] = None,
):
"""Decorator to turn a user defined function into a BigQuery remote function.

Expand Down Expand Up @@ -453,6 +454,16 @@ def remote_function(
If no setting is provided, `internal-only` will be used by default.
See for more details
https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
cloud_build_service_account (str, Optional):
Service account in the fully qualified format
`projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or
just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted
as belonging to the BigQuery DataFrames session project. This is
to be used by Cloud Build to build the function source code into
a deployable artifact. If not provided, the default Cloud Build
service account is used. See
https://cloud.google.com/build/docs/cloud-build-service-account
for more details.
"""
# Some defaults may be used from the session if not provided otherwise.
session = self._resolve_session(session)
Expand Down Expand Up @@ -599,6 +610,7 @@ def wrapper(func):
else cloud_function_service_account,
cloud_function_kms_key_name,
cloud_function_docker_repository,
cloud_build_service_account=cloud_build_service_account,
session=session, # type: ignore
)

Expand Down
2 changes: 2 additions & 0 deletions bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def remote_function(
cloud_function_ingress_settings: Literal[
"all", "internal-only", "internal-and-gclb"
] = "internal-only",
cloud_build_service_account: Optional[str] = None,
):
return global_session.with_default_session(
bigframes.session.Session.remote_function,
Expand All @@ -108,6 +109,7 @@ def remote_function(
cloud_function_vpc_connector=cloud_function_vpc_connector,
cloud_function_memory_mib=cloud_function_memory_mib,
cloud_function_ingress_settings=cloud_function_ingress_settings,
cloud_build_service_account=cloud_build_service_account,
)


Expand Down
12 changes: 12 additions & 0 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,7 @@ def remote_function(
cloud_function_ingress_settings: Literal[
"all", "internal-only", "internal-and-gclb"
] = "internal-only",
cloud_build_service_account: Optional[str] = None,
):
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
Expand Down Expand Up @@ -1553,6 +1554,16 @@ def remote_function(
If no setting is provided, `internal-only` will be used by default.
See for more details
https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
cloud_build_service_account (str, Optional):
Service account in the fully qualified format
`projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or
just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted
as belonging to the BigQuery DataFrames session project. This is
to be used by Cloud Build to build the function source code into
a deployable artifact. If not provided, the default Cloud Build
service account is used. See
https://cloud.google.com/build/docs/cloud-build-service-account
for more details.
Returns:
collections.abc.Callable:
A remote function object pointing to the cloud assets created
Expand Down Expand Up @@ -1581,6 +1592,7 @@ def remote_function(
cloud_function_vpc_connector=cloud_function_vpc_connector,
cloud_function_memory_mib=cloud_function_memory_mib,
cloud_function_ingress_settings=cloud_function_ingress_settings,
cloud_build_service_account=cloud_build_service_account,
)

def udf(
Expand Down
76 changes: 75 additions & 1 deletion tests/system/large/functions/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,7 +1342,7 @@ def test_remote_function_via_session_custom_sa(scalars_dfs):
# For upfront convenience, the following set up has been statically created
# in the project bigfrmames-dev-perf via cloud console:
#
# 1. Create a service account as per
# 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per
# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# 2. Give necessary roles as per
# https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration
Expand Down Expand Up @@ -1395,6 +1395,80 @@ def square_num(x):
)


@pytest.mark.parametrize(
("set_build_service_account"),
[
pytest.param(
"projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com",
id="fully-qualified-sa",
),
pytest.param(
"bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com",
id="just-sa-email",
),
],
)
@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_via_session_custom_build_sa(
scalars_dfs, set_build_service_account
):
# TODO(shobs): Automate the following set-up during testing in the test project.
#
# For upfront convenience, the following set up has been statically created
# in the project bigfrmames-dev-perf via cloud console:
#
# 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per
# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# 2. Give "Cloud Build Service Account (roles/cloudbuild.builds.builder)" role as per
# https://cloud.google.com/build/docs/cloud-build-service-account#default_permissions_of_the_legacy_service_account
#
project = "bigframes-dev-perf"
expected_build_service_account = "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com"

rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))

try:

# TODO(shobs): Figure out why the default ingress setting
# (internal-only) does not work here
@rf_session.remote_function(
input_types=[int],
output_type=int,
reuse=False,
cloud_function_service_account="default",
cloud_build_service_account=set_build_service_account,
cloud_function_ingress_settings="all",
)
def square_num(x):
if x is None:
return x
return x * x

# assert that the GCF is created with the intended SA
gcf = rf_session.cloudfunctionsclient.get_function(
name=square_num.bigframes_cloud_function
)
assert gcf.build_config.service_account == expected_build_service_account

# assert that the function works as expected on data
scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_result_col = bf_int64_col.apply(square_num)
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()

pd_int64_col = scalars_pandas_df["int64_col"]
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
finally:
# clean up the gcp assets created for the remote function
cleanup_function_assets(
square_num, rf_session.bqclient, rf_session.cloudfunctionsclient
)


def test_remote_function_throws_none_cloud_function_service_account(session):
with pytest.raises(
ValueError,
Expand Down