Skip to content

Commit e586151

Browse files
shobsigoogle-labs-jules[bot]gcf-owl-bot[bot]
authored
feat: support custom build service account in remote_function (#1796)
* Regarding the commit to refactor the system test for `cloud_build_service_account`: This commit refactors the system test `test_remote_function_via_session_custom_build_sa` in `tests/system/large/functions/test_remote_function.py` to align with the structure and validation approach of `test_remote_function_via_session_custom_sa`. The test now: - Uses the project "bigframes-dev-perf". - Sets `cloud_build_service_account` to "bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com". - Sets `cloud_function_service_account` to the same value for simplicity in this test. - Uses `cloud_function_ingress_settings="all"`. - Validates that `gcf.build_config.service_account` matches the provided `cloud_build_service_account`. - Employs a dedicated session for the test and ensures proper cleanup. * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add proper test, improve documentation * nit rewording for readability --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 855031a commit e586151

File tree

5 files changed

+114
-1
lines changed

5 files changed

+114
-1
lines changed

bigframes/functions/_function_client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def __init__(
7777
cloud_function_service_account=None,
7878
cloud_function_kms_key_name=None,
7979
cloud_function_docker_repository=None,
80+
cloud_build_service_account=None,
8081
*,
8182
session: Session,
8283
):
@@ -94,6 +95,7 @@ def __init__(
9495
self._cloud_function_service_account = cloud_function_service_account
9596
self._cloud_function_kms_key_name = cloud_function_kms_key_name
9697
self._cloud_function_docker_repository = cloud_function_docker_repository
98+
self._cloud_build_service_account = cloud_build_service_account
9799

98100
def _create_bq_connection(self) -> None:
99101
if self._bq_connection_manager:
@@ -452,6 +454,17 @@ def create_cloud_function(
452454
function.build_config.docker_repository = (
453455
self._cloud_function_docker_repository
454456
)
457+
458+
if self._cloud_build_service_account:
459+
canonical_cloud_build_service_account = (
460+
self._cloud_build_service_account
461+
if "/" in self._cloud_build_service_account
462+
else f"projects/{self._gcp_project_id}/serviceAccounts/{self._cloud_build_service_account}"
463+
)
464+
function.build_config.service_account = (
465+
canonical_cloud_build_service_account
466+
)
467+
455468
function.service_config = functions_v2.ServiceConfig()
456469
if memory_mib is not None:
457470
function.service_config.available_memory = f"{memory_mib}Mi"

bigframes/functions/_function_session.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def remote_function(
263263
cloud_function_ingress_settings: Literal[
264264
"all", "internal-only", "internal-and-gclb"
265265
] = "internal-only",
266+
cloud_build_service_account: Optional[str] = None,
266267
):
267268
"""Decorator to turn a user defined function into a BigQuery remote function.
268269
@@ -453,6 +454,16 @@ def remote_function(
453454
If no setting is provided, `internal-only` will be used by default.
454455
See for more details
455456
https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
457+
cloud_build_service_account (str, Optional):
458+
Service account in the fully qualified format
459+
`projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or
460+
just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted
461+
as belonging to the BigQuery DataFrames session project. This is
462+
to be used by Cloud Build to build the function source code into
463+
a deployable artifact. If not provided, the default Cloud Build
464+
service account is used. See
465+
https://cloud.google.com/build/docs/cloud-build-service-account
466+
for more details.
456467
"""
457468
# Some defaults may be used from the session if not provided otherwise.
458469
session = self._resolve_session(session)
@@ -599,6 +610,7 @@ def wrapper(func):
599610
else cloud_function_service_account,
600611
cloud_function_kms_key_name,
601612
cloud_function_docker_repository,
613+
cloud_build_service_account=cloud_build_service_account,
602614
session=session, # type: ignore
603615
)
604616

bigframes/pandas/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def remote_function(
8989
cloud_function_ingress_settings: Literal[
9090
"all", "internal-only", "internal-and-gclb"
9191
] = "internal-only",
92+
cloud_build_service_account: Optional[str] = None,
9293
):
9394
return global_session.with_default_session(
9495
bigframes.session.Session.remote_function,
@@ -108,6 +109,7 @@ def remote_function(
108109
cloud_function_vpc_connector=cloud_function_vpc_connector,
109110
cloud_function_memory_mib=cloud_function_memory_mib,
110111
cloud_function_ingress_settings=cloud_function_ingress_settings,
112+
cloud_build_service_account=cloud_build_service_account,
111113
)
112114

113115

bigframes/session/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,6 +1378,7 @@ def remote_function(
13781378
cloud_function_ingress_settings: Literal[
13791379
"all", "internal-only", "internal-and-gclb"
13801380
] = "internal-only",
1381+
cloud_build_service_account: Optional[str] = None,
13811382
):
13821383
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
13831384
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1553,6 +1554,16 @@ def remote_function(
15531554
If no setting is provided, `internal-only` will be used by default.
15541555
See for more details
15551556
https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings.
1557+
cloud_build_service_account (str, Optional):
1558+
Service account in the fully qualified format
1559+
`projects/PROJECT_ID/serviceAccounts/SERVICE_ACCOUNT_EMAIL`, or
1560+
just the SERVICE_ACCOUNT_EMAIL. The latter would be interpreted
1561+
as belonging to the BigQuery DataFrames session project. This is
1562+
to be used by Cloud Build to build the function source code into
1563+
a deployable artifact. If not provided, the default Cloud Build
1564+
service account is used. See
1565+
https://cloud.google.com/build/docs/cloud-build-service-account
1566+
for more details.
15561567
Returns:
15571568
collections.abc.Callable:
15581569
A remote function object pointing to the cloud assets created
@@ -1581,6 +1592,7 @@ def remote_function(
15811592
cloud_function_vpc_connector=cloud_function_vpc_connector,
15821593
cloud_function_memory_mib=cloud_function_memory_mib,
15831594
cloud_function_ingress_settings=cloud_function_ingress_settings,
1595+
cloud_build_service_account=cloud_build_service_account,
15841596
)
15851597

15861598
def udf(

tests/system/large/functions/test_remote_function.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1342,7 +1342,7 @@ def test_remote_function_via_session_custom_sa(scalars_dfs):
13421342
# For upfront convenience, the following set up has been statically created
13431343
# in the project bigfrmames-dev-perf via cloud console:
13441344
#
1345-
# 1. Create a service account as per
1345+
# 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per
13461346
# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
13471347
# 2. Give necessary roles as per
13481348
# https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration
@@ -1395,6 +1395,80 @@ def square_num(x):
13951395
)
13961396

13971397

1398+
@pytest.mark.parametrize(
1399+
("set_build_service_account"),
1400+
[
1401+
pytest.param(
1402+
"projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com",
1403+
id="fully-qualified-sa",
1404+
),
1405+
pytest.param(
1406+
"bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com",
1407+
id="just-sa-email",
1408+
),
1409+
],
1410+
)
1411+
@pytest.mark.flaky(retries=2, delay=120)
1412+
def test_remote_function_via_session_custom_build_sa(
1413+
scalars_dfs, set_build_service_account
1414+
):
1415+
# TODO(shobs): Automate the following set-up during testing in the test project.
1416+
#
1417+
# For upfront convenience, the following set up has been statically created
1418+
# in the project bigfrmames-dev-perf via cloud console:
1419+
#
1420+
# 1. Create a service account bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com as per
1421+
# https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
1422+
# 2. Give "Cloud Build Service Account (roles/cloudbuild.builds.builder)" role as per
1423+
# https://cloud.google.com/build/docs/cloud-build-service-account#default_permissions_of_the_legacy_service_account
1424+
#
1425+
project = "bigframes-dev-perf"
1426+
expected_build_service_account = "projects/bigframes-dev-perf/serviceAccounts/bigframes-dev-perf-1@bigframes-dev-perf.iam.gserviceaccount.com"
1427+
1428+
rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))
1429+
1430+
try:
1431+
1432+
# TODO(shobs): Figure out why the default ingress setting
1433+
# (internal-only) does not work here
1434+
@rf_session.remote_function(
1435+
input_types=[int],
1436+
output_type=int,
1437+
reuse=False,
1438+
cloud_function_service_account="default",
1439+
cloud_build_service_account=set_build_service_account,
1440+
cloud_function_ingress_settings="all",
1441+
)
1442+
def square_num(x):
1443+
if x is None:
1444+
return x
1445+
return x * x
1446+
1447+
# assert that the GCF is created with the intended SA
1448+
gcf = rf_session.cloudfunctionsclient.get_function(
1449+
name=square_num.bigframes_cloud_function
1450+
)
1451+
assert gcf.build_config.service_account == expected_build_service_account
1452+
1453+
# assert that the function works as expected on data
1454+
scalars_df, scalars_pandas_df = scalars_dfs
1455+
1456+
bf_int64_col = scalars_df["int64_col"]
1457+
bf_result_col = bf_int64_col.apply(square_num)
1458+
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()
1459+
1460+
pd_int64_col = scalars_pandas_df["int64_col"]
1461+
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
1462+
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
1463+
1464+
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
1465+
finally:
1466+
# clean up the gcp assets created for the remote function
1467+
cleanup_function_assets(
1468+
square_num, rf_session.bqclient, rf_session.cloudfunctionsclient
1469+
)
1470+
1471+
13981472
def test_remote_function_throws_none_cloud_function_service_account(session):
13991473
with pytest.raises(
14001474
ValueError,

0 commit comments

Comments
 (0)