Skip to content

Commit a1c8e9a

Browse files
authored
feat: support BigLakeConfiguration (managed Iceberg tables) (#2162)
* feat: support BigLakeConfiguration (managed Iceberg tables) This PR adds the BigLakeConfiguration class to tables, and the necessary property mappings from Table. It also adds some utility enums (BigLakeFileFormat, BigLakeTableFormat) to more easily communicate available values for configuraiton.
1 parent ca1798a commit a1c8e9a

File tree

3 files changed

+326
-0
lines changed

3 files changed

+326
-0
lines changed

google/cloud/bigquery/enums.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,19 @@ def _generate_next_value_(name, start, count, last_values):
387387
ROUNDING_MODE_UNSPECIFIED = enum.auto()
388388
ROUND_HALF_AWAY_FROM_ZERO = enum.auto()
389389
ROUND_HALF_EVEN = enum.auto()
390+
391+
392+
class BigLakeFileFormat(object):
393+
FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED"
394+
"""The default unspecified value."""
395+
396+
PARQUET = "PARQUET"
397+
"""Apache Parquet format."""
398+
399+
400+
class BigLakeTableFormat(object):
401+
TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED"
402+
"""The default unspecified value."""
403+
404+
ICEBERG = "ICEBERG"
405+
"""Apache Iceberg format."""

google/cloud/bigquery/table.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ class Table(_TableBase):
380380

381381
_PROPERTY_TO_API_FIELD: Dict[str, Any] = {
382382
**_TableBase._PROPERTY_TO_API_FIELD,
383+
"biglake_configuration": "biglakeConfiguration",
383384
"clustering_fields": "clustering",
384385
"created": "creationTime",
385386
"description": "description",
@@ -431,6 +432,29 @@ def __init__(self, table_ref, schema=None) -> None:
431432

432433
reference = property(_reference_getter)
433434

435+
@property
436+
def biglake_configuration(self):
437+
"""google.cloud.bigquery.table.BigLakeConfiguration: Configuration
438+
for managed tables for Apache Iceberg.
439+
440+
See https://cloud.google.com/bigquery/docs/iceberg-tables for more information.
441+
"""
442+
prop = self._properties.get(
443+
self._PROPERTY_TO_API_FIELD["biglake_configuration"]
444+
)
445+
if prop is not None:
446+
prop = BigLakeConfiguration.from_api_repr(prop)
447+
return prop
448+
449+
@biglake_configuration.setter
450+
def biglake_configuration(self, value):
451+
api_repr = value
452+
if value is not None:
453+
api_repr = value.to_api_repr()
454+
self._properties[
455+
self._PROPERTY_TO_API_FIELD["biglake_configuration"]
456+
] = api_repr
457+
434458
@property
435459
def require_partition_filter(self):
436460
"""bool: If set to true, queries over the partitioned table require a
@@ -3501,6 +3525,132 @@ def to_api_repr(self) -> Dict[str, Any]:
35013525
return resource
35023526

35033527

3528+
class BigLakeConfiguration(object):
3529+
"""Configuration for managed tables for Apache Iceberg, formerly
3530+
known as BigLake.
3531+
3532+
Args:
3533+
connection_id (Optional[str]):
3534+
The connection specifying the credentials to be used to read and write to external
3535+
storage, such as Cloud Storage. The connection_id can have the form
3536+
``{project}.{location}.{connection_id}`` or
3537+
``projects/{project}/locations/{location}/connections/{connection_id}``.
3538+
storage_uri (Optional[str]):
3539+
The fully qualified location prefix of the external folder where table data is
3540+
stored. The '*' wildcard character is not allowed. The URI should be in the
3541+
format ``gs://bucket/path_to_table/``.
3542+
file_format (Optional[str]):
3543+
The file format the table data is stored in. See BigLakeFileFormat for available
3544+
values.
3545+
table_format (Optional[str]):
3546+
The table format the metadata only snapshots are stored in. See BigLakeTableFormat
3547+
for available values.
3548+
_properties (Optional[dict]):
3549+
Private. Used to construct object from API resource.
3550+
"""
3551+
3552+
def __init__(
3553+
self,
3554+
connection_id: Optional[str] = None,
3555+
storage_uri: Optional[str] = None,
3556+
file_format: Optional[str] = None,
3557+
table_format: Optional[str] = None,
3558+
_properties: Optional[dict] = None,
3559+
) -> None:
3560+
if _properties is None:
3561+
_properties = {}
3562+
self._properties = _properties
3563+
if connection_id is not None:
3564+
self.connection_id = connection_id
3565+
if storage_uri is not None:
3566+
self.storage_uri = storage_uri
3567+
if file_format is not None:
3568+
self.file_format = file_format
3569+
if table_format is not None:
3570+
self.table_format = table_format
3571+
3572+
@property
3573+
def connection_id(self) -> Optional[str]:
3574+
"""str: The connection specifying the credentials to be used to read and write to external
3575+
storage, such as Cloud Storage."""
3576+
return self._properties.get("connectionId")
3577+
3578+
@connection_id.setter
3579+
def connection_id(self, value: Optional[str]):
3580+
self._properties["connectionId"] = value
3581+
3582+
@property
3583+
def storage_uri(self) -> Optional[str]:
3584+
"""str: The fully qualified location prefix of the external folder where table data is
3585+
stored."""
3586+
return self._properties.get("storageUri")
3587+
3588+
@storage_uri.setter
3589+
def storage_uri(self, value: Optional[str]):
3590+
self._properties["storageUri"] = value
3591+
3592+
@property
3593+
def file_format(self) -> Optional[str]:
3594+
"""str: The file format the table data is stored in. See BigLakeFileFormat for available
3595+
values."""
3596+
return self._properties.get("fileFormat")
3597+
3598+
@file_format.setter
3599+
def file_format(self, value: Optional[str]):
3600+
self._properties["fileFormat"] = value
3601+
3602+
@property
3603+
def table_format(self) -> Optional[str]:
3604+
"""str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat
3605+
for available values."""
3606+
return self._properties.get("tableFormat")
3607+
3608+
@table_format.setter
3609+
def table_format(self, value: Optional[str]):
3610+
self._properties["tableFormat"] = value
3611+
3612+
def _key(self):
3613+
return tuple(sorted(self._properties.items()))
3614+
3615+
def __eq__(self, other):
3616+
if not isinstance(other, BigLakeConfiguration):
3617+
return NotImplemented
3618+
return self._key() == other._key()
3619+
3620+
def __ne__(self, other):
3621+
return not self == other
3622+
3623+
def __hash__(self):
3624+
return hash(self._key())
3625+
3626+
def __repr__(self):
3627+
key_vals = ["{}={}".format(key, val) for key, val in self._key()]
3628+
return "BigLakeConfiguration({})".format(",".join(key_vals))
3629+
3630+
@classmethod
3631+
def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration":
3632+
"""Factory: construct a BigLakeConfiguration given its API representation.
3633+
3634+
Args:
3635+
resource:
3636+
BigLakeConfiguration representation returned from the API
3637+
3638+
Returns:
3639+
BigLakeConfiguration parsed from ``resource``.
3640+
"""
3641+
ref = cls()
3642+
ref._properties = resource
3643+
return ref
3644+
3645+
def to_api_repr(self) -> Dict[str, Any]:
3646+
"""Construct the API resource representation of this BigLakeConfiguration.
3647+
3648+
Returns:
3649+
BigLakeConfiguration represented as an API resource.
3650+
"""
3651+
return copy.deepcopy(self._properties)
3652+
3653+
35043654
def _item_to_row(iterator, resource):
35053655
"""Convert a JSON row to the native object.
35063656

tests/unit/test_table.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,12 @@ def _make_resource(self):
435435
"sourceFormat": "CSV",
436436
"csvOptions": {"allowJaggedRows": True, "encoding": "encoding"},
437437
},
438+
"biglakeConfiguration": {
439+
"connectionId": "connection",
440+
"storageUri": "uri",
441+
"fileFormat": "PARQUET",
442+
"tableFormat": "ICEBERG",
443+
},
438444
"labels": {"x": "y"},
439445
}
440446

@@ -521,6 +527,15 @@ def _verifyResourceProperties(self, table, resource):
521527
else:
522528
self.assertIsNone(table.encryption_configuration)
523529

530+
if "biglakeConfiguration" in resource:
531+
self.assertIsNotNone(table.biglake_configuration)
532+
self.assertEqual(table.biglake_configuration.connection_id, "connection")
533+
self.assertEqual(table.biglake_configuration.storage_uri, "uri")
534+
self.assertEqual(table.biglake_configuration.file_format, "PARQUET")
535+
self.assertEqual(table.biglake_configuration.table_format, "ICEBERG")
536+
else:
537+
self.assertIsNone(table.biglake_configuration)
538+
524539
def test_ctor(self):
525540
dataset = DatasetReference(self.PROJECT, self.DS_ID)
526541
table_ref = dataset.table(self.TABLE_NAME)
@@ -893,6 +908,60 @@ def test_table_constraints_property_getter(self):
893908
assert isinstance(table_constraints, TableConstraints)
894909
assert table_constraints.primary_key == PrimaryKey(columns=["id"])
895910

911+
def test_biglake_configuration_not_set(self):
912+
dataset = DatasetReference(self.PROJECT, self.DS_ID)
913+
table_ref = dataset.table(self.TABLE_NAME)
914+
table = self._make_one(table_ref)
915+
916+
assert table.biglake_configuration is None
917+
918+
def test_biglake_configuration_set(self):
919+
from google.cloud.bigquery.table import BigLakeConfiguration
920+
921+
dataset = DatasetReference(self.PROJECT, self.DS_ID)
922+
table_ref = dataset.table(self.TABLE_NAME)
923+
table = self._make_one(table_ref)
924+
925+
table._properties["biglakeConfiguration"] = {
926+
"connectionId": "connection",
927+
"storageUri": "uri",
928+
"fileFormat": "PARQUET",
929+
"tableFormat": "ICEBERG",
930+
}
931+
932+
config = table.biglake_configuration
933+
934+
assert isinstance(config, BigLakeConfiguration)
935+
assert config.connection_id == "connection"
936+
assert config.storage_uri == "uri"
937+
assert config.file_format == "PARQUET"
938+
assert config.table_format == "ICEBERG"
939+
940+
def test_biglake_configuration_property_setter(self):
941+
from google.cloud.bigquery.table import BigLakeConfiguration
942+
943+
dataset = DatasetReference(self.PROJECT, self.DS_ID)
944+
table_ref = dataset.table(self.TABLE_NAME)
945+
table = self._make_one(table_ref)
946+
947+
config = BigLakeConfiguration(
948+
connection_id="connection",
949+
storage_uri="uri",
950+
file_format="PARQUET",
951+
table_format="ICEBERG",
952+
)
953+
table.biglake_configuration = config
954+
955+
assert table._properties["biglakeConfiguration"] == {
956+
"connectionId": "connection",
957+
"storageUri": "uri",
958+
"fileFormat": "PARQUET",
959+
"tableFormat": "ICEBERG",
960+
}
961+
962+
table.biglake_configuration = None
963+
assert table.biglake_configuration is None
964+
896965
def test_table_constraints_property_setter(self):
897966
from google.cloud.bigquery.table import (
898967
ColumnReference,
@@ -2166,6 +2235,97 @@ def test_ctor_full_resource(self):
21662235
assert instance.snapshot_time == expected_time
21672236

21682237

2238+
class TestBigLakeConfiguration(unittest.TestCase):
2239+
@staticmethod
2240+
def _get_target_class():
2241+
from google.cloud.bigquery.table import BigLakeConfiguration
2242+
2243+
return BigLakeConfiguration
2244+
2245+
@classmethod
2246+
def _make_one(cls, *args, **kwargs):
2247+
klass = cls._get_target_class()
2248+
return klass(*args, **kwargs)
2249+
2250+
def test_ctor_empty_resource(self):
2251+
instance = self._make_one()
2252+
self.assertIsNone(instance.connection_id)
2253+
self.assertIsNone(instance.storage_uri)
2254+
self.assertIsNone(instance.file_format)
2255+
self.assertIsNone(instance.table_format)
2256+
2257+
def test_ctor_kwargs(self):
2258+
instance = self._make_one(
2259+
connection_id="conn",
2260+
storage_uri="uri",
2261+
file_format="FILE",
2262+
table_format="TABLE",
2263+
)
2264+
self.assertEqual(instance.connection_id, "conn")
2265+
self.assertEqual(instance.storage_uri, "uri")
2266+
self.assertEqual(instance.file_format, "FILE")
2267+
self.assertEqual(instance.table_format, "TABLE")
2268+
2269+
def test_ctor_full_resource(self):
2270+
resource = {
2271+
"connectionId": "conn",
2272+
"storageUri": "uri",
2273+
"fileFormat": "FILE",
2274+
"tableFormat": "TABLE",
2275+
}
2276+
instance = self._make_one(_properties=resource)
2277+
self.assertEqual(instance.connection_id, "conn")
2278+
self.assertEqual(instance.storage_uri, "uri")
2279+
self.assertEqual(instance.file_format, "FILE")
2280+
self.assertEqual(instance.table_format, "TABLE")
2281+
2282+
def test_to_api_repr(self):
2283+
resource = {
2284+
"connectionId": "conn",
2285+
"storageUri": "uri",
2286+
"fileFormat": "FILE",
2287+
"tableFormat": "TABLE",
2288+
}
2289+
instance = self._make_one(_properties=resource)
2290+
self.assertEqual(instance.to_api_repr(), resource)
2291+
2292+
def test_from_api_repr_partial(self):
2293+
klass = self._get_target_class()
2294+
api_repr = {"fileFormat": "FILE"}
2295+
instance = klass.from_api_repr(api_repr)
2296+
2297+
self.assertIsNone(instance.connection_id)
2298+
self.assertIsNone(instance.storage_uri)
2299+
self.assertEqual(instance.file_format, "FILE")
2300+
self.assertIsNone(instance.table_format)
2301+
2302+
def test_comparisons(self):
2303+
resource = {
2304+
"connectionId": "conn",
2305+
"storageUri": "uri",
2306+
"fileFormat": "FILE",
2307+
"tableFormat": "TABLE",
2308+
}
2309+
2310+
first = self._make_one(_properties=resource)
2311+
second = self._make_one(_properties=copy.deepcopy(resource))
2312+
# Exercise comparator overloads.
2313+
# first and second should be equivalent.
2314+
self.assertNotEqual(first, resource)
2315+
self.assertEqual(first, second)
2316+
self.assertEqual(hash(first), hash(second))
2317+
2318+
# Update second to ensure that first and second are no longer equivalent.
2319+
second.connection_id = "foo"
2320+
self.assertNotEqual(first, second)
2321+
self.assertNotEqual(hash(first), hash(second))
2322+
2323+
# Update first with the same change, restoring equivalence.
2324+
first.connection_id = "foo"
2325+
self.assertEqual(first, second)
2326+
self.assertEqual(hash(first), hash(second))
2327+
2328+
21692329
class TestCloneDefinition:
21702330
@staticmethod
21712331
def _get_target_class():

0 commit comments

Comments
 (0)