Skip to content

feat: support BigLakeConfiguration (managed Iceberg tables) #2162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 25, 2025
Merged
Next Next commit
feat: support BigLakeConfiguration (managed Iceberg tables)
This PR adds the BigLakeConfiguration class to tables, and the necessary
property mappings from Table.  It also adds some utility enums
(BigLakeFileFormat, BigLakeTableFormat) to more easily communicate
available values for configuraiton.

TODO: testing
  • Loading branch information
shollyman committed Apr 10, 2025
commit a251181907653d3b90a21e43c3d40f6ea8b9f4a8
16 changes: 16 additions & 0 deletions google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,19 @@ def _generate_next_value_(name, start, count, last_values):
ROUNDING_MODE_UNSPECIFIED = enum.auto()
ROUND_HALF_AWAY_FROM_ZERO = enum.auto()
ROUND_HALF_EVEN = enum.auto()


class BigLakeFileFormat(object):

FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED"
"""The default unspecified value."""

PARQUET = "PARQUET"
"""Apache Parquet format."""

class BigLakeTableFormat(object):
TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED"
"""The default unspecified value."""

ICEBERG = "ICEBERG"
"""Apache Iceberg format."""
137 changes: 137 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@
from google.cloud.bigquery._tqdm_helpers import get_progress_bar
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.enums import DefaultPandasDTypes
from google.cloud.bigquery.enums import BigLakeFileFormat
from google.cloud.bigquery.enums import BigLakeTableFormat
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery import schema as _schema
from google.cloud.bigquery.schema import _build_schema_resource
Expand Down Expand Up @@ -380,6 +382,7 @@ class Table(_TableBase):

_PROPERTY_TO_API_FIELD: Dict[str, Any] = {
**_TableBase._PROPERTY_TO_API_FIELD,
"biglake_configuration": "biglakeConfiguration",
"clustering_fields": "clustering",
"created": "creationTime",
"description": "description",
Expand Down Expand Up @@ -431,6 +434,29 @@ def __init__(self, table_ref, schema=None) -> None:

reference = property(_reference_getter)

@property
def biglake_configuration(self):
"""google.cloud.bigquery.table.BigLakeConfiguration: Configuration
for managed tables for Apache Iceberg.

See https://cloud.google.com/bigquery/docs/iceberg-tables for more information.
"""
prop = self._properties.get(
self._PROPERTY_TO_API_FIELD["biglake_configuration"]
)
if prop is not None:
prop = BigLakeConfiguration.from_api_repr(prop)
return prop

@biglake_configuration.setter
def encryption_configuration(self, value):
api_repr = value
if value is not None:
api_repr = value.to_api_repr()
self._properties[
self._PROPERTY_TO_API_FIELD["biglake_configuration"]
] = api_repr

@property
def require_partition_filter(self):
"""bool: If set to true, queries over the partitioned table require a
Expand Down Expand Up @@ -3500,6 +3526,117 @@ def to_api_repr(self) -> Dict[str, Any]:
]
return resource

class BigLakeConfiguration(object):
"""Configuration for Managed Tables for Apache Iceberg, formerly
known as BigLake.

Args:
connection_id (Optional[str]):
The connection specifying the credentials to be used to read and write to external
storage, such as Cloud Storage. The connection_id can have the form
``{project}.{location}.{connection_id}`` or
``projects/{project}/locations/{location}/connections/{connection_id}``.
storage_uri (Optional[str]):
The fully qualified location prefix of the external folder where table data is
stored. The '*' wildcard character is not allowed. The URI should be in the
format ``gs://bucket/path_to_table/``.
file_format (Optional[str]):
The file format the table data is stored in. See BigLakeFileFormat for available
values.
table_format (Optional[str]):
The table format the metadata only snapshots are stored in. See BigLakeTableFormat
for available values.
"""

def __init__(
self,
connection_id: Optional[str],
storage_uri: Optional[str],
file_format: Optional[BigLakeFileFormat],
table_format: Optional[BigLakeTableFormat],
) -> None:
self._properties: Dict[str, Any] = {}
if connection_id is not None:
self.connection_id = connection_id
if storage_uri is not None:
self.storage_uri = storage_uri
if file_format is not None:
self.file_format = file_format
if table_format is not None:
self.table_format = table_format

@property
def connection_id(self) -> str:
"""str: Field in the table to use for partitioning"""
return self._properties.get("connectionId")

@connection_id.setter
def field(self, value: str):
self._properties["connectionId"] = value

@property
def storage_uri(self) -> str:
"""str: Field in the table to use for partitioning"""
return self._properties.get("storageUri")

@storage_uri.setter
def field(self, value: str):
self._properties["storageUri"] = value

@property
def file_format(self) -> str:
"""str: Field in the table to use for partitioning"""
return self._properties.get("fileFormat", BigLakeFileFormat.FILE_FORMAT_UNSPECIFIED)

@file_format.setter
def field(self, value: str):
self._properties["fileFormat"] = value

@property
def table_format(self) -> str:
"""str: Field in the table to use for partitioning"""
return self._properties.get("tableFormat", BigLakeTableFormat.TABLE_FORMAT_UNSPECIFIED)

@table_format.setter
def field(self, value: str):
self._properties["tableFormat"] = value

def _key(self):
return tuple(sorted(self._properties.items()))

def __ne__(self, other):
return not self == other

def __hash__(self):
return hash(self._key())

def __repr__(self):
key_vals = ["{}={}".format(key, val) for key, val in self._key()]
return "BigLakeConfiguration({})".format(",".join(key_vals))

@classmethod
def from_api_repr(cls, resource: Dict[str, Any]) -> "ModelReference":
"""Factory: construct a BigLakeConfiguration given its API representation.

Args:
resource:
BigLakeConfiguration representation returned from the API

Returns:
BigLakeConfiguration parsed from ``resource``.
"""
ref = cls()
ref._properties = resource
return ref

def to_api_repr(self) -> Dict[str, Any]:
"""Construct the API resource representation of this BigLakeConfiguration.

Returns:
BigLakeConfiguration represented as an API resource.
"""
return copy.deepcopy(self._properties)


def _item_to_row(iterator, resource):
"""Convert a JSON row to the native object.
Expand Down