Skip to content
This repository was archived by the owner on Nov 12, 2025. It is now read-only.

Commit 1c91a27

Browse files
feat: add a Arrow compression options (Only LZ4 for now) (#166)
Also: * feat: Return schema on first ReadRowsResponse. * doc: clarify limit on filter string. This PR was generated using Autosynth. 🌈 Synth log will be available here: https://source.cloud.google.com/results/invocations/72a2a14b-0135-4939-ae4b-93b118a2b3e8/targets - [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.) PiperOrigin-RevId: 365759522 Source-Link: googleapis/googleapis@c539b9b
1 parent 8a97763 commit 1c91a27

File tree

10 files changed

+102
-22
lines changed

10 files changed

+102
-22
lines changed

google/cloud/bigquery_storage/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from google.cloud.bigquery_storage_v1 import __version__
2121
from google.cloud.bigquery_storage_v1.types.arrow import ArrowRecordBatch
2222
from google.cloud.bigquery_storage_v1.types.arrow import ArrowSchema
23+
from google.cloud.bigquery_storage_v1.types.arrow import ArrowSerializationOptions
2324
from google.cloud.bigquery_storage_v1.types.avro import AvroRows
2425
from google.cloud.bigquery_storage_v1.types.avro import AvroSchema
2526
from google.cloud.bigquery_storage_v1.types.storage import CreateReadSessionRequest
@@ -38,6 +39,7 @@
3839
"types",
3940
"ArrowRecordBatch",
4041
"ArrowSchema",
42+
"ArrowSerializationOptions",
4143
"AvroRows",
4244
"AvroSchema",
4345
"BigQueryReadClient",

google/cloud/bigquery_storage_v1/proto/arrow.proto

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -43,3 +42,19 @@ message ArrowRecordBatch {
4342
// The count of rows in `serialized_record_batch`.
4443
int64 row_count = 2;
4544
}
45+
46+
// Contains options specific to Arrow Serialization.
47+
message ArrowSerializationOptions {
48+
// Compression codec's supported by Arrow.
49+
enum CompressionCodec {
50+
// If unspecified no compression will be used.
51+
COMPRESSION_UNSPECIFIED = 0;
52+
53+
// LZ4 Frame (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)
54+
LZ4_FRAME = 1;
55+
}
56+
57+
// The compression codec to use for Arrow buffers in serialized record
58+
// batches.
59+
CompressionCodec buffer_compression = 2;
60+
}

google/cloud/bigquery_storage_v1/proto/avro.proto

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

google/cloud/bigquery_storage_v1/proto/storage.proto

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -70,7 +69,8 @@ service BigQueryRead {
7069
post: "/v1/{read_session.table=projects/*/datasets/*/tables/*}"
7170
body: "*"
7271
};
73-
option (google.api.method_signature) = "parent,read_session,max_stream_count";
72+
option (google.api.method_signature) =
73+
"parent,read_session,max_stream_count";
7474
}
7575

7676
// Reads rows from the stream in the format prescribed by the ReadSession.
@@ -99,7 +99,8 @@ service BigQueryRead {
9999
// original, primary, and residual, that original[0-j] = primary[0-j] and
100100
// original[j-n] = residual[0-m] once the streams have been read to
101101
// completion.
102-
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
102+
rpc SplitReadStream(SplitReadStreamRequest)
103+
returns (SplitReadStreamResponse) {
103104
option (google.api.http) = {
104105
get: "/v1/{name=projects/*/locations/*/sessions/*/streams/*}"
105106
};
@@ -201,6 +202,19 @@ message ReadRowsResponse {
201202
// Throttling state. If unset, the latest response still describes
202203
// the current throttling status.
203204
ThrottleState throttle_state = 5;
205+
206+
// The schema for the read. If read_options.selected_fields is set, the
207+
// schema may be different from the table schema as it will only contain
208+
// the selected fields. This schema is equivelant to the one returned by
209+
// CreateSession. This field is only populated in the first ReadRowsResponse
210+
// RPC.
211+
oneof schema {
212+
// Output only. Avro schema.
213+
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
214+
215+
// Output only. Arrow schema.
216+
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
217+
}
204218
}
205219

206220
// Request message for `SplitReadStream`.

google/cloud/bigquery_storage_v1/proto/stream.proto

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -72,17 +71,27 @@ message ReadSession {
7271
// "nullable_field is not NULL"
7372
// "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
7473
// "numeric_field BETWEEN 1.0 AND 5.0"
74+
//
75+
// Restricted to a maximum length for 1 MB.
7576
string row_restriction = 2;
77+
78+
// Optional. Options specific to the Apache Arrow output format.
79+
oneof output_format_serialization_options {
80+
ArrowSerializationOptions arrow_serialization_options = 3
81+
[(google.api.field_behavior) = OPTIONAL];
82+
}
7683
}
7784

7885
// Output only. Unique identifier for the session, in the form
7986
// `projects/{project_id}/locations/{location}/sessions/{session_id}`.
8087
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
8188

82-
// Output only. Time at which the session becomes invalid. After this time, subsequent
83-
// requests to read this Session will return errors. The expire_time is
84-
// automatically assigned and currently cannot be specified or updated.
85-
google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
89+
// Output only. Time at which the session becomes invalid. After this time,
90+
// subsequent requests to read this Session will return errors. The
91+
// expire_time is automatically assigned and currently cannot be specified or
92+
// updated.
93+
google.protobuf.Timestamp expire_time = 2
94+
[(google.api.field_behavior) = OUTPUT_ONLY];
8695

8796
// Immutable. Data format of the output data.
8897
DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
@@ -102,12 +111,11 @@ message ReadSession {
102111
// `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`
103112
string table = 6 [
104113
(google.api.field_behavior) = IMMUTABLE,
105-
(google.api.resource_reference) = {
106-
type: "bigquery.googleapis.com/Table"
107-
}
114+
(google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
108115
];
109116

110-
// Optional. Any modifiers which are applied when reading from the specified table.
117+
// Optional. Any modifiers which are applied when reading from the specified
118+
// table.
111119
TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];
112120

113121
// Optional. Read options for this session (e.g. column selection, filters).

google/cloud/bigquery_storage_v1/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .arrow import (
1919
ArrowRecordBatch,
2020
ArrowSchema,
21+
ArrowSerializationOptions,
2122
)
2223
from .avro import (
2324
AvroRows,
@@ -42,6 +43,7 @@
4243
__all__ = (
4344
"ArrowRecordBatch",
4445
"ArrowSchema",
46+
"ArrowSerializationOptions",
4547
"AvroRows",
4648
"AvroSchema",
4749
"CreateReadSessionRequest",

google/cloud/bigquery_storage_v1/types/arrow.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
__protobuf__ = proto.module(
2222
package="google.cloud.bigquery.storage.v1",
23-
manifest={"ArrowSchema", "ArrowRecordBatch",},
23+
manifest={"ArrowSchema", "ArrowRecordBatch", "ArrowSerializationOptions",},
2424
)
2525

2626

@@ -55,4 +55,21 @@ class ArrowRecordBatch(proto.Message):
5555
row_count = proto.Field(proto.INT64, number=2)
5656

5757

58+
class ArrowSerializationOptions(proto.Message):
59+
r"""Contains options specific to Arrow Serialization.
60+
61+
Attributes:
62+
buffer_compression (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions.CompressionCodec):
63+
The compression codec to use for Arrow
64+
buffers in serialized record batches.
65+
"""
66+
67+
class CompressionCodec(proto.Enum):
68+
r"""Compression codec's supported by Arrow."""
69+
COMPRESSION_UNSPECIFIED = 0
70+
LZ4_FRAME = 1
71+
72+
buffer_compression = proto.Field(proto.ENUM, number=2, enum=CompressionCodec,)
73+
74+
5875
__all__ = tuple(sorted(__protobuf__.manifest))

google/cloud/bigquery_storage_v1/types/storage.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ class ReadRowsResponse(proto.Message):
154154
Throttling state. If unset, the latest
155155
response still describes the current throttling
156156
status.
157+
avro_schema (google.cloud.bigquery_storage_v1.types.AvroSchema):
158+
Output only. Avro schema.
159+
arrow_schema (google.cloud.bigquery_storage_v1.types.ArrowSchema):
160+
Output only. Arrow schema.
157161
"""
158162

159163
avro_rows = proto.Field(
@@ -170,6 +174,14 @@ class ReadRowsResponse(proto.Message):
170174

171175
throttle_state = proto.Field(proto.MESSAGE, number=5, message="ThrottleState",)
172176

177+
avro_schema = proto.Field(
178+
proto.MESSAGE, number=7, oneof="schema", message=avro.AvroSchema,
179+
)
180+
181+
arrow_schema = proto.Field(
182+
proto.MESSAGE, number=8, oneof="schema", message=arrow.ArrowSchema,
183+
)
184+
173185

174186
class SplitReadStreamRequest(proto.Message):
175187
r"""Request message for ``SplitReadStream``.

google/cloud/bigquery_storage_v1/types/stream.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,24 @@ class TableReadOptions(proto.Message):
104104
Examples: "int_field > 5" "date_field = CAST('2014-9-27' as
105105
DATE)" "nullable_field is not NULL" "st_equals(geo_field,
106106
st_geofromtext("POINT(2, 2)"))" "numeric_field BETWEEN 1.0
107-
AND 5.0".
107+
AND 5.0"
108+
109+
Restricted to a maximum length for 1 MB.
110+
arrow_serialization_options (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions):
111+
108112
"""
109113

110114
selected_fields = proto.RepeatedField(proto.STRING, number=1)
111115

112116
row_restriction = proto.Field(proto.STRING, number=2)
113117

118+
arrow_serialization_options = proto.Field(
119+
proto.MESSAGE,
120+
number=3,
121+
oneof="output_format_serialization_options",
122+
message=arrow.ArrowSerializationOptions,
123+
)
124+
114125
name = proto.Field(proto.STRING, number=1)
115126

116127
expire_time = proto.Field(proto.MESSAGE, number=2, message=timestamp.Timestamp,)

synth.metadata

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"git": {
1212
"name": "googleapis",
1313
"remote": "https://github.com/googleapis/googleapis.git",
14-
"sha": "149a3a84c29c9b8189576c7442ccb6dcf6a8f95b",
15-
"internalRef": "364411656"
14+
"sha": "c539b9b08b3366ee00c0ec1950f4df711552a269",
15+
"internalRef": "365759522"
1616
}
1717
},
1818
{

0 commit comments

Comments
 (0)