Skip to content

Commit ede6a69

Browse files
kevincmchenfacebook-github-bot
authored andcommitted
support to read fixed length binary as string. (facebookincubator#10621)
Summary: This is a followup of facebookincubator#10399, support to read fixed length binary as string. Pull Request resolved: facebookincubator#10621 Reviewed By: Yuhta Differential Revision: D60527079 Pulled By: kagamiori fbshipit-source-id: a13f3bb963c57e98d494fb9fc194f2a988ba2bee
1 parent b4ea92c commit ede6a69

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

velox/dwio/parquet/reader/PageReader.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ void PageReader::makeDecoder() {
666666
pageData_, pageData_ + encodedDataSize_);
667667
break;
668668
case thrift::Type::FIXED_LEN_BYTE_ARRAY:
669-
if (type_->type()->isVarbinary()) {
669+
if (type_->type()->isVarbinary() || type_->type()->isVarchar()) {
670670
stringDecoder_ = std::make_unique<StringDecoder>(
671671
pageData_, pageData_ + encodedDataSize_, type_->typeLength_);
672672
} else {
730 Bytes
Binary file not shown.

velox/dwio/parquet/tests/reader/ParquetReaderTest.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,34 @@ TEST_F(ParquetReaderTest, readBinaryAsStringFromNation) {
11801180
0));
11811181
}
11821182

1183+
TEST_F(ParquetReaderTest, readFixedLenBinaryAsStringFromUuid) {
1184+
const std::string filename("uuid.parquet");
1185+
const std::string sample(getExampleFilePath(filename));
1186+
1187+
dwio::common::ReaderOptions readerOptions{leafPool_.get()};
1188+
auto outputRowType = ROW({"uuid_field"}, {VARCHAR()});
1189+
1190+
readerOptions.setFileSchema(outputRowType);
1191+
auto reader = createReader(sample, readerOptions);
1192+
EXPECT_EQ(reader->numberOfRows(), 3ULL);
1193+
auto rowType = reader->typeWithId();
1194+
EXPECT_EQ(rowType->type()->kind(), TypeKind::ROW);
1195+
EXPECT_EQ(rowType->size(), 1ULL);
1196+
EXPECT_EQ(rowType->childAt(0)->type()->kind(), TypeKind::VARCHAR);
1197+
1198+
auto rowReaderOpts = getReaderOpts(outputRowType);
1199+
rowReaderOpts.setScanSpec(makeScanSpec(outputRowType));
1200+
auto rowReader = reader->createRowReader(rowReaderOpts);
1201+
1202+
auto expected = std::string("5468454a-363f-ccc8-7d0b-76072a75dfaa");
1203+
VectorPtr result = BaseVector::create(outputRowType, 0, &(*leafPool_));
1204+
rowReader->next(1, result);
1205+
EXPECT_EQ(
1206+
expected,
1207+
result->as<RowVector>()->childAt(0)->asFlatVector<StringView>()->valueAt(
1208+
0));
1209+
}
1210+
11831211
TEST_F(ParquetReaderTest, testV2PageWithZeroMaxDefRep) {
11841212
// enum_type.parquet contains 1 column (ENUM) with 3 rows.
11851213
const std::string sample(getExampleFilePath("v2_page.parquet"));

0 commit comments

Comments
 (0)