Skip to content

Commit decb929

Browse files
mbasmanovacodyschierbeck
authored andcommitted
Optimize array_constructor (facebookincubator#6568)
Summary: Pull Request resolved: facebookincubator#6568 array_constructor is very slow: facebookincubator#5958 (comment) array_constructor uses BaseVector::copyRanges, which is somewhat fast for arrays and maps, but very slow for primitive types: ``` FlatVector.h void copyRanges( const BaseVector* source, const folly::Range<const BaseVector::CopyRange*>& ranges) override { for (auto& range : ranges) { copy(source, range.targetIndex, range.sourceIndex, range.count); } } ``` FlatVector<T>::copy(source, rows, toSourceRow) is faster. Switching from copyRanges to copy speeds up array_constructor for primitive types and structs significantly. Yet, this change makes arrays and maps slower. The slowness is due to ArrayVector and MapVector not having implementation for copy(source, rows, toSourceRow). They rely on BaseVector::copy to translate rows + toSourceRow to ranges. This extra processing causes perf regression. Hence, we use copy for primitive types and structs of these and copyRanges for everything else. ``` Before: array_constructor_ARRAY_nullfree#facebookincubator#1 16.80ms 59.53 array_constructor_ARRAY_nullfree#facebookincubator#2 27.02ms 37.01 array_constructor_ARRAY_nullfree#facebookincubator#3 38.03ms 26.30 array_constructor_ARRAY_nullfree##2_null 52.86ms 18.92 array_constructor_ARRAY_nullfree##2_const 54.97ms 18.19 array_constructor_ARRAY_nulls#facebookincubator#1 30.61ms 32.66 array_constructor_ARRAY_nulls#facebookincubator#2 55.01ms 18.18 array_constructor_ARRAY_nulls#facebookincubator#3 80.69ms 12.39 array_constructor_ARRAY_nulls##2_null 69.10ms 14.47 array_constructor_ARRAY_nulls##2_const 103.85ms 9.63 After: array_constructor_ARRAY_nullfree#facebookincubator#1 15.25ms 65.58 array_constructor_ARRAY_nullfree#facebookincubator#2 25.11ms 39.82 array_constructor_ARRAY_nullfree#facebookincubator#3 34.59ms 28.91 array_constructor_ARRAY_nullfree##2_null 53.61ms 18.65 array_constructor_ARRAY_nullfree##2_const 51.48ms 19.42 array_constructor_ARRAY_nulls#facebookincubator#1 29.99ms 33.34 array_constructor_ARRAY_nulls#facebookincubator#2 55.91ms 17.89 array_constructor_ARRAY_nulls#facebookincubator#3 81.73ms 12.24 array_constructor_ARRAY_nulls##2_null 66.97ms 14.93 array_constructor_ARRAY_nulls##2_const 92.96ms 10.76 Before: array_constructor_INTEGER_nullfree#facebookincubator#1 19.72ms 50.71 array_constructor_INTEGER_nullfree#facebookincubator#2 34.51ms 28.97 array_constructor_INTEGER_nullfree#facebookincubator#3 47.95ms 20.86 array_constructor_INTEGER_nullfree##2_null 58.68ms 17.04 array_constructor_INTEGER_nullfree##2_const 45.15ms 22.15 array_constructor_INTEGER_nulls#facebookincubator#1 29.99ms 33.34 array_constructor_INTEGER_nulls#facebookincubator#2 55.32ms 18.08 array_constructor_INTEGER_nulls#facebookincubator#3 78.53ms 12.73 array_constructor_INTEGER_nulls##2_null 72.24ms 13.84 array_constructor_INTEGER_nulls##2_const 71.13ms 14.06 After: array_constructor_INTEGER_nullfree#facebookincubator#1 3.39ms 294.89 array_constructor_INTEGER_nullfree#facebookincubator#2 7.35ms 136.10 array_constructor_INTEGER_nullfree#facebookincubator#3 10.78ms 92.74 array_constructor_INTEGER_nullfree##2_null 11.29ms 88.57 array_constructor_INTEGER_nullfree##2_const 10.14ms 98.65 array_constructor_INTEGER_nulls#facebookincubator#1 4.49ms 222.53 array_constructor_INTEGER_nulls#facebookincubator#2 9.78ms 102.29 array_constructor_INTEGER_nulls#facebookincubator#3 14.69ms 68.08 array_constructor_INTEGER_nulls##2_null 12.14ms 82.36 array_constructor_INTEGER_nulls##2_const 12.27ms 81.53 Before: array_constructor_MAP_nullfree#facebookincubator#1 17.34ms 57.65 array_constructor_MAP_nullfree#facebookincubator#2 29.84ms 33.51 array_constructor_MAP_nullfree#facebookincubator#3 41.51ms 24.09 array_constructor_MAP_nullfree##2_null 56.57ms 17.68 array_constructor_MAP_nullfree##2_const 71.68ms 13.95 array_constructor_MAP_nulls#facebookincubator#1 36.22ms 27.61 array_constructor_MAP_nulls#facebookincubator#2 68.18ms 14.67 array_constructor_MAP_nulls#facebookincubator#3 95.12ms 10.51 array_constructor_MAP_nulls##2_null 86.42ms 11.57 array_constructor_MAP_nulls##2_const 120.10ms 8.33 After: array_constructor_MAP_nullfree#facebookincubator#1 17.05ms 58.66 array_constructor_MAP_nullfree#facebookincubator#2 28.42ms 35.18 array_constructor_MAP_nullfree#facebookincubator#3 36.96ms 27.06 array_constructor_MAP_nullfree##2_null 55.64ms 17.97 array_constructor_MAP_nullfree##2_const 67.53ms 14.81 array_constructor_MAP_nulls#facebookincubator#1 32.91ms 30.39 array_constructor_MAP_nulls#facebookincubator#2 64.50ms 15.50 array_constructor_MAP_nulls#facebookincubator#3 95.71ms 10.45 array_constructor_MAP_nulls##2_null 77.22ms 12.95 array_constructor_MAP_nulls##2_const 114.91ms 8.70 Before: array_constructor_ROW_nullfree#facebookincubator#1 33.88ms 29.52 array_constructor_ROW_nullfree#facebookincubator#2 62.00ms 16.13 array_constructor_ROW_nullfree#facebookincubator#3 89.54ms 11.17 array_constructor_ROW_nullfree##2_null 78.46ms 12.75 array_constructor_ROW_nullfree##2_const 95.53ms 10.47 array_constructor_ROW_nulls#facebookincubator#1 44.11ms 22.67 array_constructor_ROW_nulls#facebookincubator#2 115.43ms 8.66 array_constructor_ROW_nulls#facebookincubator#3 173.61ms 5.76 array_constructor_ROW_nulls##2_null 130.40ms 7.67 array_constructor_ROW_nulls##2_const 169.97ms 5.88 After: array_constructor_ROW_nullfree#facebookincubator#1 5.55ms 180.15 array_constructor_ROW_nullfree#facebookincubator#2 12.83ms 77.94 array_constructor_ROW_nullfree#facebookincubator#3 18.89ms 52.95 array_constructor_ROW_nullfree##2_null 18.74ms 53.36 array_constructor_ROW_nullfree##2_const 18.16ms 55.07 array_constructor_ROW_nulls#facebookincubator#1 11.29ms 88.61 array_constructor_ROW_nulls#facebookincubator#2 18.57ms 53.86 array_constructor_ROW_nulls#facebookincubator#3 34.20ms 29.24 array_constructor_ROW_nulls##2_null 25.05ms 39.92 array_constructor_ROW_nulls##2_const 25.15ms 39.77 ``` Reviewed By: laithsakka Differential Revision: D49272797 fbshipit-source-id: 55d83de7b69c7ae4b72b5a5ae62a7868f36b0e19
1 parent 690d89b commit decb929

File tree

2 files changed

+160
-14
lines changed

2 files changed

+160
-14
lines changed

velox/functions/prestosql/ArrayConstructor.cpp

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,24 +55,56 @@ class ArrayConstructor : public exec::VectorFunction {
5555
} else {
5656
elementsResult->resize(baseOffset + numArgs * rows.countSelected());
5757

58-
std::vector<BaseVector::CopyRange> ranges;
59-
ranges.reserve(rows.end());
58+
if (shouldCopyRanges(elementsResult->type())) {
59+
std::vector<BaseVector::CopyRange> ranges;
60+
ranges.reserve(rows.end());
6061

61-
vector_size_t offset = baseOffset;
62-
rows.applyToSelected([&](vector_size_t row) {
63-
rawSizes[row] = numArgs;
64-
rawOffsets[row] = offset;
65-
ranges.push_back({row, offset, 1});
66-
offset += numArgs;
67-
});
62+
vector_size_t offset = baseOffset;
63+
rows.applyToSelected([&](vector_size_t row) {
64+
rawSizes[row] = numArgs;
65+
rawOffsets[row] = offset;
66+
ranges.push_back({row, offset, 1});
67+
offset += numArgs;
68+
});
69+
70+
elementsResult->copyRanges(args[0].get(), ranges);
71+
72+
for (int i = 1; i < numArgs; i++) {
73+
for (auto& range : ranges) {
74+
++range.targetIndex;
75+
}
76+
elementsResult->copyRanges(args[i].get(), ranges);
77+
}
78+
} else {
79+
SelectivityVector targetRows(elementsResult->size(), false);
80+
std::vector<vector_size_t> toSourceRow(elementsResult->size());
81+
82+
vector_size_t offset = baseOffset;
83+
rows.applyToSelected([&](vector_size_t row) {
84+
rawSizes[row] = numArgs;
85+
rawOffsets[row] = offset;
86+
87+
targetRows.setValid(offset, true);
88+
toSourceRow[offset] = row;
89+
90+
offset += numArgs;
91+
});
92+
targetRows.updateBounds();
93+
elementsResult->copy(args[0].get(), targetRows, toSourceRow.data());
94+
95+
for (int i = 1; i < numArgs; i++) {
96+
targetRows.clearAll();
6897

69-
elementsResult->copyRanges(args[0].get(), ranges);
98+
vector_size_t offset = baseOffset;
99+
rows.applyToSelected([&](vector_size_t row) {
100+
targetRows.setValid(offset + i, true);
101+
toSourceRow[offset + i] = row;
102+
offset += numArgs;
103+
});
70104

71-
for (int i = 1; i < numArgs; i++) {
72-
for (auto& range : ranges) {
73-
++range.targetIndex;
105+
targetRows.updateBounds();
106+
elementsResult->copy(args[i].get(), targetRows, toSourceRow.data());
74107
}
75-
elementsResult->copyRanges(args[i].get(), ranges);
76108
}
77109
}
78110
}
@@ -90,6 +122,28 @@ class ArrayConstructor : public exec::VectorFunction {
90122
.build(),
91123
};
92124
}
125+
126+
private:
127+
// BaseVector::copyRange is faster for arrays and maps and slower for
128+
// primitive types. Check if 'type' is an array or map or contains an array or
129+
// map. If so, return true, otherwise, false.
130+
static bool shouldCopyRanges(const TypePtr& type) {
131+
if (type->isPrimitiveType()) {
132+
return false;
133+
}
134+
135+
if (!type->isRow()) {
136+
return true;
137+
}
138+
139+
const auto& rowType = type->asRow();
140+
for (const auto& child : rowType.children()) {
141+
if (shouldCopyRanges(child)) {
142+
return true;
143+
}
144+
}
145+
return false;
146+
}
93147
};
94148
} // namespace
95149

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include <folly/Benchmark.h>
17+
#include <folly/init/Init.h>
18+
19+
#include "velox/benchmarks/ExpressionBenchmarkBuilder.h"
20+
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h"
21+
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
22+
23+
using namespace facebook::velox;
24+
using namespace facebook::velox::exec;
25+
using namespace facebook::velox::functions;
26+
27+
int main(int argc, char** argv) {
28+
folly::Init init(&argc, &argv);
29+
30+
functions::prestosql::registerArrayFunctions();
31+
32+
ExpressionBenchmarkBuilder benchmarkBuilder;
33+
34+
auto* pool = benchmarkBuilder.pool();
35+
auto& vm = benchmarkBuilder.vectorMaker();
36+
37+
auto createSet =
38+
[&](const TypePtr& type, bool withNulls, const VectorPtr& constantInput) {
39+
VectorFuzzer::Options options;
40+
options.vectorSize = 1'000;
41+
options.nullRatio = withNulls ? 0.2 : 0.0;
42+
43+
VectorFuzzer fuzzer(options, pool);
44+
std::vector<VectorPtr> columns;
45+
columns.push_back(fuzzer.fuzzFlat(type));
46+
columns.push_back(fuzzer.fuzzFlat(type));
47+
columns.push_back(fuzzer.fuzzFlat(type));
48+
columns.push_back(
49+
BaseVector::createNullConstant(type, options.vectorSize, pool));
50+
columns.push_back(
51+
BaseVector::wrapInConstant(options.vectorSize, 0, constantInput));
52+
53+
auto input = vm.rowVector({"c0", "c1", "c2", "n", "c"}, columns);
54+
55+
benchmarkBuilder
56+
.addBenchmarkSet(
57+
fmt::format(
58+
"array_constructor_{}_{}",
59+
mapTypeKindToName(type->kind()),
60+
withNulls ? "nulls" : "nullfree"),
61+
input)
62+
.addExpression("1", "array_constructor(c0)")
63+
.addExpression("2", "array_constructor(c0, c1)")
64+
.addExpression("3", "array_constructor(c0, c1, c2)")
65+
.addExpression("2_null", "array_constructor(c0, c1, n)")
66+
.addExpression("2_const", "array_constructor(c0, c1, c)");
67+
};
68+
69+
auto constantInteger = BaseVector::createConstant(INTEGER(), 11, 1, pool);
70+
createSet(INTEGER(), true, constantInteger);
71+
createSet(INTEGER(), false, constantInteger);
72+
73+
auto constantRow = vm.rowVector({
74+
BaseVector::createConstant(INTEGER(), 11, 1, pool),
75+
BaseVector::createConstant(DOUBLE(), 1.23, 1, pool),
76+
});
77+
createSet(ROW({INTEGER(), DOUBLE()}), true, constantRow);
78+
createSet(ROW({INTEGER(), DOUBLE()}), false, constantRow);
79+
80+
auto constantArray = vm.arrayVector<int32_t>({{1, 2, 3, 4, 5}});
81+
createSet(ARRAY(INTEGER()), true, constantArray);
82+
createSet(ARRAY(INTEGER()), false, constantArray);
83+
84+
auto constantMap = vm.mapVector<int32_t, float>({{{1, 1.23}, {2, 2.34}}});
85+
createSet(MAP(INTEGER(), REAL()), true, constantMap);
86+
createSet(MAP(INTEGER(), REAL()), false, constantMap);
87+
88+
benchmarkBuilder.registerBenchmarks();
89+
90+
folly::runBenchmarks();
91+
return 0;
92+
}

0 commit comments

Comments
 (0)