Skip to content

Commit d95d29f

Browse files
Yohahahafacebook-github-bot
authored andcommitted
Add empty2null Spark function (facebookincubator#10616)
Summary: A function converts empty string input to null value. Spark implementation: https://github.com/apache/spark/blob/34a52ad2ce377dd077c4cf70740ba3ab6c18c739/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala#L3486-L3508 Pull Request resolved: facebookincubator#10616 Reviewed By: DanielHunte Differential Revision: D60599503 Pulled By: kagamiori fbshipit-source-id: 3082d1877cccfc2ce3c36a7d4385811d7932d214
1 parent c9dac9d commit d95d29f

File tree

4 files changed

+42
-0
lines changed

4 files changed

+42
-0
lines changed

velox/docs/functions/spark/string.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ Unless specified otherwise, all functions return NULL if at least one of the arg
5353
SELECT conv("11abc", 10, 10); -- '11'
5454
SELECT conv('H016F', 16, 10); -- '0'
5555

56+
.. spark:function:: empty2null(input) -> varchar
57+
58+
Returns NULL if ``input`` is empty. Otherwise, returns ``input``.
59+
Note: it's an internal Spark function used to convert empty value of a partition column,
60+
which is then converted to Hive default partition value ``__HIVE_DEFAULT_PARTITION__``. ::
61+
62+
SELECT empty2null(''); -- NULL
63+
SELECT empty2null('abc'); -- 'abc'
64+
5665
.. spark:function:: endswith(left, right) -> boolean
5766
5867
Returns true if 'left' ends with 'right'. Otherwise, returns false. ::

velox/functions/sparksql/Register.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,8 @@ void registerFunctions(const std::string& prefix) {
242242
registerFunction<Sha2HexStringFunction, Varchar, Varbinary, int32_t>(
243243
{prefix + "sha2"});
244244
registerFunction<CRC32Function, int64_t, Varbinary>({prefix + "crc32"});
245+
registerFunction<Empty2NullFunction, Varchar, Varchar>(
246+
{prefix + "empty2null"});
245247

246248
exec::registerStatefulVectorFunction(
247249
prefix + "regexp_extract", re2ExtractSignatures(), makeRegexExtract);

velox/functions/sparksql/String.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,4 +1431,26 @@ struct LevenshteinDistanceFunction {
14311431
}
14321432
};
14331433

1434+
/// empty2null(input) -> varchar
1435+
///
1436+
/// Returns NULL when the input is empty,
1437+
/// otherwise, it returns the input itself.
1438+
template <typename T>
1439+
struct Empty2NullFunction {
1440+
VELOX_DEFINE_FUNCTION_TYPES(T);
1441+
1442+
// Results refer to strings in the first argument.
1443+
static constexpr int32_t reuse_strings_from_arg = 0;
1444+
1445+
FOLLY_ALWAYS_INLINE bool call(
1446+
out_type<Varchar>& result,
1447+
const arg_type<Varchar>& input) {
1448+
if (input.empty()) {
1449+
return false;
1450+
}
1451+
result.setNoCopy(input);
1452+
return true;
1453+
}
1454+
};
1455+
14341456
} // namespace facebook::velox::functions::sparksql

velox/functions/sparksql/tests/StringTest.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,5 +995,14 @@ TEST_F(StringTest, trim) {
995995
trimWithTrimStr("\u6570", "\u6574\u6570 \u6570\u636E!"),
996996
"\u6574\u6570 \u6570\u636E!");
997997
}
998+
999+
TEST_F(StringTest, empty2Null) {
1000+
const auto empty2Null = [&](const std::optional<std::string>& a) {
1001+
return evaluateOnce<std::string>("empty2null(c0)", a);
1002+
};
1003+
1004+
EXPECT_EQ(empty2Null(""), std::nullopt);
1005+
EXPECT_EQ(empty2Null("abc"), "abc");
1006+
}
9981007
} // namespace
9991008
} // namespace facebook::velox::functions::sparksql::test

0 commit comments

Comments
 (0)