datafusion_expr_common/signature.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Signature module contains foundational types that are used to represent signatures, types,
19//! and return types of functions in DataFusion.
20
21use std::fmt::Display;
22use std::hash::Hash;
23
24use crate::type_coercion::aggregates::NUMERICS;
25use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
26use datafusion_common::internal_err;
27use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
28use datafusion_common::utils::ListCoercion;
29use indexmap::IndexSet;
30use itertools::Itertools;
31
32/// Constant that is used as a placeholder for any valid timezone.
33/// This is used where a function can accept a timestamp type with any
34/// valid timezone, it exists to avoid the need to enumerate all possible
35/// timezones. See [`TypeSignature`] for more details.
36///
37/// Type coercion always ensures that functions will be executed using
38/// timestamp arrays that have a valid time zone. Functions must never
39/// return results with this timezone.
40pub const TIMEZONE_WILDCARD: &str = "+TZ";
41
42/// Constant that is used as a placeholder for any valid fixed size list.
43/// This is used where a function can accept a fixed size list type with any
44/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
45pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
46
47/// A function's volatility, which defines the functions eligibility for certain optimizations
48#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
49pub enum Volatility {
50 /// An immutable function will always return the same output when given the same
51 /// input. DataFusion will attempt to inline immutable functions during planning.
52 Immutable,
53 /// A stable function may return different values given the same input across different
54 /// queries but must return the same value for a given input within a query. An example of
55 /// this is the `Now` function. DataFusion will attempt to inline `Stable` functions
56 /// during planning, when possible.
57 /// For query `select col1, now() from t1`, it might take a while to execute but
58 /// `now()` column will be the same for each output row, which is evaluated
59 /// during planning.
60 Stable,
61 /// A volatile function may change the return value from evaluation to evaluation.
62 /// Multiple invocations of a volatile function may return different results when used in the
63 /// same query. An example of this is the random() function. DataFusion
64 /// can not evaluate such functions during planning.
65 /// In the query `select col1, random() from t1`, `random()` function will be evaluated
66 /// for each output row, resulting in a unique random value for each row.
67 Volatile,
68}
69
70/// A function's type signature defines the types of arguments the function supports.
71///
72/// Functions typically support only a few different types of arguments compared to the
73/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
74/// automatically coerces (add casts to) function arguments so they match the type signature.
75///
76/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
77/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
78/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
79///
80/// # Data Types
81///
82/// ## Timestamps
83///
84/// Types to match are represented using Arrow's [`DataType`]. [`DataType::Timestamp`] has an optional variable
85/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
86/// the [`TIMEZONE_WILDCARD`]. For example:
87///
88/// ```
89/// # use arrow::datatypes::{DataType, TimeUnit};
90/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
91/// let type_signature = TypeSignature::Exact(vec![
92/// // A nanosecond precision timestamp with ANY timezone
93/// // matches Timestamp(Nanosecond, Some("+0:00"))
94/// // matches Timestamp(Nanosecond, Some("+5:00"))
95/// // does not match Timestamp(Nanosecond, None)
96/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
97/// ]);
98/// ```
99#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
100pub enum TypeSignature {
101 /// One or more arguments of a common type out of a list of valid types.
102 ///
103 /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
104 ///
105 /// # Examples
106 ///
107 /// A function such as `concat` is `Variadic(vec![DataType::Utf8,
108 /// DataType::LargeUtf8])`
109 Variadic(Vec<DataType>),
110 /// The acceptable signature and coercions rules are special for this
111 /// function.
112 ///
113 /// If this signature is specified,
114 /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
115 ///
116 /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types
117 UserDefined,
118 /// One or more arguments with arbitrary types
119 VariadicAny,
120 /// One or more arguments of an arbitrary but equal type out of a list of valid types.
121 ///
122 /// # Examples
123 ///
124 /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
125 /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
126 Uniform(usize, Vec<DataType>),
127 /// One or more arguments with exactly the specified types in order.
128 ///
129 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
130 Exact(Vec<DataType>),
131 /// One or more arguments belonging to the [`TypeSignatureClass`], in order.
132 ///
133 /// [`Coercion`] contains not only the desired type but also the allowed casts.
134 /// For example, if you expect a function has string type, but you also allow it to be casted from binary type.
135 ///
136 /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
137 Coercible(Vec<Coercion>),
138 /// One or more arguments coercible to a single, comparable type.
139 ///
140 /// Each argument will be coerced to a single type using the
141 /// coercion rules described in [`comparison_coercion_numeric`].
142 ///
143 /// # Examples
144 ///
145 /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
146 /// the types will both be coerced to `i64` before the function is invoked.
147 ///
148 /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
149 /// the types will both be coerced to `Utf8` before the function is invoked.
150 ///
151 /// Note:
152 /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
153 /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
154 ///
155 /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
156 Comparable(usize),
157 /// One or more arguments of arbitrary types.
158 ///
159 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
160 Any(usize),
161 /// Matches exactly one of a list of [`TypeSignature`]s.
162 ///
163 /// Coercion is attempted to match the signatures in order, and stops after
164 /// the first success, if any.
165 ///
166 /// # Examples
167 ///
168 /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
169 /// is `OneOf(vec![Any(0), VariadicAny])`.
170 OneOf(Vec<TypeSignature>),
171 /// A function that has an [`ArrayFunctionSignature`]
172 ArraySignature(ArrayFunctionSignature),
173 /// One or more arguments of numeric types.
174 ///
175 /// See [`NativeType::is_numeric`] to know which type is considered numeric
176 ///
177 /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
178 ///
179 /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
180 Numeric(usize),
181 /// One or arguments of all the same string types.
182 ///
183 /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8.
184 /// Null is considered as `Utf8` by default
185 /// Dictionary with string value type is also handled.
186 ///
187 /// For example, if a function is called with (utf8, large_utf8), all
188 /// arguments will be coerced to `LargeUtf8`
189 ///
190 /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
191 String(usize),
192 /// No arguments
193 Nullary,
194}
195
196impl TypeSignature {
197 #[inline]
198 pub fn is_one_of(&self) -> bool {
199 matches!(self, TypeSignature::OneOf(_))
200 }
201}
202
203/// Represents the class of types that can be used in a function signature.
204///
205/// This is used to specify what types are valid for function arguments in a more flexible way than
206/// just listing specific DataTypes. For example, TypeSignatureClass::Timestamp matches any timestamp
207/// type regardless of timezone or precision.
208///
209/// Used primarily with TypeSignature::Coercible to define function signatures that can accept
210/// arguments that can be coerced to a particular class of types.
211#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
212pub enum TypeSignatureClass {
213 Timestamp,
214 Time,
215 Interval,
216 Duration,
217 Native(LogicalTypeRef),
218 // TODO:
219 // Numeric
220 Integer,
221}
222
223impl Display for TypeSignatureClass {
224 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
225 write!(f, "TypeSignatureClass::{self:?}")
226 }
227}
228
229impl TypeSignatureClass {
230 /// Get example acceptable types for this `TypeSignatureClass`
231 ///
232 /// This is used for `information_schema` and can be used to generate
233 /// documentation or error messages.
234 fn get_example_types(&self) -> Vec<DataType> {
235 match self {
236 TypeSignatureClass::Native(l) => get_data_types(l.native()),
237 TypeSignatureClass::Timestamp => {
238 vec![
239 DataType::Timestamp(TimeUnit::Nanosecond, None),
240 DataType::Timestamp(
241 TimeUnit::Nanosecond,
242 Some(TIMEZONE_WILDCARD.into()),
243 ),
244 ]
245 }
246 TypeSignatureClass::Time => {
247 vec![DataType::Time64(TimeUnit::Nanosecond)]
248 }
249 TypeSignatureClass::Interval => {
250 vec![DataType::Interval(IntervalUnit::DayTime)]
251 }
252 TypeSignatureClass::Duration => {
253 vec![DataType::Duration(TimeUnit::Nanosecond)]
254 }
255 TypeSignatureClass::Integer => {
256 vec![DataType::Int64]
257 }
258 }
259 }
260
261 /// Does the specified `NativeType` match this type signature class?
262 pub fn matches_native_type(
263 self: &TypeSignatureClass,
264 logical_type: &NativeType,
265 ) -> bool {
266 if logical_type == &NativeType::Null {
267 return true;
268 }
269
270 match self {
271 TypeSignatureClass::Native(t) if t.native() == logical_type => true,
272 TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
273 TypeSignatureClass::Time if logical_type.is_time() => true,
274 TypeSignatureClass::Interval if logical_type.is_interval() => true,
275 TypeSignatureClass::Duration if logical_type.is_duration() => true,
276 TypeSignatureClass::Integer if logical_type.is_integer() => true,
277 _ => false,
278 }
279 }
280
281 /// What type would `origin_type` be casted to when casting to the specified native type?
282 pub fn default_casted_type(
283 &self,
284 native_type: &NativeType,
285 origin_type: &DataType,
286 ) -> datafusion_common::Result<DataType> {
287 match self {
288 TypeSignatureClass::Native(logical_type) => {
289 logical_type.native().default_cast_for(origin_type)
290 }
291 // If the given type is already a timestamp, we don't change the unit and timezone
292 TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
293 Ok(origin_type.to_owned())
294 }
295 TypeSignatureClass::Time if native_type.is_time() => {
296 Ok(origin_type.to_owned())
297 }
298 TypeSignatureClass::Interval if native_type.is_interval() => {
299 Ok(origin_type.to_owned())
300 }
301 TypeSignatureClass::Duration if native_type.is_duration() => {
302 Ok(origin_type.to_owned())
303 }
304 TypeSignatureClass::Integer if native_type.is_integer() => {
305 Ok(origin_type.to_owned())
306 }
307 _ => internal_err!("May miss the matching logic in `matches_native_type`"),
308 }
309 }
310}
311
312#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
313pub enum ArrayFunctionSignature {
314 /// A function takes at least one List/LargeList/FixedSizeList argument.
315 Array {
316 /// A full list of the arguments accepted by this function.
317 arguments: Vec<ArrayFunctionArgument>,
318 /// Additional information about how array arguments should be coerced.
319 array_coercion: Option<ListCoercion>,
320 },
321 /// A function takes a single argument that must be a List/LargeList/FixedSizeList
322 /// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
323 RecursiveArray,
324 /// Specialized Signature for MapArray
325 /// The function takes a single argument that must be a MapArray
326 MapArray,
327}
328
329impl Display for ArrayFunctionSignature {
330 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331 match self {
332 ArrayFunctionSignature::Array { arguments, .. } => {
333 for (idx, argument) in arguments.iter().enumerate() {
334 write!(f, "{argument}")?;
335 if idx != arguments.len() - 1 {
336 write!(f, ", ")?;
337 }
338 }
339 Ok(())
340 }
341 ArrayFunctionSignature::RecursiveArray => {
342 write!(f, "recursive_array")
343 }
344 ArrayFunctionSignature::MapArray => {
345 write!(f, "map_array")
346 }
347 }
348 }
349}
350
351#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
352pub enum ArrayFunctionArgument {
353 /// A non-list or list argument. The list dimensions should be one less than the Array's list
354 /// dimensions.
355 Element,
356 /// An Int64 index argument.
357 Index,
358 /// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
359 /// to the same type.
360 Array,
361 // A Utf8 argument.
362 String,
363}
364
365impl Display for ArrayFunctionArgument {
366 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367 match self {
368 ArrayFunctionArgument::Element => {
369 write!(f, "element")
370 }
371 ArrayFunctionArgument::Index => {
372 write!(f, "index")
373 }
374 ArrayFunctionArgument::Array => {
375 write!(f, "array")
376 }
377 ArrayFunctionArgument::String => {
378 write!(f, "string")
379 }
380 }
381 }
382}
383
384impl TypeSignature {
385 pub fn to_string_repr(&self) -> Vec<String> {
386 match self {
387 TypeSignature::Nullary => {
388 vec!["NullAry()".to_string()]
389 }
390 TypeSignature::Variadic(types) => {
391 vec![format!("{}, ..", Self::join_types(types, "/"))]
392 }
393 TypeSignature::Uniform(arg_count, valid_types) => {
394 vec![
395 std::iter::repeat_n(Self::join_types(valid_types, "/"), *arg_count)
396 .collect::<Vec<String>>()
397 .join(", "),
398 ]
399 }
400 TypeSignature::String(num) => {
401 vec![format!("String({num})")]
402 }
403 TypeSignature::Numeric(num) => {
404 vec![format!("Numeric({num})")]
405 }
406 TypeSignature::Comparable(num) => {
407 vec![format!("Comparable({num})")]
408 }
409 TypeSignature::Coercible(coercions) => {
410 vec![Self::join_types(coercions, ", ")]
411 }
412 TypeSignature::Exact(types) => {
413 vec![Self::join_types(types, ", ")]
414 }
415 TypeSignature::Any(arg_count) => {
416 vec![std::iter::repeat_n("Any", *arg_count)
417 .collect::<Vec<&str>>()
418 .join(", ")]
419 }
420 TypeSignature::UserDefined => {
421 vec!["UserDefined".to_string()]
422 }
423 TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
424 TypeSignature::OneOf(sigs) => {
425 sigs.iter().flat_map(|s| s.to_string_repr()).collect()
426 }
427 TypeSignature::ArraySignature(array_signature) => {
428 vec![array_signature.to_string()]
429 }
430 }
431 }
432
433 /// Helper function to join types with specified delimiter.
434 pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
435 types
436 .iter()
437 .map(|t| t.to_string())
438 .collect::<Vec<String>>()
439 .join(delimiter)
440 }
441
442 /// Check whether 0 input argument is valid for given `TypeSignature`
443 pub fn supports_zero_argument(&self) -> bool {
444 match &self {
445 TypeSignature::Exact(vec) => vec.is_empty(),
446 TypeSignature::Nullary => true,
447 TypeSignature::OneOf(types) => types
448 .iter()
449 .any(|type_sig| type_sig.supports_zero_argument()),
450 _ => false,
451 }
452 }
453
454 /// Returns true if the signature currently supports or used to supported 0
455 /// input arguments in a previous version of DataFusion.
456 pub fn used_to_support_zero_arguments(&self) -> bool {
457 match &self {
458 TypeSignature::Any(num) => *num == 0,
459 _ => self.supports_zero_argument(),
460 }
461 }
462
463 #[deprecated(since = "46.0.0", note = "See get_example_types instead")]
464 pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
465 self.get_example_types()
466 }
467
468 /// Return example acceptable types for this `TypeSignature`'
469 ///
470 /// Returns a `Vec<DataType>` for each argument to the function
471 ///
472 /// This is used for `information_schema` and can be used to generate
473 /// documentation or error messages.
474 pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
475 match self {
476 TypeSignature::Exact(types) => vec![types.clone()],
477 TypeSignature::OneOf(types) => types
478 .iter()
479 .flat_map(|type_sig| type_sig.get_example_types())
480 .collect(),
481 TypeSignature::Uniform(arg_count, types) => types
482 .iter()
483 .cloned()
484 .map(|data_type| vec![data_type; *arg_count])
485 .collect(),
486 TypeSignature::Coercible(coercions) => coercions
487 .iter()
488 .map(|c| {
489 let mut all_types: IndexSet<DataType> =
490 c.desired_type().get_example_types().into_iter().collect();
491
492 if let Some(implicit_coercion) = c.implicit_coercion() {
493 let allowed_casts: Vec<DataType> = implicit_coercion
494 .allowed_source_types
495 .iter()
496 .flat_map(|t| t.get_example_types())
497 .collect();
498 all_types.extend(allowed_casts);
499 }
500
501 all_types.into_iter().collect::<Vec<_>>()
502 })
503 .multi_cartesian_product()
504 .collect(),
505 TypeSignature::Variadic(types) => types
506 .iter()
507 .cloned()
508 .map(|data_type| vec![data_type])
509 .collect(),
510 TypeSignature::Numeric(arg_count) => NUMERICS
511 .iter()
512 .cloned()
513 .map(|numeric_type| vec![numeric_type; *arg_count])
514 .collect(),
515 TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
516 .into_iter()
517 .map(|dt| vec![dt; *arg_count])
518 .collect::<Vec<_>>(),
519 // TODO: Implement for other types
520 TypeSignature::Any(_)
521 | TypeSignature::Comparable(_)
522 | TypeSignature::Nullary
523 | TypeSignature::VariadicAny
524 | TypeSignature::ArraySignature(_)
525 | TypeSignature::UserDefined => vec![],
526 }
527 }
528}
529
530fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
531 match native_type {
532 NativeType::Null => vec![DataType::Null],
533 NativeType::Boolean => vec![DataType::Boolean],
534 NativeType::Int8 => vec![DataType::Int8],
535 NativeType::Int16 => vec![DataType::Int16],
536 NativeType::Int32 => vec![DataType::Int32],
537 NativeType::Int64 => vec![DataType::Int64],
538 NativeType::UInt8 => vec![DataType::UInt8],
539 NativeType::UInt16 => vec![DataType::UInt16],
540 NativeType::UInt32 => vec![DataType::UInt32],
541 NativeType::UInt64 => vec![DataType::UInt64],
542 NativeType::Float16 => vec![DataType::Float16],
543 NativeType::Float32 => vec![DataType::Float32],
544 NativeType::Float64 => vec![DataType::Float64],
545 NativeType::Date => vec![DataType::Date32, DataType::Date64],
546 NativeType::Binary => vec![
547 DataType::Binary,
548 DataType::LargeBinary,
549 DataType::BinaryView,
550 ],
551 NativeType::String => {
552 vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
553 }
554 // TODO: support other native types
555 _ => vec![],
556 }
557}
558
559/// Represents type coercion rules for function arguments, specifying both the desired type
560/// and optional implicit coercion rules for source types.
561///
562/// # Examples
563///
564/// ```
565/// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
566/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
567///
568/// // Exact coercion that only accepts timestamp types
569/// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
570///
571/// // Implicit coercion that accepts string types but can coerce from binary types
572/// let implicit = Coercion::new_implicit(
573/// TypeSignatureClass::Native(logical_string()),
574/// vec![TypeSignatureClass::Native(logical_binary())],
575/// NativeType::String
576/// );
577/// ```
578///
579/// There are two variants:
580///
581/// * `Exact` - Only accepts arguments that exactly match the desired type
582/// * `Implicit` - Accepts the desired type and can coerce from specified source types
583#[derive(Debug, Clone, Eq, PartialOrd)]
584pub enum Coercion {
585 /// Coercion that only accepts arguments exactly matching the desired type.
586 Exact {
587 /// The required type for the argument
588 desired_type: TypeSignatureClass,
589 },
590
591 /// Coercion that accepts the desired type and can implicitly coerce from other types.
592 Implicit {
593 /// The primary desired type for the argument
594 desired_type: TypeSignatureClass,
595 /// Rules for implicit coercion from other types
596 implicit_coercion: ImplicitCoercion,
597 },
598}
599
600impl Coercion {
601 pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
602 Self::Exact { desired_type }
603 }
604
605 /// Create a new coercion with implicit coercion rules.
606 ///
607 /// `allowed_source_types` defines the possible types that can be coerced to `desired_type`.
608 /// `default_casted_type` is the default type to be used for coercion if we cast from other types via `allowed_source_types`.
609 pub fn new_implicit(
610 desired_type: TypeSignatureClass,
611 allowed_source_types: Vec<TypeSignatureClass>,
612 default_casted_type: NativeType,
613 ) -> Self {
614 Self::Implicit {
615 desired_type,
616 implicit_coercion: ImplicitCoercion {
617 allowed_source_types,
618 default_casted_type,
619 },
620 }
621 }
622
623 pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
624 match self {
625 Coercion::Exact { .. } => &[],
626 Coercion::Implicit {
627 implicit_coercion, ..
628 } => implicit_coercion.allowed_source_types.as_slice(),
629 }
630 }
631
632 pub fn default_casted_type(&self) -> Option<&NativeType> {
633 match self {
634 Coercion::Exact { .. } => None,
635 Coercion::Implicit {
636 implicit_coercion, ..
637 } => Some(&implicit_coercion.default_casted_type),
638 }
639 }
640
641 pub fn desired_type(&self) -> &TypeSignatureClass {
642 match self {
643 Coercion::Exact { desired_type } => desired_type,
644 Coercion::Implicit { desired_type, .. } => desired_type,
645 }
646 }
647
648 pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
649 match self {
650 Coercion::Exact { .. } => None,
651 Coercion::Implicit {
652 implicit_coercion, ..
653 } => Some(implicit_coercion),
654 }
655 }
656}
657
658impl Display for Coercion {
659 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
660 write!(f, "Coercion({}", self.desired_type())?;
661 if let Some(implicit_coercion) = self.implicit_coercion() {
662 write!(f, ", implicit_coercion={implicit_coercion}",)
663 } else {
664 write!(f, ")")
665 }
666 }
667}
668
669impl PartialEq for Coercion {
670 fn eq(&self, other: &Self) -> bool {
671 self.desired_type() == other.desired_type()
672 && self.implicit_coercion() == other.implicit_coercion()
673 }
674}
675
676impl Hash for Coercion {
677 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
678 self.desired_type().hash(state);
679 self.implicit_coercion().hash(state);
680 }
681}
682
683/// Defines rules for implicit type coercion, specifying which source types can be
684/// coerced and the default type to use when coercing.
685///
686/// This is used by functions to specify which types they can accept via implicit
687/// coercion in addition to their primary desired type.
688///
689/// # Examples
690///
691/// ```
692/// use arrow::datatypes::TimeUnit;
693///
694/// use datafusion_expr_common::signature::{Coercion, ImplicitCoercion, TypeSignatureClass};
695/// use datafusion_common::types::{NativeType, logical_binary};
696///
697/// // Allow coercing from binary types to timestamp, coerce to specific timestamp unit and timezone
698/// let implicit = Coercion::new_implicit(
699/// TypeSignatureClass::Timestamp,
700/// vec![TypeSignatureClass::Native(logical_binary())],
701/// NativeType::Timestamp(TimeUnit::Second, None),
702/// );
703/// ```
704#[derive(Debug, Clone, Eq, PartialOrd)]
705pub struct ImplicitCoercion {
706 /// The types that can be coerced from via implicit casting
707 allowed_source_types: Vec<TypeSignatureClass>,
708
709 /// The default type to use when coercing from allowed source types.
710 /// This is particularly important for types like Timestamp that have multiple
711 /// possible configurations (different time units and timezones).
712 default_casted_type: NativeType,
713}
714
715impl Display for ImplicitCoercion {
716 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
717 write!(
718 f,
719 "ImplicitCoercion({:?}, default_type={:?})",
720 self.allowed_source_types, self.default_casted_type
721 )
722 }
723}
724
725impl PartialEq for ImplicitCoercion {
726 fn eq(&self, other: &Self) -> bool {
727 self.allowed_source_types == other.allowed_source_types
728 && self.default_casted_type == other.default_casted_type
729 }
730}
731
732impl Hash for ImplicitCoercion {
733 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
734 self.allowed_source_types.hash(state);
735 self.default_casted_type.hash(state);
736 }
737}
738
739/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
740///
741/// DataFusion will automatically coerce (cast) argument types to one of the supported
742/// function signatures, if possible.
743#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
744pub struct Signature {
745 /// The data types that the function accepts. See [TypeSignature] for more information.
746 pub type_signature: TypeSignature,
747 /// The volatility of the function. See [Volatility] for more information.
748 pub volatility: Volatility,
749}
750
751impl Signature {
752 /// Creates a new Signature from a given type signature and volatility.
753 pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
754 Signature {
755 type_signature,
756 volatility,
757 }
758 }
759 /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
760 pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
761 Self {
762 type_signature: TypeSignature::Variadic(common_types),
763 volatility,
764 }
765 }
766 /// User-defined coercion rules for the function.
767 pub fn user_defined(volatility: Volatility) -> Self {
768 Self {
769 type_signature: TypeSignature::UserDefined,
770 volatility,
771 }
772 }
773
774 /// A specified number of numeric arguments
775 pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
776 Self {
777 type_signature: TypeSignature::Numeric(arg_count),
778 volatility,
779 }
780 }
781
782 /// A specified number of numeric arguments
783 pub fn string(arg_count: usize, volatility: Volatility) -> Self {
784 Self {
785 type_signature: TypeSignature::String(arg_count),
786 volatility,
787 }
788 }
789
790 /// An arbitrary number of arguments of any type.
791 pub fn variadic_any(volatility: Volatility) -> Self {
792 Self {
793 type_signature: TypeSignature::VariadicAny,
794 volatility,
795 }
796 }
797 /// A fixed number of arguments of the same type, from those listed in `valid_types`.
798 pub fn uniform(
799 arg_count: usize,
800 valid_types: Vec<DataType>,
801 volatility: Volatility,
802 ) -> Self {
803 Self {
804 type_signature: TypeSignature::Uniform(arg_count, valid_types),
805 volatility,
806 }
807 }
808 /// Exactly matches the types in `exact_types`, in order.
809 pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
810 Signature {
811 type_signature: TypeSignature::Exact(exact_types),
812 volatility,
813 }
814 }
815
816 /// Target coerce types in order
817 pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
818 Self {
819 type_signature: TypeSignature::Coercible(target_types),
820 volatility,
821 }
822 }
823
824 /// Used for function that expects comparable data types, it will try to coerced all the types into single final one.
825 pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
826 Self {
827 type_signature: TypeSignature::Comparable(arg_count),
828 volatility,
829 }
830 }
831
832 pub fn nullary(volatility: Volatility) -> Self {
833 Signature {
834 type_signature: TypeSignature::Nullary,
835 volatility,
836 }
837 }
838
839 /// A specified number of arguments of any type
840 pub fn any(arg_count: usize, volatility: Volatility) -> Self {
841 Signature {
842 type_signature: TypeSignature::Any(arg_count),
843 volatility,
844 }
845 }
846 /// Any one of a list of [TypeSignature]s.
847 pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
848 Signature {
849 type_signature: TypeSignature::OneOf(type_signatures),
850 volatility,
851 }
852 }
853 /// Specialized Signature for ArrayAppend and similar functions
854 pub fn array_and_element(volatility: Volatility) -> Self {
855 Signature {
856 type_signature: TypeSignature::ArraySignature(
857 ArrayFunctionSignature::Array {
858 arguments: vec![
859 ArrayFunctionArgument::Array,
860 ArrayFunctionArgument::Element,
861 ],
862 array_coercion: Some(ListCoercion::FixedSizedListToList),
863 },
864 ),
865 volatility,
866 }
867 }
868 /// Specialized Signature for Array functions with an optional index
869 pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
870 Signature {
871 type_signature: TypeSignature::OneOf(vec![
872 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
873 arguments: vec![
874 ArrayFunctionArgument::Array,
875 ArrayFunctionArgument::Element,
876 ],
877 array_coercion: None,
878 }),
879 TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
880 arguments: vec![
881 ArrayFunctionArgument::Array,
882 ArrayFunctionArgument::Element,
883 ArrayFunctionArgument::Index,
884 ],
885 array_coercion: None,
886 }),
887 ]),
888 volatility,
889 }
890 }
891
892 /// Specialized Signature for ArrayElement and similar functions
893 pub fn array_and_index(volatility: Volatility) -> Self {
894 Signature {
895 type_signature: TypeSignature::ArraySignature(
896 ArrayFunctionSignature::Array {
897 arguments: vec![
898 ArrayFunctionArgument::Array,
899 ArrayFunctionArgument::Index,
900 ],
901 array_coercion: None,
902 },
903 ),
904 volatility,
905 }
906 }
907 /// Specialized Signature for ArrayEmpty and similar functions
908 pub fn array(volatility: Volatility) -> Self {
909 Signature {
910 type_signature: TypeSignature::ArraySignature(
911 ArrayFunctionSignature::Array {
912 arguments: vec![ArrayFunctionArgument::Array],
913 array_coercion: None,
914 },
915 ),
916 volatility,
917 }
918 }
919}
920
921#[cfg(test)]
922mod tests {
923 use datafusion_common::types::{logical_int64, logical_string};
924
925 use super::*;
926
927 #[test]
928 fn supports_zero_argument_tests() {
929 // Testing `TypeSignature`s which supports 0 arg
930 let positive_cases = vec![
931 TypeSignature::Exact(vec![]),
932 TypeSignature::OneOf(vec![
933 TypeSignature::Exact(vec![DataType::Int8]),
934 TypeSignature::Nullary,
935 TypeSignature::Uniform(1, vec![DataType::Int8]),
936 ]),
937 TypeSignature::Nullary,
938 ];
939
940 for case in positive_cases {
941 assert!(
942 case.supports_zero_argument(),
943 "Expected {:?} to support zero arguments",
944 case
945 );
946 }
947
948 // Testing `TypeSignature`s which doesn't support 0 arg
949 let negative_cases = vec![
950 TypeSignature::Exact(vec![DataType::Utf8]),
951 TypeSignature::Uniform(1, vec![DataType::Float64]),
952 TypeSignature::Any(1),
953 TypeSignature::VariadicAny,
954 TypeSignature::OneOf(vec![
955 TypeSignature::Exact(vec![DataType::Int8]),
956 TypeSignature::Uniform(1, vec![DataType::Int8]),
957 ]),
958 ];
959
960 for case in negative_cases {
961 assert!(
962 !case.supports_zero_argument(),
963 "Expected {:?} not to support zero arguments",
964 case
965 );
966 }
967 }
968
969 #[test]
970 fn type_signature_partial_ord() {
971 // Test validates that partial ord is defined for TypeSignature and Signature.
972 assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
973 assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
974
975 assert!(
976 TypeSignature::Uniform(1, vec![DataType::Null])
977 < TypeSignature::Uniform(1, vec![DataType::Boolean])
978 );
979 assert!(
980 TypeSignature::Uniform(1, vec![DataType::Null])
981 < TypeSignature::Uniform(2, vec![DataType::Null])
982 );
983 assert!(
984 TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
985 < TypeSignature::Exact(vec![DataType::Null])
986 );
987 }
988
989 #[test]
990 fn test_get_possible_types() {
991 let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
992 let possible_types = type_signature.get_example_types();
993 assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
994
995 let type_signature = TypeSignature::OneOf(vec![
996 TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
997 TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
998 ]);
999 let possible_types = type_signature.get_example_types();
1000 assert_eq!(
1001 possible_types,
1002 vec![
1003 vec![DataType::Int32, DataType::Int64],
1004 vec![DataType::Float32, DataType::Float64]
1005 ]
1006 );
1007
1008 let type_signature = TypeSignature::OneOf(vec![
1009 TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1010 TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1011 TypeSignature::Exact(vec![DataType::Utf8]),
1012 ]);
1013 let possible_types = type_signature.get_example_types();
1014 assert_eq!(
1015 possible_types,
1016 vec![
1017 vec![DataType::Int32, DataType::Int64],
1018 vec![DataType::Float32, DataType::Float64],
1019 vec![DataType::Utf8]
1020 ]
1021 );
1022
1023 let type_signature =
1024 TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
1025 let possible_types = type_signature.get_example_types();
1026 assert_eq!(
1027 possible_types,
1028 vec![
1029 vec![DataType::Float32, DataType::Float32],
1030 vec![DataType::Int64, DataType::Int64]
1031 ]
1032 );
1033
1034 let type_signature = TypeSignature::Coercible(vec![
1035 Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
1036 Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
1037 ]);
1038 let possible_types = type_signature.get_example_types();
1039 assert_eq!(
1040 possible_types,
1041 vec![
1042 vec![DataType::Utf8, DataType::Int64],
1043 vec![DataType::LargeUtf8, DataType::Int64],
1044 vec![DataType::Utf8View, DataType::Int64]
1045 ]
1046 );
1047
1048 let type_signature =
1049 TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
1050 let possible_types = type_signature.get_example_types();
1051 assert_eq!(
1052 possible_types,
1053 vec![vec![DataType::Int32], vec![DataType::Int64]]
1054 );
1055
1056 let type_signature = TypeSignature::Numeric(2);
1057 let possible_types = type_signature.get_example_types();
1058 assert_eq!(
1059 possible_types,
1060 vec![
1061 vec![DataType::Int8, DataType::Int8],
1062 vec![DataType::Int16, DataType::Int16],
1063 vec![DataType::Int32, DataType::Int32],
1064 vec![DataType::Int64, DataType::Int64],
1065 vec![DataType::UInt8, DataType::UInt8],
1066 vec![DataType::UInt16, DataType::UInt16],
1067 vec![DataType::UInt32, DataType::UInt32],
1068 vec![DataType::UInt64, DataType::UInt64],
1069 vec![DataType::Float32, DataType::Float32],
1070 vec![DataType::Float64, DataType::Float64]
1071 ]
1072 );
1073
1074 let type_signature = TypeSignature::String(2);
1075 let possible_types = type_signature.get_example_types();
1076 assert_eq!(
1077 possible_types,
1078 vec![
1079 vec![DataType::Utf8, DataType::Utf8],
1080 vec![DataType::LargeUtf8, DataType::LargeUtf8],
1081 vec![DataType::Utf8View, DataType::Utf8View]
1082 ]
1083 );
1084 }
1085}