datafusion_expr_common/
signature.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Signature module contains foundational types that are used to represent signatures, types,
19//! and return types of functions in DataFusion.
20
21use std::fmt::Display;
22use std::hash::Hash;
23
24use crate::type_coercion::aggregates::NUMERICS;
25use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
26use datafusion_common::internal_err;
27use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
28use datafusion_common::utils::ListCoercion;
29use indexmap::IndexSet;
30use itertools::Itertools;
31
32/// Constant that is used as a placeholder for any valid timezone.
33/// This is used where a function can accept a timestamp type with any
34/// valid timezone, it exists to avoid the need to enumerate all possible
35/// timezones. See [`TypeSignature`] for more details.
36///
37/// Type coercion always ensures that functions will be executed using
38/// timestamp arrays that have a valid time zone. Functions must never
39/// return results with this timezone.
40pub const TIMEZONE_WILDCARD: &str = "+TZ";
41
42/// Constant that is used as a placeholder for any valid fixed size list.
43/// This is used where a function can accept a fixed size list type with any
44/// valid length. It exists to avoid the need to enumerate all possible fixed size list lengths.
45pub const FIXED_SIZE_LIST_WILDCARD: i32 = i32::MIN;
46
47/// A function's volatility, which defines the functions eligibility for certain optimizations
48#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
49pub enum Volatility {
50    /// An immutable function will always return the same output when given the same
51    /// input. DataFusion will attempt to inline immutable functions during planning.
52    Immutable,
53    /// A stable function may return different values given the same input across different
54    /// queries but must return the same value for a given input within a query. An example of
55    /// this is the `Now` function. DataFusion will attempt to inline `Stable` functions
56    /// during planning, when possible.
57    /// For query `select col1, now() from t1`, it might take a while to execute but
58    /// `now()` column will be the same for each output row, which is evaluated
59    /// during planning.
60    Stable,
61    /// A volatile function may change the return value from evaluation to evaluation.
62    /// Multiple invocations of a volatile function may return different results when used in the
63    /// same query. An example of this is the random() function. DataFusion
64    /// can not evaluate such functions during planning.
65    /// In the query `select col1, random() from t1`, `random()` function will be evaluated
66    /// for each output row, resulting in a unique random value for each row.
67    Volatile,
68}
69
70/// A function's type signature defines the types of arguments the function supports.
71///
72/// Functions typically support only a few different types of arguments compared to the
73/// different datatypes in Arrow. To make functions easy to use, when possible DataFusion
74/// automatically coerces (add casts to) function arguments so they match the type signature.
75///
76/// For example, a function like `cos` may only be implemented for `Float64` arguments. To support a query
77/// that calls `cos` with a different argument type, such as `cos(int_column)`, type coercion automatically
78/// adds a cast such as `cos(CAST int_column AS DOUBLE)` during planning.
79///
80/// # Data Types
81///
82/// ## Timestamps
83///
84/// Types to match are represented using Arrow's [`DataType`].  [`DataType::Timestamp`] has an optional variable
85/// timezone specification. To specify a function can handle a timestamp with *ANY* timezone, use
86/// the [`TIMEZONE_WILDCARD`]. For example:
87///
88/// ```
89/// # use arrow::datatypes::{DataType, TimeUnit};
90/// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
91/// let type_signature = TypeSignature::Exact(vec![
92///   // A nanosecond precision timestamp with ANY timezone
93///   // matches  Timestamp(Nanosecond, Some("+0:00"))
94///   // matches  Timestamp(Nanosecond, Some("+5:00"))
95///   // does not match  Timestamp(Nanosecond, None)
96///   DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
97/// ]);
98/// ```
99#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
100pub enum TypeSignature {
101    /// One or more arguments of a common type out of a list of valid types.
102    ///
103    /// For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
104    ///
105    /// # Examples
106    ///
107    /// A function such as `concat` is `Variadic(vec![DataType::Utf8,
108    /// DataType::LargeUtf8])`
109    Variadic(Vec<DataType>),
110    /// The acceptable signature and coercions rules are special for this
111    /// function.
112    ///
113    /// If this signature is specified,
114    /// DataFusion will call [`ScalarUDFImpl::coerce_types`] to prepare argument types.
115    ///
116    /// [`ScalarUDFImpl::coerce_types`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html#method.coerce_types
117    UserDefined,
118    /// One or more arguments with arbitrary types
119    VariadicAny,
120    /// One or more arguments of an arbitrary but equal type out of a list of valid types.
121    ///
122    /// # Examples
123    ///
124    /// 1. A function of one argument of f64 is `Uniform(1, vec![DataType::Float64])`
125    /// 2. A function of one argument of f64 or f32 is `Uniform(1, vec![DataType::Float32, DataType::Float64])`
126    Uniform(usize, Vec<DataType>),
127    /// One or more arguments with exactly the specified types in order.
128    ///
129    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
130    Exact(Vec<DataType>),
131    /// One or more arguments belonging to the [`TypeSignatureClass`], in order.
132    ///
133    /// [`Coercion`] contains not only the desired type but also the allowed casts.
134    /// For example, if you expect a function has string type, but you also allow it to be casted from binary type.
135    ///
136    /// For functions that take no arguments (e.g. `random()`) see [`TypeSignature::Nullary`].
137    Coercible(Vec<Coercion>),
138    /// One or more arguments coercible to a single, comparable type.
139    ///
140    /// Each argument will be coerced to a single type using the
141    /// coercion rules described in [`comparison_coercion_numeric`].
142    ///
143    /// # Examples
144    ///
145    /// If the `nullif(1, 2)` function is called with `i32` and `i64` arguments
146    /// the types will both be coerced to `i64` before the function is invoked.
147    ///
148    /// If the `nullif('1', 2)` function is called with `Utf8` and `i64` arguments
149    /// the types will both be coerced to `Utf8` before the function is invoked.
150    ///
151    /// Note:
152    /// - For functions that take no arguments (e.g. `random()` see [`TypeSignature::Nullary`]).
153    /// - If all arguments have type [`DataType::Null`], they are coerced to `Utf8`
154    ///
155    /// [`comparison_coercion_numeric`]: crate::type_coercion::binary::comparison_coercion_numeric
156    Comparable(usize),
157    /// One or more arguments of arbitrary types.
158    ///
159    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
160    Any(usize),
161    /// Matches exactly one of a list of [`TypeSignature`]s.
162    ///
163    /// Coercion is attempted to match the signatures in order, and stops after
164    /// the first success, if any.
165    ///
166    /// # Examples
167    ///
168    /// Since `make_array` takes 0 or more arguments with arbitrary types, its `TypeSignature`
169    /// is `OneOf(vec![Any(0), VariadicAny])`.
170    OneOf(Vec<TypeSignature>),
171    /// A function that has an [`ArrayFunctionSignature`]
172    ArraySignature(ArrayFunctionSignature),
173    /// One or more arguments of numeric types.
174    ///
175    /// See [`NativeType::is_numeric`] to know which type is considered numeric
176    ///
177    /// For functions that take no arguments (e.g. `random()`) use [`TypeSignature::Nullary`].
178    ///
179    /// [`NativeType::is_numeric`]: datafusion_common::types::NativeType::is_numeric
180    Numeric(usize),
181    /// One or arguments of all the same string types.
182    ///
183    /// The precedence of type from high to low is Utf8View, LargeUtf8 and Utf8.
184    /// Null is considered as `Utf8` by default
185    /// Dictionary with string value type is also handled.
186    ///
187    /// For example, if a function is called with (utf8, large_utf8), all
188    /// arguments will be coerced to  `LargeUtf8`
189    ///
190    /// For functions that take no arguments (e.g. `random()` use [`TypeSignature::Nullary`]).
191    String(usize),
192    /// No arguments
193    Nullary,
194}
195
196impl TypeSignature {
197    #[inline]
198    pub fn is_one_of(&self) -> bool {
199        matches!(self, TypeSignature::OneOf(_))
200    }
201}
202
203/// Represents the class of types that can be used in a function signature.
204///
205/// This is used to specify what types are valid for function arguments in a more flexible way than
206/// just listing specific DataTypes. For example, TypeSignatureClass::Timestamp matches any timestamp
207/// type regardless of timezone or precision.
208///
209/// Used primarily with TypeSignature::Coercible to define function signatures that can accept
210/// arguments that can be coerced to a particular class of types.
211#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash)]
212pub enum TypeSignatureClass {
213    Timestamp,
214    Time,
215    Interval,
216    Duration,
217    Native(LogicalTypeRef),
218    // TODO:
219    // Numeric
220    Integer,
221}
222
223impl Display for TypeSignatureClass {
224    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
225        write!(f, "TypeSignatureClass::{self:?}")
226    }
227}
228
229impl TypeSignatureClass {
230    /// Get example acceptable types for this `TypeSignatureClass`
231    ///
232    /// This is used for `information_schema` and can be used to generate
233    /// documentation or error messages.
234    fn get_example_types(&self) -> Vec<DataType> {
235        match self {
236            TypeSignatureClass::Native(l) => get_data_types(l.native()),
237            TypeSignatureClass::Timestamp => {
238                vec![
239                    DataType::Timestamp(TimeUnit::Nanosecond, None),
240                    DataType::Timestamp(
241                        TimeUnit::Nanosecond,
242                        Some(TIMEZONE_WILDCARD.into()),
243                    ),
244                ]
245            }
246            TypeSignatureClass::Time => {
247                vec![DataType::Time64(TimeUnit::Nanosecond)]
248            }
249            TypeSignatureClass::Interval => {
250                vec![DataType::Interval(IntervalUnit::DayTime)]
251            }
252            TypeSignatureClass::Duration => {
253                vec![DataType::Duration(TimeUnit::Nanosecond)]
254            }
255            TypeSignatureClass::Integer => {
256                vec![DataType::Int64]
257            }
258        }
259    }
260
261    /// Does the specified `NativeType` match this type signature class?
262    pub fn matches_native_type(
263        self: &TypeSignatureClass,
264        logical_type: &NativeType,
265    ) -> bool {
266        if logical_type == &NativeType::Null {
267            return true;
268        }
269
270        match self {
271            TypeSignatureClass::Native(t) if t.native() == logical_type => true,
272            TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true,
273            TypeSignatureClass::Time if logical_type.is_time() => true,
274            TypeSignatureClass::Interval if logical_type.is_interval() => true,
275            TypeSignatureClass::Duration if logical_type.is_duration() => true,
276            TypeSignatureClass::Integer if logical_type.is_integer() => true,
277            _ => false,
278        }
279    }
280
281    /// What type would `origin_type` be casted to when casting to the specified native type?
282    pub fn default_casted_type(
283        &self,
284        native_type: &NativeType,
285        origin_type: &DataType,
286    ) -> datafusion_common::Result<DataType> {
287        match self {
288            TypeSignatureClass::Native(logical_type) => {
289                logical_type.native().default_cast_for(origin_type)
290            }
291            // If the given type is already a timestamp, we don't change the unit and timezone
292            TypeSignatureClass::Timestamp if native_type.is_timestamp() => {
293                Ok(origin_type.to_owned())
294            }
295            TypeSignatureClass::Time if native_type.is_time() => {
296                Ok(origin_type.to_owned())
297            }
298            TypeSignatureClass::Interval if native_type.is_interval() => {
299                Ok(origin_type.to_owned())
300            }
301            TypeSignatureClass::Duration if native_type.is_duration() => {
302                Ok(origin_type.to_owned())
303            }
304            TypeSignatureClass::Integer if native_type.is_integer() => {
305                Ok(origin_type.to_owned())
306            }
307            _ => internal_err!("May miss the matching logic in `matches_native_type`"),
308        }
309    }
310}
311
312#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
313pub enum ArrayFunctionSignature {
314    /// A function takes at least one List/LargeList/FixedSizeList argument.
315    Array {
316        /// A full list of the arguments accepted by this function.
317        arguments: Vec<ArrayFunctionArgument>,
318        /// Additional information about how array arguments should be coerced.
319        array_coercion: Option<ListCoercion>,
320    },
321    /// A function takes a single argument that must be a List/LargeList/FixedSizeList
322    /// which gets coerced to List, with element type recursively coerced to List too if it is list-like.
323    RecursiveArray,
324    /// Specialized Signature for MapArray
325    /// The function takes a single argument that must be a MapArray
326    MapArray,
327}
328
329impl Display for ArrayFunctionSignature {
330    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
331        match self {
332            ArrayFunctionSignature::Array { arguments, .. } => {
333                for (idx, argument) in arguments.iter().enumerate() {
334                    write!(f, "{argument}")?;
335                    if idx != arguments.len() - 1 {
336                        write!(f, ", ")?;
337                    }
338                }
339                Ok(())
340            }
341            ArrayFunctionSignature::RecursiveArray => {
342                write!(f, "recursive_array")
343            }
344            ArrayFunctionSignature::MapArray => {
345                write!(f, "map_array")
346            }
347        }
348    }
349}
350
351#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
352pub enum ArrayFunctionArgument {
353    /// A non-list or list argument. The list dimensions should be one less than the Array's list
354    /// dimensions.
355    Element,
356    /// An Int64 index argument.
357    Index,
358    /// An argument of type List/LargeList/FixedSizeList. All Array arguments must be coercible
359    /// to the same type.
360    Array,
361    // A Utf8 argument.
362    String,
363}
364
365impl Display for ArrayFunctionArgument {
366    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367        match self {
368            ArrayFunctionArgument::Element => {
369                write!(f, "element")
370            }
371            ArrayFunctionArgument::Index => {
372                write!(f, "index")
373            }
374            ArrayFunctionArgument::Array => {
375                write!(f, "array")
376            }
377            ArrayFunctionArgument::String => {
378                write!(f, "string")
379            }
380        }
381    }
382}
383
384impl TypeSignature {
385    pub fn to_string_repr(&self) -> Vec<String> {
386        match self {
387            TypeSignature::Nullary => {
388                vec!["NullAry()".to_string()]
389            }
390            TypeSignature::Variadic(types) => {
391                vec![format!("{}, ..", Self::join_types(types, "/"))]
392            }
393            TypeSignature::Uniform(arg_count, valid_types) => {
394                vec![
395                    std::iter::repeat_n(Self::join_types(valid_types, "/"), *arg_count)
396                        .collect::<Vec<String>>()
397                        .join(", "),
398                ]
399            }
400            TypeSignature::String(num) => {
401                vec![format!("String({num})")]
402            }
403            TypeSignature::Numeric(num) => {
404                vec![format!("Numeric({num})")]
405            }
406            TypeSignature::Comparable(num) => {
407                vec![format!("Comparable({num})")]
408            }
409            TypeSignature::Coercible(coercions) => {
410                vec![Self::join_types(coercions, ", ")]
411            }
412            TypeSignature::Exact(types) => {
413                vec![Self::join_types(types, ", ")]
414            }
415            TypeSignature::Any(arg_count) => {
416                vec![std::iter::repeat_n("Any", *arg_count)
417                    .collect::<Vec<&str>>()
418                    .join(", ")]
419            }
420            TypeSignature::UserDefined => {
421                vec!["UserDefined".to_string()]
422            }
423            TypeSignature::VariadicAny => vec!["Any, .., Any".to_string()],
424            TypeSignature::OneOf(sigs) => {
425                sigs.iter().flat_map(|s| s.to_string_repr()).collect()
426            }
427            TypeSignature::ArraySignature(array_signature) => {
428                vec![array_signature.to_string()]
429            }
430        }
431    }
432
433    /// Helper function to join types with specified delimiter.
434    pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
435        types
436            .iter()
437            .map(|t| t.to_string())
438            .collect::<Vec<String>>()
439            .join(delimiter)
440    }
441
442    /// Check whether 0 input argument is valid for given `TypeSignature`
443    pub fn supports_zero_argument(&self) -> bool {
444        match &self {
445            TypeSignature::Exact(vec) => vec.is_empty(),
446            TypeSignature::Nullary => true,
447            TypeSignature::OneOf(types) => types
448                .iter()
449                .any(|type_sig| type_sig.supports_zero_argument()),
450            _ => false,
451        }
452    }
453
454    /// Returns true if the signature currently supports or used to supported 0
455    /// input arguments in a previous version of DataFusion.
456    pub fn used_to_support_zero_arguments(&self) -> bool {
457        match &self {
458            TypeSignature::Any(num) => *num == 0,
459            _ => self.supports_zero_argument(),
460        }
461    }
462
463    #[deprecated(since = "46.0.0", note = "See get_example_types instead")]
464    pub fn get_possible_types(&self) -> Vec<Vec<DataType>> {
465        self.get_example_types()
466    }
467
468    /// Return example acceptable types for this `TypeSignature`'
469    ///
470    /// Returns a `Vec<DataType>` for each argument to the function
471    ///
472    /// This is used for `information_schema` and can be used to generate
473    /// documentation or error messages.
474    pub fn get_example_types(&self) -> Vec<Vec<DataType>> {
475        match self {
476            TypeSignature::Exact(types) => vec![types.clone()],
477            TypeSignature::OneOf(types) => types
478                .iter()
479                .flat_map(|type_sig| type_sig.get_example_types())
480                .collect(),
481            TypeSignature::Uniform(arg_count, types) => types
482                .iter()
483                .cloned()
484                .map(|data_type| vec![data_type; *arg_count])
485                .collect(),
486            TypeSignature::Coercible(coercions) => coercions
487                .iter()
488                .map(|c| {
489                    let mut all_types: IndexSet<DataType> =
490                        c.desired_type().get_example_types().into_iter().collect();
491
492                    if let Some(implicit_coercion) = c.implicit_coercion() {
493                        let allowed_casts: Vec<DataType> = implicit_coercion
494                            .allowed_source_types
495                            .iter()
496                            .flat_map(|t| t.get_example_types())
497                            .collect();
498                        all_types.extend(allowed_casts);
499                    }
500
501                    all_types.into_iter().collect::<Vec<_>>()
502                })
503                .multi_cartesian_product()
504                .collect(),
505            TypeSignature::Variadic(types) => types
506                .iter()
507                .cloned()
508                .map(|data_type| vec![data_type])
509                .collect(),
510            TypeSignature::Numeric(arg_count) => NUMERICS
511                .iter()
512                .cloned()
513                .map(|numeric_type| vec![numeric_type; *arg_count])
514                .collect(),
515            TypeSignature::String(arg_count) => get_data_types(&NativeType::String)
516                .into_iter()
517                .map(|dt| vec![dt; *arg_count])
518                .collect::<Vec<_>>(),
519            // TODO: Implement for other types
520            TypeSignature::Any(_)
521            | TypeSignature::Comparable(_)
522            | TypeSignature::Nullary
523            | TypeSignature::VariadicAny
524            | TypeSignature::ArraySignature(_)
525            | TypeSignature::UserDefined => vec![],
526        }
527    }
528}
529
530fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
531    match native_type {
532        NativeType::Null => vec![DataType::Null],
533        NativeType::Boolean => vec![DataType::Boolean],
534        NativeType::Int8 => vec![DataType::Int8],
535        NativeType::Int16 => vec![DataType::Int16],
536        NativeType::Int32 => vec![DataType::Int32],
537        NativeType::Int64 => vec![DataType::Int64],
538        NativeType::UInt8 => vec![DataType::UInt8],
539        NativeType::UInt16 => vec![DataType::UInt16],
540        NativeType::UInt32 => vec![DataType::UInt32],
541        NativeType::UInt64 => vec![DataType::UInt64],
542        NativeType::Float16 => vec![DataType::Float16],
543        NativeType::Float32 => vec![DataType::Float32],
544        NativeType::Float64 => vec![DataType::Float64],
545        NativeType::Date => vec![DataType::Date32, DataType::Date64],
546        NativeType::Binary => vec![
547            DataType::Binary,
548            DataType::LargeBinary,
549            DataType::BinaryView,
550        ],
551        NativeType::String => {
552            vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]
553        }
554        // TODO: support other native types
555        _ => vec![],
556    }
557}
558
559/// Represents type coercion rules for function arguments, specifying both the desired type
560/// and optional implicit coercion rules for source types.
561///
562/// # Examples
563///
564/// ```
565/// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
566/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
567///
568/// // Exact coercion that only accepts timestamp types
569/// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
570///
571/// // Implicit coercion that accepts string types but can coerce from binary types
572/// let implicit = Coercion::new_implicit(
573///     TypeSignatureClass::Native(logical_string()),
574///     vec![TypeSignatureClass::Native(logical_binary())],
575///     NativeType::String
576/// );
577/// ```
578///
579/// There are two variants:
580///
581/// * `Exact` - Only accepts arguments that exactly match the desired type
582/// * `Implicit` - Accepts the desired type and can coerce from specified source types
583#[derive(Debug, Clone, Eq, PartialOrd)]
584pub enum Coercion {
585    /// Coercion that only accepts arguments exactly matching the desired type.
586    Exact {
587        /// The required type for the argument
588        desired_type: TypeSignatureClass,
589    },
590
591    /// Coercion that accepts the desired type and can implicitly coerce from other types.
592    Implicit {
593        /// The primary desired type for the argument
594        desired_type: TypeSignatureClass,
595        /// Rules for implicit coercion from other types
596        implicit_coercion: ImplicitCoercion,
597    },
598}
599
600impl Coercion {
601    pub fn new_exact(desired_type: TypeSignatureClass) -> Self {
602        Self::Exact { desired_type }
603    }
604
605    /// Create a new coercion with implicit coercion rules.
606    ///
607    /// `allowed_source_types` defines the possible types that can be coerced to `desired_type`.
608    /// `default_casted_type` is the default type to be used for coercion if we cast from other types via `allowed_source_types`.
609    pub fn new_implicit(
610        desired_type: TypeSignatureClass,
611        allowed_source_types: Vec<TypeSignatureClass>,
612        default_casted_type: NativeType,
613    ) -> Self {
614        Self::Implicit {
615            desired_type,
616            implicit_coercion: ImplicitCoercion {
617                allowed_source_types,
618                default_casted_type,
619            },
620        }
621    }
622
623    pub fn allowed_source_types(&self) -> &[TypeSignatureClass] {
624        match self {
625            Coercion::Exact { .. } => &[],
626            Coercion::Implicit {
627                implicit_coercion, ..
628            } => implicit_coercion.allowed_source_types.as_slice(),
629        }
630    }
631
632    pub fn default_casted_type(&self) -> Option<&NativeType> {
633        match self {
634            Coercion::Exact { .. } => None,
635            Coercion::Implicit {
636                implicit_coercion, ..
637            } => Some(&implicit_coercion.default_casted_type),
638        }
639    }
640
641    pub fn desired_type(&self) -> &TypeSignatureClass {
642        match self {
643            Coercion::Exact { desired_type } => desired_type,
644            Coercion::Implicit { desired_type, .. } => desired_type,
645        }
646    }
647
648    pub fn implicit_coercion(&self) -> Option<&ImplicitCoercion> {
649        match self {
650            Coercion::Exact { .. } => None,
651            Coercion::Implicit {
652                implicit_coercion, ..
653            } => Some(implicit_coercion),
654        }
655    }
656}
657
658impl Display for Coercion {
659    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
660        write!(f, "Coercion({}", self.desired_type())?;
661        if let Some(implicit_coercion) = self.implicit_coercion() {
662            write!(f, ", implicit_coercion={implicit_coercion}",)
663        } else {
664            write!(f, ")")
665        }
666    }
667}
668
669impl PartialEq for Coercion {
670    fn eq(&self, other: &Self) -> bool {
671        self.desired_type() == other.desired_type()
672            && self.implicit_coercion() == other.implicit_coercion()
673    }
674}
675
676impl Hash for Coercion {
677    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
678        self.desired_type().hash(state);
679        self.implicit_coercion().hash(state);
680    }
681}
682
683/// Defines rules for implicit type coercion, specifying which source types can be
684/// coerced and the default type to use when coercing.
685///
686/// This is used by functions to specify which types they can accept via implicit
687/// coercion in addition to their primary desired type.
688///
689/// # Examples
690///
691/// ```
692/// use arrow::datatypes::TimeUnit;
693///
694/// use datafusion_expr_common::signature::{Coercion, ImplicitCoercion, TypeSignatureClass};
695/// use datafusion_common::types::{NativeType, logical_binary};
696///
697/// // Allow coercing from binary types to timestamp, coerce to specific timestamp unit and timezone
698/// let implicit = Coercion::new_implicit(
699///     TypeSignatureClass::Timestamp,
700///     vec![TypeSignatureClass::Native(logical_binary())],
701///     NativeType::Timestamp(TimeUnit::Second, None),
702/// );
703/// ```
704#[derive(Debug, Clone, Eq, PartialOrd)]
705pub struct ImplicitCoercion {
706    /// The types that can be coerced from via implicit casting
707    allowed_source_types: Vec<TypeSignatureClass>,
708
709    /// The default type to use when coercing from allowed source types.
710    /// This is particularly important for types like Timestamp that have multiple
711    /// possible configurations (different time units and timezones).
712    default_casted_type: NativeType,
713}
714
715impl Display for ImplicitCoercion {
716    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
717        write!(
718            f,
719            "ImplicitCoercion({:?}, default_type={:?})",
720            self.allowed_source_types, self.default_casted_type
721        )
722    }
723}
724
725impl PartialEq for ImplicitCoercion {
726    fn eq(&self, other: &Self) -> bool {
727        self.allowed_source_types == other.allowed_source_types
728            && self.default_casted_type == other.default_casted_type
729    }
730}
731
732impl Hash for ImplicitCoercion {
733    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
734        self.allowed_source_types.hash(state);
735        self.default_casted_type.hash(state);
736    }
737}
738
739/// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function.
740///
741/// DataFusion will automatically coerce (cast) argument types to one of the supported
742/// function signatures, if possible.
743#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
744pub struct Signature {
745    /// The data types that the function accepts. See [TypeSignature] for more information.
746    pub type_signature: TypeSignature,
747    /// The volatility of the function. See [Volatility] for more information.
748    pub volatility: Volatility,
749}
750
751impl Signature {
752    /// Creates a new Signature from a given type signature and volatility.
753    pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self {
754        Signature {
755            type_signature,
756            volatility,
757        }
758    }
759    /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
760    pub fn variadic(common_types: Vec<DataType>, volatility: Volatility) -> Self {
761        Self {
762            type_signature: TypeSignature::Variadic(common_types),
763            volatility,
764        }
765    }
766    /// User-defined coercion rules for the function.
767    pub fn user_defined(volatility: Volatility) -> Self {
768        Self {
769            type_signature: TypeSignature::UserDefined,
770            volatility,
771        }
772    }
773
774    /// A specified number of numeric arguments
775    pub fn numeric(arg_count: usize, volatility: Volatility) -> Self {
776        Self {
777            type_signature: TypeSignature::Numeric(arg_count),
778            volatility,
779        }
780    }
781
782    /// A specified number of numeric arguments
783    pub fn string(arg_count: usize, volatility: Volatility) -> Self {
784        Self {
785            type_signature: TypeSignature::String(arg_count),
786            volatility,
787        }
788    }
789
790    /// An arbitrary number of arguments of any type.
791    pub fn variadic_any(volatility: Volatility) -> Self {
792        Self {
793            type_signature: TypeSignature::VariadicAny,
794            volatility,
795        }
796    }
797    /// A fixed number of arguments of the same type, from those listed in `valid_types`.
798    pub fn uniform(
799        arg_count: usize,
800        valid_types: Vec<DataType>,
801        volatility: Volatility,
802    ) -> Self {
803        Self {
804            type_signature: TypeSignature::Uniform(arg_count, valid_types),
805            volatility,
806        }
807    }
808    /// Exactly matches the types in `exact_types`, in order.
809    pub fn exact(exact_types: Vec<DataType>, volatility: Volatility) -> Self {
810        Signature {
811            type_signature: TypeSignature::Exact(exact_types),
812            volatility,
813        }
814    }
815
816    /// Target coerce types in order
817    pub fn coercible(target_types: Vec<Coercion>, volatility: Volatility) -> Self {
818        Self {
819            type_signature: TypeSignature::Coercible(target_types),
820            volatility,
821        }
822    }
823
824    /// Used for function that expects comparable data types, it will try to coerced all the types into single final one.
825    pub fn comparable(arg_count: usize, volatility: Volatility) -> Self {
826        Self {
827            type_signature: TypeSignature::Comparable(arg_count),
828            volatility,
829        }
830    }
831
832    pub fn nullary(volatility: Volatility) -> Self {
833        Signature {
834            type_signature: TypeSignature::Nullary,
835            volatility,
836        }
837    }
838
839    /// A specified number of arguments of any type
840    pub fn any(arg_count: usize, volatility: Volatility) -> Self {
841        Signature {
842            type_signature: TypeSignature::Any(arg_count),
843            volatility,
844        }
845    }
846    /// Any one of a list of [TypeSignature]s.
847    pub fn one_of(type_signatures: Vec<TypeSignature>, volatility: Volatility) -> Self {
848        Signature {
849            type_signature: TypeSignature::OneOf(type_signatures),
850            volatility,
851        }
852    }
853    /// Specialized Signature for ArrayAppend and similar functions
854    pub fn array_and_element(volatility: Volatility) -> Self {
855        Signature {
856            type_signature: TypeSignature::ArraySignature(
857                ArrayFunctionSignature::Array {
858                    arguments: vec![
859                        ArrayFunctionArgument::Array,
860                        ArrayFunctionArgument::Element,
861                    ],
862                    array_coercion: Some(ListCoercion::FixedSizedListToList),
863                },
864            ),
865            volatility,
866        }
867    }
868    /// Specialized Signature for Array functions with an optional index
869    pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self {
870        Signature {
871            type_signature: TypeSignature::OneOf(vec![
872                TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
873                    arguments: vec![
874                        ArrayFunctionArgument::Array,
875                        ArrayFunctionArgument::Element,
876                    ],
877                    array_coercion: None,
878                }),
879                TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
880                    arguments: vec![
881                        ArrayFunctionArgument::Array,
882                        ArrayFunctionArgument::Element,
883                        ArrayFunctionArgument::Index,
884                    ],
885                    array_coercion: None,
886                }),
887            ]),
888            volatility,
889        }
890    }
891
892    /// Specialized Signature for ArrayElement and similar functions
893    pub fn array_and_index(volatility: Volatility) -> Self {
894        Signature {
895            type_signature: TypeSignature::ArraySignature(
896                ArrayFunctionSignature::Array {
897                    arguments: vec![
898                        ArrayFunctionArgument::Array,
899                        ArrayFunctionArgument::Index,
900                    ],
901                    array_coercion: None,
902                },
903            ),
904            volatility,
905        }
906    }
907    /// Specialized Signature for ArrayEmpty and similar functions
908    pub fn array(volatility: Volatility) -> Self {
909        Signature {
910            type_signature: TypeSignature::ArraySignature(
911                ArrayFunctionSignature::Array {
912                    arguments: vec![ArrayFunctionArgument::Array],
913                    array_coercion: None,
914                },
915            ),
916            volatility,
917        }
918    }
919}
920
921#[cfg(test)]
922mod tests {
923    use datafusion_common::types::{logical_int64, logical_string};
924
925    use super::*;
926
927    #[test]
928    fn supports_zero_argument_tests() {
929        // Testing `TypeSignature`s which supports 0 arg
930        let positive_cases = vec![
931            TypeSignature::Exact(vec![]),
932            TypeSignature::OneOf(vec![
933                TypeSignature::Exact(vec![DataType::Int8]),
934                TypeSignature::Nullary,
935                TypeSignature::Uniform(1, vec![DataType::Int8]),
936            ]),
937            TypeSignature::Nullary,
938        ];
939
940        for case in positive_cases {
941            assert!(
942                case.supports_zero_argument(),
943                "Expected {:?} to support zero arguments",
944                case
945            );
946        }
947
948        // Testing `TypeSignature`s which doesn't support 0 arg
949        let negative_cases = vec![
950            TypeSignature::Exact(vec![DataType::Utf8]),
951            TypeSignature::Uniform(1, vec![DataType::Float64]),
952            TypeSignature::Any(1),
953            TypeSignature::VariadicAny,
954            TypeSignature::OneOf(vec![
955                TypeSignature::Exact(vec![DataType::Int8]),
956                TypeSignature::Uniform(1, vec![DataType::Int8]),
957            ]),
958        ];
959
960        for case in negative_cases {
961            assert!(
962                !case.supports_zero_argument(),
963                "Expected {:?} not to support zero arguments",
964                case
965            );
966        }
967    }
968
969    #[test]
970    fn type_signature_partial_ord() {
971        // Test validates that partial ord is defined for TypeSignature and Signature.
972        assert!(TypeSignature::UserDefined < TypeSignature::VariadicAny);
973        assert!(TypeSignature::UserDefined < TypeSignature::Any(1));
974
975        assert!(
976            TypeSignature::Uniform(1, vec![DataType::Null])
977                < TypeSignature::Uniform(1, vec![DataType::Boolean])
978        );
979        assert!(
980            TypeSignature::Uniform(1, vec![DataType::Null])
981                < TypeSignature::Uniform(2, vec![DataType::Null])
982        );
983        assert!(
984            TypeSignature::Uniform(usize::MAX, vec![DataType::Null])
985                < TypeSignature::Exact(vec![DataType::Null])
986        );
987    }
988
989    #[test]
990    fn test_get_possible_types() {
991        let type_signature = TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]);
992        let possible_types = type_signature.get_example_types();
993        assert_eq!(possible_types, vec![vec![DataType::Int32, DataType::Int64]]);
994
995        let type_signature = TypeSignature::OneOf(vec![
996            TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
997            TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
998        ]);
999        let possible_types = type_signature.get_example_types();
1000        assert_eq!(
1001            possible_types,
1002            vec![
1003                vec![DataType::Int32, DataType::Int64],
1004                vec![DataType::Float32, DataType::Float64]
1005            ]
1006        );
1007
1008        let type_signature = TypeSignature::OneOf(vec![
1009            TypeSignature::Exact(vec![DataType::Int32, DataType::Int64]),
1010            TypeSignature::Exact(vec![DataType::Float32, DataType::Float64]),
1011            TypeSignature::Exact(vec![DataType::Utf8]),
1012        ]);
1013        let possible_types = type_signature.get_example_types();
1014        assert_eq!(
1015            possible_types,
1016            vec![
1017                vec![DataType::Int32, DataType::Int64],
1018                vec![DataType::Float32, DataType::Float64],
1019                vec![DataType::Utf8]
1020            ]
1021        );
1022
1023        let type_signature =
1024            TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]);
1025        let possible_types = type_signature.get_example_types();
1026        assert_eq!(
1027            possible_types,
1028            vec![
1029                vec![DataType::Float32, DataType::Float32],
1030                vec![DataType::Int64, DataType::Int64]
1031            ]
1032        );
1033
1034        let type_signature = TypeSignature::Coercible(vec![
1035            Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
1036            Coercion::new_exact(TypeSignatureClass::Native(logical_int64())),
1037        ]);
1038        let possible_types = type_signature.get_example_types();
1039        assert_eq!(
1040            possible_types,
1041            vec![
1042                vec![DataType::Utf8, DataType::Int64],
1043                vec![DataType::LargeUtf8, DataType::Int64],
1044                vec![DataType::Utf8View, DataType::Int64]
1045            ]
1046        );
1047
1048        let type_signature =
1049            TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]);
1050        let possible_types = type_signature.get_example_types();
1051        assert_eq!(
1052            possible_types,
1053            vec![vec![DataType::Int32], vec![DataType::Int64]]
1054        );
1055
1056        let type_signature = TypeSignature::Numeric(2);
1057        let possible_types = type_signature.get_example_types();
1058        assert_eq!(
1059            possible_types,
1060            vec![
1061                vec![DataType::Int8, DataType::Int8],
1062                vec![DataType::Int16, DataType::Int16],
1063                vec![DataType::Int32, DataType::Int32],
1064                vec![DataType::Int64, DataType::Int64],
1065                vec![DataType::UInt8, DataType::UInt8],
1066                vec![DataType::UInt16, DataType::UInt16],
1067                vec![DataType::UInt32, DataType::UInt32],
1068                vec![DataType::UInt64, DataType::UInt64],
1069                vec![DataType::Float32, DataType::Float32],
1070                vec![DataType::Float64, DataType::Float64]
1071            ]
1072        );
1073
1074        let type_signature = TypeSignature::String(2);
1075        let possible_types = type_signature.get_example_types();
1076        assert_eq!(
1077            possible_types,
1078            vec![
1079                vec![DataType::Utf8, DataType::Utf8],
1080                vec![DataType::LargeUtf8, DataType::LargeUtf8],
1081                vec![DataType::Utf8View, DataType::Utf8View]
1082            ]
1083        );
1084    }
1085}