datafusion_functions/crypto/
md5.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! "crypto" DataFusion functions
19use crate::crypto::basic::md5;
20use arrow::datatypes::DataType;
21use datafusion_common::{
22    plan_err,
23    types::{logical_binary, logical_string, NativeType},
24    Result,
25};
26use datafusion_expr::{
27    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28    TypeSignature, Volatility,
29};
30use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
31use datafusion_macros::user_doc;
32use std::any::Any;
33
34#[user_doc(
35    doc_section(label = "Hashing Functions"),
36    description = "Computes an MD5 128-bit checksum for a string expression.",
37    syntax_example = "md5(expression)",
38    sql_example = r#"```sql
39> select md5('foo');
40+-------------------------------------+
41| md5(Utf8("foo"))                    |
42+-------------------------------------+
43| <md5_checksum_result>               |
44+-------------------------------------+
45```"#,
46    standard_argument(name = "expression", prefix = "String")
47)]
48#[derive(Debug)]
49pub struct Md5Func {
50    signature: Signature,
51}
52impl Default for Md5Func {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl Md5Func {
59    pub fn new() -> Self {
60        Self {
61            signature: Signature::one_of(
62                vec![
63                    TypeSignature::Coercible(vec![Coercion::new_implicit(
64                        TypeSignatureClass::Native(logical_binary()),
65                        vec![TypeSignatureClass::Native(logical_string())],
66                        NativeType::String,
67                    )]),
68                    TypeSignature::Coercible(vec![Coercion::new_implicit(
69                        TypeSignatureClass::Native(logical_binary()),
70                        vec![TypeSignatureClass::Native(logical_binary())],
71                        NativeType::Binary,
72                    )]),
73                ],
74                Volatility::Immutable,
75            ),
76        }
77    }
78}
79impl ScalarUDFImpl for Md5Func {
80    fn as_any(&self) -> &dyn Any {
81        self
82    }
83
84    fn name(&self) -> &str {
85        "md5"
86    }
87
88    fn signature(&self) -> &Signature {
89        &self.signature
90    }
91
92    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
93        use DataType::*;
94        Ok(match &arg_types[0] {
95            LargeUtf8 | LargeBinary => Utf8,
96            Utf8View | Utf8 | Binary | BinaryView => Utf8,
97            Null => Null,
98            Dictionary(_, t) => match **t {
99                LargeUtf8 | LargeBinary => Utf8,
100                Utf8 | Binary | BinaryView => Utf8,
101                Null => Null,
102                _ => {
103                    return plan_err!(
104                        "the md5 can only accept strings but got {:?}",
105                        **t
106                    );
107                }
108            },
109            other => {
110                return plan_err!(
111                    "The md5 function can only accept strings. Got {other}"
112                );
113            }
114        })
115    }
116    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
117        md5(&args.args)
118    }
119
120    fn documentation(&self) -> Option<&Documentation> {
121        self.doc()
122    }
123}