datafusion_optimizer/simplify_expressions/
simplify_exprs.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Simplify expressions optimizer rule and implementation
19
20use std::sync::Arc;
21
22use datafusion_common::tree_node::{Transformed, TreeNode};
23use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result};
24use datafusion_expr::execution_props::ExecutionProps;
25use datafusion_expr::logical_plan::LogicalPlan;
26use datafusion_expr::simplify::SimplifyContext;
27use datafusion_expr::utils::merge_schema;
28use datafusion_expr::Expr;
29
30use crate::optimizer::ApplyOrder;
31use crate::utils::NamePreserver;
32use crate::{OptimizerConfig, OptimizerRule};
33
34use super::ExprSimplifier;
35
36/// Optimizer Pass that simplifies [`LogicalPlan`]s by rewriting
37/// [`Expr`]`s evaluating constants and applying algebraic
38/// simplifications
39///
40/// # Introduction
41/// It uses boolean algebra laws to simplify or reduce the number of terms in expressions.
42///
43/// # Example:
44/// `Filter: b > 2 AND b > 2`
45/// is optimized to
46/// `Filter: b > 2`
47///
48/// [`Expr`]: datafusion_expr::Expr
49#[derive(Default, Debug)]
50pub struct SimplifyExpressions {}
51
52impl OptimizerRule for SimplifyExpressions {
53    fn name(&self) -> &str {
54        "simplify_expressions"
55    }
56
57    fn apply_order(&self) -> Option<ApplyOrder> {
58        Some(ApplyOrder::BottomUp)
59    }
60
61    fn supports_rewrite(&self) -> bool {
62        true
63    }
64
65    fn rewrite(
66        &self,
67        plan: LogicalPlan,
68        config: &dyn OptimizerConfig,
69    ) -> Result<Transformed<LogicalPlan>, DataFusionError> {
70        let mut execution_props = ExecutionProps::new();
71        execution_props.query_execution_start_time = config.query_execution_start_time();
72        Self::optimize_internal(plan, &execution_props)
73    }
74}
75
76impl SimplifyExpressions {
77    fn optimize_internal(
78        plan: LogicalPlan,
79        execution_props: &ExecutionProps,
80    ) -> Result<Transformed<LogicalPlan>> {
81        let schema = if !plan.inputs().is_empty() {
82            DFSchemaRef::new(merge_schema(&plan.inputs()))
83        } else if let LogicalPlan::TableScan(scan) = &plan {
84            // When predicates are pushed into a table scan, there is no input
85            // schema to resolve predicates against, so it must be handled specially
86            //
87            // Note that this is not `plan.schema()` which is the *output*
88            // schema, and reflects any pushed down projection. The output schema
89            // will not contain columns that *only* appear in pushed down predicates
90            // (and no where else) in the plan.
91            //
92            // Thus, use the full schema of the inner provider without any
93            // projection applied for simplification
94            Arc::new(DFSchema::try_from_qualified_schema(
95                scan.table_name.clone(),
96                &scan.source.schema(),
97            )?)
98        } else {
99            Arc::new(DFSchema::empty())
100        };
101
102        let info = SimplifyContext::new(execution_props).with_schema(schema);
103
104        // Inputs have already been rewritten (due to bottom-up traversal handled by Optimizer)
105        // Just need to rewrite our own expressions
106
107        let simplifier = ExprSimplifier::new(info);
108
109        // The left and right expressions in a Join on clause are not
110        // commutative, for reasons that are not entirely clear. Thus, do not
111        // reorder expressions in Join while simplifying.
112        //
113        // This is likely related to the fact that order of the columns must
114        // match the order of the children. see
115        // https://github.com/apache/datafusion/pull/8780 for more details
116        let simplifier = if let LogicalPlan::Join(_) = plan {
117            simplifier.with_canonicalize(false)
118        } else {
119            simplifier
120        };
121
122        // Preserve expression names to avoid changing the schema of the plan.
123        let name_preserver = NamePreserver::new(&plan);
124        let mut rewrite_expr = |expr: Expr| {
125            let name = name_preserver.save(&expr);
126            let expr = simplifier.simplify_with_cycle_count_transformed(expr)?.0;
127            Ok(Transformed::new_transformed(
128                name.restore(expr.data),
129                expr.transformed,
130            ))
131        };
132
133        plan.map_expressions(|expr| {
134            // Preserve the aliasing of grouping sets.
135            if let Expr::GroupingSet(_) = &expr {
136                expr.map_children(&mut rewrite_expr)
137            } else {
138                rewrite_expr(expr)
139            }
140        })
141    }
142}
143
144impl SimplifyExpressions {
145    #[allow(missing_docs)]
146    pub fn new() -> Self {
147        Self {}
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use std::ops::Not;
154
155    use arrow::datatypes::{DataType, Field, Schema};
156    use chrono::{DateTime, Utc};
157
158    use datafusion_expr::logical_plan::builder::table_scan_with_filters;
159    use datafusion_expr::logical_plan::table_scan;
160    use datafusion_expr::*;
161    use datafusion_functions_aggregate::expr_fn::{max, min};
162
163    use crate::assert_optimized_plan_eq_snapshot;
164    use crate::test::{assert_fields_eq, test_table_scan_with_name};
165    use crate::OptimizerContext;
166
167    use super::*;
168
169    fn test_table_scan() -> LogicalPlan {
170        let schema = Schema::new(vec![
171            Field::new("a", DataType::Boolean, false),
172            Field::new("b", DataType::Boolean, false),
173            Field::new("c", DataType::Boolean, false),
174            Field::new("d", DataType::UInt32, false),
175            Field::new("e", DataType::UInt32, true),
176        ]);
177        table_scan(Some("test"), &schema, None)
178            .expect("creating scan")
179            .build()
180            .expect("building plan")
181    }
182
183    macro_rules! assert_optimized_plan_equal {
184        (
185            $plan:expr,
186            @ $expected:literal $(,)?
187        ) => {{
188            let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(SimplifyExpressions::new())];
189            let optimizer_ctx = OptimizerContext::new();
190            assert_optimized_plan_eq_snapshot!(
191                optimizer_ctx,
192                rules,
193                $plan,
194                @ $expected,
195            )
196        }};
197    }
198
199    #[test]
200    fn test_simplify_table_full_filter_in_scan() -> Result<()> {
201        let fields = vec![
202            Field::new("a", DataType::UInt32, false),
203            Field::new("b", DataType::UInt32, false),
204            Field::new("c", DataType::UInt32, false),
205        ];
206
207        let schema = Schema::new(fields);
208
209        let table_scan = table_scan_with_filters(
210            Some("test"),
211            &schema,
212            Some(vec![0]),
213            vec![col("b").is_not_null()],
214        )?
215        .build()?;
216        assert_eq!(1, table_scan.schema().fields().len());
217        assert_fields_eq(&table_scan, vec!["a"]);
218
219        assert_optimized_plan_equal!(
220            table_scan,
221            @ r"TableScan: test projection=[a], full_filters=[Boolean(true)]"
222        )
223    }
224
225    #[test]
226    fn test_simplify_filter_pushdown() -> Result<()> {
227        let table_scan = test_table_scan();
228        let plan = LogicalPlanBuilder::from(table_scan)
229            .project(vec![col("a")])?
230            .filter(and(col("b").gt(lit(1)), col("b").gt(lit(1))))?
231            .build()?;
232
233        assert_optimized_plan_equal!(
234            plan,
235            @ r"
236        Filter: test.b > Int32(1)
237          Projection: test.a
238            TableScan: test
239        "
240        )
241    }
242
243    #[test]
244    fn test_simplify_optimized_plan() -> Result<()> {
245        let table_scan = test_table_scan();
246        let plan = LogicalPlanBuilder::from(table_scan)
247            .project(vec![col("a")])?
248            .filter(and(col("b").gt(lit(1)), col("b").gt(lit(1))))?
249            .build()?;
250
251        assert_optimized_plan_equal!(
252            plan,
253            @ r"
254            Filter: test.b > Int32(1)
255              Projection: test.a
256                TableScan: test
257            "
258        )
259    }
260
261    #[test]
262    fn test_simplify_optimized_plan_with_or() -> Result<()> {
263        let table_scan = test_table_scan();
264        let plan = LogicalPlanBuilder::from(table_scan)
265            .project(vec![col("a")])?
266            .filter(or(col("b").gt(lit(1)), col("b").gt(lit(1))))?
267            .build()?;
268
269        assert_optimized_plan_equal!(
270            plan,
271            @ r"
272            Filter: test.b > Int32(1)
273              Projection: test.a
274                TableScan: test
275            "
276        )
277    }
278
279    #[test]
280    fn test_simplify_optimized_plan_with_composed_and() -> Result<()> {
281        let table_scan = test_table_scan();
282        // ((c > 5) AND (d < 6)) AND (c > 5) --> (c > 5) AND (d < 6)
283        let plan = LogicalPlanBuilder::from(table_scan)
284            .project(vec![col("a"), col("b")])?
285            .filter(and(
286                and(col("a").gt(lit(5)), col("b").lt(lit(6))),
287                col("a").gt(lit(5)),
288            ))?
289            .build()?;
290
291        assert_optimized_plan_equal!(
292            plan,
293            @ r"
294        Filter: test.a > Int32(5) AND test.b < Int32(6)
295          Projection: test.a, test.b
296            TableScan: test
297        "
298        )
299    }
300
301    #[test]
302    fn test_simplify_optimized_plan_eq_expr() -> Result<()> {
303        let table_scan = test_table_scan();
304        let plan = LogicalPlanBuilder::from(table_scan)
305            .filter(col("b").eq(lit(true)))?
306            .filter(col("c").eq(lit(false)))?
307            .project(vec![col("a")])?
308            .build()?;
309
310        assert_optimized_plan_equal!(
311            plan,
312            @ r"
313        Projection: test.a
314          Filter: NOT test.c
315            Filter: test.b
316              TableScan: test
317        "
318        )
319    }
320
321    #[test]
322    fn test_simplify_optimized_plan_not_eq_expr() -> Result<()> {
323        let table_scan = test_table_scan();
324        let plan = LogicalPlanBuilder::from(table_scan)
325            .filter(col("b").not_eq(lit(true)))?
326            .filter(col("c").not_eq(lit(false)))?
327            .limit(0, Some(1))?
328            .project(vec![col("a")])?
329            .build()?;
330
331        assert_optimized_plan_equal!(
332            plan,
333            @ r"
334        Projection: test.a
335          Limit: skip=0, fetch=1
336            Filter: test.c
337              Filter: NOT test.b
338                TableScan: test
339        "
340        )
341    }
342
343    #[test]
344    fn test_simplify_optimized_plan_and_expr() -> Result<()> {
345        let table_scan = test_table_scan();
346        let plan = LogicalPlanBuilder::from(table_scan)
347            .filter(col("b").not_eq(lit(true)).and(col("c").eq(lit(true))))?
348            .project(vec![col("a")])?
349            .build()?;
350
351        assert_optimized_plan_equal!(
352            plan,
353            @ r"
354        Projection: test.a
355          Filter: NOT test.b AND test.c
356            TableScan: test
357        "
358        )
359    }
360
361    #[test]
362    fn test_simplify_optimized_plan_or_expr() -> Result<()> {
363        let table_scan = test_table_scan();
364        let plan = LogicalPlanBuilder::from(table_scan)
365            .filter(col("b").not_eq(lit(true)).or(col("c").eq(lit(false))))?
366            .project(vec![col("a")])?
367            .build()?;
368
369        assert_optimized_plan_equal!(
370            plan,
371            @ r"
372        Projection: test.a
373          Filter: NOT test.b OR NOT test.c
374            TableScan: test
375        "
376        )
377    }
378
379    #[test]
380    fn test_simplify_optimized_plan_not_expr() -> Result<()> {
381        let table_scan = test_table_scan();
382        let plan = LogicalPlanBuilder::from(table_scan)
383            .filter(col("b").eq(lit(false)).not())?
384            .project(vec![col("a")])?
385            .build()?;
386
387        assert_optimized_plan_equal!(
388            plan,
389            @ r"
390        Projection: test.a
391          Filter: test.b
392            TableScan: test
393        "
394        )
395    }
396
397    #[test]
398    fn test_simplify_optimized_plan_support_projection() -> Result<()> {
399        let table_scan = test_table_scan();
400        let plan = LogicalPlanBuilder::from(table_scan)
401            .project(vec![col("a"), col("d"), col("b").eq(lit(false))])?
402            .build()?;
403
404        assert_optimized_plan_equal!(
405            plan,
406            @ r"
407        Projection: test.a, test.d, NOT test.b AS test.b = Boolean(false)
408          TableScan: test
409        "
410        )
411    }
412
413    #[test]
414    fn test_simplify_optimized_plan_support_aggregate() -> Result<()> {
415        let table_scan = test_table_scan();
416        let plan = LogicalPlanBuilder::from(table_scan)
417            .project(vec![col("a"), col("c"), col("b")])?
418            .aggregate(
419                vec![col("a"), col("c")],
420                vec![max(col("b").eq(lit(true))), min(col("b"))],
421            )?
422            .build()?;
423
424        assert_optimized_plan_equal!(
425            plan,
426            @ r"
427        Aggregate: groupBy=[[test.a, test.c]], aggr=[[max(test.b) AS max(test.b = Boolean(true)), min(test.b)]]
428          Projection: test.a, test.c, test.b
429            TableScan: test
430        "
431        )
432    }
433
434    #[test]
435    fn test_simplify_optimized_plan_support_values() -> Result<()> {
436        let expr1 = Expr::BinaryExpr(BinaryExpr::new(
437            Box::new(lit(1)),
438            Operator::Plus,
439            Box::new(lit(2)),
440        ));
441        let expr2 = Expr::BinaryExpr(BinaryExpr::new(
442            Box::new(lit(2)),
443            Operator::Minus,
444            Box::new(lit(1)),
445        ));
446        let values = vec![vec![expr1, expr2]];
447        let plan = LogicalPlanBuilder::values(values)?.build()?;
448
449        assert_optimized_plan_equal!(
450            plan,
451            @ "Values: (Int32(3) AS Int32(1) + Int32(2), Int32(1) AS Int32(2) - Int32(1))"
452        )
453    }
454
455    fn get_optimized_plan_formatted(
456        plan: LogicalPlan,
457        date_time: &DateTime<Utc>,
458    ) -> String {
459        let config = OptimizerContext::new().with_query_execution_start_time(*date_time);
460        let rule = SimplifyExpressions::new();
461
462        let optimized_plan = rule.rewrite(plan, &config).unwrap().data;
463        format!("{optimized_plan}")
464    }
465
466    #[test]
467    fn cast_expr() -> Result<()> {
468        let table_scan = test_table_scan();
469        let proj = vec![Expr::Cast(Cast::new(Box::new(lit("0")), DataType::Int32))];
470        let plan = LogicalPlanBuilder::from(table_scan)
471            .project(proj)?
472            .build()?;
473
474        let expected = "Projection: Int32(0) AS Utf8(\"0\")\
475            \n  TableScan: test";
476        let actual = get_optimized_plan_formatted(plan, &Utc::now());
477        assert_eq!(expected, actual);
478        Ok(())
479    }
480
481    #[test]
482    fn simplify_and_eval() -> Result<()> {
483        // demonstrate a case where the evaluation needs to run prior
484        // to the simplifier for it to work
485        let table_scan = test_table_scan();
486        let time = Utc::now();
487        // (true or false) != col --> !col
488        let proj = vec![lit(true).or(lit(false)).not_eq(col("a"))];
489        let plan = LogicalPlanBuilder::from(table_scan)
490            .project(proj)?
491            .build()?;
492
493        let actual = get_optimized_plan_formatted(plan, &time);
494        let expected =
495            "Projection: NOT test.a AS Boolean(true) OR Boolean(false) != test.a\
496                        \n  TableScan: test";
497
498        assert_eq!(expected, actual);
499        Ok(())
500    }
501
502    #[test]
503    fn simplify_not_binary() -> Result<()> {
504        let table_scan = test_table_scan();
505
506        let plan = LogicalPlanBuilder::from(table_scan)
507            .filter(col("d").gt(lit(10)).not())?
508            .build()?;
509
510        assert_optimized_plan_equal!(
511            plan,
512            @ r"
513        Filter: test.d <= Int32(10)
514          TableScan: test
515        "
516        )
517    }
518
519    #[test]
520    fn simplify_not_bool_and() -> Result<()> {
521        let table_scan = test_table_scan();
522
523        let plan = LogicalPlanBuilder::from(table_scan)
524            .filter(col("d").gt(lit(10)).and(col("d").lt(lit(100))).not())?
525            .build()?;
526
527        assert_optimized_plan_equal!(
528            plan,
529            @ r"
530        Filter: test.d <= Int32(10) OR test.d >= Int32(100)
531          TableScan: test
532        "
533        )
534    }
535
536    #[test]
537    fn simplify_not_bool_or() -> Result<()> {
538        let table_scan = test_table_scan();
539
540        let plan = LogicalPlanBuilder::from(table_scan)
541            .filter(col("d").gt(lit(10)).or(col("d").lt(lit(100))).not())?
542            .build()?;
543
544        assert_optimized_plan_equal!(
545            plan,
546            @ r"
547        Filter: test.d <= Int32(10) AND test.d >= Int32(100)
548          TableScan: test
549        "
550        )
551    }
552
553    #[test]
554    fn simplify_not_not() -> Result<()> {
555        let table_scan = test_table_scan();
556
557        let plan = LogicalPlanBuilder::from(table_scan)
558            .filter(col("d").gt(lit(10)).not().not())?
559            .build()?;
560
561        assert_optimized_plan_equal!(
562            plan,
563            @ r"
564        Filter: test.d > Int32(10)
565          TableScan: test
566        "
567        )
568    }
569
570    #[test]
571    fn simplify_not_null() -> Result<()> {
572        let table_scan = test_table_scan();
573
574        let plan = LogicalPlanBuilder::from(table_scan)
575            .filter(col("e").is_null().not())?
576            .build()?;
577
578        assert_optimized_plan_equal!(
579            plan,
580            @ r"
581        Filter: test.e IS NOT NULL
582          TableScan: test
583        "
584        )
585    }
586
587    #[test]
588    fn simplify_not_not_null() -> Result<()> {
589        let table_scan = test_table_scan();
590
591        let plan = LogicalPlanBuilder::from(table_scan)
592            .filter(col("e").is_not_null().not())?
593            .build()?;
594
595        assert_optimized_plan_equal!(
596            plan,
597            @ r"
598        Filter: test.e IS NULL
599          TableScan: test
600        "
601        )
602    }
603
604    #[test]
605    fn simplify_not_in() -> Result<()> {
606        let table_scan = test_table_scan();
607
608        let plan = LogicalPlanBuilder::from(table_scan)
609            .filter(col("d").in_list(vec![lit(1), lit(2), lit(3)], false).not())?
610            .build()?;
611
612        assert_optimized_plan_equal!(
613            plan,
614            @ r"
615        Filter: test.d != Int32(1) AND test.d != Int32(2) AND test.d != Int32(3)
616          TableScan: test
617        "
618        )
619    }
620
621    #[test]
622    fn simplify_not_not_in() -> Result<()> {
623        let table_scan = test_table_scan();
624
625        let plan = LogicalPlanBuilder::from(table_scan)
626            .filter(col("d").in_list(vec![lit(1), lit(2), lit(3)], true).not())?
627            .build()?;
628
629        assert_optimized_plan_equal!(
630            plan,
631            @ r"
632        Filter: test.d = Int32(1) OR test.d = Int32(2) OR test.d = Int32(3)
633          TableScan: test
634        "
635        )
636    }
637
638    #[test]
639    fn simplify_not_between() -> Result<()> {
640        let table_scan = test_table_scan();
641        let qual = col("d").between(lit(1), lit(10));
642
643        let plan = LogicalPlanBuilder::from(table_scan)
644            .filter(qual.not())?
645            .build()?;
646
647        assert_optimized_plan_equal!(
648            plan,
649            @ r"
650        Filter: test.d < Int32(1) OR test.d > Int32(10)
651          TableScan: test
652        "
653        )
654    }
655
656    #[test]
657    fn simplify_not_not_between() -> Result<()> {
658        let table_scan = test_table_scan();
659        let qual = col("d").not_between(lit(1), lit(10));
660
661        let plan = LogicalPlanBuilder::from(table_scan)
662            .filter(qual.not())?
663            .build()?;
664
665        assert_optimized_plan_equal!(
666            plan,
667            @ r"
668        Filter: test.d >= Int32(1) AND test.d <= Int32(10)
669          TableScan: test
670        "
671        )
672    }
673
674    #[test]
675    fn simplify_not_like() -> Result<()> {
676        let schema = Schema::new(vec![
677            Field::new("a", DataType::Utf8, false),
678            Field::new("b", DataType::Utf8, false),
679        ]);
680        let table_scan = table_scan(Some("test"), &schema, None)
681            .expect("creating scan")
682            .build()
683            .expect("building plan");
684
685        let plan = LogicalPlanBuilder::from(table_scan)
686            .filter(col("a").like(col("b")).not())?
687            .build()?;
688
689        assert_optimized_plan_equal!(
690            plan,
691            @ r"
692        Filter: test.a NOT LIKE test.b
693          TableScan: test
694        "
695        )
696    }
697
698    #[test]
699    fn simplify_not_not_like() -> Result<()> {
700        let schema = Schema::new(vec![
701            Field::new("a", DataType::Utf8, false),
702            Field::new("b", DataType::Utf8, false),
703        ]);
704        let table_scan = table_scan(Some("test"), &schema, None)
705            .expect("creating scan")
706            .build()
707            .expect("building plan");
708
709        let plan = LogicalPlanBuilder::from(table_scan)
710            .filter(col("a").not_like(col("b")).not())?
711            .build()?;
712
713        assert_optimized_plan_equal!(
714            plan,
715            @ r"
716        Filter: test.a LIKE test.b
717          TableScan: test
718        "
719        )
720    }
721
722    #[test]
723    fn simplify_not_ilike() -> Result<()> {
724        let schema = Schema::new(vec![
725            Field::new("a", DataType::Utf8, false),
726            Field::new("b", DataType::Utf8, false),
727        ]);
728        let table_scan = table_scan(Some("test"), &schema, None)
729            .expect("creating scan")
730            .build()
731            .expect("building plan");
732
733        let plan = LogicalPlanBuilder::from(table_scan)
734            .filter(col("a").ilike(col("b")).not())?
735            .build()?;
736
737        assert_optimized_plan_equal!(
738            plan,
739            @ r"
740        Filter: test.a NOT ILIKE test.b
741          TableScan: test
742        "
743        )
744    }
745
746    #[test]
747    fn simplify_not_distinct_from() -> Result<()> {
748        let table_scan = test_table_scan();
749
750        let plan = LogicalPlanBuilder::from(table_scan)
751            .filter(binary_expr(col("d"), Operator::IsDistinctFrom, lit(10)).not())?
752            .build()?;
753
754        assert_optimized_plan_equal!(
755            plan,
756            @ r"
757        Filter: test.d IS NOT DISTINCT FROM Int32(10)
758          TableScan: test
759        "
760        )
761    }
762
763    #[test]
764    fn simplify_not_not_distinct_from() -> Result<()> {
765        let table_scan = test_table_scan();
766
767        let plan = LogicalPlanBuilder::from(table_scan)
768            .filter(binary_expr(col("d"), Operator::IsNotDistinctFrom, lit(10)).not())?
769            .build()?;
770
771        assert_optimized_plan_equal!(
772            plan,
773            @ r"
774        Filter: test.d IS DISTINCT FROM Int32(10)
775          TableScan: test
776        "
777        )
778    }
779
780    #[test]
781    fn simplify_equijoin_predicate() -> Result<()> {
782        let t1 = test_table_scan_with_name("t1")?;
783        let t2 = test_table_scan_with_name("t2")?;
784
785        let left_key = col("t1.a") + lit(1i64).cast_to(&DataType::UInt32, t1.schema())?;
786        let right_key =
787            col("t2.a") + lit(2i64).cast_to(&DataType::UInt32, t2.schema())?;
788        let plan = LogicalPlanBuilder::from(t1)
789            .join_with_expr_keys(
790                t2,
791                JoinType::Inner,
792                (vec![left_key], vec![right_key]),
793                None,
794            )?
795            .build()?;
796
797        // before simplify: t1.a + CAST(Int64(1), UInt32) = t2.a + CAST(Int64(2), UInt32)
798        // after simplify: t1.a + UInt32(1) = t2.a + UInt32(2) AS t1.a + Int64(1) = t2.a + Int64(2)
799        assert_optimized_plan_equal!(
800            plan,
801            @ r"
802        Inner Join: t1.a + UInt32(1) = t2.a + UInt32(2)
803          TableScan: t1
804          TableScan: t2
805        "
806        )
807    }
808
809    #[test]
810    fn simplify_is_not_null() -> Result<()> {
811        let table_scan = test_table_scan();
812
813        let plan = LogicalPlanBuilder::from(table_scan)
814            .filter(col("d").is_not_null())?
815            .build()?;
816
817        assert_optimized_plan_equal!(
818            plan,
819            @ r"
820        Filter: Boolean(true)
821          TableScan: test
822        "
823        )
824    }
825
826    #[test]
827    fn simplify_is_null() -> Result<()> {
828        let table_scan = test_table_scan();
829
830        let plan = LogicalPlanBuilder::from(table_scan)
831            .filter(col("d").is_null())?
832            .build()?;
833
834        assert_optimized_plan_equal!(
835            plan,
836            @ r"
837        Filter: Boolean(false)
838          TableScan: test
839        "
840        )
841    }
842
843    #[test]
844    fn simplify_grouping_sets() -> Result<()> {
845        let table_scan = test_table_scan();
846        let plan = LogicalPlanBuilder::from(table_scan)
847            .aggregate(
848                [grouping_set(vec![
849                    vec![(lit(42).alias("prev") + lit(1)).alias("age"), col("a")],
850                    vec![col("a").or(col("b")).and(lit(1).lt(lit(0))).alias("cond")],
851                    vec![col("d").alias("e"), (lit(1) + lit(2))],
852                ])],
853                [] as [Expr; 0],
854            )?
855            .build()?;
856
857        assert_optimized_plan_equal!(
858            plan,
859            @ r"
860        Aggregate: groupBy=[[GROUPING SETS ((Int32(43) AS age, test.a), (Boolean(false) AS cond), (test.d AS e, Int32(3) AS Int32(1) + Int32(2)))]], aggr=[[]]
861          TableScan: test
862        "
863        )
864    }
865
866    #[test]
867    fn test_simplify_regex_special_cases() -> Result<()> {
868        let schema = Schema::new(vec![
869            Field::new("a", DataType::Utf8, true),
870            Field::new("b", DataType::Utf8, false),
871        ]);
872        let table_scan = table_scan(Some("test"), &schema, None)?.build()?;
873
874        // Test `= ".*"` transforms to true (except for empty strings)
875        let plan = LogicalPlanBuilder::from(table_scan.clone())
876            .filter(binary_expr(col("a"), Operator::RegexMatch, lit(".*")))?
877            .build()?;
878
879        assert_optimized_plan_equal!(
880            plan,
881            @ r"
882        Filter: test.a IS NOT NULL
883          TableScan: test
884        "
885        )?;
886
887        // Test `!= ".*"` transforms to checking if the column is empty
888        let plan = LogicalPlanBuilder::from(table_scan.clone())
889            .filter(binary_expr(col("a"), Operator::RegexNotMatch, lit(".*")))?
890            .build()?;
891
892        assert_optimized_plan_equal!(
893            plan,
894            @ r#"
895        Filter: test.a = Utf8("")
896          TableScan: test
897        "#
898        )?;
899
900        // Test case-insensitive versions
901
902        // Test `=~ ".*"` (case-insensitive) transforms to true (except for empty strings)
903        let plan = LogicalPlanBuilder::from(table_scan.clone())
904            .filter(binary_expr(col("b"), Operator::RegexIMatch, lit(".*")))?
905            .build()?;
906
907        assert_optimized_plan_equal!(
908            plan,
909            @ r"
910        Filter: Boolean(true)
911          TableScan: test
912        "
913        )?;
914
915        // Test `!~ ".*"` (case-insensitive) transforms to checking if the column is empty
916        let plan = LogicalPlanBuilder::from(table_scan.clone())
917            .filter(binary_expr(col("a"), Operator::RegexNotIMatch, lit(".*")))?
918            .build()?;
919
920        assert_optimized_plan_equal!(
921            plan,
922            @ r#"
923        Filter: test.a = Utf8("")
924          TableScan: test
925        "#
926        )
927    }
928}