1mod min_max_bytes;
22
23use arrow::array::{
24 ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
25 Decimal128Array, Decimal256Array, DurationMicrosecondArray, DurationMillisecondArray,
26 DurationNanosecondArray, DurationSecondArray, Float16Array, Float32Array,
27 Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
28 IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray,
29 LargeStringArray, StringArray, StringViewArray, Time32MillisecondArray,
30 Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
31 TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
32 TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
33};
34use arrow::compute;
35use arrow::datatypes::{
36 DataType, Decimal128Type, Decimal256Type, DurationMicrosecondType,
37 DurationMillisecondType, DurationNanosecondType, DurationSecondType, Float16Type,
38 Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalUnit,
39 UInt16Type, UInt32Type, UInt64Type, UInt8Type,
40};
41use datafusion_common::stats::Precision;
42use datafusion_common::{
43 downcast_value, exec_err, internal_err, ColumnStatistics, DataFusionError, Result,
44};
45use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
46use datafusion_physical_expr::expressions;
47use std::cmp::Ordering;
48use std::fmt::Debug;
49
50use arrow::datatypes::i256;
51use arrow::datatypes::{
52 Date32Type, Date64Type, Time32MillisecondType, Time32SecondType,
53 Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
54 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
55};
56
57use crate::min_max::min_max_bytes::MinMaxBytesAccumulator;
58use datafusion_common::ScalarValue;
59use datafusion_expr::{
60 function::AccumulatorArgs, Accumulator, AggregateUDFImpl, Documentation,
61 SetMonotonicity, Signature, Volatility,
62};
63use datafusion_expr::{GroupsAccumulator, StatisticsArgs};
64use datafusion_macros::user_doc;
65use half::f16;
66use std::mem::size_of_val;
67use std::ops::Deref;
68
69fn get_min_max_result_type(input_types: &[DataType]) -> Result<Vec<DataType>> {
70 if input_types.len() != 1 {
72 return exec_err!(
73 "min/max was called with {} arguments. It requires only 1.",
74 input_types.len()
75 );
76 }
77 match &input_types[0] {
80 DataType::Dictionary(_, dict_value_type) => {
81 Ok(vec![dict_value_type.deref().clone()])
83 }
84 _ => Ok(input_types.to_vec()),
87 }
88}
89
90#[user_doc(
91 doc_section(label = "General Functions"),
92 description = "Returns the maximum value in the specified column.",
93 syntax_example = "max(expression)",
94 sql_example = r#"```sql
95> SELECT max(column_name) FROM table_name;
96+----------------------+
97| max(column_name) |
98+----------------------+
99| 150 |
100+----------------------+
101```"#,
102 standard_argument(name = "expression",)
103)]
104#[derive(Debug)]
106pub struct Max {
107 signature: Signature,
108}
109
110impl Max {
111 pub fn new() -> Self {
112 Self {
113 signature: Signature::user_defined(Volatility::Immutable),
114 }
115 }
116}
117
118impl Default for Max {
119 fn default() -> Self {
120 Self::new()
121 }
122}
123macro_rules! primitive_max_accumulator {
128 ($DATA_TYPE:ident, $NATIVE:ident, $PRIMTYPE:ident) => {{
129 Ok(Box::new(
130 PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new($DATA_TYPE, |cur, new| {
131 match (new).partial_cmp(cur) {
132 Some(Ordering::Greater) | None => {
133 *cur = new
135 }
136 _ => {}
137 }
138 })
139 .with_starting_value($NATIVE::MIN),
141 ))
142 }};
143}
144
145macro_rules! primitive_min_accumulator {
151 ($DATA_TYPE:ident, $NATIVE:ident, $PRIMTYPE:ident) => {{
152 Ok(Box::new(
153 PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new(&$DATA_TYPE, |cur, new| {
154 match (new).partial_cmp(cur) {
155 Some(Ordering::Less) | None => {
156 *cur = new
158 }
159 _ => {}
160 }
161 })
162 .with_starting_value($NATIVE::MAX),
164 ))
165 }};
166}
167
168trait FromColumnStatistics {
169 fn value_from_column_statistics(
170 &self,
171 stats: &ColumnStatistics,
172 ) -> Option<ScalarValue>;
173
174 fn value_from_statistics(
175 &self,
176 statistics_args: &StatisticsArgs,
177 ) -> Option<ScalarValue> {
178 if let Precision::Exact(num_rows) = &statistics_args.statistics.num_rows {
179 match *num_rows {
180 0 => return ScalarValue::try_from(statistics_args.return_type).ok(),
181 value if value > 0 => {
182 let col_stats = &statistics_args.statistics.column_statistics;
183 if statistics_args.exprs.len() == 1 {
184 if let Some(col_expr) = statistics_args.exprs[0]
186 .as_any()
187 .downcast_ref::<expressions::Column>()
188 {
189 return self.value_from_column_statistics(
190 &col_stats[col_expr.index()],
191 );
192 }
193 }
194 }
195 _ => {}
196 }
197 }
198 None
199 }
200}
201
202impl FromColumnStatistics for Max {
203 fn value_from_column_statistics(
204 &self,
205 col_stats: &ColumnStatistics,
206 ) -> Option<ScalarValue> {
207 if let Precision::Exact(ref val) = col_stats.max_value {
208 if !val.is_null() {
209 return Some(val.clone());
210 }
211 }
212 None
213 }
214}
215
216impl AggregateUDFImpl for Max {
217 fn as_any(&self) -> &dyn std::any::Any {
218 self
219 }
220
221 fn name(&self) -> &str {
222 "max"
223 }
224
225 fn signature(&self) -> &Signature {
226 &self.signature
227 }
228
229 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
230 Ok(arg_types[0].to_owned())
231 }
232
233 fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
234 Ok(Box::new(MaxAccumulator::try_new(acc_args.return_type)?))
235 }
236
237 fn aliases(&self) -> &[String] {
238 &[]
239 }
240
241 fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
242 use DataType::*;
243 matches!(
244 args.return_type,
245 Int8 | Int16
246 | Int32
247 | Int64
248 | UInt8
249 | UInt16
250 | UInt32
251 | UInt64
252 | Float16
253 | Float32
254 | Float64
255 | Decimal128(_, _)
256 | Decimal256(_, _)
257 | Date32
258 | Date64
259 | Time32(_)
260 | Time64(_)
261 | Timestamp(_, _)
262 | Utf8
263 | LargeUtf8
264 | Utf8View
265 | Binary
266 | LargeBinary
267 | BinaryView
268 | Duration(_)
269 )
270 }
271
272 fn create_groups_accumulator(
273 &self,
274 args: AccumulatorArgs,
275 ) -> Result<Box<dyn GroupsAccumulator>> {
276 use DataType::*;
277 use TimeUnit::*;
278 let data_type = args.return_type;
279 match data_type {
280 Int8 => primitive_max_accumulator!(data_type, i8, Int8Type),
281 Int16 => primitive_max_accumulator!(data_type, i16, Int16Type),
282 Int32 => primitive_max_accumulator!(data_type, i32, Int32Type),
283 Int64 => primitive_max_accumulator!(data_type, i64, Int64Type),
284 UInt8 => primitive_max_accumulator!(data_type, u8, UInt8Type),
285 UInt16 => primitive_max_accumulator!(data_type, u16, UInt16Type),
286 UInt32 => primitive_max_accumulator!(data_type, u32, UInt32Type),
287 UInt64 => primitive_max_accumulator!(data_type, u64, UInt64Type),
288 Float16 => {
289 primitive_max_accumulator!(data_type, f16, Float16Type)
290 }
291 Float32 => {
292 primitive_max_accumulator!(data_type, f32, Float32Type)
293 }
294 Float64 => {
295 primitive_max_accumulator!(data_type, f64, Float64Type)
296 }
297 Date32 => primitive_max_accumulator!(data_type, i32, Date32Type),
298 Date64 => primitive_max_accumulator!(data_type, i64, Date64Type),
299 Time32(Second) => {
300 primitive_max_accumulator!(data_type, i32, Time32SecondType)
301 }
302 Time32(Millisecond) => {
303 primitive_max_accumulator!(data_type, i32, Time32MillisecondType)
304 }
305 Time64(Microsecond) => {
306 primitive_max_accumulator!(data_type, i64, Time64MicrosecondType)
307 }
308 Time64(Nanosecond) => {
309 primitive_max_accumulator!(data_type, i64, Time64NanosecondType)
310 }
311 Timestamp(Second, _) => {
312 primitive_max_accumulator!(data_type, i64, TimestampSecondType)
313 }
314 Timestamp(Millisecond, _) => {
315 primitive_max_accumulator!(data_type, i64, TimestampMillisecondType)
316 }
317 Timestamp(Microsecond, _) => {
318 primitive_max_accumulator!(data_type, i64, TimestampMicrosecondType)
319 }
320 Timestamp(Nanosecond, _) => {
321 primitive_max_accumulator!(data_type, i64, TimestampNanosecondType)
322 }
323 Duration(Second) => {
324 primitive_max_accumulator!(data_type, i64, DurationSecondType)
325 }
326 Duration(Millisecond) => {
327 primitive_max_accumulator!(data_type, i64, DurationMillisecondType)
328 }
329 Duration(Microsecond) => {
330 primitive_max_accumulator!(data_type, i64, DurationMicrosecondType)
331 }
332 Duration(Nanosecond) => {
333 primitive_max_accumulator!(data_type, i64, DurationNanosecondType)
334 }
335 Decimal128(_, _) => {
336 primitive_max_accumulator!(data_type, i128, Decimal128Type)
337 }
338 Decimal256(_, _) => {
339 primitive_max_accumulator!(data_type, i256, Decimal256Type)
340 }
341 Utf8 | LargeUtf8 | Utf8View | Binary | LargeBinary | BinaryView => {
342 Ok(Box::new(MinMaxBytesAccumulator::new_max(data_type.clone())))
343 }
344
345 _ => internal_err!("GroupsAccumulator not supported for max({})", data_type),
347 }
348 }
349
350 fn create_sliding_accumulator(
351 &self,
352 args: AccumulatorArgs,
353 ) -> Result<Box<dyn Accumulator>> {
354 Ok(Box::new(SlidingMaxAccumulator::try_new(args.return_type)?))
355 }
356
357 fn is_descending(&self) -> Option<bool> {
358 Some(true)
359 }
360
361 fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
362 datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
363 }
364
365 fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
366 get_min_max_result_type(arg_types)
367 }
368 fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
369 datafusion_expr::ReversedUDAF::Identical
370 }
371 fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
372 self.value_from_statistics(statistics_args)
373 }
374
375 fn documentation(&self) -> Option<&Documentation> {
376 self.doc()
377 }
378
379 fn set_monotonicity(&self, _data_type: &DataType) -> SetMonotonicity {
380 SetMonotonicity::Increasing
383 }
384}
385
386macro_rules! typed_min_max_batch_string {
388 ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
389 let array = downcast_value!($VALUES, $ARRAYTYPE);
390 let value = compute::$OP(array);
391 let value = value.and_then(|e| Some(e.to_string()));
392 ScalarValue::$SCALAR(value)
393 }};
394}
395macro_rules! typed_min_max_batch_binary {
397 ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
398 let array = downcast_value!($VALUES, $ARRAYTYPE);
399 let value = compute::$OP(array);
400 let value = value.and_then(|e| Some(e.to_vec()));
401 ScalarValue::$SCALAR(value)
402 }};
403}
404
405macro_rules! typed_min_max_batch {
407 ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident $(, $EXTRA_ARGS:ident)*) => {{
408 let array = downcast_value!($VALUES, $ARRAYTYPE);
409 let value = compute::$OP(array);
410 ScalarValue::$SCALAR(value, $($EXTRA_ARGS.clone()),*)
411 }};
412}
413
414macro_rules! min_max_batch {
417 ($VALUES:expr, $OP:ident) => {{
418 match $VALUES.data_type() {
419 DataType::Null => ScalarValue::Null,
420 DataType::Decimal128(precision, scale) => {
421 typed_min_max_batch!(
422 $VALUES,
423 Decimal128Array,
424 Decimal128,
425 $OP,
426 precision,
427 scale
428 )
429 }
430 DataType::Decimal256(precision, scale) => {
431 typed_min_max_batch!(
432 $VALUES,
433 Decimal256Array,
434 Decimal256,
435 $OP,
436 precision,
437 scale
438 )
439 }
440 DataType::Float64 => {
442 typed_min_max_batch!($VALUES, Float64Array, Float64, $OP)
443 }
444 DataType::Float32 => {
445 typed_min_max_batch!($VALUES, Float32Array, Float32, $OP)
446 }
447 DataType::Float16 => {
448 typed_min_max_batch!($VALUES, Float16Array, Float16, $OP)
449 }
450 DataType::Int64 => typed_min_max_batch!($VALUES, Int64Array, Int64, $OP),
451 DataType::Int32 => typed_min_max_batch!($VALUES, Int32Array, Int32, $OP),
452 DataType::Int16 => typed_min_max_batch!($VALUES, Int16Array, Int16, $OP),
453 DataType::Int8 => typed_min_max_batch!($VALUES, Int8Array, Int8, $OP),
454 DataType::UInt64 => typed_min_max_batch!($VALUES, UInt64Array, UInt64, $OP),
455 DataType::UInt32 => typed_min_max_batch!($VALUES, UInt32Array, UInt32, $OP),
456 DataType::UInt16 => typed_min_max_batch!($VALUES, UInt16Array, UInt16, $OP),
457 DataType::UInt8 => typed_min_max_batch!($VALUES, UInt8Array, UInt8, $OP),
458 DataType::Timestamp(TimeUnit::Second, tz_opt) => {
459 typed_min_max_batch!(
460 $VALUES,
461 TimestampSecondArray,
462 TimestampSecond,
463 $OP,
464 tz_opt
465 )
466 }
467 DataType::Timestamp(TimeUnit::Millisecond, tz_opt) => typed_min_max_batch!(
468 $VALUES,
469 TimestampMillisecondArray,
470 TimestampMillisecond,
471 $OP,
472 tz_opt
473 ),
474 DataType::Timestamp(TimeUnit::Microsecond, tz_opt) => typed_min_max_batch!(
475 $VALUES,
476 TimestampMicrosecondArray,
477 TimestampMicrosecond,
478 $OP,
479 tz_opt
480 ),
481 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt) => typed_min_max_batch!(
482 $VALUES,
483 TimestampNanosecondArray,
484 TimestampNanosecond,
485 $OP,
486 tz_opt
487 ),
488 DataType::Date32 => typed_min_max_batch!($VALUES, Date32Array, Date32, $OP),
489 DataType::Date64 => typed_min_max_batch!($VALUES, Date64Array, Date64, $OP),
490 DataType::Time32(TimeUnit::Second) => {
491 typed_min_max_batch!($VALUES, Time32SecondArray, Time32Second, $OP)
492 }
493 DataType::Time32(TimeUnit::Millisecond) => {
494 typed_min_max_batch!(
495 $VALUES,
496 Time32MillisecondArray,
497 Time32Millisecond,
498 $OP
499 )
500 }
501 DataType::Time64(TimeUnit::Microsecond) => {
502 typed_min_max_batch!(
503 $VALUES,
504 Time64MicrosecondArray,
505 Time64Microsecond,
506 $OP
507 )
508 }
509 DataType::Time64(TimeUnit::Nanosecond) => {
510 typed_min_max_batch!(
511 $VALUES,
512 Time64NanosecondArray,
513 Time64Nanosecond,
514 $OP
515 )
516 }
517 DataType::Interval(IntervalUnit::YearMonth) => {
518 typed_min_max_batch!(
519 $VALUES,
520 IntervalYearMonthArray,
521 IntervalYearMonth,
522 $OP
523 )
524 }
525 DataType::Interval(IntervalUnit::DayTime) => {
526 typed_min_max_batch!($VALUES, IntervalDayTimeArray, IntervalDayTime, $OP)
527 }
528 DataType::Interval(IntervalUnit::MonthDayNano) => {
529 typed_min_max_batch!(
530 $VALUES,
531 IntervalMonthDayNanoArray,
532 IntervalMonthDayNano,
533 $OP
534 )
535 }
536 DataType::Duration(TimeUnit::Second) => {
537 typed_min_max_batch!($VALUES, DurationSecondArray, DurationSecond, $OP)
538 }
539 DataType::Duration(TimeUnit::Millisecond) => {
540 typed_min_max_batch!(
541 $VALUES,
542 DurationMillisecondArray,
543 DurationMillisecond,
544 $OP
545 )
546 }
547 DataType::Duration(TimeUnit::Microsecond) => {
548 typed_min_max_batch!(
549 $VALUES,
550 DurationMicrosecondArray,
551 DurationMicrosecond,
552 $OP
553 )
554 }
555 DataType::Duration(TimeUnit::Nanosecond) => {
556 typed_min_max_batch!(
557 $VALUES,
558 DurationNanosecondArray,
559 DurationNanosecond,
560 $OP
561 )
562 }
563 other => {
564 return internal_err!(
566 "Min/Max accumulator not implemented for type {:?}",
567 other
568 );
569 }
570 }
571 }};
572}
573
574fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
576 Ok(match values.data_type() {
577 DataType::Utf8 => {
578 typed_min_max_batch_string!(values, StringArray, Utf8, min_string)
579 }
580 DataType::LargeUtf8 => {
581 typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
582 }
583 DataType::Utf8View => {
584 typed_min_max_batch_string!(
585 values,
586 StringViewArray,
587 Utf8View,
588 min_string_view
589 )
590 }
591 DataType::Boolean => {
592 typed_min_max_batch!(values, BooleanArray, Boolean, min_boolean)
593 }
594 DataType::Binary => {
595 typed_min_max_batch_binary!(&values, BinaryArray, Binary, min_binary)
596 }
597 DataType::LargeBinary => {
598 typed_min_max_batch_binary!(
599 &values,
600 LargeBinaryArray,
601 LargeBinary,
602 min_binary
603 )
604 }
605 DataType::BinaryView => {
606 typed_min_max_batch_binary!(
607 &values,
608 BinaryViewArray,
609 BinaryView,
610 min_binary_view
611 )
612 }
613 _ => min_max_batch!(values, min),
614 })
615}
616
617pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
619 Ok(match values.data_type() {
620 DataType::Utf8 => {
621 typed_min_max_batch_string!(values, StringArray, Utf8, max_string)
622 }
623 DataType::LargeUtf8 => {
624 typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
625 }
626 DataType::Utf8View => {
627 typed_min_max_batch_string!(
628 values,
629 StringViewArray,
630 Utf8View,
631 max_string_view
632 )
633 }
634 DataType::Boolean => {
635 typed_min_max_batch!(values, BooleanArray, Boolean, max_boolean)
636 }
637 DataType::Binary => {
638 typed_min_max_batch_binary!(&values, BinaryArray, Binary, max_binary)
639 }
640 DataType::BinaryView => {
641 typed_min_max_batch_binary!(
642 &values,
643 BinaryViewArray,
644 BinaryView,
645 max_binary_view
646 )
647 }
648 DataType::LargeBinary => {
649 typed_min_max_batch_binary!(
650 &values,
651 LargeBinaryArray,
652 LargeBinary,
653 max_binary
654 )
655 }
656 _ => min_max_batch!(values, max),
657 })
658}
659
660macro_rules! typed_min_max {
662 ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident $(, $EXTRA_ARGS:ident)*) => {{
663 ScalarValue::$SCALAR(
664 match ($VALUE, $DELTA) {
665 (None, None) => None,
666 (Some(a), None) => Some(*a),
667 (None, Some(b)) => Some(*b),
668 (Some(a), Some(b)) => Some((*a).$OP(*b)),
669 },
670 $($EXTRA_ARGS.clone()),*
671 )
672 }};
673}
674macro_rules! typed_min_max_float {
675 ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
676 ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
677 (None, None) => None,
678 (Some(a), None) => Some(*a),
679 (None, Some(b)) => Some(*b),
680 (Some(a), Some(b)) => match a.total_cmp(b) {
681 choose_min_max!($OP) => Some(*b),
682 _ => Some(*a),
683 },
684 })
685 }};
686}
687
688macro_rules! typed_min_max_string {
690 ($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident) => {{
691 ScalarValue::$SCALAR(match ($VALUE, $DELTA) {
692 (None, None) => None,
693 (Some(a), None) => Some(a.clone()),
694 (None, Some(b)) => Some(b.clone()),
695 (Some(a), Some(b)) => Some((a).$OP(b).clone()),
696 })
697 }};
698}
699
700macro_rules! choose_min_max {
701 (min) => {
702 std::cmp::Ordering::Greater
703 };
704 (max) => {
705 std::cmp::Ordering::Less
706 };
707}
708
709macro_rules! interval_min_max {
710 ($OP:tt, $LHS:expr, $RHS:expr) => {{
711 match $LHS.partial_cmp(&$RHS) {
712 Some(choose_min_max!($OP)) => $RHS.clone(),
713 Some(_) => $LHS.clone(),
714 None => {
715 return internal_err!("Comparison error while computing interval min/max")
716 }
717 }
718 }};
719}
720
721macro_rules! min_max {
723 ($VALUE:expr, $DELTA:expr, $OP:ident) => {{
724 Ok(match ($VALUE, $DELTA) {
725 (ScalarValue::Null, ScalarValue::Null) => ScalarValue::Null,
726 (
727 lhs @ ScalarValue::Decimal128(lhsv, lhsp, lhss),
728 rhs @ ScalarValue::Decimal128(rhsv, rhsp, rhss)
729 ) => {
730 if lhsp.eq(rhsp) && lhss.eq(rhss) {
731 typed_min_max!(lhsv, rhsv, Decimal128, $OP, lhsp, lhss)
732 } else {
733 return internal_err!(
734 "MIN/MAX is not expected to receive scalars of incompatible types {:?}",
735 (lhs, rhs)
736 );
737 }
738 }
739 (
740 lhs @ ScalarValue::Decimal256(lhsv, lhsp, lhss),
741 rhs @ ScalarValue::Decimal256(rhsv, rhsp, rhss)
742 ) => {
743 if lhsp.eq(rhsp) && lhss.eq(rhss) {
744 typed_min_max!(lhsv, rhsv, Decimal256, $OP, lhsp, lhss)
745 } else {
746 return internal_err!(
747 "MIN/MAX is not expected to receive scalars of incompatible types {:?}",
748 (lhs, rhs)
749 );
750 }
751 }
752 (ScalarValue::Boolean(lhs), ScalarValue::Boolean(rhs)) => {
753 typed_min_max!(lhs, rhs, Boolean, $OP)
754 }
755 (ScalarValue::Float64(lhs), ScalarValue::Float64(rhs)) => {
756 typed_min_max_float!(lhs, rhs, Float64, $OP)
757 }
758 (ScalarValue::Float32(lhs), ScalarValue::Float32(rhs)) => {
759 typed_min_max_float!(lhs, rhs, Float32, $OP)
760 }
761 (ScalarValue::Float16(lhs), ScalarValue::Float16(rhs)) => {
762 typed_min_max_float!(lhs, rhs, Float16, $OP)
763 }
764 (ScalarValue::UInt64(lhs), ScalarValue::UInt64(rhs)) => {
765 typed_min_max!(lhs, rhs, UInt64, $OP)
766 }
767 (ScalarValue::UInt32(lhs), ScalarValue::UInt32(rhs)) => {
768 typed_min_max!(lhs, rhs, UInt32, $OP)
769 }
770 (ScalarValue::UInt16(lhs), ScalarValue::UInt16(rhs)) => {
771 typed_min_max!(lhs, rhs, UInt16, $OP)
772 }
773 (ScalarValue::UInt8(lhs), ScalarValue::UInt8(rhs)) => {
774 typed_min_max!(lhs, rhs, UInt8, $OP)
775 }
776 (ScalarValue::Int64(lhs), ScalarValue::Int64(rhs)) => {
777 typed_min_max!(lhs, rhs, Int64, $OP)
778 }
779 (ScalarValue::Int32(lhs), ScalarValue::Int32(rhs)) => {
780 typed_min_max!(lhs, rhs, Int32, $OP)
781 }
782 (ScalarValue::Int16(lhs), ScalarValue::Int16(rhs)) => {
783 typed_min_max!(lhs, rhs, Int16, $OP)
784 }
785 (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => {
786 typed_min_max!(lhs, rhs, Int8, $OP)
787 }
788 (ScalarValue::Utf8(lhs), ScalarValue::Utf8(rhs)) => {
789 typed_min_max_string!(lhs, rhs, Utf8, $OP)
790 }
791 (ScalarValue::LargeUtf8(lhs), ScalarValue::LargeUtf8(rhs)) => {
792 typed_min_max_string!(lhs, rhs, LargeUtf8, $OP)
793 }
794 (ScalarValue::Utf8View(lhs), ScalarValue::Utf8View(rhs)) => {
795 typed_min_max_string!(lhs, rhs, Utf8View, $OP)
796 }
797 (ScalarValue::Binary(lhs), ScalarValue::Binary(rhs)) => {
798 typed_min_max_string!(lhs, rhs, Binary, $OP)
799 }
800 (ScalarValue::LargeBinary(lhs), ScalarValue::LargeBinary(rhs)) => {
801 typed_min_max_string!(lhs, rhs, LargeBinary, $OP)
802 }
803 (ScalarValue::BinaryView(lhs), ScalarValue::BinaryView(rhs)) => {
804 typed_min_max_string!(lhs, rhs, BinaryView, $OP)
805 }
806 (ScalarValue::TimestampSecond(lhs, l_tz), ScalarValue::TimestampSecond(rhs, _)) => {
807 typed_min_max!(lhs, rhs, TimestampSecond, $OP, l_tz)
808 }
809 (
810 ScalarValue::TimestampMillisecond(lhs, l_tz),
811 ScalarValue::TimestampMillisecond(rhs, _),
812 ) => {
813 typed_min_max!(lhs, rhs, TimestampMillisecond, $OP, l_tz)
814 }
815 (
816 ScalarValue::TimestampMicrosecond(lhs, l_tz),
817 ScalarValue::TimestampMicrosecond(rhs, _),
818 ) => {
819 typed_min_max!(lhs, rhs, TimestampMicrosecond, $OP, l_tz)
820 }
821 (
822 ScalarValue::TimestampNanosecond(lhs, l_tz),
823 ScalarValue::TimestampNanosecond(rhs, _),
824 ) => {
825 typed_min_max!(lhs, rhs, TimestampNanosecond, $OP, l_tz)
826 }
827 (
828 ScalarValue::Date32(lhs),
829 ScalarValue::Date32(rhs),
830 ) => {
831 typed_min_max!(lhs, rhs, Date32, $OP)
832 }
833 (
834 ScalarValue::Date64(lhs),
835 ScalarValue::Date64(rhs),
836 ) => {
837 typed_min_max!(lhs, rhs, Date64, $OP)
838 }
839 (
840 ScalarValue::Time32Second(lhs),
841 ScalarValue::Time32Second(rhs),
842 ) => {
843 typed_min_max!(lhs, rhs, Time32Second, $OP)
844 }
845 (
846 ScalarValue::Time32Millisecond(lhs),
847 ScalarValue::Time32Millisecond(rhs),
848 ) => {
849 typed_min_max!(lhs, rhs, Time32Millisecond, $OP)
850 }
851 (
852 ScalarValue::Time64Microsecond(lhs),
853 ScalarValue::Time64Microsecond(rhs),
854 ) => {
855 typed_min_max!(lhs, rhs, Time64Microsecond, $OP)
856 }
857 (
858 ScalarValue::Time64Nanosecond(lhs),
859 ScalarValue::Time64Nanosecond(rhs),
860 ) => {
861 typed_min_max!(lhs, rhs, Time64Nanosecond, $OP)
862 }
863 (
864 ScalarValue::IntervalYearMonth(lhs),
865 ScalarValue::IntervalYearMonth(rhs),
866 ) => {
867 typed_min_max!(lhs, rhs, IntervalYearMonth, $OP)
868 }
869 (
870 ScalarValue::IntervalMonthDayNano(lhs),
871 ScalarValue::IntervalMonthDayNano(rhs),
872 ) => {
873 typed_min_max!(lhs, rhs, IntervalMonthDayNano, $OP)
874 }
875 (
876 ScalarValue::IntervalDayTime(lhs),
877 ScalarValue::IntervalDayTime(rhs),
878 ) => {
879 typed_min_max!(lhs, rhs, IntervalDayTime, $OP)
880 }
881 (
882 ScalarValue::IntervalYearMonth(_),
883 ScalarValue::IntervalMonthDayNano(_),
884 ) | (
885 ScalarValue::IntervalYearMonth(_),
886 ScalarValue::IntervalDayTime(_),
887 ) | (
888 ScalarValue::IntervalMonthDayNano(_),
889 ScalarValue::IntervalDayTime(_),
890 ) | (
891 ScalarValue::IntervalMonthDayNano(_),
892 ScalarValue::IntervalYearMonth(_),
893 ) | (
894 ScalarValue::IntervalDayTime(_),
895 ScalarValue::IntervalYearMonth(_),
896 ) | (
897 ScalarValue::IntervalDayTime(_),
898 ScalarValue::IntervalMonthDayNano(_),
899 ) => {
900 interval_min_max!($OP, $VALUE, $DELTA)
901 }
902 (
903 ScalarValue::DurationSecond(lhs),
904 ScalarValue::DurationSecond(rhs),
905 ) => {
906 typed_min_max!(lhs, rhs, DurationSecond, $OP)
907 }
908 (
909 ScalarValue::DurationMillisecond(lhs),
910 ScalarValue::DurationMillisecond(rhs),
911 ) => {
912 typed_min_max!(lhs, rhs, DurationMillisecond, $OP)
913 }
914 (
915 ScalarValue::DurationMicrosecond(lhs),
916 ScalarValue::DurationMicrosecond(rhs),
917 ) => {
918 typed_min_max!(lhs, rhs, DurationMicrosecond, $OP)
919 }
920 (
921 ScalarValue::DurationNanosecond(lhs),
922 ScalarValue::DurationNanosecond(rhs),
923 ) => {
924 typed_min_max!(lhs, rhs, DurationNanosecond, $OP)
925 }
926 e => {
927 return internal_err!(
928 "MIN/MAX is not expected to receive scalars of incompatible types {:?}",
929 e
930 )
931 }
932 })
933 }};
934}
935
936#[derive(Debug)]
938pub struct MaxAccumulator {
939 max: ScalarValue,
940}
941
942impl MaxAccumulator {
943 pub fn try_new(datatype: &DataType) -> Result<Self> {
945 Ok(Self {
946 max: ScalarValue::try_from(datatype)?,
947 })
948 }
949}
950
951impl Accumulator for MaxAccumulator {
952 fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
953 let values = &values[0];
954 let delta = &max_batch(values)?;
955 let new_max: Result<ScalarValue, DataFusionError> =
956 min_max!(&self.max, delta, max);
957 self.max = new_max?;
958 Ok(())
959 }
960
961 fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
962 self.update_batch(states)
963 }
964
965 fn state(&mut self) -> Result<Vec<ScalarValue>> {
966 Ok(vec![self.evaluate()?])
967 }
968 fn evaluate(&mut self) -> Result<ScalarValue> {
969 Ok(self.max.clone())
970 }
971
972 fn size(&self) -> usize {
973 size_of_val(self) - size_of_val(&self.max) + self.max.size()
974 }
975}
976
977#[derive(Debug)]
978pub struct SlidingMaxAccumulator {
979 max: ScalarValue,
980 moving_max: MovingMax<ScalarValue>,
981}
982
983impl SlidingMaxAccumulator {
984 pub fn try_new(datatype: &DataType) -> Result<Self> {
986 Ok(Self {
987 max: ScalarValue::try_from(datatype)?,
988 moving_max: MovingMax::<ScalarValue>::new(),
989 })
990 }
991}
992
993impl Accumulator for SlidingMaxAccumulator {
994 fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
995 for idx in 0..values[0].len() {
996 let val = ScalarValue::try_from_array(&values[0], idx)?;
997 self.moving_max.push(val);
998 }
999 if let Some(res) = self.moving_max.max() {
1000 self.max = res.clone();
1001 }
1002 Ok(())
1003 }
1004
1005 fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
1006 for _idx in 0..values[0].len() {
1007 (self.moving_max).pop();
1008 }
1009 if let Some(res) = self.moving_max.max() {
1010 self.max = res.clone();
1011 }
1012 Ok(())
1013 }
1014
1015 fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
1016 self.update_batch(states)
1017 }
1018
1019 fn state(&mut self) -> Result<Vec<ScalarValue>> {
1020 Ok(vec![self.max.clone()])
1021 }
1022
1023 fn evaluate(&mut self) -> Result<ScalarValue> {
1024 Ok(self.max.clone())
1025 }
1026
1027 fn supports_retract_batch(&self) -> bool {
1028 true
1029 }
1030
1031 fn size(&self) -> usize {
1032 size_of_val(self) - size_of_val(&self.max) + self.max.size()
1033 }
1034}
1035
1036#[user_doc(
1037 doc_section(label = "General Functions"),
1038 description = "Returns the minimum value in the specified column.",
1039 syntax_example = "min(expression)",
1040 sql_example = r#"```sql
1041> SELECT min(column_name) FROM table_name;
1042+----------------------+
1043| min(column_name) |
1044+----------------------+
1045| 12 |
1046+----------------------+
1047```"#,
1048 standard_argument(name = "expression",)
1049)]
1050#[derive(Debug)]
1051pub struct Min {
1052 signature: Signature,
1053}
1054
1055impl Min {
1056 pub fn new() -> Self {
1057 Self {
1058 signature: Signature::user_defined(Volatility::Immutable),
1059 }
1060 }
1061}
1062
1063impl Default for Min {
1064 fn default() -> Self {
1065 Self::new()
1066 }
1067}
1068
1069impl FromColumnStatistics for Min {
1070 fn value_from_column_statistics(
1071 &self,
1072 col_stats: &ColumnStatistics,
1073 ) -> Option<ScalarValue> {
1074 if let Precision::Exact(ref val) = col_stats.min_value {
1075 if !val.is_null() {
1076 return Some(val.clone());
1077 }
1078 }
1079 None
1080 }
1081}
1082
1083impl AggregateUDFImpl for Min {
1084 fn as_any(&self) -> &dyn std::any::Any {
1085 self
1086 }
1087
1088 fn name(&self) -> &str {
1089 "min"
1090 }
1091
1092 fn signature(&self) -> &Signature {
1093 &self.signature
1094 }
1095
1096 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
1097 Ok(arg_types[0].to_owned())
1098 }
1099
1100 fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
1101 Ok(Box::new(MinAccumulator::try_new(acc_args.return_type)?))
1102 }
1103
1104 fn aliases(&self) -> &[String] {
1105 &[]
1106 }
1107
1108 fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
1109 use DataType::*;
1110 matches!(
1111 args.return_type,
1112 Int8 | Int16
1113 | Int32
1114 | Int64
1115 | UInt8
1116 | UInt16
1117 | UInt32
1118 | UInt64
1119 | Float16
1120 | Float32
1121 | Float64
1122 | Decimal128(_, _)
1123 | Decimal256(_, _)
1124 | Date32
1125 | Date64
1126 | Time32(_)
1127 | Time64(_)
1128 | Timestamp(_, _)
1129 | Utf8
1130 | LargeUtf8
1131 | Utf8View
1132 | Binary
1133 | LargeBinary
1134 | BinaryView
1135 | Duration(_)
1136 )
1137 }
1138
1139 fn create_groups_accumulator(
1140 &self,
1141 args: AccumulatorArgs,
1142 ) -> Result<Box<dyn GroupsAccumulator>> {
1143 use DataType::*;
1144 use TimeUnit::*;
1145 let data_type = args.return_type;
1146 match data_type {
1147 Int8 => primitive_min_accumulator!(data_type, i8, Int8Type),
1148 Int16 => primitive_min_accumulator!(data_type, i16, Int16Type),
1149 Int32 => primitive_min_accumulator!(data_type, i32, Int32Type),
1150 Int64 => primitive_min_accumulator!(data_type, i64, Int64Type),
1151 UInt8 => primitive_min_accumulator!(data_type, u8, UInt8Type),
1152 UInt16 => primitive_min_accumulator!(data_type, u16, UInt16Type),
1153 UInt32 => primitive_min_accumulator!(data_type, u32, UInt32Type),
1154 UInt64 => primitive_min_accumulator!(data_type, u64, UInt64Type),
1155 Float16 => {
1156 primitive_min_accumulator!(data_type, f16, Float16Type)
1157 }
1158 Float32 => {
1159 primitive_min_accumulator!(data_type, f32, Float32Type)
1160 }
1161 Float64 => {
1162 primitive_min_accumulator!(data_type, f64, Float64Type)
1163 }
1164 Date32 => primitive_min_accumulator!(data_type, i32, Date32Type),
1165 Date64 => primitive_min_accumulator!(data_type, i64, Date64Type),
1166 Time32(Second) => {
1167 primitive_min_accumulator!(data_type, i32, Time32SecondType)
1168 }
1169 Time32(Millisecond) => {
1170 primitive_min_accumulator!(data_type, i32, Time32MillisecondType)
1171 }
1172 Time64(Microsecond) => {
1173 primitive_min_accumulator!(data_type, i64, Time64MicrosecondType)
1174 }
1175 Time64(Nanosecond) => {
1176 primitive_min_accumulator!(data_type, i64, Time64NanosecondType)
1177 }
1178 Timestamp(Second, _) => {
1179 primitive_min_accumulator!(data_type, i64, TimestampSecondType)
1180 }
1181 Timestamp(Millisecond, _) => {
1182 primitive_min_accumulator!(data_type, i64, TimestampMillisecondType)
1183 }
1184 Timestamp(Microsecond, _) => {
1185 primitive_min_accumulator!(data_type, i64, TimestampMicrosecondType)
1186 }
1187 Timestamp(Nanosecond, _) => {
1188 primitive_min_accumulator!(data_type, i64, TimestampNanosecondType)
1189 }
1190 Duration(Second) => {
1191 primitive_min_accumulator!(data_type, i64, DurationSecondType)
1192 }
1193 Duration(Millisecond) => {
1194 primitive_min_accumulator!(data_type, i64, DurationMillisecondType)
1195 }
1196 Duration(Microsecond) => {
1197 primitive_min_accumulator!(data_type, i64, DurationMicrosecondType)
1198 }
1199 Duration(Nanosecond) => {
1200 primitive_min_accumulator!(data_type, i64, DurationNanosecondType)
1201 }
1202 Decimal128(_, _) => {
1203 primitive_min_accumulator!(data_type, i128, Decimal128Type)
1204 }
1205 Decimal256(_, _) => {
1206 primitive_min_accumulator!(data_type, i256, Decimal256Type)
1207 }
1208 Utf8 | LargeUtf8 | Utf8View | Binary | LargeBinary | BinaryView => {
1209 Ok(Box::new(MinMaxBytesAccumulator::new_min(data_type.clone())))
1210 }
1211
1212 _ => internal_err!("GroupsAccumulator not supported for min({})", data_type),
1214 }
1215 }
1216
1217 fn create_sliding_accumulator(
1218 &self,
1219 args: AccumulatorArgs,
1220 ) -> Result<Box<dyn Accumulator>> {
1221 Ok(Box::new(SlidingMinAccumulator::try_new(args.return_type)?))
1222 }
1223
1224 fn is_descending(&self) -> Option<bool> {
1225 Some(false)
1226 }
1227
1228 fn value_from_stats(&self, statistics_args: &StatisticsArgs) -> Option<ScalarValue> {
1229 self.value_from_statistics(statistics_args)
1230 }
1231 fn order_sensitivity(&self) -> datafusion_expr::utils::AggregateOrderSensitivity {
1232 datafusion_expr::utils::AggregateOrderSensitivity::Insensitive
1233 }
1234
1235 fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
1236 get_min_max_result_type(arg_types)
1237 }
1238
1239 fn reverse_expr(&self) -> datafusion_expr::ReversedUDAF {
1240 datafusion_expr::ReversedUDAF::Identical
1241 }
1242
1243 fn documentation(&self) -> Option<&Documentation> {
1244 self.doc()
1245 }
1246
1247 fn set_monotonicity(&self, _data_type: &DataType) -> SetMonotonicity {
1248 SetMonotonicity::Decreasing
1251 }
1252}
1253
1254#[derive(Debug)]
1256pub struct MinAccumulator {
1257 min: ScalarValue,
1258}
1259
1260impl MinAccumulator {
1261 pub fn try_new(datatype: &DataType) -> Result<Self> {
1263 Ok(Self {
1264 min: ScalarValue::try_from(datatype)?,
1265 })
1266 }
1267}
1268
1269impl Accumulator for MinAccumulator {
1270 fn state(&mut self) -> Result<Vec<ScalarValue>> {
1271 Ok(vec![self.evaluate()?])
1272 }
1273
1274 fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
1275 let values = &values[0];
1276 let delta = &min_batch(values)?;
1277 let new_min: Result<ScalarValue, DataFusionError> =
1278 min_max!(&self.min, delta, min);
1279 self.min = new_min?;
1280 Ok(())
1281 }
1282
1283 fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
1284 self.update_batch(states)
1285 }
1286
1287 fn evaluate(&mut self) -> Result<ScalarValue> {
1288 Ok(self.min.clone())
1289 }
1290
1291 fn size(&self) -> usize {
1292 size_of_val(self) - size_of_val(&self.min) + self.min.size()
1293 }
1294}
1295
1296#[derive(Debug)]
1297pub struct SlidingMinAccumulator {
1298 min: ScalarValue,
1299 moving_min: MovingMin<ScalarValue>,
1300}
1301
1302impl SlidingMinAccumulator {
1303 pub fn try_new(datatype: &DataType) -> Result<Self> {
1304 Ok(Self {
1305 min: ScalarValue::try_from(datatype)?,
1306 moving_min: MovingMin::<ScalarValue>::new(),
1307 })
1308 }
1309}
1310
1311impl Accumulator for SlidingMinAccumulator {
1312 fn state(&mut self) -> Result<Vec<ScalarValue>> {
1313 Ok(vec![self.min.clone()])
1314 }
1315
1316 fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
1317 for idx in 0..values[0].len() {
1318 let val = ScalarValue::try_from_array(&values[0], idx)?;
1319 if !val.is_null() {
1320 self.moving_min.push(val);
1321 }
1322 }
1323 if let Some(res) = self.moving_min.min() {
1324 self.min = res.clone();
1325 }
1326 Ok(())
1327 }
1328
1329 fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
1330 for idx in 0..values[0].len() {
1331 let val = ScalarValue::try_from_array(&values[0], idx)?;
1332 if !val.is_null() {
1333 (self.moving_min).pop();
1334 }
1335 }
1336 if let Some(res) = self.moving_min.min() {
1337 self.min = res.clone();
1338 }
1339 Ok(())
1340 }
1341
1342 fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
1343 self.update_batch(states)
1344 }
1345
1346 fn evaluate(&mut self) -> Result<ScalarValue> {
1347 Ok(self.min.clone())
1348 }
1349
1350 fn supports_retract_batch(&self) -> bool {
1351 true
1352 }
1353
1354 fn size(&self) -> usize {
1355 size_of_val(self) - size_of_val(&self.min) + self.min.size()
1356 }
1357}
1358
1359#[derive(Debug)]
1397pub struct MovingMin<T> {
1398 push_stack: Vec<(T, T)>,
1399 pop_stack: Vec<(T, T)>,
1400}
1401
1402impl<T: Clone + PartialOrd> Default for MovingMin<T> {
1403 fn default() -> Self {
1404 Self {
1405 push_stack: Vec::new(),
1406 pop_stack: Vec::new(),
1407 }
1408 }
1409}
1410
1411impl<T: Clone + PartialOrd> MovingMin<T> {
1412 #[inline]
1415 pub fn new() -> Self {
1416 Self::default()
1417 }
1418
1419 #[inline]
1422 pub fn with_capacity(capacity: usize) -> Self {
1423 Self {
1424 push_stack: Vec::with_capacity(capacity),
1425 pop_stack: Vec::with_capacity(capacity),
1426 }
1427 }
1428
1429 #[inline]
1432 pub fn min(&self) -> Option<&T> {
1433 match (self.push_stack.last(), self.pop_stack.last()) {
1434 (None, None) => None,
1435 (Some((_, min)), None) => Some(min),
1436 (None, Some((_, min))) => Some(min),
1437 (Some((_, a)), Some((_, b))) => Some(if a < b { a } else { b }),
1438 }
1439 }
1440
1441 #[inline]
1443 pub fn push(&mut self, val: T) {
1444 self.push_stack.push(match self.push_stack.last() {
1445 Some((_, min)) => {
1446 if val > *min {
1447 (val, min.clone())
1448 } else {
1449 (val.clone(), val)
1450 }
1451 }
1452 None => (val.clone(), val),
1453 });
1454 }
1455
1456 #[inline]
1458 pub fn pop(&mut self) -> Option<T> {
1459 if self.pop_stack.is_empty() {
1460 match self.push_stack.pop() {
1461 Some((val, _)) => {
1462 let mut last = (val.clone(), val);
1463 self.pop_stack.push(last.clone());
1464 while let Some((val, _)) = self.push_stack.pop() {
1465 let min = if last.1 < val {
1466 last.1.clone()
1467 } else {
1468 val.clone()
1469 };
1470 last = (val.clone(), min);
1471 self.pop_stack.push(last.clone());
1472 }
1473 }
1474 None => return None,
1475 }
1476 }
1477 self.pop_stack.pop().map(|(val, _)| val)
1478 }
1479
1480 #[inline]
1482 pub fn len(&self) -> usize {
1483 self.push_stack.len() + self.pop_stack.len()
1484 }
1485
1486 #[inline]
1488 pub fn is_empty(&self) -> bool {
1489 self.len() == 0
1490 }
1491}
1492
1493#[derive(Debug)]
1517pub struct MovingMax<T> {
1518 push_stack: Vec<(T, T)>,
1519 pop_stack: Vec<(T, T)>,
1520}
1521
1522impl<T: Clone + PartialOrd> Default for MovingMax<T> {
1523 fn default() -> Self {
1524 Self {
1525 push_stack: Vec::new(),
1526 pop_stack: Vec::new(),
1527 }
1528 }
1529}
1530
1531impl<T: Clone + PartialOrd> MovingMax<T> {
1532 #[inline]
1534 pub fn new() -> Self {
1535 Self::default()
1536 }
1537
1538 #[inline]
1541 pub fn with_capacity(capacity: usize) -> Self {
1542 Self {
1543 push_stack: Vec::with_capacity(capacity),
1544 pop_stack: Vec::with_capacity(capacity),
1545 }
1546 }
1547
1548 #[inline]
1550 pub fn max(&self) -> Option<&T> {
1551 match (self.push_stack.last(), self.pop_stack.last()) {
1552 (None, None) => None,
1553 (Some((_, max)), None) => Some(max),
1554 (None, Some((_, max))) => Some(max),
1555 (Some((_, a)), Some((_, b))) => Some(if a > b { a } else { b }),
1556 }
1557 }
1558
1559 #[inline]
1561 pub fn push(&mut self, val: T) {
1562 self.push_stack.push(match self.push_stack.last() {
1563 Some((_, max)) => {
1564 if val < *max {
1565 (val, max.clone())
1566 } else {
1567 (val.clone(), val)
1568 }
1569 }
1570 None => (val.clone(), val),
1571 });
1572 }
1573
1574 #[inline]
1576 pub fn pop(&mut self) -> Option<T> {
1577 if self.pop_stack.is_empty() {
1578 match self.push_stack.pop() {
1579 Some((val, _)) => {
1580 let mut last = (val.clone(), val);
1581 self.pop_stack.push(last.clone());
1582 while let Some((val, _)) = self.push_stack.pop() {
1583 let max = if last.1 > val {
1584 last.1.clone()
1585 } else {
1586 val.clone()
1587 };
1588 last = (val.clone(), max);
1589 self.pop_stack.push(last.clone());
1590 }
1591 }
1592 None => return None,
1593 }
1594 }
1595 self.pop_stack.pop().map(|(val, _)| val)
1596 }
1597
1598 #[inline]
1600 pub fn len(&self) -> usize {
1601 self.push_stack.len() + self.pop_stack.len()
1602 }
1603
1604 #[inline]
1606 pub fn is_empty(&self) -> bool {
1607 self.len() == 0
1608 }
1609}
1610
1611make_udaf_expr_and_func!(
1612 Max,
1613 max,
1614 expression,
1615 "Returns the maximum of a group of values.",
1616 max_udaf
1617);
1618
1619make_udaf_expr_and_func!(
1620 Min,
1621 min,
1622 expression,
1623 "Returns the minimum of a group of values.",
1624 min_udaf
1625);
1626
1627#[cfg(test)]
1628mod tests {
1629 use super::*;
1630 use arrow::datatypes::{
1631 IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
1632 };
1633 use std::sync::Arc;
1634
1635 #[test]
1636 fn interval_min_max() {
1637 let b = IntervalYearMonthArray::from(vec![
1639 IntervalYearMonthType::make_value(0, 1),
1640 IntervalYearMonthType::make_value(5, 34),
1641 IntervalYearMonthType::make_value(-2, 4),
1642 IntervalYearMonthType::make_value(7, -4),
1643 IntervalYearMonthType::make_value(0, 1),
1644 ]);
1645 let b: ArrayRef = Arc::new(b);
1646
1647 let mut min =
1648 MinAccumulator::try_new(&DataType::Interval(IntervalUnit::YearMonth))
1649 .unwrap();
1650 min.update_batch(&[Arc::clone(&b)]).unwrap();
1651 let min_res = min.evaluate().unwrap();
1652 assert_eq!(
1653 min_res,
1654 ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(
1655 -2, 4,
1656 )))
1657 );
1658
1659 let mut max =
1660 MaxAccumulator::try_new(&DataType::Interval(IntervalUnit::YearMonth))
1661 .unwrap();
1662 max.update_batch(&[Arc::clone(&b)]).unwrap();
1663 let max_res = max.evaluate().unwrap();
1664 assert_eq!(
1665 max_res,
1666 ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(
1667 5, 34,
1668 )))
1669 );
1670
1671 let b = IntervalDayTimeArray::from(vec![
1673 IntervalDayTimeType::make_value(0, 0),
1674 IntervalDayTimeType::make_value(5, 454000),
1675 IntervalDayTimeType::make_value(-34, 0),
1676 IntervalDayTimeType::make_value(7, -4000),
1677 IntervalDayTimeType::make_value(1, 0),
1678 ]);
1679 let b: ArrayRef = Arc::new(b);
1680
1681 let mut min =
1682 MinAccumulator::try_new(&DataType::Interval(IntervalUnit::DayTime)).unwrap();
1683 min.update_batch(&[Arc::clone(&b)]).unwrap();
1684 let min_res = min.evaluate().unwrap();
1685 assert_eq!(
1686 min_res,
1687 ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(-34, 0)))
1688 );
1689
1690 let mut max =
1691 MaxAccumulator::try_new(&DataType::Interval(IntervalUnit::DayTime)).unwrap();
1692 max.update_batch(&[Arc::clone(&b)]).unwrap();
1693 let max_res = max.evaluate().unwrap();
1694 assert_eq!(
1695 max_res,
1696 ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(7, -4000)))
1697 );
1698
1699 let b = IntervalMonthDayNanoArray::from(vec![
1701 IntervalMonthDayNanoType::make_value(1, 0, 0),
1702 IntervalMonthDayNanoType::make_value(344, 34, -43_000_000_000),
1703 IntervalMonthDayNanoType::make_value(-593, -33, 13_000_000_000),
1704 IntervalMonthDayNanoType::make_value(5, 2, 493_000_000_000),
1705 IntervalMonthDayNanoType::make_value(1, 0, 0),
1706 ]);
1707 let b: ArrayRef = Arc::new(b);
1708
1709 let mut min =
1710 MinAccumulator::try_new(&DataType::Interval(IntervalUnit::MonthDayNano))
1711 .unwrap();
1712 min.update_batch(&[Arc::clone(&b)]).unwrap();
1713 let min_res = min.evaluate().unwrap();
1714 assert_eq!(
1715 min_res,
1716 ScalarValue::IntervalMonthDayNano(Some(
1717 IntervalMonthDayNanoType::make_value(-593, -33, 13_000_000_000)
1718 ))
1719 );
1720
1721 let mut max =
1722 MaxAccumulator::try_new(&DataType::Interval(IntervalUnit::MonthDayNano))
1723 .unwrap();
1724 max.update_batch(&[Arc::clone(&b)]).unwrap();
1725 let max_res = max.evaluate().unwrap();
1726 assert_eq!(
1727 max_res,
1728 ScalarValue::IntervalMonthDayNano(Some(
1729 IntervalMonthDayNanoType::make_value(344, 34, -43_000_000_000)
1730 ))
1731 );
1732 }
1733
1734 #[test]
1735 fn float_min_max_with_nans() {
1736 let pos_nan = f32::NAN;
1737 let zero = 0_f32;
1738 let neg_inf = f32::NEG_INFINITY;
1739
1740 let check = |acc: &mut dyn Accumulator, values: &[&[f32]], expected: f32| {
1741 for batch in values.iter() {
1742 let batch =
1743 Arc::new(Float32Array::from_iter_values(batch.iter().copied()));
1744 acc.update_batch(&[batch]).unwrap();
1745 }
1746 let result = acc.evaluate().unwrap();
1747 assert_eq!(result, ScalarValue::Float32(Some(expected)));
1748 };
1749
1750 let min = || MinAccumulator::try_new(&DataType::Float32).unwrap();
1755 let max = || MaxAccumulator::try_new(&DataType::Float32).unwrap();
1756
1757 check(&mut min(), &[&[zero], &[pos_nan]], zero);
1758 check(&mut min(), &[&[zero, pos_nan]], zero);
1759 check(&mut min(), &[&[zero], &[neg_inf]], neg_inf);
1760 check(&mut min(), &[&[zero, neg_inf]], neg_inf);
1761 check(&mut max(), &[&[zero], &[pos_nan]], pos_nan);
1762 check(&mut max(), &[&[zero, pos_nan]], pos_nan);
1763 check(&mut max(), &[&[zero], &[neg_inf]], zero);
1764 check(&mut max(), &[&[zero, neg_inf]], zero);
1765 }
1766
1767 use datafusion_common::Result;
1768 use rand::Rng;
1769
1770 fn get_random_vec_i32(len: usize) -> Vec<i32> {
1771 let mut rng = rand::thread_rng();
1772 let mut input = Vec::with_capacity(len);
1773 for _i in 0..len {
1774 input.push(rng.gen_range(0..100));
1775 }
1776 input
1777 }
1778
1779 fn moving_min_i32(len: usize, n_sliding_window: usize) -> Result<()> {
1780 let data = get_random_vec_i32(len);
1781 let mut expected = Vec::with_capacity(len);
1782 let mut moving_min = MovingMin::<i32>::new();
1783 let mut res = Vec::with_capacity(len);
1784 for i in 0..len {
1785 let start = i.saturating_sub(n_sliding_window);
1786 expected.push(*data[start..i + 1].iter().min().unwrap());
1787
1788 moving_min.push(data[i]);
1789 if i > n_sliding_window {
1790 moving_min.pop();
1791 }
1792 res.push(*moving_min.min().unwrap());
1793 }
1794 assert_eq!(res, expected);
1795 Ok(())
1796 }
1797
1798 fn moving_max_i32(len: usize, n_sliding_window: usize) -> Result<()> {
1799 let data = get_random_vec_i32(len);
1800 let mut expected = Vec::with_capacity(len);
1801 let mut moving_max = MovingMax::<i32>::new();
1802 let mut res = Vec::with_capacity(len);
1803 for i in 0..len {
1804 let start = i.saturating_sub(n_sliding_window);
1805 expected.push(*data[start..i + 1].iter().max().unwrap());
1806
1807 moving_max.push(data[i]);
1808 if i > n_sliding_window {
1809 moving_max.pop();
1810 }
1811 res.push(*moving_max.max().unwrap());
1812 }
1813 assert_eq!(res, expected);
1814 Ok(())
1815 }
1816
1817 #[test]
1818 fn moving_min_tests() -> Result<()> {
1819 moving_min_i32(100, 10)?;
1820 moving_min_i32(100, 20)?;
1821 moving_min_i32(100, 50)?;
1822 moving_min_i32(100, 100)?;
1823 Ok(())
1824 }
1825
1826 #[test]
1827 fn moving_max_tests() -> Result<()> {
1828 moving_max_i32(100, 10)?;
1829 moving_max_i32(100, 20)?;
1830 moving_max_i32(100, 50)?;
1831 moving_max_i32(100, 100)?;
1832 Ok(())
1833 }
1834
1835 #[test]
1836 fn test_min_max_coerce_types() {
1837 let funs: Vec<Box<dyn AggregateUDFImpl>> =
1839 vec![Box::new(Min::new()), Box::new(Max::new())];
1840 let input_types = vec![
1841 vec![DataType::Int32],
1842 vec![DataType::Decimal128(10, 2)],
1843 vec![DataType::Decimal256(1, 1)],
1844 vec![DataType::Utf8],
1845 ];
1846 for fun in funs {
1847 for input_type in &input_types {
1848 let result = fun.coerce_types(input_type);
1849 assert_eq!(*input_type, result.unwrap());
1850 }
1851 }
1852 }
1853
1854 #[test]
1855 fn test_get_min_max_return_type_coerce_dictionary() -> Result<()> {
1856 let data_type =
1857 DataType::Dictionary(Box::new(DataType::Utf8), Box::new(DataType::Int32));
1858 let result = get_min_max_result_type(&[data_type])?;
1859 assert_eq!(result, vec![DataType::Int32]);
1860 Ok(())
1861 }
1862}