datafusion_expr/
window_state.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Structures used to hold window function state (for implementing WindowUDFs)
19
20use std::{collections::VecDeque, ops::Range, sync::Arc};
21
22use crate::{WindowFrame, WindowFrameBound, WindowFrameUnits};
23
24use arrow::{
25    array::ArrayRef,
26    compute::{concat, concat_batches, SortOptions},
27    datatypes::{DataType, SchemaRef},
28    record_batch::RecordBatch,
29};
30use datafusion_common::{
31    internal_err,
32    utils::{compare_rows, get_row_at_idx, search_in_slice},
33    DataFusionError, Result, ScalarValue,
34};
35
36/// Holds the state of evaluating a window function
37#[derive(Debug)]
38pub struct WindowAggState {
39    /// The range that we calculate the window function
40    pub window_frame_range: Range<usize>,
41    pub window_frame_ctx: Option<WindowFrameContext>,
42    /// The index of the last row that its result is calculated inside the partition record batch buffer.
43    pub last_calculated_index: usize,
44    /// The offset of the deleted row number
45    pub offset_pruned_rows: usize,
46    /// Stores the results calculated by window frame
47    pub out_col: ArrayRef,
48    /// Keeps track of how many rows should be generated to be in sync with input record_batch.
49    // (For each row in the input record batch we need to generate a window result).
50    pub n_row_result_missing: usize,
51    /// Flag indicating whether we have received all data for this partition
52    pub is_end: bool,
53}
54
55impl WindowAggState {
56    pub fn prune_state(&mut self, n_prune: usize) {
57        self.window_frame_range = Range {
58            start: self.window_frame_range.start - n_prune,
59            end: self.window_frame_range.end - n_prune,
60        };
61        self.last_calculated_index -= n_prune;
62        self.offset_pruned_rows += n_prune;
63
64        match self.window_frame_ctx.as_mut() {
65            // Rows have no state do nothing
66            Some(WindowFrameContext::Rows(_)) => {}
67            Some(WindowFrameContext::Range { .. }) => {}
68            Some(WindowFrameContext::Groups { state, .. }) => {
69                let mut n_group_to_del = 0;
70                for (_, end_idx) in &state.group_end_indices {
71                    if n_prune < *end_idx {
72                        break;
73                    }
74                    n_group_to_del += 1;
75                }
76                state.group_end_indices.drain(0..n_group_to_del);
77                state
78                    .group_end_indices
79                    .iter_mut()
80                    .for_each(|(_, start_idx)| *start_idx -= n_prune);
81                state.current_group_idx -= n_group_to_del;
82            }
83            None => {}
84        };
85    }
86
87    pub fn update(
88        &mut self,
89        out_col: &ArrayRef,
90        partition_batch_state: &PartitionBatchState,
91    ) -> Result<()> {
92        self.last_calculated_index += out_col.len();
93        self.out_col = concat(&[&self.out_col, &out_col])?;
94        self.n_row_result_missing =
95            partition_batch_state.record_batch.num_rows() - self.last_calculated_index;
96        self.is_end = partition_batch_state.is_end;
97        Ok(())
98    }
99
100    pub fn new(out_type: &DataType) -> Result<Self> {
101        let empty_out_col = ScalarValue::try_from(out_type)?.to_array_of_size(0)?;
102        Ok(Self {
103            window_frame_range: Range { start: 0, end: 0 },
104            window_frame_ctx: None,
105            last_calculated_index: 0,
106            offset_pruned_rows: 0,
107            out_col: empty_out_col,
108            n_row_result_missing: 0,
109            is_end: false,
110        })
111    }
112}
113
114/// This object stores the window frame state for use in incremental calculations.
115#[derive(Debug)]
116pub enum WindowFrameContext {
117    /// ROWS frames are inherently stateless.
118    Rows(Arc<WindowFrame>),
119    /// RANGE frames are stateful, they store indices specifying where the
120    /// previous search left off. This amortizes the overall cost to O(n)
121    /// where n denotes the row count.
122    Range {
123        window_frame: Arc<WindowFrame>,
124        state: WindowFrameStateRange,
125    },
126    /// GROUPS frames are stateful, they store group boundaries and indices
127    /// specifying where the previous search left off. This amortizes the
128    /// overall cost to O(n) where n denotes the row count.
129    Groups {
130        window_frame: Arc<WindowFrame>,
131        state: WindowFrameStateGroups,
132    },
133}
134
135impl WindowFrameContext {
136    /// Create a new state object for the given window frame.
137    pub fn new(window_frame: Arc<WindowFrame>, sort_options: Vec<SortOptions>) -> Self {
138        match window_frame.units {
139            WindowFrameUnits::Rows => WindowFrameContext::Rows(window_frame),
140            WindowFrameUnits::Range => WindowFrameContext::Range {
141                window_frame,
142                state: WindowFrameStateRange::new(sort_options),
143            },
144            WindowFrameUnits::Groups => WindowFrameContext::Groups {
145                window_frame,
146                state: WindowFrameStateGroups::default(),
147            },
148        }
149    }
150
151    /// This function calculates beginning/ending indices for the frame of the current row.
152    pub fn calculate_range(
153        &mut self,
154        range_columns: &[ArrayRef],
155        last_range: &Range<usize>,
156        length: usize,
157        idx: usize,
158    ) -> Result<Range<usize>> {
159        match self {
160            WindowFrameContext::Rows(window_frame) => {
161                Self::calculate_range_rows(window_frame, length, idx)
162            }
163            // Sort options is used in RANGE mode calculations because the
164            // ordering or position of NULLs impact range calculations and
165            // comparison of rows.
166            WindowFrameContext::Range {
167                window_frame,
168                ref mut state,
169            } => state.calculate_range(
170                window_frame,
171                last_range,
172                range_columns,
173                length,
174                idx,
175            ),
176            // Sort options is not used in GROUPS mode calculations as the
177            // inequality of two rows indicates a group change, and ordering
178            // or position of NULLs do not impact inequality.
179            WindowFrameContext::Groups {
180                window_frame,
181                ref mut state,
182            } => state.calculate_range(window_frame, range_columns, length, idx),
183        }
184    }
185
186    /// This function calculates beginning/ending indices for the frame of the current row.
187    fn calculate_range_rows(
188        window_frame: &Arc<WindowFrame>,
189        length: usize,
190        idx: usize,
191    ) -> Result<Range<usize>> {
192        let start = match window_frame.start_bound {
193            // UNBOUNDED PRECEDING
194            WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => 0,
195            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
196                idx.saturating_sub(n as usize)
197            }
198            WindowFrameBound::CurrentRow => idx,
199            // UNBOUNDED FOLLOWING
200            WindowFrameBound::Following(ScalarValue::UInt64(None)) => {
201                return internal_err!(
202                    "Frame start cannot be UNBOUNDED FOLLOWING '{window_frame:?}'"
203                )
204            }
205            WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
206                std::cmp::min(idx + n as usize, length)
207            }
208            // ERRONEOUS FRAMES
209            WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
210                return internal_err!("Rows should be Uint")
211            }
212        };
213        let end = match window_frame.end_bound {
214            // UNBOUNDED PRECEDING
215            WindowFrameBound::Preceding(ScalarValue::UInt64(None)) => {
216                return internal_err!(
217                    "Frame end cannot be UNBOUNDED PRECEDING '{window_frame:?}'"
218                )
219            }
220            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(n))) => {
221                if idx >= n as usize {
222                    idx - n as usize + 1
223                } else {
224                    0
225                }
226            }
227            WindowFrameBound::CurrentRow => idx + 1,
228            // UNBOUNDED FOLLOWING
229            WindowFrameBound::Following(ScalarValue::UInt64(None)) => length,
230            WindowFrameBound::Following(ScalarValue::UInt64(Some(n))) => {
231                std::cmp::min(idx + n as usize + 1, length)
232            }
233            // ERRONEOUS FRAMES
234            WindowFrameBound::Preceding(_) | WindowFrameBound::Following(_) => {
235                return internal_err!("Rows should be Uint")
236            }
237        };
238        Ok(Range { start, end })
239    }
240}
241
242/// State for each unique partition determined according to PARTITION BY column(s)
243#[derive(Debug)]
244pub struct PartitionBatchState {
245    /// The record batch belonging to current partition
246    pub record_batch: RecordBatch,
247    /// The record batch that contains the most recent row at the input.
248    /// Please note that this batch doesn't necessarily have the same partitioning
249    /// with `record_batch`. Keeping track of this batch enables us to prune
250    /// `record_batch` when cardinality of the partition is sparse.
251    pub most_recent_row: Option<RecordBatch>,
252    /// Flag indicating whether we have received all data for this partition
253    pub is_end: bool,
254    /// Number of rows emitted for each partition
255    pub n_out_row: usize,
256}
257
258impl PartitionBatchState {
259    pub fn new(schema: SchemaRef) -> Self {
260        Self {
261            record_batch: RecordBatch::new_empty(schema),
262            most_recent_row: None,
263            is_end: false,
264            n_out_row: 0,
265        }
266    }
267
268    pub fn extend(&mut self, batch: &RecordBatch) -> Result<()> {
269        self.record_batch =
270            concat_batches(&self.record_batch.schema(), [&self.record_batch, batch])?;
271        Ok(())
272    }
273
274    pub fn set_most_recent_row(&mut self, batch: RecordBatch) {
275        // It is enough for the batch to contain only a single row (the rest
276        // are not necessary).
277        self.most_recent_row = Some(batch);
278    }
279}
280
281/// This structure encapsulates all the state information we require as we scan
282/// ranges of data while processing RANGE frames.
283/// Attribute `sort_options` stores the column ordering specified by the ORDER
284/// BY clause. This information is used to calculate the range.
285#[derive(Debug, Default)]
286pub struct WindowFrameStateRange {
287    sort_options: Vec<SortOptions>,
288}
289
290impl WindowFrameStateRange {
291    /// Create a new object to store the search state.
292    fn new(sort_options: Vec<SortOptions>) -> Self {
293        Self { sort_options }
294    }
295
296    /// This function calculates beginning/ending indices for the frame of the current row.
297    // Argument `last_range` stores the resulting indices from the previous search. Since the indices only
298    // advance forward, we start from `last_range` subsequently. Thus, the overall
299    // time complexity of linear search amortizes to O(n) where n denotes the total
300    // row count.
301    fn calculate_range(
302        &mut self,
303        window_frame: &Arc<WindowFrame>,
304        last_range: &Range<usize>,
305        range_columns: &[ArrayRef],
306        length: usize,
307        idx: usize,
308    ) -> Result<Range<usize>> {
309        let start = match window_frame.start_bound {
310            WindowFrameBound::Preceding(ref n) => {
311                if n.is_null() {
312                    // UNBOUNDED PRECEDING
313                    0
314                } else {
315                    self.calculate_index_of_row::<true, true>(
316                        range_columns,
317                        last_range,
318                        idx,
319                        Some(n),
320                        length,
321                    )?
322                }
323            }
324            WindowFrameBound::CurrentRow => self.calculate_index_of_row::<true, true>(
325                range_columns,
326                last_range,
327                idx,
328                None,
329                length,
330            )?,
331            WindowFrameBound::Following(ref n) => self
332                .calculate_index_of_row::<true, false>(
333                    range_columns,
334                    last_range,
335                    idx,
336                    Some(n),
337                    length,
338                )?,
339        };
340        let end = match window_frame.end_bound {
341            WindowFrameBound::Preceding(ref n) => self
342                .calculate_index_of_row::<false, true>(
343                    range_columns,
344                    last_range,
345                    idx,
346                    Some(n),
347                    length,
348                )?,
349            WindowFrameBound::CurrentRow => self.calculate_index_of_row::<false, false>(
350                range_columns,
351                last_range,
352                idx,
353                None,
354                length,
355            )?,
356            WindowFrameBound::Following(ref n) => {
357                if n.is_null() {
358                    // UNBOUNDED FOLLOWING
359                    length
360                } else {
361                    self.calculate_index_of_row::<false, false>(
362                        range_columns,
363                        last_range,
364                        idx,
365                        Some(n),
366                        length,
367                    )?
368                }
369            }
370        };
371        Ok(Range { start, end })
372    }
373
374    /// This function does the heavy lifting when finding range boundaries. It is meant to be
375    /// called twice, in succession, to get window frame start and end indices (with `SIDE`
376    /// supplied as true and false, respectively).
377    fn calculate_index_of_row<const SIDE: bool, const SEARCH_SIDE: bool>(
378        &mut self,
379        range_columns: &[ArrayRef],
380        last_range: &Range<usize>,
381        idx: usize,
382        delta: Option<&ScalarValue>,
383        length: usize,
384    ) -> Result<usize> {
385        let current_row_values = get_row_at_idx(range_columns, idx)?;
386        let end_range = if let Some(delta) = delta {
387            let is_descending: bool = self
388                .sort_options
389                .first()
390                .ok_or_else(|| {
391                    DataFusionError::Internal(
392                        "Sort options unexpectedly absent in a window frame".to_string(),
393                    )
394                })?
395                .descending;
396
397            current_row_values
398                .iter()
399                .map(|value| {
400                    if value.is_null() {
401                        return Ok(value.clone());
402                    }
403                    if SEARCH_SIDE == is_descending {
404                        // TODO: Handle positive overflows.
405                        value.add(delta)
406                    } else if value.is_unsigned() && value < delta {
407                        // NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue.
408                        //       If we decide to implement a "default" construction mechanism for ScalarValue,
409                        //       change the following statement to use that.
410                        value.sub(value)
411                    } else {
412                        // TODO: Handle negative overflows.
413                        value.sub(delta)
414                    }
415                })
416                .collect::<Result<Vec<ScalarValue>>>()?
417        } else {
418            current_row_values
419        };
420        let search_start = if SIDE {
421            last_range.start
422        } else {
423            last_range.end
424        };
425        let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
426            let cmp = compare_rows(current, target, &self.sort_options)?;
427            Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
428        };
429        search_in_slice(range_columns, &end_range, compare_fn, search_start, length)
430    }
431}
432
433// In GROUPS mode, rows with duplicate sorting values are grouped together.
434// Therefore, there must be an ORDER BY clause in the window definition to use GROUPS mode.
435// The syntax is as follows:
436//     GROUPS frame_start [ frame_exclusion ]
437//     GROUPS BETWEEN frame_start AND frame_end [ frame_exclusion ]
438// The optional frame_exclusion specifier is not yet supported.
439// The frame_start and frame_end parameters allow us to specify which rows the window
440// frame starts and ends with. They accept the following values:
441//    - UNBOUNDED PRECEDING: Start with the first row of the partition. Possible only in frame_start.
442//    - offset PRECEDING: When used in frame_start, it refers to the first row of the group
443//                        that comes "offset" groups before the current group (i.e. the group
444//                        containing the current row). When used in frame_end, it refers to the
445//                        last row of the group that comes "offset" groups before the current group.
446//    - CURRENT ROW: When used in frame_start, it refers to the first row of the group containing
447//                   the current row. When used in frame_end, it refers to the last row of the group
448//                   containing the current row.
449//    - offset FOLLOWING: When used in frame_start, it refers to the first row of the group
450//                        that comes "offset" groups after the current group (i.e. the group
451//                        containing the current row). When used in frame_end, it refers to the
452//                        last row of the group that comes "offset" groups after the current group.
453//    - UNBOUNDED FOLLOWING: End with the last row of the partition. Possible only in frame_end.
454
455/// This structure encapsulates all the state information we require as we
456/// scan groups of data while processing window frames.
457#[derive(Debug, Default)]
458pub struct WindowFrameStateGroups {
459    /// A tuple containing group values and the row index where the group ends.
460    /// Example: [[1, 1], [1, 1], [2, 1], [2, 1], ...] would correspond to
461    ///          [([1, 1], 2), ([2, 1], 4), ...].
462    pub group_end_indices: VecDeque<(Vec<ScalarValue>, usize)>,
463    /// The group index to which the row index belongs.
464    pub current_group_idx: usize,
465}
466
467impl WindowFrameStateGroups {
468    fn calculate_range(
469        &mut self,
470        window_frame: &Arc<WindowFrame>,
471        range_columns: &[ArrayRef],
472        length: usize,
473        idx: usize,
474    ) -> Result<Range<usize>> {
475        let start = match window_frame.start_bound {
476            WindowFrameBound::Preceding(ref n) => {
477                if n.is_null() {
478                    // UNBOUNDED PRECEDING
479                    0
480                } else {
481                    self.calculate_index_of_row::<true, true>(
482                        range_columns,
483                        idx,
484                        Some(n),
485                        length,
486                    )?
487                }
488            }
489            WindowFrameBound::CurrentRow => self.calculate_index_of_row::<true, true>(
490                range_columns,
491                idx,
492                None,
493                length,
494            )?,
495            WindowFrameBound::Following(ref n) => self
496                .calculate_index_of_row::<true, false>(
497                    range_columns,
498                    idx,
499                    Some(n),
500                    length,
501                )?,
502        };
503        let end = match window_frame.end_bound {
504            WindowFrameBound::Preceding(ref n) => self
505                .calculate_index_of_row::<false, true>(
506                    range_columns,
507                    idx,
508                    Some(n),
509                    length,
510                )?,
511            WindowFrameBound::CurrentRow => self.calculate_index_of_row::<false, false>(
512                range_columns,
513                idx,
514                None,
515                length,
516            )?,
517            WindowFrameBound::Following(ref n) => {
518                if n.is_null() {
519                    // UNBOUNDED FOLLOWING
520                    length
521                } else {
522                    self.calculate_index_of_row::<false, false>(
523                        range_columns,
524                        idx,
525                        Some(n),
526                        length,
527                    )?
528                }
529            }
530        };
531        Ok(Range { start, end })
532    }
533
534    /// This function does the heavy lifting when finding range boundaries. It is meant to be
535    /// called twice, in succession, to get window frame start and end indices (with `SIDE`
536    /// supplied as true and false, respectively). Generic argument `SEARCH_SIDE` determines
537    /// the sign of `delta` (where true/false represents negative/positive respectively).
538    fn calculate_index_of_row<const SIDE: bool, const SEARCH_SIDE: bool>(
539        &mut self,
540        range_columns: &[ArrayRef],
541        idx: usize,
542        delta: Option<&ScalarValue>,
543        length: usize,
544    ) -> Result<usize> {
545        let delta = if let Some(delta) = delta {
546            if let ScalarValue::UInt64(Some(value)) = delta {
547                *value as usize
548            } else {
549                return internal_err!(
550                    "Unexpectedly got a non-UInt64 value in a GROUPS mode window frame"
551                );
552            }
553        } else {
554            0
555        };
556        let mut group_start = 0;
557        let last_group = self.group_end_indices.back_mut();
558        if let Some((group_row, group_end)) = last_group {
559            if *group_end < length {
560                let new_group_row = get_row_at_idx(range_columns, *group_end)?;
561                // If last/current group keys are the same, we extend the last group:
562                if new_group_row.eq(group_row) {
563                    // Update the end boundary of the group (search right boundary):
564                    *group_end = search_in_slice(
565                        range_columns,
566                        group_row,
567                        check_equality,
568                        *group_end,
569                        length,
570                    )?;
571                }
572            }
573            // Start searching from the last group boundary:
574            group_start = *group_end;
575        }
576
577        // Advance groups until `idx` is inside a group:
578        while idx >= group_start {
579            let group_row = get_row_at_idx(range_columns, group_start)?;
580            // Find end boundary of the group (search right boundary):
581            let group_end = search_in_slice(
582                range_columns,
583                &group_row,
584                check_equality,
585                group_start,
586                length,
587            )?;
588            self.group_end_indices.push_back((group_row, group_end));
589            group_start = group_end;
590        }
591
592        // Update the group index `idx` belongs to:
593        while self.current_group_idx < self.group_end_indices.len()
594            && idx >= self.group_end_indices[self.current_group_idx].1
595        {
596            self.current_group_idx += 1;
597        }
598
599        // Find the group index of the frame boundary:
600        let group_idx = if SEARCH_SIDE {
601            self.current_group_idx.saturating_sub(delta)
602        } else {
603            self.current_group_idx + delta
604        };
605
606        // Extend `group_start_indices` until it includes at least `group_idx`:
607        while self.group_end_indices.len() <= group_idx && group_start < length {
608            let group_row = get_row_at_idx(range_columns, group_start)?;
609            // Find end boundary of the group (search right boundary):
610            let group_end = search_in_slice(
611                range_columns,
612                &group_row,
613                check_equality,
614                group_start,
615                length,
616            )?;
617            self.group_end_indices.push_back((group_row, group_end));
618            group_start = group_end;
619        }
620
621        // Calculate index of the group boundary:
622        Ok(match (SIDE, SEARCH_SIDE) {
623            // Window frame start:
624            (true, _) => {
625                let group_idx = std::cmp::min(group_idx, self.group_end_indices.len());
626                if group_idx > 0 {
627                    // Normally, start at the boundary of the previous group.
628                    self.group_end_indices[group_idx - 1].1
629                } else {
630                    // If previous group is out of the table, start at zero.
631                    0
632                }
633            }
634            // Window frame end, PRECEDING n
635            (false, true) => {
636                if self.current_group_idx >= delta {
637                    let group_idx = self.current_group_idx - delta;
638                    self.group_end_indices[group_idx].1
639                } else {
640                    // Group is out of the table, therefore end at zero.
641                    0
642                }
643            }
644            // Window frame end, FOLLOWING n
645            (false, false) => {
646                let group_idx = std::cmp::min(
647                    self.current_group_idx + delta,
648                    self.group_end_indices.len() - 1,
649                );
650                self.group_end_indices[group_idx].1
651            }
652        })
653    }
654}
655
656fn check_equality(current: &[ScalarValue], target: &[ScalarValue]) -> Result<bool> {
657    Ok(current == target)
658}
659
660#[cfg(test)]
661mod tests {
662    use super::*;
663
664    use arrow::array::Float64Array;
665
666    fn get_test_data() -> (Vec<ArrayRef>, Vec<SortOptions>) {
667        let range_columns: Vec<ArrayRef> = vec![Arc::new(Float64Array::from(vec![
668            5.0, 7.0, 8.0, 8.0, 9., 10., 10., 10., 11.,
669        ]))];
670        let sort_options = vec![SortOptions {
671            descending: false,
672            nulls_first: false,
673        }];
674
675        (range_columns, sort_options)
676    }
677
678    fn assert_expected(
679        expected_results: Vec<(Range<usize>, usize)>,
680        window_frame: &Arc<WindowFrame>,
681    ) -> Result<()> {
682        let mut window_frame_groups = WindowFrameStateGroups::default();
683        let (range_columns, _) = get_test_data();
684        let n_row = range_columns[0].len();
685        for (idx, (expected_range, expected_group_idx)) in
686            expected_results.into_iter().enumerate()
687        {
688            let range = window_frame_groups.calculate_range(
689                window_frame,
690                &range_columns,
691                n_row,
692                idx,
693            )?;
694            assert_eq!(range, expected_range);
695            assert_eq!(window_frame_groups.current_group_idx, expected_group_idx);
696        }
697        Ok(())
698    }
699
700    #[test]
701    fn test_window_frame_group_boundaries() -> Result<()> {
702        let window_frame = Arc::new(WindowFrame::new_bounds(
703            WindowFrameUnits::Groups,
704            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(1))),
705            WindowFrameBound::Following(ScalarValue::UInt64(Some(1))),
706        ));
707        let expected_results = vec![
708            (Range { start: 0, end: 2 }, 0),
709            (Range { start: 0, end: 4 }, 1),
710            (Range { start: 1, end: 5 }, 2),
711            (Range { start: 1, end: 5 }, 2),
712            (Range { start: 2, end: 8 }, 3),
713            (Range { start: 4, end: 9 }, 4),
714            (Range { start: 4, end: 9 }, 4),
715            (Range { start: 4, end: 9 }, 4),
716            (Range { start: 5, end: 9 }, 5),
717        ];
718        assert_expected(expected_results, &window_frame)
719    }
720
721    #[test]
722    fn test_window_frame_group_boundaries_both_following() -> Result<()> {
723        let window_frame = Arc::new(WindowFrame::new_bounds(
724            WindowFrameUnits::Groups,
725            WindowFrameBound::Following(ScalarValue::UInt64(Some(1))),
726            WindowFrameBound::Following(ScalarValue::UInt64(Some(2))),
727        ));
728        let expected_results = vec![
729            (Range::<usize> { start: 1, end: 4 }, 0),
730            (Range::<usize> { start: 2, end: 5 }, 1),
731            (Range::<usize> { start: 4, end: 8 }, 2),
732            (Range::<usize> { start: 4, end: 8 }, 2),
733            (Range::<usize> { start: 5, end: 9 }, 3),
734            (Range::<usize> { start: 8, end: 9 }, 4),
735            (Range::<usize> { start: 8, end: 9 }, 4),
736            (Range::<usize> { start: 8, end: 9 }, 4),
737            (Range::<usize> { start: 9, end: 9 }, 5),
738        ];
739        assert_expected(expected_results, &window_frame)
740    }
741
742    #[test]
743    fn test_window_frame_group_boundaries_both_preceding() -> Result<()> {
744        let window_frame = Arc::new(WindowFrame::new_bounds(
745            WindowFrameUnits::Groups,
746            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(2))),
747            WindowFrameBound::Preceding(ScalarValue::UInt64(Some(1))),
748        ));
749        let expected_results = vec![
750            (Range::<usize> { start: 0, end: 0 }, 0),
751            (Range::<usize> { start: 0, end: 1 }, 1),
752            (Range::<usize> { start: 0, end: 2 }, 2),
753            (Range::<usize> { start: 0, end: 2 }, 2),
754            (Range::<usize> { start: 1, end: 4 }, 3),
755            (Range::<usize> { start: 2, end: 5 }, 4),
756            (Range::<usize> { start: 2, end: 5 }, 4),
757            (Range::<usize> { start: 2, end: 5 }, 4),
758            (Range::<usize> { start: 4, end: 8 }, 5),
759        ];
760        assert_expected(expected_results, &window_frame)
761    }
762}