vortex_scan/
range_scan.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use std::ops::{BitAnd, Range};
use std::sync::Arc;

use vortex_array::compute::FilterMask;
use vortex_array::{ArrayData, IntoArrayVariant};
use vortex_error::VortexResult;
use vortex_expr::ExprRef;

use crate::evaluator::{AsyncEvaluator, Evaluator};
use crate::{RowMask, Scan};

pub struct RangeScan {
    scan: Arc<Scan>,
    row_range: Range<u64>,
    mask: FilterMask,
    state: State,
}

enum State {
    // First we run the filter expression over the full row range.
    FilterEval((FilterMask, ExprRef)),
    // Then we project the selected rows.
    Project((FilterMask, ExprRef)),
    // And then we're done.
    Ready(ArrayData),
}

pub enum NextOp {
    /// The finished result of the scan.
    Ready(ArrayData),
    /// The next expression to evaluate.
    /// The caller **must** first apply the mask before evaluating the expression.
    Eval((Range<u64>, FilterMask, ExprRef)),
}

/// We implement the range scan via polling for the next operation to perform, and then posting
/// the result back to the range scan to make progress.
///
/// This allows us to minimize the amount of logic we duplicate in order to support both
/// synchronous and asynchronous evaluation.
impl RangeScan {
    pub(crate) fn new(scan: Arc<Scan>, row_offset: u64, mask: FilterMask) -> Self {
        let state = scan
            .filter()
            .map(|filter| {
                // If we have a filter expression, then for now we evaluate it against all rows
                // of the range.
                // TODO(ngates): we should decide based on mask.true_count() whether to include the
                //  current mask or not. But note that the resulting expression would need to be
                //  aligned and intersected with the given mask.
                State::FilterEval((FilterMask::new_true(mask.len()), filter.clone()))
            })
            .unwrap_or_else(|| {
                // If there is no filter expression, then we immediately perform a mask + project.
                State::Project((mask.clone(), scan.projection().clone()))
            });

        Self {
            scan,
            row_range: row_offset..row_offset + mask.len() as u64,
            mask,
            state,
        }
    }

    /// Check for the next operation to perform.
    /// Returns `Poll::Ready` when the scan is complete.
    fn next(&self) -> NextOp {
        match &self.state {
            State::FilterEval((mask, expr)) => {
                NextOp::Eval((self.row_range.clone(), mask.clone(), expr.clone()))
            }
            State::Project((mask, expr)) => {
                NextOp::Eval((self.row_range.clone(), mask.clone(), expr.clone()))
            }
            State::Ready(array) => NextOp::Ready(array.clone()),
        }
    }

    /// Post the result of the last expression evaluation back to the range scan.
    fn post(&mut self, result: ArrayData) -> VortexResult<()> {
        match &self.state {
            State::FilterEval(_) => {
                // Intersect the result of the filter expression with our initial row mask.
                let mask = result.into_bool()?.boolean_buffer();
                let mask = self.mask.to_boolean_buffer()?.bitand(&mask);
                // Then move onto the projection
                self.state =
                    State::Project((FilterMask::from(mask), self.scan.projection().clone()))
            }
            State::Project(_) => {
                // We're done.
                self.state = State::Ready(result);
            }
            State::Ready(_) => {}
        }
        Ok(())
    }

    /// Evaluate the [`RangeScan`] operation using a synchronous expression evaluator.
    pub fn evaluate(mut self, evaluator: &dyn Evaluator) -> VortexResult<ArrayData> {
        loop {
            match self.next() {
                NextOp::Ready(array) => return Ok(array),
                NextOp::Eval((row_range, mask, expr)) => {
                    self.post(evaluator.evaluate(RowMask::new(mask, row_range.start), expr)?)?;
                }
            }
        }
    }

    /// Evaluate the [`RangeScan`] operation using an async expression evaluator.
    pub async fn evaluate_async(
        mut self,
        evaluator: &dyn AsyncEvaluator,
    ) -> VortexResult<ArrayData> {
        loop {
            match self.next() {
                NextOp::Ready(array) => return Ok(array),
                NextOp::Eval((row_range, mask, expr)) => {
                    self.post(
                        evaluator
                            .evaluate(RowMask::new(mask, row_range.start), expr)
                            .await?,
                    )?;
                }
            }
        }
    }
}