vortex_scan/range_scan.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
use std::ops::{BitAnd, Range};
use std::sync::Arc;
use vortex_array::compute::FilterMask;
use vortex_array::{ArrayData, IntoArrayVariant};
use vortex_error::VortexResult;
use vortex_expr::ExprRef;
use crate::evaluator::{AsyncEvaluator, Evaluator};
use crate::{RowMask, Scan};
pub struct RangeScan {
scan: Arc<Scan>,
row_range: Range<u64>,
mask: FilterMask,
state: State,
}
enum State {
// First we run the filter expression over the full row range.
FilterEval((FilterMask, ExprRef)),
// Then we project the selected rows.
Project((FilterMask, ExprRef)),
// And then we're done.
Ready(ArrayData),
}
pub enum NextOp {
/// The finished result of the scan.
Ready(ArrayData),
/// The next expression to evaluate.
/// The caller **must** first apply the mask before evaluating the expression.
Eval((Range<u64>, FilterMask, ExprRef)),
}
/// We implement the range scan via polling for the next operation to perform, and then posting
/// the result back to the range scan to make progress.
///
/// This allows us to minimize the amount of logic we duplicate in order to support both
/// synchronous and asynchronous evaluation.
impl RangeScan {
pub(crate) fn new(scan: Arc<Scan>, row_offset: u64, mask: FilterMask) -> Self {
let state = scan
.filter()
.map(|filter| {
// If we have a filter expression, then for now we evaluate it against all rows
// of the range.
// TODO(ngates): we should decide based on mask.true_count() whether to include the
// current mask or not. But note that the resulting expression would need to be
// aligned and intersected with the given mask.
State::FilterEval((FilterMask::new_true(mask.len()), filter.clone()))
})
.unwrap_or_else(|| {
// If there is no filter expression, then we immediately perform a mask + project.
State::Project((mask.clone(), scan.projection().clone()))
});
Self {
scan,
row_range: row_offset..row_offset + mask.len() as u64,
mask,
state,
}
}
/// Check for the next operation to perform.
/// Returns `Poll::Ready` when the scan is complete.
fn next(&self) -> NextOp {
match &self.state {
State::FilterEval((mask, expr)) => {
NextOp::Eval((self.row_range.clone(), mask.clone(), expr.clone()))
}
State::Project((mask, expr)) => {
NextOp::Eval((self.row_range.clone(), mask.clone(), expr.clone()))
}
State::Ready(array) => NextOp::Ready(array.clone()),
}
}
/// Post the result of the last expression evaluation back to the range scan.
fn post(&mut self, result: ArrayData) -> VortexResult<()> {
match &self.state {
State::FilterEval(_) => {
// Intersect the result of the filter expression with our initial row mask.
let mask = result.into_bool()?.boolean_buffer();
let mask = self.mask.to_boolean_buffer()?.bitand(&mask);
// Then move onto the projection
self.state =
State::Project((FilterMask::from(mask), self.scan.projection().clone()))
}
State::Project(_) => {
// We're done.
self.state = State::Ready(result);
}
State::Ready(_) => {}
}
Ok(())
}
/// Evaluate the [`RangeScan`] operation using a synchronous expression evaluator.
pub fn evaluate(mut self, evaluator: &dyn Evaluator) -> VortexResult<ArrayData> {
loop {
match self.next() {
NextOp::Ready(array) => return Ok(array),
NextOp::Eval((row_range, mask, expr)) => {
self.post(evaluator.evaluate(RowMask::new(mask, row_range.start), expr)?)?;
}
}
}
}
/// Evaluate the [`RangeScan`] operation using an async expression evaluator.
pub async fn evaluate_async(
mut self,
evaluator: &dyn AsyncEvaluator,
) -> VortexResult<ArrayData> {
loop {
match self.next() {
NextOp::Ready(array) => return Ok(array),
NextOp::Eval((row_range, mask, expr)) => {
self.post(
evaluator
.evaluate(RowMask::new(mask, row_range.start), expr)
.await?,
)?;
}
}
}
}
}