1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
//! Connectors to enable DataFusion to read Vortex data.

#![allow(clippy::nonminimal_bool)]
#![allow(clippy::cast_possible_truncation)]

use std::sync::Arc;

use arrow_schema::{DataType, Schema};
use datafusion::prelude::{DataFrame, SessionContext};
use datafusion_common::Result as DFResult;
use datafusion_expr::{Expr, Operator};
use vortex_array::{ArrayDType, ArrayData};
use vortex_error::vortex_err;

use crate::memory::{VortexMemTable, VortexMemTableOptions};

pub mod memory;
pub mod persistent;

const SUPPORTED_BINARY_OPS: &[Operator] = &[
    Operator::Eq,
    Operator::NotEq,
    Operator::Gt,
    Operator::GtEq,
    Operator::Lt,
    Operator::LtEq,
];

fn supported_data_types(dt: DataType) -> bool {
    let is_supported = dt.is_integer()
        || dt.is_floating()
        || dt.is_null()
        || dt == DataType::Boolean
        || dt == DataType::Binary
        || dt == DataType::Utf8
        || dt == DataType::Binary
        || dt == DataType::BinaryView
        || dt == DataType::Utf8View
        || dt == DataType::Date32
        || dt == DataType::Date64
        || matches!(
            dt,
            DataType::Timestamp(_, _) | DataType::Time32(_) | DataType::Time64(_)
        );

    if !is_supported {
        log::debug!("DataFusion data type {:?} is not supported", dt);
    }

    is_supported
}

pub trait SessionContextExt {
    fn register_mem_vortex<S: AsRef<str>>(&self, name: S, array: ArrayData) -> DFResult<()> {
        self.register_mem_vortex_opts(name, array, VortexMemTableOptions::default())
    }

    fn register_mem_vortex_opts<S: AsRef<str>>(
        &self,
        name: S,
        array: ArrayData,
        options: VortexMemTableOptions,
    ) -> DFResult<()>;

    fn read_mem_vortex(&self, array: ArrayData) -> DFResult<DataFrame> {
        self.read_mem_vortex_opts(array, VortexMemTableOptions::default())
    }

    fn read_mem_vortex_opts(
        &self,
        array: ArrayData,
        options: VortexMemTableOptions,
    ) -> DFResult<DataFrame>;
}

impl SessionContextExt for SessionContext {
    fn register_mem_vortex_opts<S: AsRef<str>>(
        &self,
        name: S,
        array: ArrayData,
        options: VortexMemTableOptions,
    ) -> DFResult<()> {
        if !array.dtype().is_struct() {
            return Err(vortex_err!(
                "Vortex arrays must have struct type, found {}",
                array.dtype()
            )
            .into());
        }

        let vortex_table = VortexMemTable::new(array, options);
        self.register_table(name.as_ref(), Arc::new(vortex_table))
            .map(|_| ())
    }

    fn read_mem_vortex_opts(
        &self,
        array: ArrayData,
        options: VortexMemTableOptions,
    ) -> DFResult<DataFrame> {
        if !array.dtype().is_struct() {
            return Err(vortex_err!(
                "Vortex arrays must have struct type, found {}",
                array.dtype()
            )
            .into());
        }

        let vortex_table = VortexMemTable::new(array, options);

        self.read_table(Arc::new(vortex_table))
    }
}

fn can_be_pushed_down(expr: &Expr, schema: &Schema) -> bool {
    match expr {
        Expr::BinaryExpr(expr)
            if expr.op.is_logic_operator() || SUPPORTED_BINARY_OPS.contains(&expr.op) =>
        {
            can_be_pushed_down(expr.left.as_ref(), schema)
                & can_be_pushed_down(expr.right.as_ref(), schema)
        }
        Expr::Column(col) => match schema.column_with_name(col.name()) {
            Some((_, field)) => supported_data_types(field.data_type().clone()),
            _ => false,
        },
        Expr::Like(like) => {
            can_be_pushed_down(&like.expr, schema) && can_be_pushed_down(&like.pattern, schema)
        }
        Expr::Literal(lit) => supported_data_types(lit.data_type()),
        _ => {
            log::debug!("DataFusion expression can't be pushed down: {:?}", expr);
            false
        }
    }
}