1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
use vortex_dtype::DType;
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexResult};

use crate::arrow::{Datum, FromArrowArray};
use crate::encoding::Encoding;
use crate::{ArrayDType, ArrayData};

pub trait LikeFn<Array> {
    fn like(
        &self,
        array: &Array,
        pattern: &ArrayData,
        options: LikeOptions,
    ) -> VortexResult<ArrayData>;
}

impl<E: Encoding> LikeFn<ArrayData> for E
where
    E: LikeFn<E::Array>,
    for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>,
{
    fn like(
        &self,
        array: &ArrayData,
        pattern: &ArrayData,
        options: LikeOptions,
    ) -> VortexResult<ArrayData> {
        let array_ref = <&E::Array>::try_from(array)?;
        let encoding = array
            .encoding()
            .as_any()
            .downcast_ref::<E>()
            .ok_or_else(|| vortex_err!("Mismatched encoding"))?;
        LikeFn::like(encoding, array_ref, pattern, options)
    }
}

/// Options for SQL LIKE function
#[derive(Default, Debug, Clone, Copy)]
pub struct LikeOptions {
    pub negated: bool,
    pub case_insensitive: bool,
}

/// Perform SQL left LIKE right
///
/// There are two wildcards supported with the LIKE operator:
/// - %: matches zero or more characters
/// - _: matches exactly one character
pub fn like(
    array: &ArrayData,
    pattern: &ArrayData,
    options: LikeOptions,
) -> VortexResult<ArrayData> {
    if !matches!(array.dtype(), DType::Utf8(..)) {
        vortex_bail!("Expected utf8 array, got {}", array.dtype());
    }
    if !matches!(pattern.dtype(), DType::Utf8(..)) {
        vortex_bail!("Expected utf8 pattern, got {}", array.dtype());
    }

    if let Some(f) = array.encoding().like_fn() {
        let result = f.like(array, pattern, options)?;

        debug_assert_eq!(
            result.len(),
            array.len(),
            "Like length mismatch {}",
            array.encoding().id()
        );
        debug_assert_eq!(
            result.dtype(),
            &DType::Bool((array.dtype().is_nullable() || pattern.dtype().is_nullable()).into()),
            "Like dtype mismatch {}",
            array.encoding().id()
        );

        return Ok(result);
    }

    // Otherwise, we canonicalize into a UTF8 array.
    log::debug!(
        "No like implementation found for encoding {}",
        array.encoding().id(),
    );
    arrow_like(array, pattern, options)
}

/// Implementation of `LikeFn` using the Arrow crate.
pub(crate) fn arrow_like(
    child: &ArrayData,
    pattern: &ArrayData,
    options: LikeOptions,
) -> VortexResult<ArrayData> {
    let nullable = child.dtype().is_nullable();
    let child = Datum::try_from(child.clone())?;
    let pattern = Datum::try_from(pattern.clone())?;

    let array = match (options.negated, options.case_insensitive) {
        (false, false) => arrow_string::like::like(&child, &pattern)?,
        (true, false) => arrow_string::like::nlike(&child, &pattern)?,
        (false, true) => arrow_string::like::ilike(&child, &pattern)?,
        (true, true) => arrow_string::like::nilike(&child, &pattern)?,
    };

    Ok(ArrayData::from_arrow(&array, nullable))
}