vortex_sampling_compressor/
downscale.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use vortex_array::array::{PrimitiveArray, PrimitiveEncoding};
use vortex_array::compute::try_cast;
use vortex_array::encoding::EncodingVTable;
use vortex_array::stats::{ArrayStatistics, Stat};
use vortex_array::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
use vortex_dtype::{DType, PType};
use vortex_error::{vortex_err, VortexResult};

/// Downscale a primitive array to the narrowest PType that fits all the values.
pub fn downscale_integer_array(array: ArrayData) -> VortexResult<ArrayData> {
    if !array.is_encoding(PrimitiveEncoding.id()) {
        // This can happen if e.g. the array is ConstantArray.
        return Ok(array);
    }
    let array = PrimitiveArray::try_from(array)?;

    let min = array
        .statistics()
        .compute(Stat::Min)
        .ok_or_else(|| vortex_err!("Failed to compute min on primitive array"))?;
    let max = array
        .statistics()
        .compute(Stat::Max)
        .ok_or_else(|| vortex_err!("Failed to compute max on primitive array"))?;

    // If we can't cast to i64, then leave the array as its original type.
    // It's too big to downcast anyway.
    let Ok(min) = min.cast(&DType::Primitive(PType::I64, array.dtype().nullability())) else {
        return Ok(array.into_array());
    };
    let Ok(max) = max.cast(&DType::Primitive(PType::I64, array.dtype().nullability())) else {
        return Ok(array.into_array());
    };

    downscale_primitive_integer_array(array, i64::try_from(min)?, i64::try_from(max)?)
        .map(|a| a.into_array())
}

/// Downscale a primitive array to the narrowest PType that fits all the values.
fn downscale_primitive_integer_array(
    array: PrimitiveArray,
    min: i64,
    max: i64,
) -> VortexResult<PrimitiveArray> {
    if min < 0 || max < 0 {
        // Signed
        if min >= i8::MIN as i64 && max <= i8::MAX as i64 {
            return try_cast(
                &array,
                &DType::Primitive(PType::I8, array.dtype().nullability()),
            )?
            .into_primitive();
        }

        if min >= i16::MIN as i64 && max <= i16::MAX as i64 {
            return try_cast(
                &array,
                &DType::Primitive(PType::I16, array.dtype().nullability()),
            )?
            .into_primitive();
        }

        if min >= i32::MIN as i64 && max <= i32::MAX as i64 {
            return try_cast(
                &array,
                &DType::Primitive(PType::I32, array.dtype().nullability()),
            )?
            .into_primitive();
        }
    } else {
        // Unsigned
        if max <= u8::MAX as i64 {
            return try_cast(
                &array,
                &DType::Primitive(PType::U8, array.dtype().nullability()),
            )?
            .into_primitive();
        }

        if max <= u16::MAX as i64 {
            return try_cast(
                &array,
                &DType::Primitive(PType::U16, array.dtype().nullability()),
            )?
            .into_primitive();
        }

        if max <= u32::MAX as i64 {
            return try_cast(
                &array,
                &DType::Primitive(PType::U32, array.dtype().nullability()),
            )?
            .into_primitive();
        }
    }

    Ok(array)
}