vortex_layout/strategies/
struct_of_chunks.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
use vortex_dtype::{DType, Nullability};
use vortex_error::{vortex_bail, VortexResult};

use crate::layouts::chunked::writer::{ChunkedLayoutOptions, ChunkedLayoutWriter};
use crate::layouts::struct_::writer::StructLayoutWriter;
use crate::strategies::{LayoutStrategy, LayoutWriter};

/// Struct-of-chunks is the default Vortex layout strategy.
///
/// This layout first splits data into struct columns, before applying chunking as per the
/// provided batches.
///
/// TODO(ngates): add configuration options to this struct to re-chunk the data within each
///   column by size.
pub struct StructOfChunks;

impl LayoutStrategy for StructOfChunks {
    fn new_writer(&self, dtype: &DType) -> VortexResult<Box<dyn LayoutWriter>> {
        match dtype {
            DType::Struct(struct_dtype, nullability) => {
                if nullability == &Nullability::Nullable {
                    vortex_bail!("Structs with nullable fields are not supported");
                }

                Ok(Box::new(StructLayoutWriter::new(
                    dtype.clone(),
                    struct_dtype
                        .dtypes()
                        .map(|col_dtype| default_column_layout(&col_dtype))
                        .collect(),
                )))
            }
            _ => Ok(default_column_layout(dtype)),
        }
    }
}

fn default_column_layout(dtype: &DType) -> Box<dyn LayoutWriter> {
    Box::new(ChunkedLayoutWriter::new(
        dtype,
        ChunkedLayoutOptions::default(),
    )) as _
}