vortex_layout/layouts/chunked/
stats_table.rsuse std::sync::Arc;
use itertools::Itertools;
use vortex_array::array::StructArray;
use vortex_array::builders::{builder_with_capacity, ArrayBuilder, ArrayBuilderExt};
use vortex_array::stats::{ArrayStatistics as _, Stat};
use vortex_array::validity::{ArrayValidity, Validity};
use vortex_array::{ArrayDType, ArrayData, IntoArrayData};
use vortex_dtype::{DType, Nullability, StructDType};
use vortex_error::{vortex_bail, VortexResult};
#[derive(Clone)]
pub struct StatsTable {
column_dtype: DType,
array: ArrayData,
stats: Arc<[Stat]>,
}
impl StatsTable {
pub fn try_new(
column_dtype: DType,
array: ArrayData,
stats: Arc<[Stat]>,
) -> VortexResult<Self> {
if &Self::dtype_for_stats_table(&column_dtype, &stats) != array.dtype() {
vortex_bail!("Array dtype does not match expected stats table dtype");
}
Ok(Self {
column_dtype,
array,
stats,
})
}
pub fn dtype_for_stats_table(column_dtype: &DType, present_stats: &[Stat]) -> DType {
let dtypes = present_stats
.iter()
.map(|s| s.dtype(column_dtype).as_nullable())
.collect();
DType::Struct(
StructDType::new(
present_stats.iter().map(|s| s.name().into()).collect(),
dtypes,
),
Nullability::NonNullable,
)
}
pub fn column_dtype(&self) -> &DType {
&self.column_dtype
}
pub fn array(&self) -> &ArrayData {
&self.array
}
pub fn present_stats(&self) -> &[Stat] {
&self.stats
}
}
pub struct StatsAccumulator {
column_dtype: DType,
stats: Vec<Stat>,
builders: Vec<Box<dyn ArrayBuilder>>,
length: usize,
}
impl StatsAccumulator {
pub fn new(dtype: DType, mut stats: Vec<Stat>) -> Self {
stats.sort_by_key(|s| u8::from(*s));
let builders = stats
.iter()
.map(|s| builder_with_capacity(&s.dtype(&dtype).as_nullable(), 1024))
.collect();
Self {
column_dtype: dtype,
stats,
builders,
length: 0,
}
}
pub fn push_chunk(&mut self, array: &ArrayData) -> VortexResult<()> {
for (s, builder) in self.stats.iter().zip_eq(self.builders.iter_mut()) {
if let Some(v) = array.statistics().compute(*s) {
builder.append_scalar(&v.cast(builder.dtype())?)?;
} else {
builder.append_null();
}
}
self.length += 1;
Ok(())
}
pub fn as_stats_table(&mut self) -> VortexResult<Option<StatsTable>> {
let mut names = Vec::new();
let mut fields = Vec::new();
let mut stats = Vec::new();
for (stat, builder) in self.stats.iter().zip(self.builders.iter_mut()) {
let values = builder
.finish()
.map_err(|e| e.with_context(format!("Failed to finish stat builder for {stat}")))?;
if values.logical_validity().null_count()? == values.len() {
continue;
}
stats.push(*stat);
names.push(stat.to_string().into());
fields.push(values);
}
if names.is_empty() {
return Ok(None);
}
Ok(Some(StatsTable {
column_dtype: self.column_dtype.clone(),
array: StructArray::try_new(names.into(), fields, self.length, Validity::NonNullable)?
.into_array(),
stats: stats.into(),
}))
}
}