use std::fmt::{Debug, Display};
use std::ops::BitAnd;
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer};
use serde::{Deserialize, Serialize};
use vortex_dtype::{DType, Nullability};
use vortex_error::{
vortex_bail, vortex_err, vortex_panic, VortexError, VortexExpect as _, VortexResult,
};
use crate::array::{BoolArray, ConstantArray};
use crate::compute::{filter, scalar_at, slice, take, FilterMask};
use crate::encoding::Encoding;
use crate::patches::Patches;
use crate::stats::ArrayStatistics;
use crate::{ArrayDType, ArrayData, IntoArrayData, IntoArrayVariant};
pub trait ValidityVTable<Array> {
fn is_valid(&self, array: &Array, index: usize) -> bool;
fn logical_validity(&self, array: &Array) -> LogicalValidity;
}
impl<E: Encoding> ValidityVTable<ArrayData> for E
where
E: ValidityVTable<E::Array>,
for<'a> &'a E::Array: TryFrom<&'a ArrayData, Error = VortexError>,
{
fn is_valid(&self, array: &ArrayData, index: usize) -> bool {
let array_ref =
<&E::Array>::try_from(array).vortex_expect("Failed to get array as reference");
let encoding = array
.encoding()
.as_any()
.downcast_ref::<E>()
.vortex_expect("Failed to downcast encoding");
ValidityVTable::is_valid(encoding, array_ref, index)
}
fn logical_validity(&self, array: &ArrayData) -> LogicalValidity {
let array_ref =
<&E::Array>::try_from(array).vortex_expect("Failed to get array as reference");
let encoding = array
.encoding()
.as_any()
.downcast_ref::<E>()
.vortex_expect("Failed to downcast encoding");
ValidityVTable::logical_validity(encoding, array_ref)
}
}
pub trait ArrayValidity {
fn is_valid(&self, index: usize) -> bool;
fn logical_validity(&self) -> LogicalValidity;
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum ValidityMetadata {
NonNullable,
AllValid,
AllInvalid,
Array,
}
impl Display for ValidityMetadata {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Debug::fmt(self, f)
}
}
impl ValidityMetadata {
pub fn to_validity<F>(&self, array_fn: F) -> Validity
where
F: FnOnce() -> ArrayData,
{
match self {
Self::NonNullable => Validity::NonNullable,
Self::AllValid => Validity::AllValid,
Self::AllInvalid => Validity::AllInvalid,
Self::Array => Validity::Array(array_fn()),
}
}
}
#[derive(Clone, Debug)]
pub enum Validity {
NonNullable,
AllValid,
AllInvalid,
Array(ArrayData),
}
impl Validity {
pub const DTYPE: DType = DType::Bool(Nullability::NonNullable);
pub fn to_metadata(&self, length: usize) -> VortexResult<ValidityMetadata> {
match self {
Self::NonNullable => Ok(ValidityMetadata::NonNullable),
Self::AllValid => Ok(ValidityMetadata::AllValid),
Self::AllInvalid => Ok(ValidityMetadata::AllInvalid),
Self::Array(a) => {
let validity_len = a.len();
if validity_len != length {
vortex_bail!(
"Validity array length {} doesn't match array length {}",
validity_len,
length
)
}
Ok(ValidityMetadata::Array)
}
}
}
pub fn null_count(&self, length: usize) -> VortexResult<usize> {
match self {
Self::NonNullable | Self::AllValid => Ok(0),
Self::AllInvalid => Ok(length),
Self::Array(a) => {
let validity_len = a.len();
if validity_len != length {
vortex_bail!(
"Validity array length {} doesn't match array length {}",
validity_len,
length
)
}
let true_count = a.statistics().compute_true_count().ok_or_else(|| {
vortex_err!("Failed to compute true count from validity array")
})?;
Ok(length - true_count)
}
}
}
pub fn into_array(self) -> Option<ArrayData> {
match self {
Self::Array(a) => Some(a),
_ => None,
}
}
pub fn as_array(&self) -> Option<&ArrayData> {
match self {
Self::Array(a) => Some(a),
_ => None,
}
}
pub fn nullability(&self) -> Nullability {
match self {
Self::NonNullable => Nullability::NonNullable,
_ => Nullability::Nullable,
}
}
#[inline]
pub fn is_valid(&self, index: usize) -> bool {
match self {
Self::NonNullable | Self::AllValid => true,
Self::AllInvalid => false,
Self::Array(a) => scalar_at(a, index)
.and_then(|s| bool::try_from(&s))
.unwrap_or_else(|err| {
vortex_panic!(
err,
"Failed to get bool from Validity Array at index {}",
index
)
}),
}
}
#[inline]
pub fn is_null(&self, index: usize) -> bool {
!self.is_valid(index)
}
pub fn slice(&self, start: usize, stop: usize) -> VortexResult<Self> {
match self {
Self::Array(a) => Ok(Self::Array(slice(a, start, stop)?)),
_ => Ok(self.clone()),
}
}
pub fn take(&self, indices: &ArrayData) -> VortexResult<Self> {
match self {
Self::NonNullable => Ok(Self::NonNullable),
Self::AllValid => Ok(Self::AllValid),
Self::AllInvalid => Ok(Self::AllInvalid),
Self::Array(a) => Ok(Self::Array(take(a, indices)?)),
}
}
pub unsafe fn take_unchecked(&self, indices: &ArrayData) -> VortexResult<Self> {
match self {
Self::NonNullable => Ok(Self::NonNullable),
Self::AllValid => Ok(Self::AllValid),
Self::AllInvalid => Ok(Self::AllInvalid),
Self::Array(a) => {
let taken = if let Some(take_fn) = a.encoding().take_fn() {
unsafe { take_fn.take_unchecked(a, indices) }
} else {
take(a, indices)
};
taken.map(Self::Array)
}
}
}
pub fn filter(&self, mask: &FilterMask) -> VortexResult<Self> {
match self {
v @ (Validity::NonNullable | Validity::AllValid | Validity::AllInvalid) => {
Ok(v.clone())
}
Validity::Array(arr) => Ok(Validity::Array(filter(arr, mask.clone())?)),
}
}
pub fn to_logical(&self, length: usize) -> LogicalValidity {
match self {
Self::NonNullable => LogicalValidity::AllValid(length),
Self::AllValid => LogicalValidity::AllValid(length),
Self::AllInvalid => LogicalValidity::AllInvalid(length),
Self::Array(a) => {
if a.statistics().compute_min::<bool>().unwrap_or(false) {
LogicalValidity::AllValid(length)
} else if a
.statistics()
.compute_max::<bool>()
.map(|m| !m)
.unwrap_or(false)
{
LogicalValidity::AllInvalid(length)
} else {
LogicalValidity::Array(a.clone())
}
}
}
}
pub fn and(self, rhs: Validity) -> VortexResult<Validity> {
let validity = match (self, rhs) {
(Validity::NonNullable, Validity::NonNullable) => Validity::NonNullable,
(Validity::AllInvalid, _) | (_, Validity::AllInvalid) => Validity::AllInvalid,
(Validity::Array(a), Validity::AllValid)
| (Validity::Array(a), Validity::NonNullable)
| (Validity::NonNullable, Validity::Array(a))
| (Validity::AllValid, Validity::Array(a)) => Validity::Array(a),
(Validity::NonNullable, Validity::AllValid)
| (Validity::AllValid, Validity::NonNullable)
| (Validity::AllValid, Validity::AllValid) => Validity::AllValid,
(Validity::Array(lhs), Validity::Array(rhs)) => {
let lhs = BoolArray::try_from(lhs)?;
let rhs = BoolArray::try_from(rhs)?;
let lhs = lhs.boolean_buffer();
let rhs = rhs.boolean_buffer();
Validity::from(lhs.bitand(&rhs))
}
};
Ok(validity)
}
pub fn patch(self, len: usize, indices: &ArrayData, patches: Validity) -> VortexResult<Self> {
match (&self, &patches) {
(Validity::NonNullable, Validity::NonNullable) => return Ok(Validity::NonNullable),
(Validity::NonNullable, _) => {
vortex_bail!("Can't patch a non-nullable validity with nullable validity")
}
(_, Validity::NonNullable) => {
vortex_bail!("Can't patch a nullable validity with non-nullable validity")
}
(Validity::AllValid, Validity::AllValid) => return Ok(Validity::AllValid),
(Validity::AllInvalid, Validity::AllInvalid) => return Ok(Validity::AllInvalid),
_ => {}
};
let source = match self {
Validity::NonNullable => BoolArray::from(BooleanBuffer::new_set(len)),
Validity::AllValid => BoolArray::from(BooleanBuffer::new_set(len)),
Validity::AllInvalid => BoolArray::from(BooleanBuffer::new_unset(len)),
Validity::Array(a) => a.into_bool()?,
};
let patch_values = match patches {
Validity::NonNullable => BoolArray::from(BooleanBuffer::new_set(indices.len())),
Validity::AllValid => BoolArray::from(BooleanBuffer::new_set(indices.len())),
Validity::AllInvalid => BoolArray::from(BooleanBuffer::new_unset(indices.len())),
Validity::Array(a) => a.into_bool()?,
};
let patches = Patches::new(len, indices.clone(), patch_values.into_array());
Validity::try_from(source.patch(patches)?.into_array())
}
pub fn into_nullable(self) -> Validity {
match self {
Self::NonNullable => Self::AllValid,
_ => self,
}
}
}
impl PartialEq for Validity {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::NonNullable, Self::NonNullable) => true,
(Self::AllValid, Self::AllValid) => true,
(Self::AllInvalid, Self::AllInvalid) => true,
(Self::Array(a), Self::Array(b)) => {
let a_buffer = a
.clone()
.into_bool()
.vortex_expect("Failed to get Validity Array as BoolArray")
.boolean_buffer();
let b_buffer = b
.clone()
.into_bool()
.vortex_expect("Failed to get Validity Array as BoolArray")
.boolean_buffer();
a_buffer == b_buffer
}
_ => false,
}
}
}
impl From<BooleanBuffer> for Validity {
fn from(value: BooleanBuffer) -> Self {
if value.count_set_bits() == value.len() {
Self::AllValid
} else if value.count_set_bits() == 0 {
Self::AllInvalid
} else {
Self::Array(BoolArray::from(value).into_array())
}
}
}
impl From<NullBuffer> for Validity {
fn from(value: NullBuffer) -> Self {
value.into_inner().into()
}
}
impl TryFrom<ArrayData> for Validity {
type Error = VortexError;
fn try_from(value: ArrayData) -> Result<Self, Self::Error> {
LogicalValidity::try_from(value).map(|a| a.into_validity())
}
}
impl FromIterator<LogicalValidity> for Validity {
fn from_iter<T: IntoIterator<Item = LogicalValidity>>(iter: T) -> Self {
let validities: Vec<LogicalValidity> = iter.into_iter().collect();
if validities.iter().all(|v| v.all_valid()) {
return Self::AllValid;
}
if validities.iter().all(|v| v.all_invalid()) {
return Self::AllInvalid;
}
let mut buffer = BooleanBufferBuilder::new(validities.iter().map(|v| v.len()).sum());
for validity in validities {
match validity {
LogicalValidity::AllValid(count) => buffer.append_n(count, true),
LogicalValidity::AllInvalid(count) => buffer.append_n(count, false),
LogicalValidity::Array(array) => {
let array_buffer = array
.into_bool()
.vortex_expect("Failed to get Validity Array as BoolArray")
.boolean_buffer();
buffer.append_buffer(&array_buffer);
}
};
}
let bool_array = BoolArray::from(buffer.finish());
Self::Array(bool_array.into_array())
}
}
impl FromIterator<bool> for Validity {
fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
Validity::from(BooleanBuffer::from_iter(iter))
}
}
impl From<Nullability> for Validity {
fn from(value: Nullability) -> Self {
match value {
Nullability::NonNullable => Validity::NonNullable,
Nullability::Nullable => Validity::AllValid,
}
}
}
#[derive(Clone, Debug)]
pub enum LogicalValidity {
AllValid(usize),
AllInvalid(usize),
Array(ArrayData),
}
impl LogicalValidity {
pub fn try_new_from_array(array: ArrayData) -> VortexResult<Self> {
if !matches!(array.dtype(), &Validity::DTYPE) {
vortex_bail!("Expected a non-nullable boolean array");
}
let true_count = array.statistics().compute_true_count().ok_or_else(|| {
vortex_err!(
"Failed to compute true count from validity array {:#?}",
array
)
})?;
if true_count == array.len() {
return Ok(Self::AllValid(array.len()));
} else if true_count == 0 {
return Ok(Self::AllInvalid(array.len()));
}
Ok(Self::Array(array))
}
pub fn to_null_buffer(&self) -> VortexResult<Option<NullBuffer>> {
match self {
Self::AllValid(_) => Ok(None),
Self::AllInvalid(l) => Ok(Some(NullBuffer::new_null(*l))),
Self::Array(a) => Ok(Some(NullBuffer::new(
a.clone().into_bool()?.boolean_buffer(),
))),
}
}
pub fn all_valid(&self) -> bool {
matches!(self, Self::AllValid(_))
}
pub fn all_invalid(&self) -> bool {
matches!(self, Self::AllInvalid(_))
}
pub fn len(&self) -> usize {
match self {
Self::AllValid(n) => *n,
Self::AllInvalid(n) => *n,
Self::Array(a) => a.len(),
}
}
pub fn is_empty(&self) -> bool {
match self {
Self::AllValid(n) => *n == 0,
Self::AllInvalid(n) => *n == 0,
Self::Array(a) => a.is_empty(),
}
}
pub fn into_validity(self) -> Validity {
match self {
Self::AllValid(_) => Validity::AllValid,
Self::AllInvalid(_) => Validity::AllInvalid,
Self::Array(a) => Validity::Array(a),
}
}
pub fn null_count(&self) -> VortexResult<usize> {
match self {
Self::AllValid(_) => Ok(0),
Self::AllInvalid(len) => Ok(*len),
Self::Array(a) => {
let true_count = a.statistics().compute_true_count().ok_or_else(|| {
vortex_err!("Failed to compute true count from validity array")
})?;
Ok(a.len() - true_count)
}
}
}
}
impl TryFrom<ArrayData> for LogicalValidity {
type Error = VortexError;
fn try_from(array: ArrayData) -> VortexResult<Self> {
Self::try_new_from_array(array)
}
}
impl IntoArrayData for LogicalValidity {
fn into_array(self) -> ArrayData {
match self {
Self::AllValid(len) => ConstantArray::new(true, len).into_array(),
Self::AllInvalid(len) => ConstantArray::new(false, len).into_array(),
Self::Array(a) => a,
}
}
}
#[cfg(test)]
mod tests {
use rstest::rstest;
use crate::array::{BoolArray, PrimitiveArray};
use crate::validity::Validity;
use crate::IntoArrayData;
#[rstest]
#[case(Validity::AllValid, 5, &[2, 4], Validity::AllValid, Validity::AllValid)]
#[case(Validity::AllValid, 5, &[2, 4], Validity::AllInvalid, Validity::Array(BoolArray::from_iter([true, true, false, true, false]).into_array())
)]
#[case(Validity::AllValid, 5, &[2, 4], Validity::Array(BoolArray::from_iter([true, false]).into_array()), Validity::Array(BoolArray::from_iter([true, true, true, true, false]).into_array())
)]
#[case(Validity::AllInvalid, 5, &[2, 4], Validity::AllValid, Validity::Array(BoolArray::from_iter([false, false, true, false, true]).into_array())
)]
#[case(Validity::AllInvalid, 5, &[2, 4], Validity::AllInvalid, Validity::AllInvalid)]
#[case(Validity::AllInvalid, 5, &[2, 4], Validity::Array(BoolArray::from_iter([true, false]).into_array()), Validity::Array(BoolArray::from_iter([false, false, true, false, false]).into_array())
)]
#[case(Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()), 5, &[2, 4], Validity::AllValid, Validity::Array(BoolArray::from_iter([false, true, true, true, true]).into_array())
)]
#[case(Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()), 5, &[2, 4], Validity::AllInvalid, Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array())
)]
#[case(Validity::Array(BoolArray::from_iter([false, true, false, true, false]).into_array()), 5, &[2, 4], Validity::Array(BoolArray::from_iter([true, false]).into_array()), Validity::Array(BoolArray::from_iter([false, true, true, true, false]).into_array())
)]
fn patch_validity(
#[case] validity: Validity,
#[case] len: usize,
#[case] positions: &[u64],
#[case] patches: Validity,
#[case] expected: Validity,
) {
let indices =
PrimitiveArray::from_vec(positions.to_vec(), Validity::NonNullable).into_array();
assert_eq!(validity.patch(len, &indices, patches).unwrap(), expected);
}
#[test]
#[should_panic]
fn out_of_bounds_patch() {
Validity::NonNullable
.patch(
2,
&PrimitiveArray::from_vec(vec![4], Validity::NonNullable).into_array(),
Validity::AllInvalid,
)
.unwrap();
}
}