1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
#![deny(missing_docs)]
//! A byte buffer implementation for Vortex.
//!
//! Vortex arrays hold data in a set of buffers.
//!
//! # Alignment
//! See: `<https://github.com/spiraldb/vortex/issues/115>`
//!
//! We do not currently enforce any alignment guarantees on the buffer.
use core::cmp::Ordering;
use core::ops::{Deref, Range};
use arrow_buffer::{ArrowNativeType, Buffer as ArrowBuffer, MutableBuffer as ArrowMutableBuffer};
pub use string::*;
mod flexbuffers;
mod string;
/// Buffer is an owned, cheaply cloneable byte array.
///
/// Buffers form the building blocks of all in-memory storage in Vortex.
#[derive(Debug, Clone)]
pub struct Buffer(Inner);
#[derive(Debug, Clone)]
enum Inner {
// TODO(ngates): we could add Aligned(Arc<AVec>) from aligned-vec package
/// A Buffer that wraps an Apache Arrow buffer
Arrow(ArrowBuffer),
/// A Buffer that wraps an owned [`bytes::Bytes`].
Bytes(bytes::Bytes),
}
unsafe impl Send for Buffer {}
unsafe impl Sync for Buffer {}
impl Buffer {
/// Create a new buffer of the provided length with all bytes set to `0u8`.
/// If len is 0, does not perform any allocations.
pub fn from_len_zeroed(len: usize) -> Self {
Self::from(ArrowMutableBuffer::from_len_zeroed(len))
}
/// Length of the buffer in bytes
pub fn len(&self) -> usize {
match &self.0 {
Inner::Arrow(b) => b.len(),
Inner::Bytes(b) => b.len(),
}
}
/// Predicate for empty buffers
pub fn is_empty(&self) -> bool {
match &self.0 {
Inner::Arrow(b) => b.is_empty(),
Inner::Bytes(b) => b.is_empty(),
}
}
#[allow(clippy::same_name_method)]
/// Return a new view on the buffer, but limited to the given index range.
/// TODO(ngates): implement std::ops::Index
pub fn slice(&self, range: Range<usize>) -> Self {
match &self.0 {
Inner::Arrow(b) => Buffer(Inner::Arrow(
b.slice_with_length(range.start, range.end - range.start),
)),
Inner::Bytes(b) => {
if range.is_empty() {
// bytes::Bytes::slice does not preserve alignment if the range is empty
let mut empty_b = b.clone();
empty_b.truncate(0);
Buffer(Inner::Bytes(empty_b))
} else {
Buffer(Inner::Bytes(b.slice(range)))
}
}
}
}
#[allow(clippy::same_name_method)]
/// Access the buffer as an immutable byte slice.
pub fn as_slice(&self) -> &[u8] {
match &self.0 {
Inner::Arrow(b) => b.as_ref(),
Inner::Bytes(b) => b.as_ref(),
}
}
/// Convert the buffer into a `Vec` of the given native type `T`.
///
/// # Ownership
/// The caller takes ownership of the underlying memory.
///
/// # Errors
/// This method will fail if the underlying buffer is an owned [`bytes::Bytes`].
///
/// This method will also fail if we attempt to pass a `T` that is not aligned to the `T` that
/// it was originally allocated with.
pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
match self.0 {
Inner::Arrow(buffer) => buffer.into_vec::<T>().map_err(|b| Buffer(Inner::Arrow(b))),
// Cannot convert bytes into a mutable vec
Inner::Bytes(_) => Err(self),
}
}
/// Convert a Buffer into an ArrowBuffer with no copying.
pub fn into_arrow(self) -> ArrowBuffer {
match self.0 {
Inner::Arrow(a) => a,
// This is cheeky. But it uses From<bytes::Bytes> for arrow_buffer::Bytes, even though
// arrow_buffer::Bytes is only pub(crate). Seems weird...
// See: https://github.com/apache/arrow-rs/issues/6033
Inner::Bytes(b) => ArrowBuffer::from_bytes(b.into()),
}
}
}
impl PartialEq for Buffer {
fn eq(&self, other: &Self) -> bool {
self.as_slice().eq(other.as_slice())
}
}
impl Eq for Buffer {}
impl PartialOrd for Buffer {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_slice().partial_cmp(other.as_slice())
}
}
impl Deref for Buffer {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_slice()
}
}
impl AsRef<[u8]> for Buffer {
fn as_ref(&self) -> &[u8] {
self.as_slice()
}
}
impl From<&'static [u8]> for Buffer {
fn from(value: &'static [u8]) -> Self {
Buffer(Inner::Bytes(bytes::Bytes::from_static(value)))
}
}
impl From<&'static str> for Buffer {
fn from(slice: &'static str) -> Buffer {
Buffer(Inner::Bytes(bytes::Bytes::from_static(slice.as_bytes())))
}
}
impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
fn from(value: Vec<T>) -> Self {
// We prefer Arrow since it retains mutability
Buffer(Inner::Arrow(ArrowBuffer::from_vec(value)))
}
}
impl From<bytes::Bytes> for Buffer {
fn from(value: bytes::Bytes) -> Self {
Buffer(Inner::Bytes(value))
}
}
impl From<ArrowBuffer> for Buffer {
fn from(value: ArrowBuffer) -> Self {
Buffer(Inner::Arrow(value))
}
}
impl From<ArrowMutableBuffer> for Buffer {
fn from(value: ArrowMutableBuffer) -> Self {
Buffer(Inner::Arrow(ArrowBuffer::from(value)))
}
}
impl FromIterator<u8> for Buffer {
fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
Buffer(Inner::Arrow(ArrowBuffer::from_iter(iter)))
}
}