1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#![deny(missing_docs)]

//! A byte buffer implementation for Vortex.
//!
//! Vortex arrays hold data in a set of buffers.
//!
//! # Alignment
//! See: `<https://github.com/spiraldb/vortex/issues/115>`
//!
//! We do not currently enforce any alignment guarantees on the buffer.

use core::cmp::Ordering;
use core::ops::{Deref, Range};

use arrow_buffer::{ArrowNativeType, Buffer as ArrowBuffer, MutableBuffer as ArrowMutableBuffer};
pub use string::*;

mod flexbuffers;
mod string;

/// Buffer is an owned, cheaply cloneable byte array.
///
/// Buffers form the building blocks of all in-memory storage in Vortex.
#[derive(Debug, Clone)]
pub struct Buffer(Inner);

#[derive(Debug, Clone)]
enum Inner {
    // TODO(ngates): we could add Aligned(Arc<AVec>) from aligned-vec package
    /// A Buffer that wraps an Apache Arrow buffer
    Arrow(ArrowBuffer),

    /// A Buffer that wraps an owned [`bytes::Bytes`].
    Bytes(bytes::Bytes),
}

unsafe impl Send for Buffer {}
unsafe impl Sync for Buffer {}

impl Buffer {
    /// Create a new buffer of the provided length with all bytes set to `0u8`.
    /// If len is 0, does not perform any allocations.
    pub fn from_len_zeroed(len: usize) -> Self {
        Self::from(ArrowMutableBuffer::from_len_zeroed(len))
    }

    /// Length of the buffer in bytes
    pub fn len(&self) -> usize {
        match &self.0 {
            Inner::Arrow(b) => b.len(),
            Inner::Bytes(b) => b.len(),
        }
    }

    /// Predicate for empty buffers
    pub fn is_empty(&self) -> bool {
        match &self.0 {
            Inner::Arrow(b) => b.is_empty(),
            Inner::Bytes(b) => b.is_empty(),
        }
    }

    #[allow(clippy::same_name_method)]
    /// Return a new view on the buffer, but limited to the given index range.
    /// TODO(ngates): implement std::ops::Index
    pub fn slice(&self, range: Range<usize>) -> Self {
        match &self.0 {
            Inner::Arrow(b) => Buffer(Inner::Arrow(
                b.slice_with_length(range.start, range.end - range.start),
            )),
            Inner::Bytes(b) => {
                if range.is_empty() {
                    // bytes::Bytes::slice does not preserve alignment if the range is empty
                    let mut empty_b = b.clone();
                    empty_b.truncate(0);
                    Buffer(Inner::Bytes(empty_b))
                } else {
                    Buffer(Inner::Bytes(b.slice(range)))
                }
            }
        }
    }

    #[allow(clippy::same_name_method)]
    /// Access the buffer as an immutable byte slice.
    pub fn as_slice(&self) -> &[u8] {
        match &self.0 {
            Inner::Arrow(b) => b.as_ref(),
            Inner::Bytes(b) => b.as_ref(),
        }
    }

    /// Convert the buffer into a `Vec` of the given native type `T`.
    ///
    /// # Ownership
    /// The caller takes ownership of the underlying memory.
    ///
    /// # Errors
    /// This method will fail if the underlying buffer is an owned [`bytes::Bytes`].
    ///
    /// This method will also fail if we attempt to pass a `T` that is not aligned to the `T` that
    /// it was originally allocated with.
    pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
        match self.0 {
            Inner::Arrow(buffer) => buffer.into_vec::<T>().map_err(|b| Buffer(Inner::Arrow(b))),
            // Cannot convert bytes into a mutable vec
            Inner::Bytes(_) => Err(self),
        }
    }

    /// Convert a Buffer into an ArrowBuffer with no copying.
    pub fn into_arrow(self) -> ArrowBuffer {
        match self.0 {
            Inner::Arrow(a) => a,
            // This is cheeky. But it uses From<bytes::Bytes> for arrow_buffer::Bytes, even though
            // arrow_buffer::Bytes is only pub(crate). Seems weird...
            // See: https://github.com/apache/arrow-rs/issues/6033
            Inner::Bytes(b) => ArrowBuffer::from_bytes(b.into()),
        }
    }
}

impl PartialEq for Buffer {
    fn eq(&self, other: &Self) -> bool {
        self.as_slice().eq(other.as_slice())
    }
}

impl Eq for Buffer {}

impl PartialOrd for Buffer {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        self.as_slice().partial_cmp(other.as_slice())
    }
}

impl Deref for Buffer {
    type Target = [u8];

    fn deref(&self) -> &Self::Target {
        self.as_slice()
    }
}

impl AsRef<[u8]> for Buffer {
    fn as_ref(&self) -> &[u8] {
        self.as_slice()
    }
}

impl From<&'static [u8]> for Buffer {
    fn from(value: &'static [u8]) -> Self {
        Buffer(Inner::Bytes(bytes::Bytes::from_static(value)))
    }
}

impl From<&'static str> for Buffer {
    fn from(slice: &'static str) -> Buffer {
        Buffer(Inner::Bytes(bytes::Bytes::from_static(slice.as_bytes())))
    }
}

impl<T: ArrowNativeType> From<Vec<T>> for Buffer {
    fn from(value: Vec<T>) -> Self {
        // We prefer Arrow since it retains mutability
        Buffer(Inner::Arrow(ArrowBuffer::from_vec(value)))
    }
}

impl From<bytes::Bytes> for Buffer {
    fn from(value: bytes::Bytes) -> Self {
        Buffer(Inner::Bytes(value))
    }
}

impl From<ArrowBuffer> for Buffer {
    fn from(value: ArrowBuffer) -> Self {
        Buffer(Inner::Arrow(value))
    }
}

impl From<ArrowMutableBuffer> for Buffer {
    fn from(value: ArrowMutableBuffer) -> Self {
        Buffer(Inner::Arrow(ArrowBuffer::from(value)))
    }
}

impl FromIterator<u8> for Buffer {
    fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> Self {
        Buffer(Inner::Arrow(ArrowBuffer::from_iter(iter)))
    }
}