Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 156 additions & 14 deletions vortex-array/public-api.lock

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions vortex-array/src/arrays/extension/compute/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,23 +75,42 @@ mod tests {
use crate::extension::EmptyMetadata;
use crate::optimizer::ArrayOptimizer;
use crate::scalar::Scalar;
use crate::scalar::ScalarValue;

#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
struct TestExt;
impl ExtVTable for TestExt {
type Metadata = EmptyMetadata;
type NativeValue<'a> = &'a str;

fn id(&self) -> ExtId {
ExtId::new_ref("test_ext")
}

fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
Ok(vec![])
}

fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult<Self::Metadata> {
Ok(EmptyMetadata)
}

fn validate_dtype(
&self,
_options: &Self::Metadata,
_storage_dtype: &DType,
) -> VortexResult<()> {
Ok(())
}

fn unpack_native<'a>(
&self,
_metadata: &'a Self::Metadata,
_storage_dtype: &'a DType,
_storage_value: &'a ScalarValue,
) -> VortexResult<Self::NativeValue<'a>> {
Ok("")
}
}

fn test_ext_dtype() -> ExtDTypeRef {
Expand Down Expand Up @@ -164,18 +183,36 @@ mod tests {
struct TestExt2;
impl ExtVTable for TestExt2 {
type Metadata = EmptyMetadata;
type NativeValue<'a> = &'a str;

fn id(&self) -> ExtId {
ExtId::new_ref("test_ext_2")
}

fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
Ok(vec![])
}

fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult<Self::Metadata> {
Ok(EmptyMetadata)
}

fn validate_dtype(
&self,
_options: &Self::Metadata,
_storage_dtype: &DType,
) -> VortexResult<()> {
Ok(())
}

fn unpack_native<'a>(
&self,
_metadata: &'a Self::Metadata,
_storage_dtype: &'a DType,
_storage_value: &'a ScalarValue,
) -> VortexResult<Self::NativeValue<'a>> {
Ok("")
}
}

let ext_dtype1 = ExtDType::<TestExt>::try_new(
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/dtype/extension/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Extension DTypes, and interfaces for working with extension types (dtypes, scalars, and arrays).
//! Extension DTypes, and interfaces for working with extension types.
//!
//! ## File layout convention
//!
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/dtype/extension/plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl<V: ExtVTable> ExtDTypePlugin for V {
}

fn deserialize(&self, data: &[u8], storage_dtype: DType) -> VortexResult<ExtDTypeRef> {
let metadata = ExtVTable::deserialize(self, data)?;
let metadata = ExtVTable::deserialize_metadata(self, data)?;
Ok(ExtDType::try_with_vtable(self.clone(), metadata, storage_dtype)?.erased())
}
}
2 changes: 1 addition & 1 deletion vortex-array/src/dtype/extension/typed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ impl<V: ExtVTable> DynExtDType for ExtDTypeInner<V> {
}

fn metadata_serialize(&self) -> VortexResult<Vec<u8>> {
V::serialize(&self.vtable, &self.metadata)
V::serialize_metadata(&self.vtable, &self.metadata)
}

fn with_nullability(&self, nullability: Nullability) -> ExtDTypeRef {
Expand Down
62 changes: 46 additions & 16 deletions vortex-array/src/dtype/extension/vtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,70 @@ use std::fmt::Display;
use std::hash::Hash;

use vortex_error::VortexResult;
use vortex_error::vortex_bail;

use crate::dtype::DType;
use crate::dtype::extension::ExtId;
use crate::scalar::ScalarValue;

/// The public API for defining new extension types.
///
/// This is the non-object-safe trait that plugin authors implement to define a new extension
/// type. It specifies the type's identity, metadata, serialization, and validation.
pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash {
/// Associated type containing the deserialized metadata for this extension type
/// Associated type containing the deserialized metadata for this extension type.
type Metadata: 'static + Send + Sync + Clone + Debug + Display + Eq + Hash;

/// A native Rust value that represents a scalar of the extension type.
///
/// The value only represents non-null values. We denote nullable values as `Option<Value>`.
type NativeValue<'a>: Display;

/// Returns the ID for this extension type.
fn id(&self) -> ExtId;

// Methods related to the extension `DType`.

/// Serialize the metadata into a byte vector.
fn serialize(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
_ = metadata;
vortex_bail!(
"Serialization not implemented for extension type {}",
self.id()
);
}
fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>>;

/// Deserialize the metadata from a byte slice.
fn deserialize(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
_ = metadata;
vortex_bail!(
"Deserialization not implemented for extension type {}",
self.id()
);
}
fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult<Self::Metadata>;

/// Validate that the given storage type is compatible with this extension type.
fn validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &DType) -> VortexResult<()>;

// Methods related to the extension scalar values.

/// Validate the given storage value is compatible with the extension type.
///
/// By default, this calls [`unpack_native()`](ExtVTable::unpack_native) and discards the result.
///
/// # Errors
///
/// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type.
fn validate_scalar_value(
&self,
metadata: &Self::Metadata,
storage_dtype: &DType,
storage_value: &ScalarValue,
) -> VortexResult<()> {
self.unpack_native(metadata, storage_dtype, storage_value)
.map(|_| ())
}

/// Validate and unpack a native value from the storage [`ScalarValue`].
///
/// Note that [`ExtVTable::validate_dtype()`] is always called first to validate the storage
/// [`DType`], and the [`Scalar`](crate::scalar::Scalar) implementation will verify that the
/// storage value is compatible with the storage dtype on construction.
///
/// # Errors
///
/// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type.
fn unpack_native<'a>(
&self,
metadata: &'a Self::Metadata,
storage_dtype: &'a DType,
storage_value: &'a ScalarValue,
) -> VortexResult<Self::NativeValue<'a>>;
}
62 changes: 52 additions & 10 deletions vortex-array/src/extension/datetime/date.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::fmt;

use jiff::Span;
use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;

Expand All @@ -13,11 +17,25 @@ use crate::dtype::extension::ExtDType;
use crate::dtype::extension::ExtId;
use crate::dtype::extension::ExtVTable;
use crate::extension::datetime::TimeUnit;
use crate::scalar::ScalarValue;

/// The Unix epoch date (1970-01-01).
const EPOCH: jiff::civil::Date = jiff::civil::Date::constant(1970, 1, 1);

/// Date DType.
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub struct Date;

fn date_ptype(time_unit: &TimeUnit) -> Option<PType> {
match time_unit {
TimeUnit::Nanoseconds => None,
TimeUnit::Microseconds => None,
TimeUnit::Milliseconds => Some(PType::I64),
TimeUnit::Seconds => None,
TimeUnit::Days => Some(PType::I32),
}
}

impl Date {
/// Creates a new Date extension dtype with the given time unit and nullability.
///
Expand All @@ -38,18 +56,37 @@ impl Date {
}
}

/// Unpacked value of a [`Date`] extension scalar.
pub enum DateValue {
/// Days since the Unix epoch.
Days(i32),
/// Milliseconds since the Unix epoch.
Milliseconds(i64),
}

impl fmt::Display for DateValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let date = match self {
DateValue::Days(days) => EPOCH + Span::new().days(*days),
DateValue::Milliseconds(ms) => EPOCH + Span::new().milliseconds(*ms),
};
write!(f, "{}", date)
}
}

impl ExtVTable for Date {
type Metadata = TimeUnit;
type NativeValue<'a> = DateValue;

fn id(&self) -> ExtId {
ExtId::new_ref("vortex.date")
}

fn serialize(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
Ok(vec![u8::from(*metadata)])
}

fn deserialize(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
let tag = metadata[0];
TimeUnit::try_from(tag)
}
Expand All @@ -67,14 +104,19 @@ impl ExtVTable for Date {

Ok(())
}
}

fn date_ptype(time_unit: &TimeUnit) -> Option<PType> {
match time_unit {
TimeUnit::Nanoseconds => None,
TimeUnit::Microseconds => None,
TimeUnit::Milliseconds => Some(PType::I64),
TimeUnit::Seconds => None,
TimeUnit::Days => Some(PType::I32),
fn unpack_native(
&self,
metadata: &Self::Metadata,
_storage_dtype: &DType,
storage_value: &ScalarValue,
) -> VortexResult<Self::NativeValue<'_>> {
match metadata {
TimeUnit::Milliseconds => Ok(DateValue::Milliseconds(
storage_value.as_primitive().cast::<i64>()?,
)),
TimeUnit::Days => Ok(DateValue::Days(storage_value.as_primitive().cast::<i32>()?)),
_ => vortex_bail!("Date type does not support time unit {}", metadata),
}
}
}
Loading
Loading