From 30bae626ad28c3b2406b1846f7f7a73f5eaaf3b8 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 10:33:59 -0500 Subject: [PATCH 01/10] first commit Signed-off-by: Connor Tsui --- vortex-array/src/dtype/extension/mod.rs | 2 +- vortex-array/src/scalar/extension/erased.rs | 2 + vortex-array/src/scalar/extension/matcher.rs | 2 + vortex-array/src/scalar/extension/mod.rs | 50 ++++++++++++++++++++ vortex-array/src/scalar/extension/plugin.rs | 2 + vortex-array/src/scalar/extension/typed.rs | 2 + vortex-array/src/scalar/mod.rs | 2 + 7 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 vortex-array/src/scalar/extension/erased.rs create mode 100644 vortex-array/src/scalar/extension/matcher.rs create mode 100644 vortex-array/src/scalar/extension/mod.rs create mode 100644 vortex-array/src/scalar/extension/plugin.rs create mode 100644 vortex-array/src/scalar/extension/typed.rs diff --git a/vortex-array/src/dtype/extension/mod.rs b/vortex-array/src/dtype/extension/mod.rs index ed5d5b76b0b..65eb0ea13a6 100644 --- a/vortex-array/src/dtype/extension/mod.rs +++ b/vortex-array/src/dtype/extension/mod.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Extension DTypes, and interfaces for working with extension types (dtypes, scalars, and arrays). +//! Extension DTypes, and interfaces for working with extension types. //! //! ## File layout convention //! diff --git a/vortex-array/src/scalar/extension/erased.rs b/vortex-array/src/scalar/extension/erased.rs new file mode 100644 index 00000000000..0d735177e5d --- /dev/null +++ b/vortex-array/src/scalar/extension/erased.rs @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors diff --git a/vortex-array/src/scalar/extension/matcher.rs b/vortex-array/src/scalar/extension/matcher.rs new file mode 100644 index 00000000000..0d735177e5d --- /dev/null +++ b/vortex-array/src/scalar/extension/matcher.rs @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors diff --git a/vortex-array/src/scalar/extension/mod.rs b/vortex-array/src/scalar/extension/mod.rs new file mode 100644 index 00000000000..100660e870a --- /dev/null +++ b/vortex-array/src/scalar/extension/mod.rs @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Extension Scalar Values, and interfaces for working with them. +//! +//! We define normal [`Scalar`]s as the combination of a [`ScalarValue`] and a [`DType`]. +//! +//! Similarly, we define an extension [`Scalar`] as the combination of an [`ExtScalarValueRef`] and +//! an [`ExtDTypeRef`]. +//! +//! [`Scalar`]: crate::scalar::Scalar +//! [`ScalarValue`]: crate::scalar::ScalarValue +//! [`DType`]: crate::dtype::DType +//! [`ExtDTypeRef`]: crate::dtype::extension::ExtDTypeRef +//! +//! ## File layout convention +//! +//! Note that there is a single unified vtable for working with extension types at +//! [`vortex_array::dtype::extension::vtable`]. +//! +//! Every other vtable-backed concept `FooScalarValue` follows this module structure: +//! +//! - `plugin.rs`: TODO +//! - `typed.rs`: TODO +//! - `erased.rs`: TODO +//! - `matcher.rs`: TODO + +mod plugin; +// pub use plugin::ExtDTypePlugin; + +mod typed; +// pub use typed::ExtDType; + +mod erased; +// pub use erased::ExtDTypeRef; + +mod matcher; +// pub use matcher::Matcher; + +// /// Private module to seal [`typed::DynExtDType`]. +// mod sealed { +// use crate::dtype::extension::ExtVTable; +// use crate::dtype::extension::typed::ExtDTypeInner; + +// /// Marker trait to prevent external implementations of [`super::typed::DynExtDType`]. +// pub(crate) trait Sealed {} + +// /// This can be the **only** implementor for [`super::typed::DynExtDType`]. +// impl Sealed for ExtDTypeInner {} +// } diff --git a/vortex-array/src/scalar/extension/plugin.rs b/vortex-array/src/scalar/extension/plugin.rs new file mode 100644 index 00000000000..0d735177e5d --- /dev/null +++ b/vortex-array/src/scalar/extension/plugin.rs @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors diff --git a/vortex-array/src/scalar/extension/typed.rs b/vortex-array/src/scalar/extension/typed.rs new file mode 100644 index 00000000000..0d735177e5d --- /dev/null +++ b/vortex-array/src/scalar/extension/typed.rs @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors diff --git a/vortex-array/src/scalar/mod.rs b/vortex-array/src/scalar/mod.rs index a441d72e532..767f5723df3 100644 --- a/vortex-array/src/scalar/mod.rs +++ b/vortex-array/src/scalar/mod.rs @@ -44,6 +44,8 @@ mod typed_view; pub use scalar_value::*; pub use typed_view::*; +pub mod extension; + #[cfg(test)] mod tests; mod truncation; From 931d9f121638fb2b7aaf72e0b5230cd840ec5cd6 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 13:58:27 -0500 Subject: [PATCH 02/10] add extension scalar value infra Signed-off-by: Connor Tsui --- vortex-array/public-api.lock | 162 ++++++++++++++++ vortex-array/src/dtype/extension/vtable.rs | 60 ++++-- vortex-array/src/extension/datetime/date.rs | 61 ++++++ vortex-array/src/extension/datetime/time.rs | 88 +++++++++ .../src/extension/datetime/timestamp.rs | 96 ++++++++- vortex-array/src/scalar/extension/erased.rs | 168 ++++++++++++++++ vortex-array/src/scalar/extension/matcher.rs | 2 - vortex-array/src/scalar/extension/mod.rs | 34 ++-- vortex-array/src/scalar/extension/plugin.rs | 2 - vortex-array/src/scalar/extension/typed.rs | 183 ++++++++++++++++++ 10 files changed, 813 insertions(+), 43 deletions(-) delete mode 100644 vortex-array/src/scalar/extension/matcher.rs delete mode 100644 vortex-array/src/scalar/extension/plugin.rs diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 2abdf599602..02d33f1de5e 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -6366,50 +6366,74 @@ pub trait vortex_array::dtype::extension::ExtVTable: 'static + core::marker::Siz pub type vortex_array::dtype::extension::ExtVTable::Metadata: 'static + core::marker::Send + core::marker::Sync + core::clone::Clone + core::fmt::Debug + core::fmt::Display + core::cmp::Eq + core::hash::Hash +pub type vortex_array::dtype::extension::ExtVTable::Value<'a>: core::fmt::Display + pub fn vortex_array::dtype::extension::ExtVTable::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::dtype::extension::ExtVTable::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::dtype::extension::ExtVTable::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::dtype::extension::ExtVTable::unpack<'a>(&self, metadata: &'a Self::Metadata, storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::dtype::extension::ExtVTable::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::dtype::extension::ExtVTable::validate_scalar_value(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::datetime::Date pub type vortex_array::extension::datetime::Date::Metadata = vortex_array::extension::datetime::TimeUnit +pub type vortex_array::extension::datetime::Date::Value<'a> = vortex_array::extension::datetime::DateValue + pub fn vortex_array::extension::datetime::Date::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Date::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::extension::datetime::Date::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Date::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, _metadata: &::Metadata, _storage_dtype: &vortex_array::dtype::DType, _storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::datetime::Time pub type vortex_array::extension::datetime::Time::Metadata = vortex_array::extension::datetime::TimeUnit +pub type vortex_array::extension::datetime::Time::Value<'a> = vortex_array::extension::datetime::TimeValue + pub fn vortex_array::extension::datetime::Time::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Time::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::extension::datetime::Time::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Time::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::datetime::Timestamp pub type vortex_array::extension::datetime::Timestamp::Metadata = vortex_array::extension::datetime::TimestampOptions +pub type vortex_array::extension::datetime::Timestamp::Value<'a> = vortex_array::extension::datetime::TimestampValue<'a> + pub fn vortex_array::extension::datetime::Timestamp::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Timestamp::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::extension::datetime::Timestamp::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Timestamp::unpack<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + pub trait vortex_array::dtype::extension::Matcher pub type vortex_array::dtype::extension::Matcher::Match<'a> @@ -12192,6 +12216,16 @@ pub mod vortex_array::extension pub mod vortex_array::extension::datetime +pub enum vortex_array::extension::datetime::DateValue + +pub vortex_array::extension::datetime::DateValue::Days(i32) + +pub vortex_array::extension::datetime::DateValue::Milliseconds(i64) + +impl core::fmt::Display for vortex_array::extension::datetime::DateValue + +pub fn vortex_array::extension::datetime::DateValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub enum vortex_array::extension::datetime::TemporalJiff pub vortex_array::extension::datetime::TemporalJiff::Date(jiff::civil::date::Date) @@ -12306,6 +12340,34 @@ impl core::marker::Copy for vortex_array::extension::datetime::TimeUnit impl core::marker::StructuralPartialEq for vortex_array::extension::datetime::TimeUnit +pub enum vortex_array::extension::datetime::TimeValue + +pub vortex_array::extension::datetime::TimeValue::Microseconds(i64) + +pub vortex_array::extension::datetime::TimeValue::Milliseconds(i32) + +pub vortex_array::extension::datetime::TimeValue::Nanoseconds(i64) + +pub vortex_array::extension::datetime::TimeValue::Seconds(i32) + +impl core::fmt::Display for vortex_array::extension::datetime::TimeValue + +pub fn vortex_array::extension::datetime::TimeValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub enum vortex_array::extension::datetime::TimestampValue<'a> + +pub vortex_array::extension::datetime::TimestampValue::Microseconds(i64, core::option::Option<&'a alloc::sync::Arc>) + +pub vortex_array::extension::datetime::TimestampValue::Milliseconds(i64, core::option::Option<&'a alloc::sync::Arc>) + +pub vortex_array::extension::datetime::TimestampValue::Nanoseconds(i64, core::option::Option<&'a alloc::sync::Arc>) + +pub vortex_array::extension::datetime::TimestampValue::Seconds(i64, core::option::Option<&'a alloc::sync::Arc>) + +impl core::fmt::Display for vortex_array::extension::datetime::TimestampValue<'_> + +pub fn vortex_array::extension::datetime::TimestampValue<'_>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + pub struct vortex_array::extension::datetime::AnyTemporal impl vortex_array::dtype::extension::Matcher for vortex_array::extension::datetime::AnyTemporal @@ -12350,14 +12412,20 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Date::Metadata = vortex_array::extension::datetime::TimeUnit +pub type vortex_array::extension::datetime::Date::Value<'a> = vortex_array::extension::datetime::DateValue + pub fn vortex_array::extension::datetime::Date::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Date::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::extension::datetime::Date::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Date::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, _metadata: &::Metadata, _storage_dtype: &vortex_array::dtype::DType, _storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + pub struct vortex_array::extension::datetime::Time impl vortex_array::extension::datetime::Time @@ -12394,14 +12462,20 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Time::Metadata = vortex_array::extension::datetime::TimeUnit +pub type vortex_array::extension::datetime::Time::Value<'a> = vortex_array::extension::datetime::TimeValue + pub fn vortex_array::extension::datetime::Time::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Time::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::extension::datetime::Time::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Time::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + pub struct vortex_array::extension::datetime::Timestamp impl vortex_array::extension::datetime::Timestamp @@ -12440,14 +12514,20 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Timestamp::Metadata = vortex_array::extension::datetime::TimestampOptions +pub type vortex_array::extension::datetime::Timestamp::Value<'a> = vortex_array::extension::datetime::TimestampValue<'a> + pub fn vortex_array::extension::datetime::Timestamp::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Timestamp::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::extension::datetime::Timestamp::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Timestamp::unpack<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> Self::Value + pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> +pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> + pub struct vortex_array::extension::datetime::TimestampOptions pub vortex_array::extension::datetime::TimestampOptions::tz: core::option::Option> @@ -13036,6 +13116,88 @@ pub const vortex_array::patches::PATCH_CHUNK_SIZE: usize pub mod vortex_array::scalar +pub mod vortex_array::scalar::extension + +pub struct vortex_array::scalar::extension::ExtScalarValue(_) + +impl vortex_array::scalar::extension::ExtScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValue::erased(self) -> vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValue::id(&self) -> vortex_array::dtype::extension::ExtId + +pub fn vortex_array::scalar::extension::ExtScalarValue::storage_value(&self) -> &vortex_array::scalar::ScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValue::try_new(ext_dtype: vortex_array::dtype::extension::ExtDType, storage: vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult + +pub fn vortex_array::scalar::extension::ExtScalarValue::vtable(&self) -> &V + +impl core::clone::Clone for vortex_array::scalar::extension::ExtScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValue::clone(&self) -> vortex_array::scalar::extension::ExtScalarValue + +impl core::cmp::Eq for vortex_array::scalar::extension::ExtScalarValue + +impl core::cmp::PartialEq for vortex_array::scalar::extension::ExtScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValue::eq(&self, other: &vortex_array::scalar::extension::ExtScalarValue) -> bool + +impl core::fmt::Debug for vortex_array::scalar::extension::ExtScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar::extension::ExtScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValue::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar::extension::ExtScalarValue + +pub struct vortex_array::scalar::extension::ExtScalarValueRef(_) + +impl vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::downcast(self) -> vortex_array::scalar::extension::ExtScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::fmt_ext_scalar(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::get_vtable(&self) -> &V + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::id(&self) -> vortex_array::dtype::extension::ExtId + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::storage_value(&self) -> &vortex_array::scalar::ScalarValue + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::try_downcast(self) -> core::result::Result, vortex_array::scalar::extension::ExtScalarValueRef> + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::try_get_vtable(&self) -> core::option::Option<&V> + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::validate(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<()> + +impl core::clone::Clone for vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::clone(&self) -> vortex_array::scalar::extension::ExtScalarValueRef + +impl core::cmp::Eq for vortex_array::scalar::extension::ExtScalarValueRef + +impl core::cmp::PartialEq for vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::eq(&self, other: &Self) -> bool + +impl core::cmp::PartialOrd for vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::partial_cmp(&self, other: &Self) -> core::option::Option + +impl core::fmt::Debug for vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar::extension::ExtScalarValueRef + +pub fn vortex_array::scalar::extension::ExtScalarValueRef::hash(&self, state: &mut H) + pub enum vortex_array::scalar::DecimalValue pub vortex_array::scalar::DecimalValue::I128(i128) diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index e37a7be400b..984983f8aa5 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -6,40 +6,68 @@ use std::fmt::Display; use std::hash::Hash; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use crate::dtype::DType; use crate::dtype::extension::ExtId; +use crate::scalar::ScalarValue; /// The public API for defining new extension types. /// /// This is the non-object-safe trait that plugin authors implement to define a new extension /// type. It specifies the type's identity, metadata, serialization, and validation. pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { - /// Associated type containing the deserialized metadata for this extension type + /// Associated type containing the deserialized metadata for this extension type. type Metadata: 'static + Send + Sync + Clone + Debug + Display + Eq + Hash; + /// A native Rust value that represents a scalar of the extension type. + /// + /// The value only represents non-null values. We denote nullable values as `Option`. + type Value<'a>: Display; + /// Returns the ID for this extension type. fn id(&self) -> ExtId; + // Methods related to the extension `DType`. + + // TODO(connor): Should probably be called `serialize_metadata`. /// Serialize the metadata into a byte vector. - fn serialize(&self, metadata: &Self::Metadata) -> VortexResult> { - _ = metadata; - vortex_bail!( - "Serialization not implemented for extension type {}", - self.id() - ); - } + fn serialize(&self, metadata: &Self::Metadata) -> VortexResult>; + // TODO(connor): Should probably be called `deserialize_metadata`. /// Deserialize the metadata from a byte slice. - fn deserialize(&self, metadata: &[u8]) -> VortexResult { - _ = metadata; - vortex_bail!( - "Deserialization not implemented for extension type {}", - self.id() - ); - } + fn deserialize(&self, metadata: &[u8]) -> VortexResult; /// Validate that the given storage type is compatible with this extension type. fn validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &DType) -> VortexResult<()>; + + // Methods related to the extension scalar values. + + /// Validate the given storage value is compatible with the extension type. + /// + /// Note that [`ExtVTable::validate_dtype`] is always called first to validate the storage + /// [`DType`], and the [`Scalar`](crate::scalar::Scalar) implementation will verify that the + /// storage value is compatible with the storage dtype on construction. + /// + /// TODO more docs because it is not super obvious how to implement this at first. + /// + /// # Errors + /// + /// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type. + fn validate_scalar_value( + &self, + metadata: &Self::Metadata, + storage_dtype: &DType, + storage_value: &ScalarValue, + ) -> VortexResult<()>; + + /// Unpack a native value from the storage ScalarValue. + /// + /// This call is infallible assuming the [`ExtVTable::validate_scalar_value`] function has been + /// called previously. + fn unpack<'a>( + &self, + metadata: &'a Self::Metadata, + storage_dtype: &'a DType, + storage_value: &'a ScalarValue, + ) -> Self::Value<'a>; } diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index ed29a0ae330..5f0a4a8454e 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::fmt; + +use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; @@ -13,6 +16,10 @@ use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtId; use crate::dtype::extension::ExtVTable; use crate::extension::datetime::TimeUnit; +use crate::scalar::ScalarValue; + +/// The Unix epoch date (1970-01-01). +const EPOCH: jiff::civil::Date = jiff::civil::Date::constant(1970, 1, 1); /// Date DType. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] @@ -38,8 +45,27 @@ impl Date { } } +/// Unpacked value of a [`Date`] extension scalar. +pub enum DateValue { + /// Days since the Unix epoch. + Days(i32), + /// Milliseconds since the Unix epoch. + Milliseconds(i64), +} + +impl fmt::Display for DateValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let date = match self { + DateValue::Days(days) => EPOCH + Span::new().days(*days), + DateValue::Milliseconds(ms) => EPOCH + Span::new().milliseconds(*ms), + }; + write!(f, "{}", date) + } +} + impl ExtVTable for Date { type Metadata = TimeUnit; + type Value<'a> = DateValue; fn id(&self) -> ExtId { ExtId::new_ref("vortex.date") @@ -67,6 +93,41 @@ impl ExtVTable for Date { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + // We know that the dtype is correct for this extension type (primitive) by the + // precondition that `validate_dtype` has already been called successfully, and we know that + // the `Scalar` we came from has verified that the storage value is a primitive. + // We also say that any i32 or i64 is a valid date value, so we do not need to verify the + // values at all. + Ok(()) + } + + fn unpack( + &self, + metadata: &Self::Metadata, + _storage_dtype: &DType, + storage_value: &ScalarValue, + ) -> Self::Value<'_> { + match metadata { + TimeUnit::Milliseconds => { + DateValue::Milliseconds(storage_value.as_primitive().cast::().vortex_expect( + "The Scalar validation already checked that the value must be an i64", + )) + } + TimeUnit::Days => { + DateValue::Days(storage_value.as_primitive().cast::().vortex_expect( + "The Scalar validation already checked that the value must be an i32", + )) + } + _ => unreachable!(), + } + } } fn date_ptype(time_unit: &TimeUnit) -> Option { diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index e1e630f237b..0353a8e4e19 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::fmt; + +use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; @@ -13,6 +16,7 @@ use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtId; use crate::dtype::extension::ExtVTable; use crate::extension::datetime::TimeUnit; +use crate::scalar::ScalarValue; /// Time DType. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] @@ -34,9 +38,38 @@ impl Time { } } +/// Unpacked value of a [`Time`] extension scalar. +pub enum TimeValue { + /// Seconds since midnight. + Seconds(i32), + /// Milliseconds since midnight. + Milliseconds(i32), + /// Microseconds since midnight. + Microseconds(i64), + /// Nanoseconds since midnight. + Nanoseconds(i64), +} + +impl fmt::Display for TimeValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let min = jiff::civil::Time::MIN; + + let time = match self { + TimeValue::Seconds(s) => min + Span::new().seconds(*s), + TimeValue::Milliseconds(ms) => min + Span::new().milliseconds(*ms), + TimeValue::Microseconds(us) => min + Span::new().microseconds(*us), + TimeValue::Nanoseconds(ns) => min + Span::new().nanoseconds(*ns), + }; + + write!(f, "{}", time) + } +} + impl ExtVTable for Time { type Metadata = TimeUnit; + type Value<'a> = TimeValue; + fn id(&self) -> ExtId { ExtId::new_ref("vortex.time") } @@ -63,6 +96,61 @@ impl ExtVTable for Time { Ok(()) } + + fn validate_scalar_value( + &self, + metadata: &Self::Metadata, + _storage_dtype: &DType, + storage_value: &ScalarValue, + ) -> VortexResult<()> { + let length_of_time = storage_value.as_primitive().cast::()?; + + // Validate the storage value is within the valid range for Time. + let span = match *metadata { + TimeUnit::Nanoseconds => Span::new().nanoseconds(length_of_time), + TimeUnit::Microseconds => Span::new().microseconds(length_of_time), + TimeUnit::Milliseconds => Span::new().milliseconds(length_of_time), + TimeUnit::Seconds => Span::new().seconds(length_of_time), + TimeUnit::Days => Span::new().days(length_of_time), + }; + + jiff::civil::Time::MIN + .checked_add(span) + .map_err(|e| vortex_err!("Invalid time scalar: {}", e))?; + + Ok(()) + } + + fn unpack( + &self, + metadata: &Self::Metadata, + _storage_dtype: &DType, + storage_value: &ScalarValue, + ) -> Self::Value<'_> { + match metadata { + TimeUnit::Seconds => { + TimeValue::Seconds(storage_value.as_primitive().cast::().vortex_expect( + "The Scalar validation already checked that the value must be an i32", + )) + } + TimeUnit::Milliseconds => { + TimeValue::Milliseconds(storage_value.as_primitive().cast::().vortex_expect( + "The Scalar validation already checked that the value must be an i32", + )) + } + TimeUnit::Microseconds => { + TimeValue::Microseconds(storage_value.as_primitive().cast::().vortex_expect( + "The Scalar validation already checked that the value must be an i64", + )) + } + TimeUnit::Nanoseconds => { + TimeValue::Nanoseconds(storage_value.as_primitive().cast::().vortex_expect( + "The Scalar validation already checked that the value must be an i64", + )) + } + _ => unreachable!(), + } + } } fn time_ptype(time_unit: &TimeUnit) -> Option { diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index 5b49f4b0ff9..f28a9760c3a 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -3,10 +3,10 @@ //! Temporal extension data types. -use std::fmt::Display; -use std::fmt::Formatter; +use std::fmt; use std::sync::Arc; +use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; @@ -20,6 +20,7 @@ use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtId; use crate::dtype::extension::ExtVTable; use crate::extension::datetime::TimeUnit; +use crate::scalar::ScalarValue; /// Timestamp DType. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] @@ -63,8 +64,8 @@ pub struct TimestampOptions { pub tz: Option>, } -impl Display for TimestampOptions { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +impl fmt::Display for TimestampOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.tz { Some(tz) => write!(f, "{}, tz={}", self.unit, tz), None => write!(f, "{}", self.unit), @@ -72,9 +73,46 @@ impl Display for TimestampOptions { } } +/// Unpacked value of a [`Timestamp`] extension scalar. +/// +/// Each variant carries the raw storage value and an optional timezone. +pub enum TimestampValue<'a> { + /// Seconds since the Unix epoch. + Seconds(i64, Option<&'a Arc>), + /// Milliseconds since the Unix epoch. + Milliseconds(i64, Option<&'a Arc>), + /// Microseconds since the Unix epoch. + Microseconds(i64, Option<&'a Arc>), + /// Nanoseconds since the Unix epoch. + Nanoseconds(i64, Option<&'a Arc>), +} + +impl fmt::Display for TimestampValue<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (span, tz) = match self { + TimestampValue::Seconds(v, tz) => (Span::new().seconds(*v), *tz), + TimestampValue::Milliseconds(v, tz) => (Span::new().milliseconds(*v), *tz), + TimestampValue::Microseconds(v, tz) => (Span::new().microseconds(*v), *tz), + TimestampValue::Nanoseconds(v, tz) => (Span::new().nanoseconds(*v), *tz), + }; + let ts = jiff::Timestamp::UNIX_EPOCH + span; + + match tz { + None => { + write!(f, "{}", ts) + } + Some(tz) => { + write!(f, "{}", ts.in_tz(tz.as_ref()).map_err(|_| fmt::Error)?) + } + } + } +} + impl ExtVTable for Timestamp { type Metadata = TimestampOptions; + type Value<'a> = TimestampValue<'a>; + fn id(&self) -> ExtId { ExtId::new_ref("vortex.timestamp") } @@ -142,4 +180,54 @@ impl ExtVTable for Timestamp { ); Ok(()) } + + fn validate_scalar_value( + &self, + metadata: &Self::Metadata, + _storage_dtype: &DType, + storage_value: &ScalarValue, + ) -> VortexResult<()> { + let length_of_time = storage_value.as_primitive().cast::()?; + + // Validate the storage value is within the valid range for Timestamp. + let span = match metadata.unit { + TimeUnit::Nanoseconds => Span::new().nanoseconds(length_of_time), + TimeUnit::Microseconds => Span::new().microseconds(length_of_time), + TimeUnit::Milliseconds => Span::new().milliseconds(length_of_time), + TimeUnit::Seconds => Span::new().seconds(length_of_time), + TimeUnit::Days => Span::new().days(length_of_time), + }; + + let ts = jiff::Timestamp::UNIX_EPOCH + .checked_add(span) + .map_err(|e| vortex_err!("Invalid timestamp scalar: {}", e))?; + + if let Some(tz) = &metadata.tz { + ts.in_tz(tz.as_ref()) + .map_err(|e| vortex_err!("Invalid timezone for timestamp scalar: {}", e))?; + } + + Ok(()) + } + + fn unpack<'a>( + &self, + metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + let ts_value = storage_value + .as_primitive() + .cast::() + .vortex_expect("The Scalar validation already checked that the value must be an i64"); + let tz = metadata.tz.as_ref(); + + match metadata.unit { + TimeUnit::Nanoseconds => TimestampValue::Nanoseconds(ts_value, tz), + TimeUnit::Microseconds => TimestampValue::Microseconds(ts_value, tz), + TimeUnit::Milliseconds => TimestampValue::Milliseconds(ts_value, tz), + TimeUnit::Seconds => TimestampValue::Seconds(ts_value, tz), + TimeUnit::Days => unreachable!(), + } + } } diff --git a/vortex-array/src/scalar/extension/erased.rs b/vortex-array/src/scalar/extension/erased.rs index 0d735177e5d..ab80eebcc1b 100644 --- a/vortex-array/src/scalar/extension/erased.rs +++ b/vortex-array/src/scalar/extension/erased.rs @@ -1,2 +1,170 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::any::type_name; +use std::cmp::Ordering; +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; +use std::sync::Arc; + +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_err; + +use crate::dtype::extension::ExtDTypeRef; +use crate::dtype::extension::ExtId; +use crate::dtype::extension::ExtVTable; +use crate::scalar::ScalarValue; +use crate::scalar::extension::ExtScalarValue; +use crate::scalar::extension::typed::DynExtScalarValue; +use crate::scalar::extension::typed::ExtScalarValueInner; + +/// A type-erased extension scalar value. +/// +/// This is the extension scalar analog of [`ExtDTypeRef`]: it stores an [`ExtVTable`] +/// and a storage [`ScalarValue`] behind a trait object, allowing heterogeneous storage inside +/// `ScalarValue::Extension` (so that we do not need a generic parameter). +/// +/// You can use [`try_downcast`] or [`downcast`] to recover the concrete vtable type as an +/// [`ExtScalarValue`]. +/// +/// [`try_downcast`]: ExtScalarValueRef::try_downcast +/// [`downcast`]: ExtScalarValueRef::downcast +#[derive(Clone)] +pub struct ExtScalarValueRef(pub(super) Arc); + +// NB: If you need access to the vtable, you probably want to add a method and implementation to +// `ExtScalarValueInnerImpl` and `ExtScalarValueInner`. +/// Methods for downcasting type-erased extension scalars. +impl ExtScalarValueRef { + /// Returns the [`ExtId`] identifying this extension scalar's type. + pub fn id(&self) -> ExtId { + self.0.id() + } + + /// Returns a reference to the underlying storage [`ScalarValue`]. + pub fn storage_value(&self) -> &ScalarValue { + self.0.storage_value() + } + + /// Formats the extension scalar using the provided [`ExtDTypeRef`] for metadata context. + /// + /// # Errors + /// + /// Returns an error if the underlying [`fmt::Write`] operation fails. + pub fn fmt_ext_scalar( + &self, + ext_dtype: &ExtDTypeRef, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + self.0.fmt_ext_scalar_value(ext_dtype, f) + } + + /// Attempts to downcast to a concrete [`ExtScalarValue`]. + /// + /// # Errors + /// + /// Returns `Err(self)` if the underlying vtable type does not match `V`. + pub fn try_downcast(self) -> Result, ExtScalarValueRef> { + // `ExtScalarValueInner` is the only implementor of `ExtScalarValueInnerImpl` (due to + // the sealed implementation below), so if the vtable is correct, we know the type can be + // downcasted and reinterpreted safely. + if !self.0.as_any().is::>() { + return Err(self); + } + + let ptr = Arc::into_raw(self.0) as *const ExtScalarValueInner; + // SAFETY: We verified the type matches above, so the size and alignment are correct. + let inner = unsafe { Arc::from_raw(ptr) }; + + Ok(ExtScalarValue(inner)) + } + + /// Downcasts to a concrete [`ExtScalarValue`]. + /// + /// # Panics + /// + /// Panics if the underlying vtable type does not match `V`. + pub fn downcast(self) -> ExtScalarValue { + self.try_downcast::() + .map_err(|this| { + vortex_err!( + "Failed to downcast ExtScalar {} to {}", + this.0.id(), + type_name::(), + ) + }) + .vortex_expect("Failed to downcast ExtScalar") + } + + /// Attempts to downcast the vtable to a concrete [`ExtVTable`] type by reference. + /// + /// Unlike [`try_downcast`], this borrows rather than consuming `self`. + /// + /// [`try_downcast`]: ExtScalarValueRef::try_downcast + pub fn try_get_vtable(&self) -> Option<&V> { + self.0.vtable_any().downcast_ref::() + } + + /// Downcasts the vtable to a concrete [`ExtVTable`] type by reference. + /// + /// Unlike [`downcast`], this borrows rather than consuming `self`. + /// + /// # Panics + /// + /// Panics if the underlying vtable type does not match `V`. + /// + /// [`downcast`]: ExtScalarValueRef::downcast + pub fn get_vtable(&self) -> &V { + self.try_get_vtable::() + .vortex_expect("ExtVTable downcast failed") + } + + /// Checks whether this extension scalar value is compatible with the given [`ExtDTypeRef`]. + /// + /// This validates that the vtable types match and that the storage value passes the + /// vtable's [`ExtVTable::validate_scalar_value`] check. + /// + /// # Errors + /// + /// Returns an error if it is not compatible with the extension type. + pub fn validate(&self, ext_dtype: &ExtDTypeRef) -> VortexResult<()> { + self.0.validate(ext_dtype) + } +} + +impl fmt::Display for ExtScalarValueRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}({})", self.0.id(), self.0.storage_value()) + } +} + +impl fmt::Debug for ExtScalarValueRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ExtScalar") + .field("id", &self.0.id()) + .field("storage_value", self.0.storage_value()) + .finish() + } +} + +impl PartialEq for ExtScalarValueRef { + fn eq(&self, other: &Self) -> bool { + self.0.id() == other.0.id() && self.0.storage_value() == other.0.storage_value() + } +} +impl Eq for ExtScalarValueRef {} + +impl PartialOrd for ExtScalarValueRef { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.storage_value().partial_cmp(other.0.storage_value()) + } +} + +impl Hash for ExtScalarValueRef { + fn hash(&self, state: &mut H) { + self.0.id().hash(state); + self.0.storage_value().hash(state); + } +} diff --git a/vortex-array/src/scalar/extension/matcher.rs b/vortex-array/src/scalar/extension/matcher.rs deleted file mode 100644 index 0d735177e5d..00000000000 --- a/vortex-array/src/scalar/extension/matcher.rs +++ /dev/null @@ -1,2 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors diff --git a/vortex-array/src/scalar/extension/mod.rs b/vortex-array/src/scalar/extension/mod.rs index 100660e870a..95409ff2f31 100644 --- a/vortex-array/src/scalar/extension/mod.rs +++ b/vortex-array/src/scalar/extension/mod.rs @@ -15,36 +15,32 @@ //! //! ## File layout convention //! -//! Note that there is a single unified vtable for working with extension types at -//! [`vortex_array::dtype::extension::vtable`]. +//! Note that there is a single unified vtable for working with extension types +//! [`ExtVTable`](crate::dtype::extension::ExtVTable). //! //! Every other vtable-backed concept `FooScalarValue` follows this module structure: //! //! - `plugin.rs`: TODO //! - `typed.rs`: TODO //! - `erased.rs`: TODO -//! - `matcher.rs`: TODO -mod plugin; -// pub use plugin::ExtDTypePlugin; +// mod plugin; +// pub use plugin::ExtScalarValuePlugin; mod typed; -// pub use typed::ExtDType; +pub use typed::ExtScalarValue; mod erased; -// pub use erased::ExtDTypeRef; +pub use erased::ExtScalarValueRef; -mod matcher; -// pub use matcher::Matcher; +/// Private module to seal [`DynExtScalarValue`]. +mod sealed { + use crate::dtype::extension::ExtVTable; + use crate::scalar::extension::typed::ExtScalarValueInner; -// /// Private module to seal [`typed::DynExtDType`]. -// mod sealed { -// use crate::dtype::extension::ExtVTable; -// use crate::dtype::extension::typed::ExtDTypeInner; + /// Marker trait to prevent external implementations of [`DynExtScalarValue`]. + pub(super) trait Sealed {} -// /// Marker trait to prevent external implementations of [`super::typed::DynExtDType`]. -// pub(crate) trait Sealed {} - -// /// This can be the **only** implementor for [`super::typed::DynExtDType`]. -// impl Sealed for ExtDTypeInner {} -// } + /// This can be the **only** implementor for [`super::typed::DynExtScalarValue`]. + impl Sealed for ExtScalarValueInner {} +} diff --git a/vortex-array/src/scalar/extension/plugin.rs b/vortex-array/src/scalar/extension/plugin.rs deleted file mode 100644 index 0d735177e5d..00000000000 --- a/vortex-array/src/scalar/extension/plugin.rs +++ /dev/null @@ -1,2 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors diff --git a/vortex-array/src/scalar/extension/typed.rs b/vortex-array/src/scalar/extension/typed.rs index 0d735177e5d..a0bbd2c136a 100644 --- a/vortex-array/src/scalar/extension/typed.rs +++ b/vortex-array/src/scalar/extension/typed.rs @@ -1,2 +1,185 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::any::Any; +use std::fmt; +use std::sync::Arc; + +use vortex_error::VortexResult; +use vortex_error::vortex_bail; + +use crate::dtype::extension::ExtDType; +use crate::dtype::extension::ExtDTypeRef; +use crate::dtype::extension::ExtId; +use crate::dtype::extension::ExtVTable; +use crate::scalar::ScalarValue; +use crate::scalar::extension::ExtScalarValueRef; + +/// A typed extension scalar value, parameterized by a concrete [`ExtVTable`]. +/// +/// This is the extension scalar analog of [`ExtDType`]: it retains full type information +/// about the vtable, providing direct access to the vtable and storage value without +/// downcasting. +/// +/// You can construct one of these via [`try_new()`] from an [`ExtDType`] and a storage +/// [`ScalarValue`], and erase the type with [`erased()`] to obtain an [`ExtScalarValueRef`]. +/// +/// [`try_new()`]: ExtScalarValue::try_new +/// [`erased()`]: ExtScalarValue::erased +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExtScalarValue(pub(super) Arc>); + +/// The concrete inner representation of an extension scalar, pairing a vtable with its storage +/// value. +/// +/// This is the sole implementor of [`DynExtScalarValue`], enabling [`ExtScalarValueRef`] to +/// downcast back to the concrete vtable type via [`Any`]. +#[derive(Debug, PartialEq, Eq, Hash)] +pub(super) struct ExtScalarValueInner { + /// The extension scalar vtable. + vtable: V, + /// The underlying storage value. + storage: ScalarValue, +} + +impl DynExtScalarValue for ExtScalarValueInner { + fn as_any(&self) -> &dyn Any { + self + } + + fn id(&self) -> ExtId { + self.vtable.id() + } + + fn vtable_any(&self) -> &dyn Any { + &self.vtable + } + + fn storage_value(&self) -> &ScalarValue { + &self.storage + } + + fn validate(&self, ext_dtype: &ExtDTypeRef) -> VortexResult<()> { + // Downcasting the metadata implicitly verifies the vtable types match, but we still want to + // validate the actual scalar value. + let Some(metadata) = ext_dtype.metadata_opt::() else { + vortex_bail!("extension scalar is not compatible with type {ext_dtype}"); + }; + + self.vtable + .validate_scalar_value(metadata, ext_dtype.storage_dtype(), &self.storage) + } + + fn fmt_ext_scalar_value( + &self, + ext_dtype: &ExtDTypeRef, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result { + let value = V::unpack( + &self.vtable, + ext_dtype.metadata::(), + ext_dtype.storage_dtype(), + &self.storage, + ); + + write!(f, "{value}") + } +} + +/// An object-safe, sealed trait encapsulating the behavior for extension scalar values. +/// +/// This mirrors [`DynExtDType`] in `vortex-dtype`: it provides type-erased access to the +/// extension scalar's identity, storage value, and display formatting. The only implementor is +/// [`ExtScalarValueInner`]. +/// +/// [`DynExtDType`]: crate::dtype::extension::DynExtDType +pub(super) trait DynExtScalarValue: super::sealed::Sealed + 'static + Send + Sync { + /// Returns `self` as a trait object for downcasting. + fn as_any(&self) -> &dyn Any; + /// Returns the [`ExtID`] identifying this extension type. + fn id(&self) -> ExtId; + /// Returns the vtable as a trait object for downcasting. + fn vtable_any(&self) -> &dyn Any; + /// Returns a reference to the underlying storage [`ScalarValue`]. + fn storage_value(&self) -> &ScalarValue; + /// Checks whether this extension scalar value is compatible with the given [`ExtDTypeRef`]. + fn validate(&self, ext_dtype: &ExtDTypeRef) -> VortexResult<()>; + /// Formats the extension scalar using the provided [`ExtDTypeRef`] for metadata context. + fn fmt_ext_scalar_value( + &self, + ext_dtype: &ExtDTypeRef, + f: &mut fmt::Formatter<'_>, + ) -> fmt::Result; +} + +impl ExtScalarValue { + /// Creates a new extension scalar from a storage [`ScalarValue`], validating it against the + /// given [`ExtDType`]. + /// + /// # Errors + /// + /// Returns an error if [`ExtVTable::validate_scalar_value`] fails for the given + /// storage value and extension dtype. + pub fn try_new(ext_dtype: ExtDType, storage: ScalarValue) -> VortexResult { + ExtVTable::validate_scalar_value( + ext_dtype.vtable(), + ext_dtype.metadata(), + ext_dtype.storage_dtype(), + &storage, + )?; + + Ok(Self(Arc::new(ExtScalarValueInner:: { + vtable: ext_dtype.vtable().clone(), + storage, + }))) + } + + /// Erases the concrete type information, returning a type-erased [`ExtScalarValueRef`]. + pub fn erased(self) -> ExtScalarValueRef { + ExtScalarValueRef(self.0) + } + + /// Returns the [`ExtId`] identifying this extension scalar's type. + pub fn id(&self) -> ExtId { + self.0.vtable.id() + } + + /// Returns a reference to the [`ExtVTable`] for this extension scalar. + pub fn vtable(&self) -> &V { + &self.0.vtable + } + + /// Returns a reference to the underlying storage [`ScalarValue`]. + pub fn storage_value(&self) -> &ScalarValue { + self.0.storage_value() + } + + // pub(crate) fn cast(&self, dtype: &DType) -> VortexResult { + // if self.value.is_none() && !dtype.is_nullable() { + // vortex_bail!( + // "cannot cast extension dtype with id {} and storage type {} to {}", + // self.ext_dtype.id(), + // self.ext_dtype.storage_dtype(), + // dtype + // ); + // } + // + // if self.ext_dtype.storage_dtype().eq_ignore_nullability(dtype) { + // // Casting from an extension type to the underlying storage type is OK. + // return Ok(Scalar::new(dtype.clone(), self.value.clone())); + // } + // + // if let DType::Extension(ext_dtype) = dtype + // && self.ext_dtype.eq_ignore_nullability(ext_dtype) + // { + // return Ok(Scalar::new(dtype.clone(), self.value.clone())); + // } + // + // vortex_bail!( + // "cannot cast extension dtype with id {} and storage type {} to {}", + // self.ext_dtype.id(), + // self.ext_dtype.storage_dtype(), + // dtype + // ); + // } +} From cb7a62314f2bb71049c756a7bc597fc8bafd3e45 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 14:23:54 -0500 Subject: [PATCH 03/10] update tests Signed-off-by: Connor Tsui --- .../src/arrays/extension/compute/rules.rs | 55 +++++++++++++ vortex-array/src/scalar/arrow.rs | 20 +++++ vortex-array/src/scalar/tests/casting.rs | 82 +++++++++++++++++++ .../src/scalar/typed_view/extension/tests.rs | 82 +++++++++++++++++++ vortex-duckdb/src/convert/dtype.rs | 28 +++++++ 5 files changed, 267 insertions(+) diff --git a/vortex-array/src/arrays/extension/compute/rules.rs b/vortex-array/src/arrays/extension/compute/rules.rs index a6d890dae4e..8eb1aaa5227 100644 --- a/vortex-array/src/arrays/extension/compute/rules.rs +++ b/vortex-array/src/arrays/extension/compute/rules.rs @@ -75,16 +75,26 @@ mod tests { use crate::extension::EmptyMetadata; use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; + use crate::scalar::ScalarValue; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] struct TestExt; impl ExtVTable for TestExt { type Metadata = EmptyMetadata; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + Ok(vec![]) + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + Ok(EmptyMetadata) + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -92,6 +102,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } fn test_ext_dtype() -> ExtDTypeRef { @@ -164,11 +192,20 @@ mod tests { struct TestExt2; impl ExtVTable for TestExt2 { type Metadata = EmptyMetadata; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext_2") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + Ok(vec![]) + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + Ok(EmptyMetadata) + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -176,6 +213,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let ext_dtype1 = ExtDType::::try_new( diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index 2b15c9e7ef7..c9b03ead83e 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -212,6 +212,7 @@ mod tests { use crate::extension::datetime::TimestampOptions; use crate::scalar::DecimalValue; use crate::scalar::Scalar; + use crate::scalar::ScalarValue; #[test] fn test_null_scalar_to_arrow() { @@ -449,6 +450,7 @@ mod tests { struct SomeExt; impl ExtVTable for SomeExt { type Metadata = String; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("some_ext") @@ -469,6 +471,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let scalar = Scalar::extension::( diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index d63f5c6d92a..cebfa331196 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -9,6 +9,7 @@ mod tests { use vortex_error::VortexExpect; use vortex_error::VortexResult; + use vortex_error::vortex_bail; use crate::dtype::DType; use crate::dtype::FieldDType; @@ -28,11 +29,20 @@ mod tests { impl ExtVTable for Apples { type Metadata = usize; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("apples") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + Ok(vec![]) + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + Ok(0) + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -40,6 +50,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } impl Apples { @@ -231,11 +259,20 @@ mod tests { struct F16Ext; impl ExtVTable for F16Ext { type Metadata = usize; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("f16_ext") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + vortex_bail!("not implemented") + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + vortex_bail!("not implemented") + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -243,6 +280,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let storage_dtype = DType::Primitive(PType::F16, Nullability::NonNullable); @@ -276,11 +331,20 @@ mod tests { struct StructExt; impl ExtVTable for StructExt { type Metadata = usize; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("struct_ext") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + vortex_bail!("not implemented") + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + vortex_bail!("not implemented") + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -288,6 +352,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let struct_dtype = DType::Struct( diff --git a/vortex-array/src/scalar/typed_view/extension/tests.rs b/vortex-array/src/scalar/typed_view/extension/tests.rs index 0a683a085bb..22a40590802 100644 --- a/vortex-array/src/scalar/typed_view/extension/tests.rs +++ b/vortex-array/src/scalar/typed_view/extension/tests.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_error::VortexResult; +use vortex_error::vortex_bail; use crate::dtype::DType; use crate::dtype::Nullability; @@ -19,11 +20,20 @@ use crate::scalar::ScalarValue; struct TestI32Ext; impl ExtVTable for TestI32Ext { type Metadata = EmptyMetadata; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + Ok(vec![]) + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + Ok(EmptyMetadata) + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -31,6 +41,24 @@ impl ExtVTable for TestI32Ext { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } impl TestI32Ext { @@ -90,11 +118,20 @@ fn test_ext_scalar_partial_ord_different_types() { struct TestExt2; impl ExtVTable for TestExt2 { type Metadata = EmptyMetadata; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext_2") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + Ok(vec![]) + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + Ok(EmptyMetadata) + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -102,6 +139,24 @@ fn test_ext_scalar_partial_ord_different_types() { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let scalar1 = Scalar::extension::( @@ -269,11 +324,20 @@ fn test_ext_scalar_with_metadata() { struct TestExtMetadata; impl ExtVTable for TestExtMetadata { type Metadata = usize; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext_metadata") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + vortex_bail!("not implemented") + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + vortex_bail!("not implemented") + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -281,6 +345,24 @@ fn test_ext_scalar_with_metadata() { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let scalar = Scalar::extension::( diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index 332899b6abc..ed8d8120424 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -347,6 +347,7 @@ mod tests { use vortex::extension::datetime::Date; use vortex::extension::datetime::Time; use vortex::extension::datetime::Timestamp; + use vortex::scalar::ScalarValue; use crate::cpp; use crate::duckdb::LogicalType; @@ -576,11 +577,20 @@ mod tests { struct TestExt; impl ExtVTable for TestExt { type Metadata = EmptyMetadata; + type Value<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("unknown.extension") } + fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + Ok(vec![]) + } + + fn deserialize(&self, _data: &[u8]) -> VortexResult { + Ok(EmptyMetadata) + } + fn validate_dtype( &self, _options: &Self::Metadata, @@ -588,6 +598,24 @@ mod tests { ) -> VortexResult<()> { Ok(()) } + + fn validate_scalar_value( + &self, + _metadata: &Self::Metadata, + _storage_dtype: &DType, + _storage_value: &ScalarValue, + ) -> VortexResult<()> { + Ok(()) + } + + fn unpack<'a>( + &self, + _metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + _storage_value: &'a ScalarValue, + ) -> Self::Value<'a> { + "" + } } let ext_dtype = ExtDType::::try_new( From 56a6cc60204a467859d45de3bc5b91e5e5f06a50 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 14:44:52 -0500 Subject: [PATCH 04/10] some concerns Signed-off-by: Connor Tsui --- vortex-array/src/dtype/extension/vtable.rs | 3 +-- vortex-array/src/extension/datetime/date.rs | 2 +- vortex-array/src/extension/datetime/time.rs | 3 ++- vortex-array/src/scalar/extension/erased.rs | 3 +++ vortex-array/src/scalar/extension/mod.rs | 9 +++---- vortex-array/src/scalar/extension/typed.rs | 29 --------------------- 6 files changed, 10 insertions(+), 39 deletions(-) diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index 984983f8aa5..b7a2d74d7b2 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -42,14 +42,13 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { // Methods related to the extension scalar values. + // TODO(connor): more docs because it is not super obvious how to implement this at first. /// Validate the given storage value is compatible with the extension type. /// /// Note that [`ExtVTable::validate_dtype`] is always called first to validate the storage /// [`DType`], and the [`Scalar`](crate::scalar::Scalar) implementation will verify that the /// storage value is compatible with the storage dtype on construction. /// - /// TODO more docs because it is not super obvious how to implement this at first. - /// /// # Errors /// /// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type. diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index 5f0a4a8454e..f20d33c0720 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -96,7 +96,7 @@ impl ExtVTable for Date { fn validate_scalar_value( &self, - _metadata: &::Metadata, + _metadata: &Self::Metadata, _storage_dtype: &DType, _storage_value: &ScalarValue, ) -> VortexResult<()> { diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index 0353a8e4e19..aebfaed4d74 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -6,6 +6,7 @@ use std::fmt; use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; @@ -111,7 +112,7 @@ impl ExtVTable for Time { TimeUnit::Microseconds => Span::new().microseconds(length_of_time), TimeUnit::Milliseconds => Span::new().milliseconds(length_of_time), TimeUnit::Seconds => Span::new().seconds(length_of_time), - TimeUnit::Days => Span::new().days(length_of_time), + d @ TimeUnit::Days => vortex_bail!("Time type does not support time unit {d}"), }; jiff::civil::Time::MIN diff --git a/vortex-array/src/scalar/extension/erased.rs b/vortex-array/src/scalar/extension/erased.rs index ab80eebcc1b..fbe79420364 100644 --- a/vortex-array/src/scalar/extension/erased.rs +++ b/vortex-array/src/scalar/extension/erased.rs @@ -134,6 +134,7 @@ impl ExtScalarValueRef { } } +// TODO(connor): Do we disallow this because we do not have an extdtype? impl fmt::Display for ExtScalarValueRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}({})", self.0.id(), self.0.storage_value()) @@ -149,6 +150,8 @@ impl fmt::Debug for ExtScalarValueRef { } } +// TODO(connor): I feel like there is something wrong with these... + impl PartialEq for ExtScalarValueRef { fn eq(&self, other: &Self) -> bool { self.0.id() == other.0.id() && self.0.storage_value() == other.0.storage_value() diff --git a/vortex-array/src/scalar/extension/mod.rs b/vortex-array/src/scalar/extension/mod.rs index 95409ff2f31..f1550439bb5 100644 --- a/vortex-array/src/scalar/extension/mod.rs +++ b/vortex-array/src/scalar/extension/mod.rs @@ -17,13 +17,10 @@ //! //! Note that there is a single unified vtable for working with extension types //! [`ExtVTable`](crate::dtype::extension::ExtVTable). -//! -//! Every other vtable-backed concept `FooScalarValue` follows this module structure: -//! -//! - `plugin.rs`: TODO -//! - `typed.rs`: TODO -//! - `erased.rs`: TODO +// TODO(connor): Docs on file structure. + +// TODO(connor): Figure out what this should look like. // mod plugin; // pub use plugin::ExtScalarValuePlugin; diff --git a/vortex-array/src/scalar/extension/typed.rs b/vortex-array/src/scalar/extension/typed.rs index a0bbd2c136a..bbbdb0ef4b0 100644 --- a/vortex-array/src/scalar/extension/typed.rs +++ b/vortex-array/src/scalar/extension/typed.rs @@ -153,33 +153,4 @@ impl ExtScalarValue { pub fn storage_value(&self) -> &ScalarValue { self.0.storage_value() } - - // pub(crate) fn cast(&self, dtype: &DType) -> VortexResult { - // if self.value.is_none() && !dtype.is_nullable() { - // vortex_bail!( - // "cannot cast extension dtype with id {} and storage type {} to {}", - // self.ext_dtype.id(), - // self.ext_dtype.storage_dtype(), - // dtype - // ); - // } - // - // if self.ext_dtype.storage_dtype().eq_ignore_nullability(dtype) { - // // Casting from an extension type to the underlying storage type is OK. - // return Ok(Scalar::new(dtype.clone(), self.value.clone())); - // } - // - // if let DType::Extension(ext_dtype) = dtype - // && self.ext_dtype.eq_ignore_nullability(ext_dtype) - // { - // return Ok(Scalar::new(dtype.clone(), self.value.clone())); - // } - // - // vortex_bail!( - // "cannot cast extension dtype with id {} and storage type {} to {}", - // self.ext_dtype.id(), - // self.ext_dtype.storage_dtype(), - // dtype - // ); - // } } From 9c709c7eecaf125998fd2f575cf374d37935cf83 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 14:54:16 -0500 Subject: [PATCH 05/10] rename to native scalar value Signed-off-by: Connor Tsui --- .../src/arrays/extension/compute/rules.rs | 12 ++++++------ vortex-array/src/dtype/extension/vtable.rs | 6 +++--- vortex-array/src/extension/datetime/date.rs | 6 +++--- vortex-array/src/extension/datetime/time.rs | 6 +++--- .../src/extension/datetime/timestamp.rs | 6 +++--- vortex-array/src/scalar/arrow.rs | 6 +++--- vortex-array/src/scalar/extension/typed.rs | 2 +- vortex-array/src/scalar/tests/casting.rs | 18 +++++++++--------- .../src/scalar/typed_view/extension/tests.rs | 18 +++++++++--------- vortex-duckdb/src/convert/dtype.rs | 6 +++--- 10 files changed, 43 insertions(+), 43 deletions(-) diff --git a/vortex-array/src/arrays/extension/compute/rules.rs b/vortex-array/src/arrays/extension/compute/rules.rs index 8eb1aaa5227..beb4c1f0097 100644 --- a/vortex-array/src/arrays/extension/compute/rules.rs +++ b/vortex-array/src/arrays/extension/compute/rules.rs @@ -81,7 +81,7 @@ mod tests { struct TestExt; impl ExtVTable for TestExt { type Metadata = EmptyMetadata; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext") @@ -112,12 +112,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } @@ -192,7 +192,7 @@ mod tests { struct TestExt2; impl ExtVTable for TestExt2 { type Metadata = EmptyMetadata; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext_2") @@ -223,12 +223,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index b7a2d74d7b2..878828179d8 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -22,7 +22,7 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { /// A native Rust value that represents a scalar of the extension type. /// /// The value only represents non-null values. We denote nullable values as `Option`. - type Value<'a>: Display; + type NativeValue<'a>: Display; /// Returns the ID for this extension type. fn id(&self) -> ExtId; @@ -63,10 +63,10 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { /// /// This call is infallible assuming the [`ExtVTable::validate_scalar_value`] function has been /// called previously. - fn unpack<'a>( + fn unpack_native<'a>( &self, metadata: &'a Self::Metadata, storage_dtype: &'a DType, storage_value: &'a ScalarValue, - ) -> Self::Value<'a>; + ) -> Self::NativeValue<'a>; } diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index f20d33c0720..79e17c3c8cc 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -65,7 +65,7 @@ impl fmt::Display for DateValue { impl ExtVTable for Date { type Metadata = TimeUnit; - type Value<'a> = DateValue; + type NativeValue<'a> = DateValue; fn id(&self) -> ExtId { ExtId::new_ref("vortex.date") @@ -108,12 +108,12 @@ impl ExtVTable for Date { Ok(()) } - fn unpack( + fn unpack_native( &self, metadata: &Self::Metadata, _storage_dtype: &DType, storage_value: &ScalarValue, - ) -> Self::Value<'_> { + ) -> Self::NativeValue<'_> { match metadata { TimeUnit::Milliseconds => { DateValue::Milliseconds(storage_value.as_primitive().cast::().vortex_expect( diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index aebfaed4d74..fe0df807a2a 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -69,7 +69,7 @@ impl fmt::Display for TimeValue { impl ExtVTable for Time { type Metadata = TimeUnit; - type Value<'a> = TimeValue; + type NativeValue<'a> = TimeValue; fn id(&self) -> ExtId { ExtId::new_ref("vortex.time") @@ -122,12 +122,12 @@ impl ExtVTable for Time { Ok(()) } - fn unpack( + fn unpack_native( &self, metadata: &Self::Metadata, _storage_dtype: &DType, storage_value: &ScalarValue, - ) -> Self::Value<'_> { + ) -> Self::NativeValue<'_> { match metadata { TimeUnit::Seconds => { TimeValue::Seconds(storage_value.as_primitive().cast::().vortex_expect( diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index f28a9760c3a..96ee7b821a8 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -111,7 +111,7 @@ impl fmt::Display for TimestampValue<'_> { impl ExtVTable for Timestamp { type Metadata = TimestampOptions; - type Value<'a> = TimestampValue<'a>; + type NativeValue<'a> = TimestampValue<'a>; fn id(&self) -> ExtId { ExtId::new_ref("vortex.timestamp") @@ -210,12 +210,12 @@ impl ExtVTable for Timestamp { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, metadata: &'a Self::Metadata, _storage_dtype: &'a DType, storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { let ts_value = storage_value .as_primitive() .cast::() diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index c9b03ead83e..c6db04bb8d0 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -450,7 +450,7 @@ mod tests { struct SomeExt; impl ExtVTable for SomeExt { type Metadata = String; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("some_ext") @@ -481,12 +481,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } diff --git a/vortex-array/src/scalar/extension/typed.rs b/vortex-array/src/scalar/extension/typed.rs index bbbdb0ef4b0..25f549417a2 100644 --- a/vortex-array/src/scalar/extension/typed.rs +++ b/vortex-array/src/scalar/extension/typed.rs @@ -75,7 +75,7 @@ impl DynExtScalarValue for ExtScalarValueInner { ext_dtype: &ExtDTypeRef, f: &mut fmt::Formatter<'_>, ) -> fmt::Result { - let value = V::unpack( + let value = V::unpack_native( &self.vtable, ext_dtype.metadata::(), ext_dtype.storage_dtype(), diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index cebfa331196..23d510a5313 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -29,7 +29,7 @@ mod tests { impl ExtVTable for Apples { type Metadata = usize; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("apples") @@ -60,12 +60,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } @@ -259,7 +259,7 @@ mod tests { struct F16Ext; impl ExtVTable for F16Ext { type Metadata = usize; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("f16_ext") @@ -290,12 +290,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } @@ -331,7 +331,7 @@ mod tests { struct StructExt; impl ExtVTable for StructExt { type Metadata = usize; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("struct_ext") @@ -362,12 +362,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } diff --git a/vortex-array/src/scalar/typed_view/extension/tests.rs b/vortex-array/src/scalar/typed_view/extension/tests.rs index 22a40590802..a12089b1632 100644 --- a/vortex-array/src/scalar/typed_view/extension/tests.rs +++ b/vortex-array/src/scalar/typed_view/extension/tests.rs @@ -20,7 +20,7 @@ use crate::scalar::ScalarValue; struct TestI32Ext; impl ExtVTable for TestI32Ext { type Metadata = EmptyMetadata; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext") @@ -51,12 +51,12 @@ impl ExtVTable for TestI32Ext { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } @@ -118,7 +118,7 @@ fn test_ext_scalar_partial_ord_different_types() { struct TestExt2; impl ExtVTable for TestExt2 { type Metadata = EmptyMetadata; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext_2") @@ -149,12 +149,12 @@ fn test_ext_scalar_partial_ord_different_types() { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } @@ -324,7 +324,7 @@ fn test_ext_scalar_with_metadata() { struct TestExtMetadata; impl ExtVTable for TestExtMetadata { type Metadata = usize; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("test_ext_metadata") @@ -355,12 +355,12 @@ fn test_ext_scalar_with_metadata() { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index ed8d8120424..d829308ecac 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -577,7 +577,7 @@ mod tests { struct TestExt; impl ExtVTable for TestExt { type Metadata = EmptyMetadata; - type Value<'a> = &'a str; + type NativeValue<'a> = &'a str; fn id(&self) -> ExtId { ExtId::new_ref("unknown.extension") @@ -608,12 +608,12 @@ mod tests { Ok(()) } - fn unpack<'a>( + fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::Value<'a> { + ) -> Self::NativeValue<'a> { "" } } From be738263f45d495c8301952e883af99f2429c5ea Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 15:28:22 -0500 Subject: [PATCH 06/10] unify methods Signed-off-by: Connor Tsui --- vortex-array/public-api.lock | 40 ++++++---------- .../src/arrays/extension/compute/rules.rs | 26 ++-------- vortex-array/src/dtype/extension/vtable.rs | 23 +++++---- vortex-array/src/extension/datetime/date.rs | 33 +++---------- vortex-array/src/extension/datetime/time.rs | 47 ++++++------------- .../src/extension/datetime/timestamp.rs | 46 +++++++----------- vortex-array/src/scalar/arrow.rs | 13 +---- vortex-array/src/scalar/extension/typed.rs | 7 +-- vortex-array/src/scalar/tests/casting.rs | 39 +++------------ .../src/scalar/typed_view/extension/tests.rs | 39 +++------------ vortex-duckdb/src/convert/dtype.rs | 13 +---- 11 files changed, 91 insertions(+), 235 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 02d33f1de5e..8f9d1253eb0 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -6366,7 +6366,7 @@ pub trait vortex_array::dtype::extension::ExtVTable: 'static + core::marker::Siz pub type vortex_array::dtype::extension::ExtVTable::Metadata: 'static + core::marker::Send + core::marker::Sync + core::clone::Clone + core::fmt::Debug + core::fmt::Display + core::cmp::Eq + core::hash::Hash -pub type vortex_array::dtype::extension::ExtVTable::Value<'a>: core::fmt::Display +pub type vortex_array::dtype::extension::ExtVTable::NativeValue<'a>: core::fmt::Display pub fn vortex_array::dtype::extension::ExtVTable::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult @@ -6374,7 +6374,7 @@ pub fn vortex_array::dtype::extension::ExtVTable::id(&self) -> vortex_array::dty pub fn vortex_array::dtype::extension::ExtVTable::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::dtype::extension::ExtVTable::unpack<'a>(&self, metadata: &'a Self::Metadata, storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::dtype::extension::ExtVTable::unpack_native<'a>(&self, metadata: &'a Self::Metadata, storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::dtype::extension::ExtVTable::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> @@ -6384,7 +6384,7 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Date::Metadata = vortex_array::extension::datetime::TimeUnit -pub type vortex_array::extension::datetime::Date::Value<'a> = vortex_array::extension::datetime::DateValue +pub type vortex_array::extension::datetime::Date::NativeValue<'a> = vortex_array::extension::datetime::DateValue pub fn vortex_array::extension::datetime::Date::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult @@ -6392,17 +6392,15 @@ pub fn vortex_array::extension::datetime::Date::id(&self) -> vortex_array::dtype pub fn vortex_array::extension::datetime::Date::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::extension::datetime::Date::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::extension::datetime::Date::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> -pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, _metadata: &::Metadata, _storage_dtype: &vortex_array::dtype::DType, _storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> - impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::datetime::Time pub type vortex_array::extension::datetime::Time::Metadata = vortex_array::extension::datetime::TimeUnit -pub type vortex_array::extension::datetime::Time::Value<'a> = vortex_array::extension::datetime::TimeValue +pub type vortex_array::extension::datetime::Time::NativeValue<'a> = vortex_array::extension::datetime::TimeValue pub fn vortex_array::extension::datetime::Time::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult @@ -6410,17 +6408,15 @@ pub fn vortex_array::extension::datetime::Time::id(&self) -> vortex_array::dtype pub fn vortex_array::extension::datetime::Time::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::extension::datetime::Time::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::extension::datetime::Time::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> -pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> - impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::datetime::Timestamp pub type vortex_array::extension::datetime::Timestamp::Metadata = vortex_array::extension::datetime::TimestampOptions -pub type vortex_array::extension::datetime::Timestamp::Value<'a> = vortex_array::extension::datetime::TimestampValue<'a> +pub type vortex_array::extension::datetime::Timestamp::NativeValue<'a> = vortex_array::extension::datetime::TimestampValue<'a> pub fn vortex_array::extension::datetime::Timestamp::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult @@ -6428,12 +6424,10 @@ pub fn vortex_array::extension::datetime::Timestamp::id(&self) -> vortex_array:: pub fn vortex_array::extension::datetime::Timestamp::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::extension::datetime::Timestamp::unpack<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> -pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> - pub trait vortex_array::dtype::extension::Matcher pub type vortex_array::dtype::extension::Matcher::Match<'a> @@ -12412,7 +12406,7 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Date::Metadata = vortex_array::extension::datetime::TimeUnit -pub type vortex_array::extension::datetime::Date::Value<'a> = vortex_array::extension::datetime::DateValue +pub type vortex_array::extension::datetime::Date::NativeValue<'a> = vortex_array::extension::datetime::DateValue pub fn vortex_array::extension::datetime::Date::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult @@ -12420,12 +12414,10 @@ pub fn vortex_array::extension::datetime::Date::id(&self) -> vortex_array::dtype pub fn vortex_array::extension::datetime::Date::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::extension::datetime::Date::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::extension::datetime::Date::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> -pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, _metadata: &::Metadata, _storage_dtype: &vortex_array::dtype::DType, _storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> - pub struct vortex_array::extension::datetime::Time impl vortex_array::extension::datetime::Time @@ -12462,7 +12454,7 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Time::Metadata = vortex_array::extension::datetime::TimeUnit -pub type vortex_array::extension::datetime::Time::Value<'a> = vortex_array::extension::datetime::TimeValue +pub type vortex_array::extension::datetime::Time::NativeValue<'a> = vortex_array::extension::datetime::TimeValue pub fn vortex_array::extension::datetime::Time::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult @@ -12470,12 +12462,10 @@ pub fn vortex_array::extension::datetime::Time::id(&self) -> vortex_array::dtype pub fn vortex_array::extension::datetime::Time::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::extension::datetime::Time::unpack(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::extension::datetime::Time::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> -pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> - pub struct vortex_array::extension::datetime::Timestamp impl vortex_array::extension::datetime::Timestamp @@ -12514,7 +12504,7 @@ impl vortex_array::dtype::extension::ExtVTable for vortex_array::extension::date pub type vortex_array::extension::datetime::Timestamp::Metadata = vortex_array::extension::datetime::TimestampOptions -pub type vortex_array::extension::datetime::Timestamp::Value<'a> = vortex_array::extension::datetime::TimestampValue<'a> +pub type vortex_array::extension::datetime::Timestamp::NativeValue<'a> = vortex_array::extension::datetime::TimestampValue<'a> pub fn vortex_array::extension::datetime::Timestamp::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult @@ -12522,12 +12512,10 @@ pub fn vortex_array::extension::datetime::Timestamp::id(&self) -> vortex_array:: pub fn vortex_array::extension::datetime::Timestamp::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> -pub fn vortex_array::extension::datetime::Timestamp::unpack<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> Self::Value +pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, _metadata: &Self::Metadata, storage_dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<()> -pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> - pub struct vortex_array::extension::datetime::TimestampOptions pub vortex_array::extension::datetime::TimestampOptions::tz: core::option::Option> diff --git a/vortex-array/src/arrays/extension/compute/rules.rs b/vortex-array/src/arrays/extension/compute/rules.rs index beb4c1f0097..ead23d12bb0 100644 --- a/vortex-array/src/arrays/extension/compute/rules.rs +++ b/vortex-array/src/arrays/extension/compute/rules.rs @@ -103,22 +103,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } @@ -214,22 +205,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index 878828179d8..5bca94dd780 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -42,12 +42,9 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { // Methods related to the extension scalar values. - // TODO(connor): more docs because it is not super obvious how to implement this at first. /// Validate the given storage value is compatible with the extension type. /// - /// Note that [`ExtVTable::validate_dtype`] is always called first to validate the storage - /// [`DType`], and the [`Scalar`](crate::scalar::Scalar) implementation will verify that the - /// storage value is compatible with the storage dtype on construction. + /// By default, this calls [`unpack_native()`](ExtVTable::unpack_native) and discards the result. /// /// # Errors /// @@ -57,16 +54,24 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { metadata: &Self::Metadata, storage_dtype: &DType, storage_value: &ScalarValue, - ) -> VortexResult<()>; + ) -> VortexResult<()> { + self.unpack_native(metadata, storage_dtype, storage_value) + .map(|_| ()) + } - /// Unpack a native value from the storage ScalarValue. + /// Validate and unpack a native value from the storage [`ScalarValue`]. + /// + /// Note that [`ExtVTable::validate_dtype()`] is always called first to validate the storage + /// [`DType`], and the [`Scalar`](crate::scalar::Scalar) implementation will verify that the + /// storage value is compatible with the storage dtype on construction. + /// + /// # Errors /// - /// This call is infallible assuming the [`ExtVTable::validate_scalar_value`] function has been - /// called previously. + /// Returns an error if the storage [`ScalarValue`] is not compatible with the extension type. fn unpack_native<'a>( &self, metadata: &'a Self::Metadata, storage_dtype: &'a DType, storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a>; + ) -> VortexResult>; } diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index 79e17c3c8cc..0a927e86538 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -6,6 +6,7 @@ use std::fmt; use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; @@ -94,38 +95,18 @@ impl ExtVTable for Date { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - // We know that the dtype is correct for this extension type (primitive) by the - // precondition that `validate_dtype` has already been called successfully, and we know that - // the `Scalar` we came from has verified that the storage value is a primitive. - // We also say that any i32 or i64 is a valid date value, so we do not need to verify the - // values at all. - Ok(()) - } - fn unpack_native( &self, metadata: &Self::Metadata, _storage_dtype: &DType, storage_value: &ScalarValue, - ) -> Self::NativeValue<'_> { + ) -> VortexResult> { match metadata { - TimeUnit::Milliseconds => { - DateValue::Milliseconds(storage_value.as_primitive().cast::().vortex_expect( - "The Scalar validation already checked that the value must be an i64", - )) - } - TimeUnit::Days => { - DateValue::Days(storage_value.as_primitive().cast::().vortex_expect( - "The Scalar validation already checked that the value must be an i32", - )) - } - _ => unreachable!(), + TimeUnit::Milliseconds => Ok(DateValue::Milliseconds( + storage_value.as_primitive().cast::()?, + )), + TimeUnit::Days => Ok(DateValue::Days(storage_value.as_primitive().cast::()?)), + _ => vortex_bail!("Date type does not support time unit {}", metadata), } } } diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index fe0df807a2a..660871a26f9 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -98,12 +98,12 @@ impl ExtVTable for Time { Ok(()) } - fn validate_scalar_value( + fn unpack_native( &self, metadata: &Self::Metadata, _storage_dtype: &DType, storage_value: &ScalarValue, - ) -> VortexResult<()> { + ) -> VortexResult> { let length_of_time = storage_value.as_primitive().cast::()?; // Validate the storage value is within the valid range for Time. @@ -119,37 +119,20 @@ impl ExtVTable for Time { .checked_add(span) .map_err(|e| vortex_err!("Invalid time scalar: {}", e))?; - Ok(()) - } - - fn unpack_native( - &self, - metadata: &Self::Metadata, - _storage_dtype: &DType, - storage_value: &ScalarValue, - ) -> Self::NativeValue<'_> { match metadata { - TimeUnit::Seconds => { - TimeValue::Seconds(storage_value.as_primitive().cast::().vortex_expect( - "The Scalar validation already checked that the value must be an i32", - )) - } - TimeUnit::Milliseconds => { - TimeValue::Milliseconds(storage_value.as_primitive().cast::().vortex_expect( - "The Scalar validation already checked that the value must be an i32", - )) - } - TimeUnit::Microseconds => { - TimeValue::Microseconds(storage_value.as_primitive().cast::().vortex_expect( - "The Scalar validation already checked that the value must be an i64", - )) - } - TimeUnit::Nanoseconds => { - TimeValue::Nanoseconds(storage_value.as_primitive().cast::().vortex_expect( - "The Scalar validation already checked that the value must be an i64", - )) - } - _ => unreachable!(), + TimeUnit::Seconds => Ok(TimeValue::Seconds( + storage_value.as_primitive().cast::()?, + )), + TimeUnit::Milliseconds => Ok(TimeValue::Milliseconds( + storage_value.as_primitive().cast::()?, + )), + TimeUnit::Microseconds => Ok(TimeValue::Microseconds( + storage_value.as_primitive().cast::()?, + )), + TimeUnit::Nanoseconds => Ok(TimeValue::Nanoseconds( + storage_value.as_primitive().cast::()?, + )), + d => vortex_bail!("Time type does not support time unit {d}"), } } } diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index 96ee7b821a8..0c55c6ce407 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -9,6 +9,7 @@ use std::sync::Arc; use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; @@ -181,21 +182,21 @@ impl ExtVTable for Timestamp { Ok(()) } - fn validate_scalar_value( + fn unpack_native<'a>( &self, - metadata: &Self::Metadata, - _storage_dtype: &DType, - storage_value: &ScalarValue, - ) -> VortexResult<()> { - let length_of_time = storage_value.as_primitive().cast::()?; + metadata: &'a Self::Metadata, + _storage_dtype: &'a DType, + storage_value: &'a ScalarValue, + ) -> VortexResult> { + let ts_value = storage_value.as_primitive().cast::()?; // Validate the storage value is within the valid range for Timestamp. let span = match metadata.unit { - TimeUnit::Nanoseconds => Span::new().nanoseconds(length_of_time), - TimeUnit::Microseconds => Span::new().microseconds(length_of_time), - TimeUnit::Milliseconds => Span::new().milliseconds(length_of_time), - TimeUnit::Seconds => Span::new().seconds(length_of_time), - TimeUnit::Days => Span::new().days(length_of_time), + TimeUnit::Nanoseconds => Span::new().nanoseconds(ts_value), + TimeUnit::Microseconds => Span::new().microseconds(ts_value), + TimeUnit::Milliseconds => Span::new().milliseconds(ts_value), + TimeUnit::Seconds => Span::new().seconds(ts_value), + TimeUnit::Days => Span::new().days(ts_value), }; let ts = jiff::Timestamp::UNIX_EPOCH @@ -207,27 +208,14 @@ impl ExtVTable for Timestamp { .map_err(|e| vortex_err!("Invalid timezone for timestamp scalar: {}", e))?; } - Ok(()) - } - - fn unpack_native<'a>( - &self, - metadata: &'a Self::Metadata, - _storage_dtype: &'a DType, - storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - let ts_value = storage_value - .as_primitive() - .cast::() - .vortex_expect("The Scalar validation already checked that the value must be an i64"); let tz = metadata.tz.as_ref(); match metadata.unit { - TimeUnit::Nanoseconds => TimestampValue::Nanoseconds(ts_value, tz), - TimeUnit::Microseconds => TimestampValue::Microseconds(ts_value, tz), - TimeUnit::Milliseconds => TimestampValue::Milliseconds(ts_value, tz), - TimeUnit::Seconds => TimestampValue::Seconds(ts_value, tz), - TimeUnit::Days => unreachable!(), + TimeUnit::Nanoseconds => Ok(TimestampValue::Nanoseconds(ts_value, tz)), + TimeUnit::Microseconds => Ok(TimestampValue::Microseconds(ts_value, tz)), + TimeUnit::Milliseconds => Ok(TimestampValue::Milliseconds(ts_value, tz)), + TimeUnit::Seconds => Ok(TimestampValue::Seconds(ts_value, tz)), + TimeUnit::Days => vortex_bail!("Timestamp does not support Days time unit"), } } } diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index c6db04bb8d0..2c3905ef9fc 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -472,22 +472,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } diff --git a/vortex-array/src/scalar/extension/typed.rs b/vortex-array/src/scalar/extension/typed.rs index 25f549417a2..b6b2a7e45a3 100644 --- a/vortex-array/src/scalar/extension/typed.rs +++ b/vortex-array/src/scalar/extension/typed.rs @@ -5,6 +5,7 @@ use std::any::Any; use std::fmt; use std::sync::Arc; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -80,7 +81,8 @@ impl DynExtScalarValue for ExtScalarValueInner { ext_dtype.metadata::(), ext_dtype.storage_dtype(), &self.storage, - ); + ) + .vortex_expect("invalid extension dtype for this extension scalar value"); write!(f, "{value}") } @@ -121,8 +123,7 @@ impl ExtScalarValue { /// Returns an error if [`ExtVTable::validate_scalar_value`] fails for the given /// storage value and extension dtype. pub fn try_new(ext_dtype: ExtDType, storage: ScalarValue) -> VortexResult { - ExtVTable::validate_scalar_value( - ext_dtype.vtable(), + ext_dtype.vtable().validate_scalar_value( ext_dtype.metadata(), ext_dtype.storage_dtype(), &storage, diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index 23d510a5313..c1105385351 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -51,22 +51,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } @@ -281,22 +272,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } @@ -353,22 +335,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } diff --git a/vortex-array/src/scalar/typed_view/extension/tests.rs b/vortex-array/src/scalar/typed_view/extension/tests.rs index a12089b1632..c1429c282eb 100644 --- a/vortex-array/src/scalar/typed_view/extension/tests.rs +++ b/vortex-array/src/scalar/typed_view/extension/tests.rs @@ -42,22 +42,13 @@ impl ExtVTable for TestI32Ext { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } @@ -140,22 +131,13 @@ fn test_ext_scalar_partial_ord_different_types() { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } @@ -346,22 +328,13 @@ fn test_ext_scalar_with_metadata() { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index d829308ecac..2c3796f883b 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -599,22 +599,13 @@ mod tests { Ok(()) } - fn validate_scalar_value( - &self, - _metadata: &Self::Metadata, - _storage_dtype: &DType, - _storage_value: &ScalarValue, - ) -> VortexResult<()> { - Ok(()) - } - fn unpack_native<'a>( &self, _metadata: &'a Self::Metadata, _storage_dtype: &'a DType, _storage_value: &'a ScalarValue, - ) -> Self::NativeValue<'a> { - "" + ) -> VortexResult> { + Ok("") } } From 80f9ec03d168ae4a10f4eb811b2e106335c5041d Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 15:32:11 -0500 Subject: [PATCH 07/10] rename to _metadata Signed-off-by: Connor Tsui --- vortex-array/src/arrays/extension/compute/rules.rs | 8 ++++---- vortex-array/src/dtype/extension/plugin.rs | 2 +- vortex-array/src/dtype/extension/typed.rs | 2 +- vortex-array/src/dtype/extension/vtable.rs | 6 ++---- vortex-array/src/extension/datetime/date.rs | 4 ++-- vortex-array/src/extension/datetime/time.rs | 4 ++-- vortex-array/src/extension/datetime/timestamp.rs | 4 ++-- vortex-array/src/scalar/arrow.rs | 4 ++-- vortex-array/src/scalar/tests/casting.rs | 12 ++++++------ .../src/scalar/typed_view/extension/tests.rs | 12 ++++++------ vortex-duckdb/src/convert/dtype.rs | 4 ++-- 11 files changed, 30 insertions(+), 32 deletions(-) diff --git a/vortex-array/src/arrays/extension/compute/rules.rs b/vortex-array/src/arrays/extension/compute/rules.rs index ead23d12bb0..74aab6ad4fd 100644 --- a/vortex-array/src/arrays/extension/compute/rules.rs +++ b/vortex-array/src/arrays/extension/compute/rules.rs @@ -87,11 +87,11 @@ mod tests { ExtId::new_ref("test_ext") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { Ok(vec![]) } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { Ok(EmptyMetadata) } @@ -189,11 +189,11 @@ mod tests { ExtId::new_ref("test_ext_2") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { Ok(vec![]) } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { Ok(EmptyMetadata) } diff --git a/vortex-array/src/dtype/extension/plugin.rs b/vortex-array/src/dtype/extension/plugin.rs index 2b45b140849..abde9c45dac 100644 --- a/vortex-array/src/dtype/extension/plugin.rs +++ b/vortex-array/src/dtype/extension/plugin.rs @@ -32,7 +32,7 @@ impl ExtDTypePlugin for V { } fn deserialize(&self, data: &[u8], storage_dtype: DType) -> VortexResult { - let metadata = ExtVTable::deserialize(self, data)?; + let metadata = ExtVTable::deserialize_metadata(self, data)?; Ok(ExtDType::try_with_vtable(self.clone(), metadata, storage_dtype)?.erased()) } } diff --git a/vortex-array/src/dtype/extension/typed.rs b/vortex-array/src/dtype/extension/typed.rs index 8e69df584d4..ab61309e0dd 100644 --- a/vortex-array/src/dtype/extension/typed.rs +++ b/vortex-array/src/dtype/extension/typed.rs @@ -168,7 +168,7 @@ impl DynExtDType for ExtDTypeInner { } fn metadata_serialize(&self) -> VortexResult> { - V::serialize(&self.vtable, &self.metadata) + V::serialize_metadata(&self.vtable, &self.metadata) } fn with_nullability(&self, nullability: Nullability) -> ExtDTypeRef { diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index 5bca94dd780..84f530ea06f 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -29,13 +29,11 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { // Methods related to the extension `DType`. - // TODO(connor): Should probably be called `serialize_metadata`. /// Serialize the metadata into a byte vector. - fn serialize(&self, metadata: &Self::Metadata) -> VortexResult>; + fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult>; - // TODO(connor): Should probably be called `deserialize_metadata`. /// Deserialize the metadata from a byte slice. - fn deserialize(&self, metadata: &[u8]) -> VortexResult; + fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult; /// Validate that the given storage type is compatible with this extension type. fn validate_dtype(&self, metadata: &Self::Metadata, storage_dtype: &DType) -> VortexResult<()>; diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index 0a927e86538..260757b9149 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -72,11 +72,11 @@ impl ExtVTable for Date { ExtId::new_ref("vortex.date") } - fn serialize(&self, metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult> { Ok(vec![u8::from(*metadata)]) } - fn deserialize(&self, metadata: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult { let tag = metadata[0]; TimeUnit::try_from(tag) } diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index 660871a26f9..9c0b30013e6 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -75,11 +75,11 @@ impl ExtVTable for Time { ExtId::new_ref("vortex.time") } - fn serialize(&self, metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult> { Ok(vec![u8::from(*metadata)]) } - fn deserialize(&self, data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, data: &[u8]) -> VortexResult { let tag = data[0]; TimeUnit::try_from(tag) } diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index 0c55c6ce407..7773c12d758 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -120,7 +120,7 @@ impl ExtVTable for Timestamp { // NOTE(ngates): unfortunately we're stuck with this hand-rolled serialization format for // backwards compatibility. - fn serialize(&self, metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult> { let mut bytes = Vec::with_capacity(4); let unit_tag: u8 = metadata.unit.into(); @@ -141,7 +141,7 @@ impl ExtVTable for Timestamp { Ok(bytes) } - fn deserialize(&self, data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, data: &[u8]) -> VortexResult { vortex_ensure!(data.len() >= 3); let tag = data[0]; diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index 2c3905ef9fc..e94dac46551 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -456,11 +456,11 @@ mod tests { ExtId::new_ref("some_ext") } - fn serialize(&self, _options: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _options: &Self::Metadata) -> VortexResult> { vortex_bail!("not implemented") } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { vortex_bail!("not implemented") } diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index c1105385351..72b44a5ed55 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -35,11 +35,11 @@ mod tests { ExtId::new_ref("apples") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { Ok(vec![]) } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { Ok(0) } @@ -256,11 +256,11 @@ mod tests { ExtId::new_ref("f16_ext") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { vortex_bail!("not implemented") } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { vortex_bail!("not implemented") } @@ -319,11 +319,11 @@ mod tests { ExtId::new_ref("struct_ext") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { vortex_bail!("not implemented") } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { vortex_bail!("not implemented") } diff --git a/vortex-array/src/scalar/typed_view/extension/tests.rs b/vortex-array/src/scalar/typed_view/extension/tests.rs index c1429c282eb..bff884671a7 100644 --- a/vortex-array/src/scalar/typed_view/extension/tests.rs +++ b/vortex-array/src/scalar/typed_view/extension/tests.rs @@ -26,11 +26,11 @@ impl ExtVTable for TestI32Ext { ExtId::new_ref("test_ext") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { Ok(vec![]) } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { Ok(EmptyMetadata) } @@ -115,11 +115,11 @@ fn test_ext_scalar_partial_ord_different_types() { ExtId::new_ref("test_ext_2") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { Ok(vec![]) } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { Ok(EmptyMetadata) } @@ -312,11 +312,11 @@ fn test_ext_scalar_with_metadata() { ExtId::new_ref("test_ext_metadata") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { vortex_bail!("not implemented") } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { vortex_bail!("not implemented") } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index 2c3796f883b..aeffa40140b 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -583,11 +583,11 @@ mod tests { ExtId::new_ref("unknown.extension") } - fn serialize(&self, _metadata: &Self::Metadata) -> VortexResult> { + fn serialize_metadata(&self, _metadata: &Self::Metadata) -> VortexResult> { Ok(vec![]) } - fn deserialize(&self, _data: &[u8]) -> VortexResult { + fn deserialize_metadata(&self, _data: &[u8]) -> VortexResult { Ok(EmptyMetadata) } From 5e441a48bc87f1247ea666df3e16629db8bbfbcb Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 15:54:18 -0500 Subject: [PATCH 08/10] clean up Signed-off-by: Connor Tsui --- vortex-array/public-api.lock | 30 +++++----- vortex-array/src/extension/datetime/date.rs | 20 +++---- vortex-array/src/extension/datetime/time.rs | 60 +++++++++---------- .../src/extension/datetime/timestamp.rs | 4 +- vortex-array/src/scalar/extension/mod.rs | 11 ---- vortex-array/src/scalar/extension/typed.rs | 4 +- 6 files changed, 59 insertions(+), 70 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 8f9d1253eb0..f269285c507 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -6368,11 +6368,11 @@ pub type vortex_array::dtype::extension::ExtVTable::Metadata: 'static + core::ma pub type vortex_array::dtype::extension::ExtVTable::NativeValue<'a>: core::fmt::Display -pub fn vortex_array::dtype::extension::ExtVTable::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::dtype::extension::ExtVTable::deserialize_metadata(&self, metadata: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::dtype::extension::ExtVTable::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::dtype::extension::ExtVTable::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::dtype::extension::ExtVTable::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::dtype::extension::ExtVTable::unpack_native<'a>(&self, metadata: &'a Self::Metadata, storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -6386,11 +6386,11 @@ pub type vortex_array::extension::datetime::Date::Metadata = vortex_array::exten pub type vortex_array::extension::datetime::Date::NativeValue<'a> = vortex_array::extension::datetime::DateValue -pub fn vortex_array::extension::datetime::Date::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Date::deserialize_metadata(&self, metadata: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Date::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::extension::datetime::Date::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::extension::datetime::Date::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -6402,11 +6402,11 @@ pub type vortex_array::extension::datetime::Time::Metadata = vortex_array::exten pub type vortex_array::extension::datetime::Time::NativeValue<'a> = vortex_array::extension::datetime::TimeValue -pub fn vortex_array::extension::datetime::Time::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Time::deserialize_metadata(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Time::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::extension::datetime::Time::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::extension::datetime::Time::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -6418,11 +6418,11 @@ pub type vortex_array::extension::datetime::Timestamp::Metadata = vortex_array:: pub type vortex_array::extension::datetime::Timestamp::NativeValue<'a> = vortex_array::extension::datetime::TimestampValue<'a> -pub fn vortex_array::extension::datetime::Timestamp::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Timestamp::deserialize_metadata(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Timestamp::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::extension::datetime::Timestamp::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -12408,11 +12408,11 @@ pub type vortex_array::extension::datetime::Date::Metadata = vortex_array::exten pub type vortex_array::extension::datetime::Date::NativeValue<'a> = vortex_array::extension::datetime::DateValue -pub fn vortex_array::extension::datetime::Date::deserialize(&self, metadata: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Date::deserialize_metadata(&self, metadata: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Date::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::extension::datetime::Date::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::extension::datetime::Date::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -12456,11 +12456,11 @@ pub type vortex_array::extension::datetime::Time::Metadata = vortex_array::exten pub type vortex_array::extension::datetime::Time::NativeValue<'a> = vortex_array::extension::datetime::TimeValue -pub fn vortex_array::extension::datetime::Time::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Time::deserialize_metadata(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Time::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::extension::datetime::Time::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::extension::datetime::Time::unpack_native(&self, metadata: &Self::Metadata, _storage_dtype: &vortex_array::dtype::DType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -12506,11 +12506,11 @@ pub type vortex_array::extension::datetime::Timestamp::Metadata = vortex_array:: pub type vortex_array::extension::datetime::Timestamp::NativeValue<'a> = vortex_array::extension::datetime::TimestampValue<'a> -pub fn vortex_array::extension::datetime::Timestamp::deserialize(&self, data: &[u8]) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Timestamp::deserialize_metadata(&self, data: &[u8]) -> vortex_error::VortexResult pub fn vortex_array::extension::datetime::Timestamp::id(&self) -> vortex_array::dtype::extension::ExtId -pub fn vortex_array::extension::datetime::Timestamp::serialize(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> +pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, metadata: &Self::Metadata) -> vortex_error::VortexResult> pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, metadata: &'a Self::Metadata, _storage_dtype: &'a vortex_array::dtype::DType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult @@ -13116,7 +13116,7 @@ pub fn vortex_array::scalar::extension::ExtScalarValue::id(&self) -> vortex_a pub fn vortex_array::scalar::extension::ExtScalarValue::storage_value(&self) -> &vortex_array::scalar::ScalarValue -pub fn vortex_array::scalar::extension::ExtScalarValue::try_new(ext_dtype: vortex_array::dtype::extension::ExtDType, storage: vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::scalar::extension::ExtScalarValue::try_new(ext_dtype: &vortex_array::dtype::extension::ExtDType, storage: vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult pub fn vortex_array::scalar::extension::ExtScalarValue::vtable(&self) -> &V diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index 260757b9149..c31a2b2f058 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -26,6 +26,16 @@ const EPOCH: jiff::civil::Date = jiff::civil::Date::constant(1970, 1, 1); #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] pub struct Date; +fn date_ptype(time_unit: &TimeUnit) -> Option { + match time_unit { + TimeUnit::Nanoseconds => None, + TimeUnit::Microseconds => None, + TimeUnit::Milliseconds => Some(PType::I64), + TimeUnit::Seconds => None, + TimeUnit::Days => Some(PType::I32), + } +} + impl Date { /// Creates a new Date extension dtype with the given time unit and nullability. /// @@ -110,13 +120,3 @@ impl ExtVTable for Date { } } } - -fn date_ptype(time_unit: &TimeUnit) -> Option { - match time_unit { - TimeUnit::Nanoseconds => None, - TimeUnit::Microseconds => None, - TimeUnit::Milliseconds => Some(PType::I64), - TimeUnit::Seconds => None, - TimeUnit::Days => Some(PType::I32), - } -} diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index 9c0b30013e6..a13352afe92 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -23,10 +23,18 @@ use crate::scalar::ScalarValue; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] pub struct Time; +fn time_ptype(time_unit: &TimeUnit) -> Option { + Some(match time_unit { + TimeUnit::Nanoseconds | TimeUnit::Microseconds => PType::I64, + TimeUnit::Milliseconds | TimeUnit::Seconds => PType::I32, + TimeUnit::Days => return None, + }) +} + impl Time { /// Creates a new Time extension dtype with the given time unit and nullability. /// - /// Note that only Milliseconds and Days time units are supported for Time. + /// Note that Days units are not supported for Time. pub fn try_new(time_unit: TimeUnit, nullability: Nullability) -> VortexResult> { let ptype = time_ptype(&time_unit) .ok_or_else(|| vortex_err!("Time type does not support time unit {}", time_unit))?; @@ -106,41 +114,33 @@ impl ExtVTable for Time { ) -> VortexResult> { let length_of_time = storage_value.as_primitive().cast::()?; - // Validate the storage value is within the valid range for Time. - let span = match *metadata { - TimeUnit::Nanoseconds => Span::new().nanoseconds(length_of_time), - TimeUnit::Microseconds => Span::new().microseconds(length_of_time), - TimeUnit::Milliseconds => Span::new().milliseconds(length_of_time), - TimeUnit::Seconds => Span::new().seconds(length_of_time), + let (span, value) = match *metadata { + TimeUnit::Seconds => { + let v = i32::try_from(length_of_time) + .map_err(|e| vortex_err!("Time seconds value out of i32 range: {e}"))?; + (Span::new().seconds(v), TimeValue::Seconds(v)) + } + TimeUnit::Milliseconds => { + let v = i32::try_from(length_of_time) + .map_err(|e| vortex_err!("Time milliseconds value out of i32 range: {e}"))?; + (Span::new().milliseconds(v), TimeValue::Milliseconds(v)) + } + TimeUnit::Microseconds => ( + Span::new().microseconds(length_of_time), + TimeValue::Microseconds(length_of_time), + ), + TimeUnit::Nanoseconds => ( + Span::new().nanoseconds(length_of_time), + TimeValue::Nanoseconds(length_of_time), + ), d @ TimeUnit::Days => vortex_bail!("Time type does not support time unit {d}"), }; + // Validate the storage value is within the valid range for Time. jiff::civil::Time::MIN .checked_add(span) .map_err(|e| vortex_err!("Invalid time scalar: {}", e))?; - match metadata { - TimeUnit::Seconds => Ok(TimeValue::Seconds( - storage_value.as_primitive().cast::()?, - )), - TimeUnit::Milliseconds => Ok(TimeValue::Milliseconds( - storage_value.as_primitive().cast::()?, - )), - TimeUnit::Microseconds => Ok(TimeValue::Microseconds( - storage_value.as_primitive().cast::()?, - )), - TimeUnit::Nanoseconds => Ok(TimeValue::Nanoseconds( - storage_value.as_primitive().cast::()?, - )), - d => vortex_bail!("Time type does not support time unit {d}"), - } + Ok(value) } } - -fn time_ptype(time_unit: &TimeUnit) -> Option { - Some(match time_unit { - TimeUnit::Nanoseconds | TimeUnit::Microseconds => PType::I64, - TimeUnit::Milliseconds | TimeUnit::Seconds => PType::I32, - TimeUnit::Days => return None, - }) -} diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index 7773c12d758..82f874a84ed 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -196,7 +196,7 @@ impl ExtVTable for Timestamp { TimeUnit::Microseconds => Span::new().microseconds(ts_value), TimeUnit::Milliseconds => Span::new().milliseconds(ts_value), TimeUnit::Seconds => Span::new().seconds(ts_value), - TimeUnit::Days => Span::new().days(ts_value), + TimeUnit::Days => vortex_bail!("Timestamp does not support Days time unit"), }; let ts = jiff::Timestamp::UNIX_EPOCH @@ -215,7 +215,7 @@ impl ExtVTable for Timestamp { TimeUnit::Microseconds => Ok(TimestampValue::Microseconds(ts_value, tz)), TimeUnit::Milliseconds => Ok(TimestampValue::Milliseconds(ts_value, tz)), TimeUnit::Seconds => Ok(TimestampValue::Seconds(ts_value, tz)), - TimeUnit::Days => vortex_bail!("Timestamp does not support Days time unit"), + TimeUnit::Days => unreachable!("Timestamp does not support Days time unit"), } } } diff --git a/vortex-array/src/scalar/extension/mod.rs b/vortex-array/src/scalar/extension/mod.rs index f1550439bb5..bd237ef6372 100644 --- a/vortex-array/src/scalar/extension/mod.rs +++ b/vortex-array/src/scalar/extension/mod.rs @@ -12,17 +12,6 @@ //! [`ScalarValue`]: crate::scalar::ScalarValue //! [`DType`]: crate::dtype::DType //! [`ExtDTypeRef`]: crate::dtype::extension::ExtDTypeRef -//! -//! ## File layout convention -//! -//! Note that there is a single unified vtable for working with extension types -//! [`ExtVTable`](crate::dtype::extension::ExtVTable). - -// TODO(connor): Docs on file structure. - -// TODO(connor): Figure out what this should look like. -// mod plugin; -// pub use plugin::ExtScalarValuePlugin; mod typed; pub use typed::ExtScalarValue; diff --git a/vortex-array/src/scalar/extension/typed.rs b/vortex-array/src/scalar/extension/typed.rs index b6b2a7e45a3..f338f9a6328 100644 --- a/vortex-array/src/scalar/extension/typed.rs +++ b/vortex-array/src/scalar/extension/typed.rs @@ -98,7 +98,7 @@ impl DynExtScalarValue for ExtScalarValueInner { pub(super) trait DynExtScalarValue: super::sealed::Sealed + 'static + Send + Sync { /// Returns `self` as a trait object for downcasting. fn as_any(&self) -> &dyn Any; - /// Returns the [`ExtID`] identifying this extension type. + /// Returns the [`ExtId`] identifying this extension type. fn id(&self) -> ExtId; /// Returns the vtable as a trait object for downcasting. fn vtable_any(&self) -> &dyn Any; @@ -122,7 +122,7 @@ impl ExtScalarValue { /// /// Returns an error if [`ExtVTable::validate_scalar_value`] fails for the given /// storage value and extension dtype. - pub fn try_new(ext_dtype: ExtDType, storage: ScalarValue) -> VortexResult { + pub fn try_new(ext_dtype: &ExtDType, storage: ScalarValue) -> VortexResult { ext_dtype.vtable().validate_scalar_value( ext_dtype.metadata(), ext_dtype.storage_dtype(), From f8f4dce69792c9a344efd708a4f968f9a3ac8548 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 25 Feb 2026 16:27:03 -0500 Subject: [PATCH 09/10] add tests and clean up Signed-off-by: Connor Tsui --- vortex-array/public-api.lock | 8 -- vortex-array/src/scalar/extension/erased.rs | 60 +-------- vortex-array/src/scalar/extension/mod.rs | 3 + vortex-array/src/scalar/extension/tests.rs | 142 ++++++++++++++++++++ vortex-array/src/scalar/extension/typed.rs | 4 + 5 files changed, 154 insertions(+), 63 deletions(-) create mode 100644 vortex-array/src/scalar/extension/tests.rs diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index f269285c507..fb2f9b9f20e 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -13146,20 +13146,12 @@ impl vortex_array::scalar::extension::ExtScalarValueRef pub fn vortex_array::scalar::extension::ExtScalarValueRef::downcast(self) -> vortex_array::scalar::extension::ExtScalarValue -pub fn vortex_array::scalar::extension::ExtScalarValueRef::fmt_ext_scalar(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result - -pub fn vortex_array::scalar::extension::ExtScalarValueRef::get_vtable(&self) -> &V - pub fn vortex_array::scalar::extension::ExtScalarValueRef::id(&self) -> vortex_array::dtype::extension::ExtId pub fn vortex_array::scalar::extension::ExtScalarValueRef::storage_value(&self) -> &vortex_array::scalar::ScalarValue pub fn vortex_array::scalar::extension::ExtScalarValueRef::try_downcast(self) -> core::result::Result, vortex_array::scalar::extension::ExtScalarValueRef> -pub fn vortex_array::scalar::extension::ExtScalarValueRef::try_get_vtable(&self) -> core::option::Option<&V> - -pub fn vortex_array::scalar::extension::ExtScalarValueRef::validate(&self, ext_dtype: &vortex_array::dtype::extension::ExtDTypeRef) -> vortex_error::VortexResult<()> - impl core::clone::Clone for vortex_array::scalar::extension::ExtScalarValueRef pub fn vortex_array::scalar::extension::ExtScalarValueRef::clone(&self) -> vortex_array::scalar::extension::ExtScalarValueRef diff --git a/vortex-array/src/scalar/extension/erased.rs b/vortex-array/src/scalar/extension/erased.rs index fbe79420364..37d600376fa 100644 --- a/vortex-array/src/scalar/extension/erased.rs +++ b/vortex-array/src/scalar/extension/erased.rs @@ -9,10 +9,8 @@ use std::hash::Hasher; use std::sync::Arc; use vortex_error::VortexExpect; -use vortex_error::VortexResult; use vortex_error::vortex_err; -use crate::dtype::extension::ExtDTypeRef; use crate::dtype::extension::ExtId; use crate::dtype::extension::ExtVTable; use crate::scalar::ScalarValue; @@ -26,11 +24,12 @@ use crate::scalar::extension::typed::ExtScalarValueInner; /// and a storage [`ScalarValue`] behind a trait object, allowing heterogeneous storage inside /// `ScalarValue::Extension` (so that we do not need a generic parameter). /// -/// You can use [`try_downcast`] or [`downcast`] to recover the concrete vtable type as an +/// You can use [`try_downcast()`] or [`downcast()`] to recover the concrete vtable type as an /// [`ExtScalarValue`]. /// -/// [`try_downcast`]: ExtScalarValueRef::try_downcast -/// [`downcast`]: ExtScalarValueRef::downcast +/// [`ExtDTypeRef`]: crate::dtype::extension::ExtDTypeRef +/// [`try_downcast()`]: ExtScalarValueRef::try_downcast +/// [`downcast()`]: ExtScalarValueRef::downcast #[derive(Clone)] pub struct ExtScalarValueRef(pub(super) Arc); @@ -48,19 +47,6 @@ impl ExtScalarValueRef { self.0.storage_value() } - /// Formats the extension scalar using the provided [`ExtDTypeRef`] for metadata context. - /// - /// # Errors - /// - /// Returns an error if the underlying [`fmt::Write`] operation fails. - pub fn fmt_ext_scalar( - &self, - ext_dtype: &ExtDTypeRef, - f: &mut fmt::Formatter<'_>, - ) -> fmt::Result { - self.0.fmt_ext_scalar_value(ext_dtype, f) - } - /// Attempts to downcast to a concrete [`ExtScalarValue`]. /// /// # Errors @@ -97,44 +83,8 @@ impl ExtScalarValueRef { }) .vortex_expect("Failed to downcast ExtScalar") } - - /// Attempts to downcast the vtable to a concrete [`ExtVTable`] type by reference. - /// - /// Unlike [`try_downcast`], this borrows rather than consuming `self`. - /// - /// [`try_downcast`]: ExtScalarValueRef::try_downcast - pub fn try_get_vtable(&self) -> Option<&V> { - self.0.vtable_any().downcast_ref::() - } - - /// Downcasts the vtable to a concrete [`ExtVTable`] type by reference. - /// - /// Unlike [`downcast`], this borrows rather than consuming `self`. - /// - /// # Panics - /// - /// Panics if the underlying vtable type does not match `V`. - /// - /// [`downcast`]: ExtScalarValueRef::downcast - pub fn get_vtable(&self) -> &V { - self.try_get_vtable::() - .vortex_expect("ExtVTable downcast failed") - } - - /// Checks whether this extension scalar value is compatible with the given [`ExtDTypeRef`]. - /// - /// This validates that the vtable types match and that the storage value passes the - /// vtable's [`ExtVTable::validate_scalar_value`] check. - /// - /// # Errors - /// - /// Returns an error if it is not compatible with the extension type. - pub fn validate(&self, ext_dtype: &ExtDTypeRef) -> VortexResult<()> { - self.0.validate(ext_dtype) - } } -// TODO(connor): Do we disallow this because we do not have an extdtype? impl fmt::Display for ExtScalarValueRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}({})", self.0.id(), self.0.storage_value()) @@ -150,7 +100,7 @@ impl fmt::Debug for ExtScalarValueRef { } } -// TODO(connor): I feel like there is something wrong with these... +// TODO(connor): In the future we may want to allow implementors to customize this behavior. impl PartialEq for ExtScalarValueRef { fn eq(&self, other: &Self) -> bool { diff --git a/vortex-array/src/scalar/extension/mod.rs b/vortex-array/src/scalar/extension/mod.rs index bd237ef6372..c3e2608b37a 100644 --- a/vortex-array/src/scalar/extension/mod.rs +++ b/vortex-array/src/scalar/extension/mod.rs @@ -30,3 +30,6 @@ mod sealed { /// This can be the **only** implementor for [`super::typed::DynExtScalarValue`]. impl Sealed for ExtScalarValueInner {} } + +#[cfg(test)] +mod tests; diff --git a/vortex-array/src/scalar/extension/tests.rs b/vortex-array/src/scalar/extension/tests.rs new file mode 100644 index 00000000000..feadd2f319d --- /dev/null +++ b/vortex-array/src/scalar/extension/tests.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use vortex_error::VortexResult; + +use crate::dtype::Nullability; +use crate::extension::datetime::Date; +use crate::extension::datetime::Time; +use crate::extension::datetime::TimeUnit; +use crate::extension::datetime::Timestamp; +use crate::scalar::PValue; +use crate::scalar::ScalarValue; +use crate::scalar::extension::ExtScalarValue; +use crate::scalar::extension::ExtScalarValueRef; + +#[test] +fn try_new_date_valid() -> VortexResult<()> { + let ext_dtype = Date::new(TimeUnit::Days, Nullability::NonNullable); + let storage = ScalarValue::Primitive(PValue::I32(100)); + + let sv = ExtScalarValue::::try_new(&ext_dtype, storage.clone())?; + + assert_eq!(sv.id().as_ref(), "vortex.date"); + assert_eq!(sv.storage_value(), &storage); + assert_eq!(sv.vtable(), &Date); + Ok(()) +} + +#[test] +fn try_new_time_rejects_out_of_range() -> VortexResult<()> { + let ext_dtype = Time::new(TimeUnit::Seconds, Nullability::NonNullable); + + // 25 hours in seconds exceeds valid time-of-day range. + let too_large = ScalarValue::Primitive(PValue::I32(90_000)); + assert!(ExtScalarValue::