From 722df630c10479cbd1bd78a0a0ac45be21b3ebc5 Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Mon, 2 Mar 2026 13:47:15 +0000 Subject: [PATCH 1/6] scalar_at_fields Signed-off-by: Baris Palaska --- vortex-array/src/arrays/struct_/array.rs | 12 ++++++++++ vortex-array/src/arrays/struct_/tests.rs | 23 +++++++++++++++++++ .../src/arrays/struct_/vtable/operations.rs | 7 +----- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index b7a1a83b405..354d62f7d33 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -17,6 +17,7 @@ use crate::dtype::DType; use crate::dtype::FieldName; use crate::dtype::FieldNames; use crate::dtype::StructFields; +use crate::scalar::Scalar; use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable::ValidityHelper; @@ -177,6 +178,17 @@ impl StructArray { self.struct_fields().find(name).map(|idx| &self.fields[idx]) } + /// Return an iterator over the scalar value of each field at the given row index + pub fn scalar_at_fields( + &self, + index: usize, + ) -> VortexResult> + '_> { + Ok(self + .unmasked_fields() + .iter() + .map(move |f| f.scalar_at(index))) + } + pub fn names(&self) -> &FieldNames { self.struct_fields().names() } diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index 83df1440489..cbc50905402 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -130,6 +130,29 @@ fn test_duplicate_field_names() { assert_arrays_eq!(third_field, PrimitiveArray::from_iter([100i32, 200, 300])); } +#[test] +fn test_scalar_at_fields() -> VortexResult<()> { + let xs = PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable); + let ys = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable); + + let struct_array = StructArray::try_new( + FieldNames::from(["xs", "ys"]), + vec![xs.into_array(), ys.into_array()], + 3, + Validity::NonNullable, + )?; + + // scalar_at_fields should yield the same field scalars as scalar_at + let via_fields: Vec<_> = struct_array + .scalar_at_fields(1)? + .collect::>()?; + assert_eq!(via_fields.len(), 2); + assert_eq!(via_fields[0], 2i32.into()); + assert_eq!(via_fields[1], 20i64.into()); + + Ok(()) +} + #[test] fn test_uncompressed_size_in_bytes() -> VortexResult<()> { let struct_array = StructArray::new( diff --git a/vortex-array/src/arrays/struct_/vtable/operations.rs b/vortex-array/src/arrays/struct_/vtable/operations.rs index dba7a594ed1..f7e01f9b142 100644 --- a/vortex-array/src/arrays/struct_/vtable/operations.rs +++ b/vortex-array/src/arrays/struct_/vtable/operations.rs @@ -3,7 +3,6 @@ use vortex_error::VortexResult; -use crate::Array; use crate::arrays::struct_::StructArray; use crate::arrays::struct_::StructVTable; use crate::scalar::Scalar; @@ -11,11 +10,7 @@ use crate::vtable::OperationsVTable; impl OperationsVTable for StructVTable { fn scalar_at(array: &StructArray, index: usize) -> VortexResult { - let field_scalars: VortexResult> = array - .unmasked_fields() - .iter() - .map(|field| field.scalar_at(index)) - .collect(); + let field_scalars: VortexResult> = array.scalar_at_fields(index)?.collect(); Ok(Scalar::struct_(array.dtype().clone(), field_scalars?)) } } From 2418c69b026bfe40df1e7e4f1f1113291dc054f1 Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Mon, 2 Mar 2026 14:16:31 +0000 Subject: [PATCH 2/6] handle validity Signed-off-by: Baris Palaska --- vortex-array/public-api.lock | 2 ++ vortex-array/src/arrays/struct_/array.rs | 19 ++++++++++++------ vortex-array/src/arrays/struct_/tests.rs | 20 ++++++++++++++++++- .../src/arrays/struct_/vtable/operations.rs | 7 ++++++- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index d1a798eae06..a04b74b69e2 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3188,6 +3188,8 @@ pub fn vortex_array::arrays::StructArray::project(&self, projection: &[vortex_ar pub fn vortex_array::arrays::StructArray::remove_column(&mut self, name: impl core::convert::Into) -> core::option::Option +pub fn vortex_array::arrays::StructArray::scalar_at_fields(&self, index: usize) -> vortex_error::VortexResult> + '_>> + pub fn vortex_array::arrays::StructArray::struct_fields(&self) -> &vortex_array::dtype::StructFields pub fn vortex_array::arrays::StructArray::try_from_iter, A: vortex_array::IntoArray, T: core::iter::traits::collect::IntoIterator>(iter: T) -> vortex_error::VortexResult diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 354d62f7d33..2f6691a9cfe 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -178,15 +178,22 @@ impl StructArray { self.struct_fields().find(name).map(|idx| &self.fields[idx]) } - /// Return an iterator over the scalar value of each field at the given row index + /// Iterate over the scalar value of each field at the given row index. + /// + /// Returns `Ok(None)` if the row is null at the struct level. Returns `Ok(Some(iter))` for + /// valid rows, yielding one `VortexResult` per field. pub fn scalar_at_fields( &self, index: usize, - ) -> VortexResult> + '_> { - Ok(self - .unmasked_fields() - .iter() - .map(move |f| f.scalar_at(index))) + ) -> VortexResult> + '_>> { + if !self.validity.is_valid(index)? { + return Ok(None); + } + Ok(Some( + self.unmasked_fields() + .iter() + .map(move |f| f.scalar_at(index)), + )) } pub fn names(&self) -> &FieldNames { diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index cbc50905402..f097fe669dd 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -142,9 +142,9 @@ fn test_scalar_at_fields() -> VortexResult<()> { Validity::NonNullable, )?; - // scalar_at_fields should yield the same field scalars as scalar_at let via_fields: Vec<_> = struct_array .scalar_at_fields(1)? + .expect("row 1 is valid") .collect::>()?; assert_eq!(via_fields.len(), 2); assert_eq!(via_fields[0], 2i32.into()); @@ -153,6 +153,24 @@ fn test_scalar_at_fields() -> VortexResult<()> { Ok(()) } +#[test] +fn test_scalar_at_fields_null_row() -> VortexResult<()> { + let xs = PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable); + let ys = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable); + + let struct_array = StructArray::try_new( + FieldNames::from(["xs", "ys"]), + vec![xs.into_array(), ys.into_array()], + 3, + Validity::from_iter([true, false, true]), + )?; + + assert!(struct_array.scalar_at_fields(1)?.is_none()); + assert!(struct_array.scalar_at_fields(0)?.is_some()); + + Ok(()) +} + #[test] fn test_uncompressed_size_in_bytes() -> VortexResult<()> { let struct_array = StructArray::new( diff --git a/vortex-array/src/arrays/struct_/vtable/operations.rs b/vortex-array/src/arrays/struct_/vtable/operations.rs index f7e01f9b142..22572f7d382 100644 --- a/vortex-array/src/arrays/struct_/vtable/operations.rs +++ b/vortex-array/src/arrays/struct_/vtable/operations.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::arrays::struct_::StructArray; @@ -10,7 +11,11 @@ use crate::vtable::OperationsVTable; impl OperationsVTable for StructVTable { fn scalar_at(array: &StructArray, index: usize) -> VortexResult { - let field_scalars: VortexResult> = array.scalar_at_fields(index)?.collect(); + // The vtable contract guarantees index is non-null before this is called. + let field_scalars: VortexResult> = array + .scalar_at_fields(index)? + .vortex_expect("scalar_at precondition: index is guaranteed non-null") + .collect(); Ok(Scalar::struct_(array.dtype().clone(), field_scalars?)) } } From b9f11f6a9c1df00277381d3dcf0c2db842c3aadf Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Mon, 2 Mar 2026 14:36:49 +0000 Subject: [PATCH 3/6] bound check Signed-off-by: Baris Palaska --- vortex-array/src/arrays/struct_/array.rs | 2 ++ vortex-array/src/arrays/struct_/tests.rs | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 2f6691a9cfe..2a10516592f 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; use vortex_error::vortex_err; use crate::Array; @@ -186,6 +187,7 @@ impl StructArray { &self, index: usize, ) -> VortexResult> + '_>> { + vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len()); if !self.validity.is_valid(index)? { return Ok(None); } diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index f097fe669dd..0567a959222 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -171,6 +171,19 @@ fn test_scalar_at_fields_null_row() -> VortexResult<()> { Ok(()) } +#[test] +fn test_scalar_at_fields_out_of_bounds() { + let struct_array = StructArray::try_new( + FieldNames::from(["xs"]), + vec![buffer![1i32, 2, 3].into_array()], + 3, + Validity::NonNullable, + ) + .unwrap(); + + assert!(struct_array.scalar_at_fields(3).is_err()); +} + #[test] fn test_uncompressed_size_in_bytes() -> VortexResult<()> { let struct_array = StructArray::new( From 89840be7723cf93e6c4480ec92057f79f7b3b70c Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Mon, 2 Mar 2026 16:51:45 +0000 Subject: [PATCH 4/6] StructScalar unchecked, Struct scalar_at uses it Signed-off-by: Baris Palaska --- vortex-array/src/arrays/struct_/array.rs | 21 -------- vortex-array/src/arrays/struct_/tests.rs | 54 ------------------- .../src/arrays/struct_/vtable/operations.rs | 14 ++--- vortex-array/src/scalar/typed_view/struct_.rs | 24 +++++++-- 4 files changed, 28 insertions(+), 85 deletions(-) diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index 2a10516592f..b7a1a83b405 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -8,7 +8,6 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; -use vortex_error::vortex_ensure; use vortex_error::vortex_err; use crate::Array; @@ -18,7 +17,6 @@ use crate::dtype::DType; use crate::dtype::FieldName; use crate::dtype::FieldNames; use crate::dtype::StructFields; -use crate::scalar::Scalar; use crate::stats::ArrayStats; use crate::validity::Validity; use crate::vtable::ValidityHelper; @@ -179,25 +177,6 @@ impl StructArray { self.struct_fields().find(name).map(|idx| &self.fields[idx]) } - /// Iterate over the scalar value of each field at the given row index. - /// - /// Returns `Ok(None)` if the row is null at the struct level. Returns `Ok(Some(iter))` for - /// valid rows, yielding one `VortexResult` per field. - pub fn scalar_at_fields( - &self, - index: usize, - ) -> VortexResult> + '_>> { - vortex_ensure!(index < self.len(), OutOfBounds: index, 0, self.len()); - if !self.validity.is_valid(index)? { - return Ok(None); - } - Ok(Some( - self.unmasked_fields() - .iter() - .map(move |f| f.scalar_at(index)), - )) - } - pub fn names(&self) -> &FieldNames { self.struct_fields().names() } diff --git a/vortex-array/src/arrays/struct_/tests.rs b/vortex-array/src/arrays/struct_/tests.rs index 0567a959222..83df1440489 100644 --- a/vortex-array/src/arrays/struct_/tests.rs +++ b/vortex-array/src/arrays/struct_/tests.rs @@ -130,60 +130,6 @@ fn test_duplicate_field_names() { assert_arrays_eq!(third_field, PrimitiveArray::from_iter([100i32, 200, 300])); } -#[test] -fn test_scalar_at_fields() -> VortexResult<()> { - let xs = PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable); - let ys = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable); - - let struct_array = StructArray::try_new( - FieldNames::from(["xs", "ys"]), - vec![xs.into_array(), ys.into_array()], - 3, - Validity::NonNullable, - )?; - - let via_fields: Vec<_> = struct_array - .scalar_at_fields(1)? - .expect("row 1 is valid") - .collect::>()?; - assert_eq!(via_fields.len(), 2); - assert_eq!(via_fields[0], 2i32.into()); - assert_eq!(via_fields[1], 20i64.into()); - - Ok(()) -} - -#[test] -fn test_scalar_at_fields_null_row() -> VortexResult<()> { - let xs = PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable); - let ys = PrimitiveArray::new(buffer![10i64, 20, 30], Validity::NonNullable); - - let struct_array = StructArray::try_new( - FieldNames::from(["xs", "ys"]), - vec![xs.into_array(), ys.into_array()], - 3, - Validity::from_iter([true, false, true]), - )?; - - assert!(struct_array.scalar_at_fields(1)?.is_none()); - assert!(struct_array.scalar_at_fields(0)?.is_some()); - - Ok(()) -} - -#[test] -fn test_scalar_at_fields_out_of_bounds() { - let struct_array = StructArray::try_new( - FieldNames::from(["xs"]), - vec![buffer![1i32, 2, 3].into_array()], - 3, - Validity::NonNullable, - ) - .unwrap(); - - assert!(struct_array.scalar_at_fields(3).is_err()); -} - #[test] fn test_uncompressed_size_in_bytes() -> VortexResult<()> { let struct_array = StructArray::new( diff --git a/vortex-array/src/arrays/struct_/vtable/operations.rs b/vortex-array/src/arrays/struct_/vtable/operations.rs index 22572f7d382..111832a20b3 100644 --- a/vortex-array/src/arrays/struct_/vtable/operations.rs +++ b/vortex-array/src/arrays/struct_/vtable/operations.rs @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use vortex_error::VortexExpect; use vortex_error::VortexResult; +use crate::Array; use crate::arrays::struct_::StructArray; use crate::arrays::struct_::StructVTable; use crate::scalar::Scalar; @@ -11,11 +11,13 @@ use crate::vtable::OperationsVTable; impl OperationsVTable for StructVTable { fn scalar_at(array: &StructArray, index: usize) -> VortexResult { - // The vtable contract guarantees index is non-null before this is called. - let field_scalars: VortexResult> = array - .scalar_at_fields(index)? - .vortex_expect("scalar_at precondition: index is guaranteed non-null") + let field_scalars: VortexResult> = array + .unmasked_fields() + .iter() + .map(|field| field.scalar_at(index)) .collect(); - Ok(Scalar::struct_(array.dtype().clone(), field_scalars?)) + // SAFETY: The vtable guarantees index is in-bounds and non-null before this is called. + // Each field's scalar_at returns a scalar with the field's own dtype. + Ok(unsafe { Scalar::struct_unchecked(array.dtype().clone(), field_scalars?) }) } } diff --git a/vortex-array/src/scalar/typed_view/struct_.rs b/vortex-array/src/scalar/typed_view/struct_.rs index acc56e195c4..ce571350a5b 100644 --- a/vortex-array/src/scalar/typed_view/struct_.rs +++ b/vortex-array/src/scalar/typed_view/struct_.rs @@ -279,12 +279,13 @@ impl<'a> StructScalar<'a> { } impl Scalar { - /// Creates a new struct scalar with the given fields. - pub fn struct_(dtype: DType, children: Vec) -> Self { + /// Creates a new struct scalar with the given fields, checking dtypes at runtime. + pub fn struct_(dtype: DType, children: impl IntoIterator) -> Self { let DType::Struct(struct_fields, _) = &dtype else { vortex_panic!("Expected struct dtype, found {}", dtype); }; + let children: Vec = children.into_iter().collect(); let field_dtypes = struct_fields.fields(); if children.len() != field_dtypes.len() { vortex_panic!( @@ -305,9 +306,24 @@ impl Scalar { } } - let mut value_children = Vec::with_capacity(children.len()); - value_children.extend(children.into_iter().map(|x| x.into_value())); + let value_children: Vec<_> = children.into_iter().map(|x| x.into_value()).collect(); + Self::try_new(dtype, Some(ScalarValue::List(value_children))) + .vortex_expect("unable to construct a struct `Scalar`") + } + /// Creates a new struct scalar from an iterator of field scalars, skipping dtype checks. + /// + /// # Safety + /// + /// Caller must ensure: + /// - `dtype` is `DType::Struct` + /// - The iterator yields exactly as many scalars as `dtype` has fields + /// - Each scalar's dtype matches the corresponding field dtype in `dtype` + pub unsafe fn struct_unchecked( + dtype: DType, + children: impl IntoIterator, + ) -> Self { + let value_children: Vec<_> = children.into_iter().map(|s| s.into_value()).collect(); Self::try_new(dtype, Some(ScalarValue::List(value_children))) .vortex_expect("unable to construct a struct `Scalar`") } From c863fa280366e898a65b734e1a0a56f6d4206e7b Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Mon, 2 Mar 2026 16:54:50 +0000 Subject: [PATCH 5/6] public api Signed-off-by: Baris Palaska --- vortex-array/public-api.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index a04b74b69e2..b1ec105ab49 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3188,8 +3188,6 @@ pub fn vortex_array::arrays::StructArray::project(&self, projection: &[vortex_ar pub fn vortex_array::arrays::StructArray::remove_column(&mut self, name: impl core::convert::Into) -> core::option::Option -pub fn vortex_array::arrays::StructArray::scalar_at_fields(&self, index: usize) -> vortex_error::VortexResult> + '_>> - pub fn vortex_array::arrays::StructArray::struct_fields(&self) -> &vortex_array::dtype::StructFields pub fn vortex_array::arrays::StructArray::try_from_iter, A: vortex_array::IntoArray, T: core::iter::traits::collect::IntoIterator>(iter: T) -> vortex_error::VortexResult @@ -12298,7 +12296,9 @@ pub fn vortex_array::scalar::Scalar::from_proto_value(value: &vortex_proto::scal impl vortex_array::scalar::Scalar -pub fn vortex_array::scalar::Scalar::struct_(dtype: vortex_array::dtype::DType, children: alloc::vec::Vec) -> Self +pub fn vortex_array::scalar::Scalar::struct_(dtype: vortex_array::dtype::DType, children: impl core::iter::traits::collect::IntoIterator) -> Self + +pub unsafe fn vortex_array::scalar::Scalar::struct_unchecked(dtype: vortex_array::dtype::DType, children: impl core::iter::traits::collect::IntoIterator) -> Self impl vortex_array::scalar::Scalar From 9629bea329d83c7728cbf8bb87139e61ec5f8b10 Mon Sep 17 00:00:00 2001 From: Baris Palaska Date: Mon, 2 Mar 2026 17:01:23 +0000 Subject: [PATCH 6/6] fix Signed-off-by: Baris Palaska --- vortex-python/src/scalar/factory.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vortex-python/src/scalar/factory.rs b/vortex-python/src/scalar/factory.rs index 81286574a17..d4a0e000cf1 100644 --- a/vortex-python/src/scalar/factory.rs +++ b/vortex-python/src/scalar/factory.rs @@ -151,12 +151,14 @@ fn scalar_helper_inner(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyRes ))); } + let children: Vec = dict + .values() + .into_iter() + .map(|item| scalar_helper_inner(&item, None)) + .try_collect()?; return Ok(Scalar::struct_( DType::Struct(dtype.clone(), *nullability), - dict.values() - .into_iter() - .map(|item| scalar_helper_inner(&item, None)) - .try_collect()?, + children, )); } else { let values: Vec = dict