From 0385273b9107b89b946e4b1b280675f4017675ba Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 2 Mar 2026 13:13:37 -0500 Subject: [PATCH 1/2] feat: migrate single-child wrappers to iterative execute (phase 4b) Replace recursive child.execute() calls with ExecutionStep returns in Slice, Filter, Masked, FoR, and ZigZag vtables. Each now checks if its child is already in the needed form (canonical/primitive/constant) and returns Done directly, or returns ExecuteChild(0)/ColumnarizeChild(0) to let the scheduler handle child execution iteratively. Also handles ConstantArray children explicitly to prevent infinite loops in the scheduler (since constants are already columnar and won't be re-executed). FoR decompress is split into try_fused_decompress and apply_reference for reuse without recursive execution. Signed-off-by: Nicholas Gates Co-Authored-By: Claude Opus 4.6 --- .../fastlanes/src/for/array/for_compress.rs | 11 +++- .../fastlanes/src/for/array/for_decompress.rs | 23 +++++--- encodings/fastlanes/src/for/vtable/mod.rs | 57 ++++++++++++++++++- encodings/zigzag/src/array.rs | 40 +++++++++++-- vortex-array/src/arrays/filter/vtable.rs | 30 +++++++--- vortex-array/src/arrays/masked/vtable/mod.rs | 25 ++++++-- vortex-array/src/arrays/slice/vtable.rs | 35 +++++++----- 7 files changed, 178 insertions(+), 43 deletions(-) diff --git a/encodings/fastlanes/src/for/array/for_compress.rs b/encodings/fastlanes/src/for/array/for_compress.rs index 9990df5fb6a..161d2b04439 100644 --- a/encodings/fastlanes/src/for/array/for_compress.rs +++ b/encodings/fastlanes/src/for/array/for_compress.rs @@ -68,8 +68,9 @@ mod test { use super::*; use crate::BitPackedArray; - use crate::r#for::array::for_decompress::decompress; + use crate::r#for::array::for_decompress::apply_reference; use crate::r#for::array::for_decompress::fused_decompress; + use crate::r#for::array::for_decompress::try_fused_decompress; static SESSION: LazyLock = LazyLock::new(|| VortexSession::empty().with::()); @@ -169,7 +170,13 @@ mod test { let expected_unsigned = PrimitiveArray::from_iter(unsigned); assert_arrays_eq!(encoded, expected_unsigned); - let decompressed = decompress(&compressed, &mut SESSION.create_execution_ctx())?; + let mut ctx = SESSION.create_execution_ctx(); + let decompressed = if let Some(result) = try_fused_decompress(&compressed, &mut ctx)? { + result + } else { + let encoded = compressed.encoded().to_primitive(); + apply_reference(&compressed, encoded) + }; array .as_slice::() .iter() diff --git a/encodings/fastlanes/src/for/array/for_decompress.rs b/encodings/fastlanes/src/for/array/for_decompress.rs index f7292a5a06e..b462fa0646d 100644 --- a/encodings/fastlanes/src/for/array/for_decompress.rs +++ b/encodings/fastlanes/src/for/array/for_decompress.rs @@ -45,23 +45,28 @@ impl + FoR> UnpackStrategy for FoRStrategy } } -pub fn decompress(array: &FoRArray, ctx: &mut ExecutionCtx) -> VortexResult { - let ptype = array.ptype(); - - // Try to do fused unpack. +/// Try the fused BitPacked decompression path. Returns `None` if the child is not BitPacked +/// or the reference type is not unsigned. +pub fn try_fused_decompress( + array: &FoRArray, + ctx: &mut ExecutionCtx, +) -> VortexResult> { if array.reference_scalar().dtype().is_unsigned_int() && let Some(bp) = array.encoded().as_opt::() { return match_each_unsigned_integer_ptype!(array.ptype(), |T| { - fused_decompress::(array, bp, ctx) + fused_decompress::(array, bp, ctx).map(Some) }); } + Ok(None) +} - // TODO(ngates): Do we need this to be into_encoded() somehow? - let encoded = array.encoded().clone().execute::(ctx)?; +/// Apply the FoR reference value to an already-decoded PrimitiveArray. +pub fn apply_reference(array: &FoRArray, encoded: PrimitiveArray) -> PrimitiveArray { + let ptype = array.ptype(); let validity = encoded.validity().clone(); - Ok(match_each_integer_ptype!(ptype, |T| { + match_each_integer_ptype!(ptype, |T| { let min = array .reference_scalar() .as_primitive() @@ -75,7 +80,7 @@ pub fn decompress(array: &FoRArray, ctx: &mut ExecutionCtx) -> VortexResult VortexResult { - Ok(ExecutionStep::Done(decompress(array, ctx)?.into_array())) + // Try fused decompress with BitPacked child (no child execution needed). + if let Some(result) = try_fused_decompress(array, ctx)? { + return Ok(ExecutionStep::Done(result.into_array())); + } + + // If child is already a PrimitiveArray, add the reference value. + if array.encoded().is::() { + let encoded = array.encoded().as_::().clone(); + return Ok(ExecutionStep::Done( + apply_reference(array, encoded).into_array(), + )); + } + + // If child is a constant, compute the result as a constant. + if let Some(constant) = array.encoded().as_opt::() { + let scalar = constant.scalar(); + if scalar.is_null() { + return Ok(ExecutionStep::Done( + ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()) + .into_array(), + )); + } + return Ok(ExecutionStep::Done(match_each_integer_ptype!( + array.ptype(), + |T| { + let enc_val = scalar + .as_primitive() + .typed_value::() + .vortex_expect("constant must be non-null after check"); + let ref_val = array + .reference_scalar() + .as_primitive() + .typed_value::() + .vortex_expect("reference must be non-null"); + ConstantArray::new( + Scalar::primitive( + enc_val.wrapping_add(ref_val), + scalar.dtype().nullability(), + ), + array.len(), + ) + .into_array() + } + ))); + } + + // Otherwise, ask the scheduler to execute the child first. + Ok(ExecutionStep::ExecuteChild(0)) } fn execute_parent( diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index b7229031151..104ab79ca76 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -12,6 +12,9 @@ use vortex_array::ExecutionCtx; use vortex_array::ExecutionStep; use vortex_array::IntoArray; use vortex_array::Precision; +use vortex_array::arrays::ConstantArray; +use vortex_array::arrays::ConstantVTable; +use vortex_array::arrays::PrimitiveVTable; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::PType; @@ -149,10 +152,39 @@ impl VTable for ZigZagVTable { Ok(()) } - fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { - Ok(ExecutionStep::Done( - zigzag_decode(array.encoded().clone().execute(ctx)?).into_array(), - )) + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + // If child is already a PrimitiveArray, decode it directly. + if array.encoded().is::() { + let encoded = array.encoded().as_::().clone(); + return Ok(ExecutionStep::Done(zigzag_decode(encoded).into_array())); + } + + // If child is a constant, decode the scalar value. + if let Some(constant) = array.encoded().as_opt::() { + let scalar = constant.scalar(); + if scalar.is_null() { + return Ok(ExecutionStep::Done( + ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()) + .into_array(), + )); + } + let result = match_each_unsigned_integer_ptype!(scalar.as_primitive().ptype(), |P| { + let val = scalar + .as_primitive() + .typed_value::

() + .vortex_expect("constant must be non-null after check"); + Scalar::primitive( + <

::Int>::decode(val), + array.dtype().nullability(), + ) + }); + return Ok(ExecutionStep::Done( + ConstantArray::new(result, array.len()).into_array(), + )); + } + + // Otherwise, ask the scheduler to execute the child first. + Ok(ExecutionStep::ExecuteChild(0)) } fn reduce_parent( diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index fff8c90b992..ef329007ecf 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -13,12 +13,16 @@ use vortex_error::vortex_panic; use vortex_mask::Mask; use vortex_session::VortexSession; +use crate::AnyCanonical; use crate::Array; use crate::ArrayEq; use crate::ArrayHash; use crate::ArrayRef; +use crate::Canonical; use crate::IntoArray; use crate::Precision; +use crate::arrays::ConstantArray; +use crate::arrays::ConstantVTable; use crate::arrays::filter::array::FilterArray; use crate::arrays::filter::execute::execute_filter; use crate::arrays::filter::execute::execute_filter_fast_paths; @@ -156,18 +160,30 @@ impl VTable for FilterVTable { } fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { - if let Some(canonical) = execute_filter_fast_paths(array, ctx)? { - return Ok(ExecutionStep::Done(canonical)); + if let Some(result) = execute_filter_fast_paths(array, ctx)? { + return Ok(ExecutionStep::Done(result)); } let Mask::Values(mask_values) = &array.mask else { unreachable!("`execute_filter_fast_paths` handles AllTrue and AllFalse") }; - // We rely on the optimization pass that runs prior to this execution for filter pushdown, - // so now we can just execute the filter without worrying. - Ok(ExecutionStep::Done( - execute_filter(array.child.clone().execute(ctx)?, mask_values).into_array(), - )) + // If child is already canonical, filter it directly. + if let Some(canonical) = array.child.as_opt::() { + return Ok(ExecutionStep::Done( + execute_filter(Canonical::from(canonical), mask_values).into_array(), + )); + } + + // If child is a constant, filtering just changes the length. + if let Some(constant) = array.child.as_opt::() { + return Ok(ExecutionStep::Done( + ConstantArray::new(constant.scalar().clone(), mask_values.true_count()) + .into_array(), + )); + } + + // Otherwise, ask the scheduler to execute the child first. + Ok(ExecutionStep::ExecuteChild(0)) } fn reduce_parent( diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index c63a0687196..5c838363142 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -13,12 +13,15 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_session::VortexSession; +use crate::AnyCanonical; use crate::ArrayRef; use crate::Canonical; use crate::EmptyMetadata; use crate::IntoArray; use crate::Precision; use crate::arrays::ConstantArray; +use crate::arrays::ConstantVTable; +use crate::arrays::constant::constant_canonicalize; use crate::arrays::masked::MaskedArray; use crate::arrays::masked::compute::rules::PARENT_RULES; use crate::arrays::masked::mask_validity_canonical; @@ -178,10 +181,24 @@ impl VTable for MaskedVTable { // While we could manually convert the dtype, `mask_validity_canonical` is already O(1) for // `AllTrue` masks (no data copying), so there's no benefit. - let child = array.child().clone().execute::(ctx)?; - Ok(ExecutionStep::Done( - mask_validity_canonical(child, &validity_mask, ctx)?.into_array(), - )) + // If child is already canonical, apply the validity mask directly. + if let Some(canonical) = array.child().as_opt::() { + return Ok(ExecutionStep::Done( + mask_validity_canonical(Canonical::from(canonical), &validity_mask, ctx)? + .into_array(), + )); + } + + // If child is a constant, expand to canonical then apply the validity mask. + if let Some(constant) = array.child().as_opt::() { + let canonical = constant_canonicalize(constant)?; + return Ok(ExecutionStep::Done( + mask_validity_canonical(canonical, &validity_mask, ctx)?.into_array(), + )); + } + + // Otherwise, ask the scheduler to execute the child first. + Ok(ExecutionStep::ColumnarizeChild(0)) } fn reduce_parent( diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index 26cc932a8ea..09fb50caf88 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -20,7 +20,10 @@ use crate::ArrayEq; use crate::ArrayHash; use crate::ArrayRef; use crate::Canonical; +use crate::IntoArray; use crate::Precision; +use crate::arrays::ConstantArray; +use crate::arrays::ConstantVTable; use crate::arrays::slice::array::SliceArray; use crate::arrays::slice::rules::PARENT_RULES; use crate::buffer::BufferHandle; @@ -155,23 +158,25 @@ impl VTable for SliceVTable { Ok(()) } - fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { - // Execute the child to get canonical form, then slice it - let Some(canonical) = array.child.as_opt::() else { - // If the child is not canonical, recurse. - return array - .child - .clone() - .execute::(ctx)? - .slice(array.slice_range().clone()) + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + // If child is already canonical, slice it directly. + if let Some(canonical) = array.child.as_opt::() { + // TODO(ngates): we should inline canonical slice logic here. + return Canonical::from(canonical) + .as_ref() + .slice(array.range.clone()) .map(ExecutionStep::Done); - }; + } + + // If child is a constant, slicing just changes the length. + if let Some(constant) = array.child.as_opt::() { + return Ok(ExecutionStep::Done( + ConstantArray::new(constant.scalar().clone(), array.range.len()).into_array(), + )); + } - // TODO(ngates): we should inline canonical slice logic here. - Canonical::from(canonical) - .as_ref() - .slice(array.range.clone()) - .map(ExecutionStep::Done) + // Otherwise, ask the scheduler to execute the child first. + Ok(ExecutionStep::ExecuteChild(0)) } fn reduce_parent( From 0e1eae914446cbc0aa128868652b0ecfaa77c678 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Mon, 2 Mar 2026 14:01:17 -0500 Subject: [PATCH 2/2] chore: regenerate public-api.lock files Signed-off-by: Nicholas Gates Co-Authored-By: Claude Opus 4.6 --- encodings/zigzag/public-api.lock | 2 +- vortex-array/public-api.lock | 4 ++-- vortex-array/src/arrays/masked/vtable/mod.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/encodings/zigzag/public-api.lock b/encodings/zigzag/public-api.lock index 1f9fca6c735..45307c66270 100644 --- a/encodings/zigzag/public-api.lock +++ b/encodings/zigzag/public-api.lock @@ -96,7 +96,7 @@ pub fn vortex_zigzag::ZigZagVTable::deserialize(_bytes: &[u8], _dtype: &vortex_a pub fn vortex_zigzag::ZigZagVTable::dtype(array: &vortex_zigzag::ZigZagArray) -> &vortex_array::dtype::DType -pub fn vortex_zigzag::ZigZagVTable::execute(array: &Self::Array, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_zigzag::ZigZagVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_zigzag::ZigZagVTable::execute_parent(array: &Self::Array, parent: &vortex_array::array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult> diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index b885c08284c..cc8271d88e9 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3138,7 +3138,7 @@ pub fn vortex_array::arrays::SliceVTable::deserialize(_bytes: &[u8], _dtype: &vo pub fn vortex_array::arrays::SliceVTable::dtype(array: &vortex_array::arrays::SliceArray) -> &vortex_array::dtype::DType -pub fn vortex_array::arrays::SliceVTable::execute(array: &Self::Array, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::arrays::SliceVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_array::arrays::SliceVTable::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> @@ -17402,7 +17402,7 @@ pub fn vortex_array::arrays::SliceVTable::deserialize(_bytes: &[u8], _dtype: &vo pub fn vortex_array::arrays::SliceVTable::dtype(array: &vortex_array::arrays::SliceArray) -> &vortex_array::dtype::DType -pub fn vortex_array::arrays::SliceVTable::execute(array: &Self::Array, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_array::arrays::SliceVTable::execute(array: &Self::Array, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_array::arrays::SliceVTable::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index 5c838363142..b2a8099bf1b 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -198,7 +198,7 @@ impl VTable for MaskedVTable { } // Otherwise, ask the scheduler to execute the child first. - Ok(ExecutionStep::ColumnarizeChild(0)) + Ok(ExecutionStep::ExecuteChild(0)) } fn reduce_parent(