From 109daf3b671aef07d9300dcebebf7168f76e0cdf Mon Sep 17 00:00:00 2001 From: axreldable Date: Sun, 15 Feb 2026 14:04:42 +0100 Subject: [PATCH 1/2] GH-470: [Vector] Fix ListViewVector.getElementEndIndex(index) method --- .../arrow/vector/complex/ListViewVector.java | 2 +- .../arrow/vector/TestListViewVector.java | 215 ++++++++++++++---- 2 files changed, 167 insertions(+), 50 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 8711db5e0f..f5e3a5607f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -891,7 +891,7 @@ public int getElementStartIndex(int index) { @Override public int getElementEndIndex(int index) { - return sizeBuffer.getInt(index * OFFSET_WIDTH); + return offsetBuffer.getInt(index * OFFSET_WIDTH) + sizeBuffer.getInt(index * SIZE_WIDTH); } @Override diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java index 2f282e1988..8ab0edb145 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -1550,55 +1550,7 @@ public void testOverwriteWithNull() { public void testOutOfOrderOffset1() { // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), null, null); - Field field = new Field("child-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - SmallIntVector childVector = (SmallIntVector) fieldVector; - - childVector.allocateNew(7); - - childVector.set(0, 0); - childVector.set(1, -127); - childVector.set(2, 127); - childVector.set(3, 50); - childVector.set(4, 12); - childVector.set(5, -7); - childVector.set(6, 25); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 0); - listViewVector.setValidity(2, 1); - listViewVector.setValidity(3, 1); - listViewVector.setValidity(4, 1); - - listViewVector.setOffset(0, 4); - listViewVector.setOffset(1, 7); - listViewVector.setOffset(2, 0); - listViewVector.setOffset(3, 0); - listViewVector.setOffset(4, 3); - - listViewVector.setSize(0, 3); - listViewVector.setSize(1, 0); - listViewVector.setSize(2, 4); - listViewVector.setSize(3, 0); - listViewVector.setSize(4, 2); - - // Set value count using `setValueCount` method. - listViewVector.setValueCount(5); + initializeListViewVectorAsInSpecification(listViewVector); final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); @@ -2217,6 +2169,105 @@ public void testRangeChildVector2() { } } + @Test + public void testGetElementStartIndexAndEndIndexOrderedOffsetsNoIntersection() { + /* + values = [10, 20, 30, 40, 50] + offsets = [0, 3] + sizes = [3, 2] + vector: [[10, 20, 30], [40, 50]] + */ + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + initializeListViewVector( + listViewVector, List.of(10, 20, 30, 40, 50), List.of(1, 1), List.of(0, 3), List.of(3, 2)); + + assertEquals(0, listViewVector.getElementStartIndex(0)); + assertEquals(3, listViewVector.getElementEndIndex(0)); + assertEquals(3, listViewVector.getElementStartIndex(1)); + assertEquals(5, listViewVector.getElementEndIndex(1)); + + final FieldVector dataVec = listViewVector.getDataVector(); + int elemIndex = 0; + int start = listViewVector.getElementStartIndex(elemIndex); + int end = listViewVector.getElementEndIndex(elemIndex); + List list = listViewVector.getObject(elemIndex); + assertEquals(end - start, list.size()); + for (int j = 0; j < list.size(); j++) { + assertEquals(((SmallIntVector) dataVec).get(start + j), list.get(j)); + } + } + } + + @Test + public void testGetElementStartIndexAndEndIndexNotOrderedOffsetsNoIntersection() { + /* + values = [1, 2, 3, 4, 5, 6] + validity = [1, 1, 1] + offsets = [4, 2, 0] + sizes = [2, 2, 2] + vector: [[5, 6], [3, 4], [1, 2]] + */ + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + initializeListViewVector( + listViewVector, + List.of(1, 2, 3, 4, 5, 6), + List.of(1, 1, 1), + List.of(4, 2, 0), + List.of(2, 2, 2)); + + assertEquals(4, listViewVector.getElementStartIndex(0)); + assertEquals(6, listViewVector.getElementEndIndex(0)); + assertEquals(2, listViewVector.getElementStartIndex(1)); + assertEquals(4, listViewVector.getElementEndIndex(1)); + assertEquals(0, listViewVector.getElementStartIndex(2)); + assertEquals(2, listViewVector.getElementEndIndex(2)); + } + } + + @Test + public void testGetElementStartIndexAndEndIndexOrderedOffsetsWithIntersection() { + /* + values = [1, 2, 3, 4, 5] + validity = [1, 1, 1] + offsets = [0, 1, 4] + sizes = [2, 3, 1] + vector: [[1, 2], [2, 3, 4], [5]] + */ + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + initializeListViewVector( + listViewVector, + List.of(1, 2, 3, 4, 5), + List.of(1, 1, 1), + List.of(0, 1, 4), + List.of(2, 3, 1)); + + assertEquals(0, listViewVector.getElementStartIndex(0)); + assertEquals(2, listViewVector.getElementEndIndex(0)); + assertEquals(1, listViewVector.getElementStartIndex(1)); + assertEquals(4, listViewVector.getElementEndIndex(1)); + assertEquals(4, listViewVector.getElementStartIndex(2)); + assertEquals(5, listViewVector.getElementEndIndex(2)); + } + } + + @Test + public void testGetElementStartIndexAndEndIndexOrderedOffsetsAsInSpecification() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + initializeListViewVectorAsInSpecification(listViewVector); + + assertEquals(4, listViewVector.getElementStartIndex(0)); + assertEquals(7, listViewVector.getElementEndIndex(0)); + assertEquals(7, listViewVector.getElementStartIndex(1)); + assertEquals(7, listViewVector.getElementEndIndex(1)); + assertEquals(0, listViewVector.getElementStartIndex(2)); + assertEquals(4, listViewVector.getElementEndIndex(2)); + assertEquals(0, listViewVector.getElementStartIndex(3)); + assertEquals(0, listViewVector.getElementEndIndex(3)); + assertEquals(3, listViewVector.getElementStartIndex(4)); + assertEquals(5, listViewVector.getElementEndIndex(4)); + } + } + private void writeIntValues(UnionListViewWriter writer, int[] values) { writer.startListView(); for (int v : values) { @@ -2224,4 +2275,70 @@ private void writeIntValues(UnionListViewWriter writer, int[] values) { } writer.endListView(); } + + /** + * ListViewVector from the specification. + */ + private void initializeListViewVectorAsInSpecification(ListViewVector listViewVector) { + /* + values = [0, -127, 127, 50, 12, -7, 25] + validity = [1, 1, 1, 0, 1] (reversed) + offsets = [4, 7, 0, 0, 3] + sizes = [3, 0, 4, 0, 2] + vector: [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + */ + initializeListViewVector( + listViewVector, + List.of(0, -127, 127, 50, 12, -7, 25), + List.of(1, 1, 1, 0, 1), + List.of(4, 7, 0, 0, 3), + List.of(3, 0, 4, 0, 2)); + } + + private void initializeListViewVector( + ListViewVector listViewVector, + List values, + List validity, + List offsets, + List sizes) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + assert offsets.size() == sizes.size(); + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + SmallIntVector childVector = (SmallIntVector) fieldVector; + childVector.allocateNew(values.size()); + for (int i = 0; i < values.size(); i++) { + childVector.set(i, values.get(i)); + } + childVector.setValueCount(values.size()); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + List reversedValidity = new ArrayList<>(validity); + Collections.reverse(reversedValidity); + for (int i = 0; i < reversedValidity.size(); i++) { + listViewVector.setValidity(i, reversedValidity.get(i)); + } + + for (int i = 0; i < offsets.size(); i++) { + listViewVector.setOffset(i, offsets.get(i)); + } + + for (int i = 0; i < sizes.size(); i++) { + listViewVector.setSize(i, sizes.get(i)); + } + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(offsets.size()); + } } From ade73e50ca56b30b91dd689ad08627c0af1ab709 Mon Sep 17 00:00:00 2001 From: axreldable Date: Sun, 15 Feb 2026 14:40:53 +0100 Subject: [PATCH 2/2] GH-470: [Vector] Fix bug of usage sizeBuffer with OFFSET_WIDTH (hashCode) and offsetBuffer with SIZE_WIDTH (setSize) plus refactoring to avoid code duplication in ListViewVector --- .../arrow/vector/complex/ListViewVector.java | 68 +++++++++++-------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index f5e3a5607f..d41f61e291 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -226,8 +226,8 @@ private void setReaderAndWriterIndex() { sizeBuffer.writerIndex(0); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH); - sizeBuffer.writerIndex(valueCount * SIZE_WIDTH); + offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH); + sizeBuffer.writerIndex((long) valueCount * SIZE_WIDTH); } } @@ -445,14 +445,22 @@ public int hashCode(int index, ArrowBufHasher hasher) { return ArrowBufPointer.NULL_HASH_CODE; } int hash = 0; - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); + final int start = getElementStartIndex(index); + final int end = getElementEndIndex(index); for (int i = start; i < end; i++) { hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); } return hash; } + private void setElementOffsetBuffer(int index, int value) { + offsetBuffer.setInt((long) index * OFFSET_WIDTH, value); + } + + private void setElementSizeBuffer(int index, int value) { + sizeBuffer.setInt((long) index * SIZE_WIDTH, value); + } + private class TransferImpl implements TransferPair { ListViewVector to; @@ -498,7 +506,6 @@ public void splitAndTransfer(int startIndex, int length) { valueCount); to.clear(); if (length > 0) { - final int startPoint = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH); // we have to scan by index since there are out-of-order offsets to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH); to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH); @@ -507,9 +514,9 @@ public void splitAndTransfer(int startIndex, int length) { int maxOffsetAndSizeSum = -1; int minOffsetValue = -1; for (int i = 0; i < length; i++) { - final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); - final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); - to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue); + final int offsetValue = getElementStartIndex(startIndex + i); + final int sizeValue = getElementSize(startIndex + i); + to.setElementSizeBuffer(i, sizeValue); if (maxOffsetAndSizeSum < offsetValue + sizeValue) { maxOffsetAndSizeSum = offsetValue + sizeValue; } @@ -520,9 +527,9 @@ public void splitAndTransfer(int startIndex, int length) { /* splitAndTransfer the offset buffer */ for (int i = 0; i < length; i++) { - final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); + final int offsetValue = getElementStartIndex(startIndex + i); final int relativeOffset = offsetValue - minOffsetValue; - to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset); + to.setElementOffsetBuffer(i, relativeOffset); } /* splitAndTransfer the validity buffer */ @@ -678,8 +685,8 @@ public List getObject(int index) { if (isSet(index) == 0) { return null; } - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); + final int start = getElementStartIndex(index); + final int end = getElementEndIndex(index); final ValueVector vv = getDataVector(); final List vals = new JsonStringArrayList<>(end - start); for (int i = start; i < end; i++) { @@ -711,7 +718,7 @@ public boolean isEmpty(int index) { if (isNull(index)) { return true; } else { - return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; + return getElementSize(index) == 0; } } @@ -722,10 +729,7 @@ public boolean isEmpty(int index) { * @return 1 if element at given index is not null, 0 otherwise */ public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; + return BitVectorHelper.get(validityBuffer, index); } /** @@ -775,8 +779,8 @@ public void setNull(int index) { reallocValidityAndSizeAndOffsetBuffers(); } - offsetBuffer.setInt(index * OFFSET_WIDTH, 0); - sizeBuffer.setInt(index * SIZE_WIDTH, 0); + setElementOffsetBuffer(index, 0); + setElementSizeBuffer(index, 0); BitVectorHelper.unsetBit(validityBuffer, index); } @@ -794,11 +798,11 @@ public int startNewValue(int index) { if (index > 0) { final int prevOffset = getMaxViewEndChildVectorByIndex(index); - offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + setElementOffsetBuffer(index, prevOffset); } BitVectorHelper.setBit(validityBuffer, index); - return offsetBuffer.getInt(index * OFFSET_WIDTH); + return getElementStartIndex(index); } /** @@ -836,9 +840,9 @@ private void validateInvariants(int offset, int size) { * @param value value to set */ public void setOffset(int index, int value) { - validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); + validateInvariants(value, getElementSize(index)); - offsetBuffer.setInt(index * OFFSET_WIDTH, value); + setElementOffsetBuffer(index, value); } /** @@ -848,9 +852,9 @@ public void setOffset(int index, int value) { * @param value value to set */ public void setSize(int index, int value) { - validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); + validateInvariants(getElementStartIndex(index), value); - sizeBuffer.setInt(index * SIZE_WIDTH, value); + setElementSizeBuffer(index, value); } /** @@ -886,12 +890,16 @@ public void setValueCount(int valueCount) { @Override public int getElementStartIndex(int index) { - return offsetBuffer.getInt(index * OFFSET_WIDTH); + return offsetBuffer.getInt((long) index * OFFSET_WIDTH); + } + + private int getElementSize(int index) { + return sizeBuffer.getInt((long) index * SIZE_WIDTH); } @Override public int getElementEndIndex(int index) { - return offsetBuffer.getInt(index * OFFSET_WIDTH) + sizeBuffer.getInt(index * SIZE_WIDTH); + return getElementStartIndex(index) + getElementSize(index); } @Override @@ -948,8 +956,8 @@ public double getDensity() { @Override public void validate() { for (int i = 0; i < valueCount; i++) { - final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); - final int size = sizeBuffer.getInt(i * SIZE_WIDTH); + final int offset = getElementStartIndex(i); + final int size = getElementSize(i); validateInvariants(offset, size); } } @@ -961,6 +969,6 @@ public void validate() { * @param size number of elements in the list that was written */ public void endValue(int index, int size) { - sizeBuffer.setInt(index * SIZE_WIDTH, size); + setElementSizeBuffer(index, size); } }