From d983fbbd2da3914c58e95959602f2ec2a45cb57c Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 23 Feb 2026 18:39:08 -0600 Subject: [PATCH 1/5] DX-103340: Update jarbuild --- .github/workflows/jarbuild.yml | 264 ++++++++++++++++++--------------- 1 file changed, 143 insertions(+), 121 deletions(-) diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index ae2981cd6d..bb0509ebb4 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -16,7 +16,7 @@ # under the License. name: JarBuild -on: +on: workflow_dispatch: inputs: arrow_branch: @@ -44,16 +44,35 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 5 steps: + - name: Trim workflow inputs + run: | + echo "ARROW_BRANCH=$(echo '${{github.event.inputs.arrow_branch}}' | xargs)" >> $GITHUB_ENV + echo "ARROW_REPO=$(echo '${{github.event.inputs.arrow_repo}}' | xargs)" >> $GITHUB_ENV + echo "RELEASE_TAG_NAME=$(echo '${{github.event.inputs.release_tag_name}}' | xargs)" >> $GITHUB_ENV + - name: Print workflow input parameters + run: | + echo "==========================================" + echo "Workflow Input Parameters" + echo "==========================================" + echo "arrow_branch: ${{env.ARROW_BRANCH}}" + echo "arrow_repo: ${{env.ARROW_REPO}}" + echo "release_tag_name: ${{env.RELEASE_TAG_NAME}}" + echo "arrow-java branch: ${{github.ref_name}}" + echo "" + echo "Direct Links:" + echo "----------------------------------------" + echo "Arrow C++ repo/branch: https://github.com/${{env.ARROW_REPO}}/tree/${{env.ARROW_BRANCH}}" + echo "Arrow Java repo/branch: https://github.com/${{github.repository}}/tree/${{github.ref_name}}" + echo "Release tag: https://github.com/${{github.repository}}/releases/tag/${{env.RELEASE_TAG_NAME}}" + echo "==========================================" - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: submodules: recursive - - name: Set env - run: echo "release_tag_name=$(echo $release_tag_name)" >> $GITHUB_ENV - name: Prepare for tag run: | - echo "${{github.event.inputs.release_tag_name}}" - ver=$(echo ${{github.event.inputs.release_tag_name}}) + echo "${{env.RELEASE_TAG_NAME}}" + ver=$(echo ${{env.RELEASE_TAG_NAME}}) version=${ver%-rc*} version=${version#v} rc=${ver#*-rc} @@ -81,7 +100,7 @@ jobs: jni-linux: name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} runs-on: ${{ matrix.platform.runs_on }} - timeout-minutes: 120 + timeout-minutes: 240 needs: - source strategy: @@ -158,17 +177,16 @@ jobs: jni-macos: name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} runs-on: ${{ matrix.platform.runs_on }} - timeout-minutes: 45 + timeout-minutes: 445 needs: - source strategy: fail-fast: false matrix: platform: - - { runs_on: macos-13, arch: "x86_64"} - - { runs_on: macos-14, arch: "aarch_64" } + - { runs_on: macos-15, arch: "aarch_64" } env: - MACOSX_DEPLOYMENT_TARGET: "14.0" + MACOSX_DEPLOYMENT_TARGET: "15.0" steps: - name: Download source archive uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -205,20 +223,41 @@ jobs: python-version: 3.12 - name: Install Archery run: pip install -e arrow/dev/archery[all] + - name: Checkout vcpkg + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: Microsoft/vcpkg + path: arrow/vcpkg + - name: Install vcpkg + run: | + cd arrow/vcpkg + ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${PWD}/arrow/vcpkg" >> ${GITHUB_ENV} + echo "${PWD}/arrow/vcpkg" >> ${GITHUB_PATH} + - name: Clean up disk space + run: | + echo "=== Free disk space before cleanup ===" + df -h / + + echo "" + echo "=== Removing Xcode simulators ===" + sudo rm -rf /Library/Developer/CoreSimulator/Caches || : + echo "Removed /Library/Developer/CoreSimulator/Caches" + + echo "" + echo "=== Removing user simulator data ===" + rm -rf ~/Library/Developer/CoreSimulator || : + echo "Removed ~/Library/Developer/CoreSimulator" + + echo "" + echo "=== Free disk space after cleanup ===" + df -h / - name: Install dependencies run: | - # We want to use llvm@14 to avoid shared z3 - # dependency. llvm@14 doesn't depend on z3 and llvm depends - # on z3. And Homebrew's z3 provides only shared library. It - # doesn't provides static z3 because z3's CMake doesn't accept - # building both shared and static libraries at once. - # See also: Z3_BUILD_LIBZ3_SHARED in - # https://github.com/Z3Prover/z3/blob/master/README-CMake.md - # - # If llvm is installed, Apache Arrow C++ uses llvm rather than - # llvm@14 because llvm is newer than llvm@14. - brew uninstall llvm || : + echo "=== Free disk space at start of dependency installation ===" + df -h / + echo "" # Ensure updating python@XXX with the "--overwrite" option. # If python@XXX is updated without "--overwrite", it causes # a conflict error. Because Python 3 installed not by @@ -240,7 +279,13 @@ jobs: brew uninstall pkg-config@0.29.2 || : fi + # Install basic build tools via brew (vcpkg needs these) + brew install cmake ninja pkg-config brew bundle --file=arrow/cpp/Brewfile + + # Clean up any existing LLVM installations in favor of vcpkg. + brew uninstall llvm || : + # We want to link aws-sdk-cpp statically but Homebrew's # aws-sdk-cpp provides only shared library. If we have # Homebrew's aws-sdk-cpp, our build mix Homebrew's @@ -259,6 +304,24 @@ jobs: # bundled Protobuf. brew uninstall protobuf + echo "" + echo "=== Free disk space before LLVM build ===" + df -h / + + echo "" + # Use vcpkg to install LLVM. + vcpkg install \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=arrow/ci/vcpkg \ + --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=gandiva-llvm + + echo "" + echo "=== Free disk space after LLVM build ===" + df -h / + + echo "" brew bundle --file=Brewfile - name: Prepare ccache run: | @@ -271,10 +334,18 @@ jobs: restore-keys: jni-macos-${{ matrix.platform.arch }}- - name: Build run: | + echo "=== Free disk space at start of build ===" + df -h / + + echo "" set -e # make brew Java available to CMake export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home ci/scripts/jni_macos_build.sh . arrow build jni + + echo "" + echo "=== Free disk space at end of build ===" + df -h / - name: Compress into single artifact to keep directory structure run: tar -cvzf jni-macos-${{ matrix.platform.arch }}.tar.gz jni/ - name: Upload artifacts @@ -282,92 +353,12 @@ jobs: with: name: jni-macos-${{ matrix.platform.arch }} path: jni-macos-${{ matrix.platform.arch }}.tar.gz - jni-windows: - name: JNI ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} - runs-on: ${{ matrix.platform.runs_on }} - timeout-minutes: 45 - needs: - - source - strategy: - fail-fast: false - matrix: - platform: - - runs_on: windows-2019 - arch: "x86_64" - steps: - - name: Download source archive - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: release-source - - name: Extract source archive - shell: bash - run: | - tar -xf apache-arrow-java-*.tar.gz --strip-components=1 - # - name: Download the latest Apache Arrow C++ - # if: github.event_name != 'schedule' - # shell: bash - # run: | - # ci/scripts/download_cpp.sh - - name: Checkout Apache Arrow C++ - # if: github.event_name == 'schedule' - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - repository: ${{github.event.inputs.arrow_repo}} - ref: ${{github.event.inputs.arrow_branch}} - path: arrow - - name: Set up Java - uses: actions/setup-java@7a6d8a8234af8eb26422e24e3006232cccaa061b # v4.6.0 - with: - java-version: '11' - distribution: 'temurin' - - name: Download Timezone Database - shell: bash - run: | - arrow/ci/scripts/download_tz_database.sh - - name: Install ccache - shell: bash - run: | - env | sort - version=4.10.2 - base_name="ccache-${version}-windows-x86_64" - url="https://github.com/ccache/ccache/releases/download/v${version}/${base_name}.zip" - curl --fail --location --remote-name "${url}" - unzip "${base_name}.zip" - chmod +x "${base_name}/ccache.exe" - mv "${base_name}/ccache.exe" /usr/bin/ - rm -rf "${base_name}"{,.zip} - - name: Prepare ccache - shell: bash - run: | - echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} - - name: Cache ccache - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 - with: - path: ccache - key: jni-windows-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} - restore-keys: jni-windows-${{ matrix.platform.arch }}- - - name: Build - shell: cmd - run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - REM For ORC - set TZDIR=/c/msys64/usr/share/zoneinfo - bash -c "ci/scripts/jni_windows_build.sh . arrow build jni" - - name: Compress into single artifact to keep directory structure - shell: bash - run: tar -cvzf jni-windows-${{ matrix.platform.arch }}.tar.gz jni/ - - name: Upload artifacts - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 - with: - name: jni-windows-${{ matrix.platform.arch }} - path: jni-windows-${{ matrix.platform.arch }}.tar.gz binaries: name: Binaries runs-on: ubuntu-latest needs: - jni-linux - jni-macos - - jni-windows steps: - name: Download artifacts uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 @@ -379,9 +370,7 @@ jobs: tar -xf apache-arrow-java-*.tar.gz --strip-components=1 tar -xvzf jni-linux-x86_64.tar.gz tar -xvzf jni-linux-aarch_64.tar.gz - tar -xvzf jni-macos-x86_64.tar.gz tar -xvzf jni-macos-aarch_64.tar.gz - tar -xvzf jni-windows-x86_64.tar.gz - name: Test that shared libraries exist run: | set -x @@ -396,19 +385,11 @@ jobs: test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.so test -f jni/gandiva_jni/aarch_64/libgandiva_jni.so - test -f jni/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib - test -f jni/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib - test -f jni/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib - test -f jni/gandiva_jni/x86_64/libgandiva_jni.dylib - test -f jni/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib test -f jni/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib test -f jni/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib test -f jni/gandiva_jni/aarch_64/libgandiva_jni.dylib - test -f jni/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll - test -f jni/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll - test -f jni/arrow_orc_jni/x86_64/arrow_orc_jni.dll - name: Checkout apache/arrow-testing uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: @@ -521,30 +502,71 @@ jobs: permissions: contents: write steps: + - name: Checkout arrow-java repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Checkout Apache Arrow C++ repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: ${{github.event.inputs.arrow_repo}} + ref: ${{github.event.inputs.arrow_branch}} + path: arrow + - name: Get commit IDs + id: commit_ids + run: | + # Get short commit ID for arrow-java + arrow_java_commit=$(git rev-parse --short HEAD) + echo "arrow_java_commit=${arrow_java_commit}" >> $GITHUB_OUTPUT + + # Get short commit ID for arrow + cd arrow + arrow_commit=$(git rev-parse --short HEAD) + echo "arrow_commit=${arrow_commit}" >> $GITHUB_OUTPUT + cd .. + + # Parse version from release tag + ver=$(echo ${{github.event.inputs.release_tag_name}}) + version=${ver%-rc*} + version=${version#v} + rc=${ver#*-rc} + + # Create release name with both commit IDs + release_name="${version}-${arrow_java_commit}-${arrow_commit}" + release_tag="v${release_name}" + echo "release_name=${release_name}" >> $GITHUB_OUTPUT + echo "release_tag=${release_tag}" >> $GITHUB_OUTPUT + echo "version=${version}" >> $GITHUB_OUTPUT + echo "rc=${rc}" >> $GITHUB_OUTPUT + + echo "Arrow Java commit: ${arrow_java_commit}" + echo "Arrow commit: ${arrow_commit}" + echo "Release tag: ${release_tag}" - name: Download release artifacts uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: pattern: release-* path: artifacts + - name: Create and push tag + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git tag -a "${{ steps.commit_ids.outputs.release_tag }}" -m "Release ${{ steps.commit_ids.outputs.release_name }} RC${{ steps.commit_ids.outputs.rc }}" -m "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" + git push origin "${{ steps.commit_ids.outputs.release_tag }}" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload run: | # GH-499: How to create release notes? - echo "${{github.event.inputs.release_tag_name}}" - ver=$(echo ${{github.event.inputs.release_tag_name}}) - version=${ver%-rc*} - version=${version#v} - rc=${ver#*-rc} - gh release create ${{github.event.inputs.release_tag_name}} \ - --generate-notes \ + echo "Creating release: ${{ steps.commit_ids.outputs.release_tag }}" + gh release create "${{ steps.commit_ids.outputs.release_tag }}" \ + -n "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" \ --prerelease \ --repo ${GITHUB_REPOSITORY} \ - --title "Apache Arrow Java ${version} RC${rc}" \ - --verify-tag + --title "Apache Arrow Java ${{ steps.commit_ids.outputs.version }} RC${{ steps.commit_ids.outputs.rc }} (arrow-java: ${{ steps.commit_ids.outputs.arrow_java_commit }}, arrow: ${{ steps.commit_ids.outputs.arrow_commit }})" # GitHub CLI does not respect their own rate limits # https://github.com/cli/cli/issues/9586 for artifact in artifacts/*/*; do sleep 1 - gh release upload ${{github.event.inputs.release_tag_name}} \ + gh release upload "${{ steps.commit_ids.outputs.release_tag }}" \ --repo ${GITHUB_REPOSITORY} \ $artifact done From 6c4407c83323adc00bc12230e02599c55e556e8c Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 23 Feb 2026 18:54:52 -0600 Subject: [PATCH 2/5] DX-114802: Support Jira tickets in PR titles --- .github/workflows/dev_pr.js | 69 +++++++++++++------ ci/scripts/jni_manylinux_build.sh | 18 ++++- .../codegen/templates/UnionListWriter.java | 19 +++-- 3 files changed, 76 insertions(+), 30 deletions(-) diff --git a/.github/workflows/dev_pr.js b/.github/workflows/dev_pr.js index 13acc946e1..05c3a0c8dc 100644 --- a/.github/workflows/dev_pr.js +++ b/.github/workflows/dev_pr.js @@ -122,15 +122,20 @@ module.exports = { const title = context.payload.pull_request.title; if (title.startsWith("MINOR: ")) { console.log("PR is a minor PR"); - return {"issue": null}; + return {"issue": null, "type": "minor"}; } - const match = title.match(/^GH-([0-9]+): .*$/); + const match = title.match(/^(GH|DX)-([0-9]+): .*$/); if (match === null) { - core.setFailed("Invalid PR title format. Must either be MINOR: or GH-NNN:"); - return {"issue": null}; + core.setFailed("Invalid PR title format. Must either be MINOR:, GH-NNN:, or DX-NNN:"); + return {"issue": null, "type": null}; } - return {"issue": parseInt(match[1], 10)}; + + const issueType = match[1]; // "GH" or "DX" + const issueNumber = parseInt(match[2], 10); + + console.log(`PR references ${issueType}-${issueNumber}`); + return {"issue": issueNumber, "type": issueType}; }, apply_labels: async function({core, github, context}) { @@ -203,9 +208,28 @@ See [CONTRIBUTING.md](https://github.com/apache/arrow-java/blob/main/CONTRIBUTIN console.log("This is a MINOR PR"); return; } - const expected = `https://github.com/apache/arrow-java/issues/${issue.issue}`; - const query = ` + // Handle Jira tickets (DX-NNN) + if (issue.type === "DX") { + const jiraUrl = `https://dremio.atlassian.net/browse/DX-${issue.issue}`; + console.log(`This PR references Jira ticket: ${jiraUrl}`); + + // Add a comment with the Jira link + const comment_tag = "jira_link_comment"; + const maybe_comment_id = await have_comment(github, context, context.payload.pull_request.number, comment_tag); + const body_text = ` +**Related Jira Ticket:** [DX-${issue.issue}](${jiraUrl})`; + + await upsert_comment(github, maybe_comment_id, body_text, true); + console.log("Added/updated Jira link comment"); + return; + } + + // Handle GitHub issues (GH-NNN) + if (issue.type === "GH") { + const expected = `https://github.com/apache/arrow-java/issues/${issue.issue}`; + + const query = ` query($owner: String!, $name: String!, $number: Int!) { repository(owner: $owner, name: $name) { pullRequest(number: $number) { @@ -220,22 +244,23 @@ query($owner: String!, $name: String!, $number: Int!) { } }`; - const result = await github.graphql(query, { - owner: context.repo.owner, - name: context.repo.repo, - number: context.payload.pull_request.number, - }); - const issues = result.repository.pullRequest.closingIssuesReferences.edges; - console.log(issues); - - for (const link of issues) { - console.log(`PR is linked to ${link.node.number}`); - if (link.node.number === issue.issue) { - console.log(`Found link to ${expected}`); - return; + const result = await github.graphql(query, { + owner: context.repo.owner, + name: context.repo.repo, + number: context.payload.pull_request.number, + }); + const issues = result.repository.pullRequest.closingIssuesReferences.edges; + console.log(issues); + + for (const link of issues) { + console.log(`PR is linked to ${link.node.number}`); + if (link.node.number === issue.issue) { + console.log(`Found link to ${expected}`); + return; + } } + console.log(`Did not find link to ${expected}`); + core.setFailed("Missing link to issue in title"); } - console.log(`Did not find link to ${expected}`); - core.setFailed("Missing link to issue in title"); }, }; diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index 148d2e02f6..0c63fc3408 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -25,6 +25,22 @@ set -euo pipefail # shellcheck source=ci/scripts/util_log.sh . "$(dirname "${0}")/util_log.sh" +github_actions_group_begin "Update llvm" + vcpkg install \ + --debug \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=/arrow/ci/vcpkg \ + --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=parquet \ + --x-feature=gandiva \ + --x-feature=s3 +github_actions_group_end + github_actions_group_begin "Prepare arguments" source_dir="$(cd "${1}" && pwd)" arrow_dir="$(cd "${2}" && pwd)" @@ -57,7 +73,7 @@ devtoolset_version="$(rpm -qa "devtoolset-*-gcc" --queryformat '%{VERSION}' | gr devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" : "${ARROW_ACERO:=ON}" export ARROW_ACERO -: "${ARROW_BUILD_TESTS:=ON}" +: "${ARROW_BUILD_TESTS:=OFF}" export ARROW_BUILD_TESTS : "${ARROW_DATASET:=ON}" export ARROW_DATASET diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 9424533f29..80383254f0 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -53,6 +53,7 @@ public class Union${listName}Writer extends AbstractFieldWriter { private boolean inStruct = false; private boolean listStarted = false; private String structName; + private ArrowType extensionType; <#if listName == "LargeList" || listName == "LargeListView"> private static final long OFFSET_WIDTH = 8; <#else> @@ -203,13 +204,13 @@ public MapWriter map(String name, boolean keysSorted) { @Override public ExtensionWriter extension(ArrowType arrowType) { - writer.extension(arrowType); - return writer; + extensionType = arrowType; + return this; } + @Override public ExtensionWriter extension(String name, ArrowType arrowType) { - ExtensionWriter extensionWriter = writer.extension(name, arrowType); - return extensionWriter; + return writer.extension(name, arrowType); } <#if listName == "LargeList"> @@ -336,14 +337,18 @@ public void writeNull() { @Override public void writeExtension(Object value) { - writer.writeExtension(value); + writer.writeExtension(value, extensionType); + writer.setPosition(writer.idx() + 1); } + @Override - public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { - writer.addExtensionTypeWriterFactory(var1); + public void writeExtension(Object value, ArrowType type) { + writeExtension(value); } + public void write(ExtensionHolder var1) { writer.write(var1); + writer.setPosition(writer.idx() + 1); } <#list vv.types as type> From 1f962d37c449efca32d498596d4100668d792ed0 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 12:03:29 -0600 Subject: [PATCH 3/5] WIP --- .github/workflows/jarbuild.yml | 21 +++-- .pre-commit-config.yaml | 5 +- ci/scripts/jni_macos_build.sh | 79 ++++++++++++++++++- ci/scripts/jni_manylinux_build.sh | 26 +++--- .../codegen/templates/UnionListWriter.java | 19 ++--- 5 files changed, 112 insertions(+), 38 deletions(-) diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index bb0509ebb4..3c78dd0639 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -228,12 +228,13 @@ jobs: with: repository: Microsoft/vcpkg path: arrow/vcpkg + fetch-depth: 0 - name: Install vcpkg run: | cd arrow/vcpkg ./bootstrap-vcpkg.sh - echo "VCPKG_ROOT=${PWD}/arrow/vcpkg" >> ${GITHUB_ENV} - echo "${PWD}/arrow/vcpkg" >> ${GITHUB_PATH} + echo "VCPKG_ROOT_LOCAL=${PWD}" >> ${GITHUB_ENV} + echo "${PWD}" >> ${GITHUB_PATH} - name: Clean up disk space run: | echo "=== Free disk space before cleanup ===" @@ -284,25 +285,28 @@ jobs: brew bundle --file=arrow/cpp/Brewfile # Clean up any existing LLVM installations in favor of vcpkg. - brew uninstall llvm || : + # Need to uninstall all versioned LLVM packages (llvm@18, llvm@17, etc.) + for llvm_pkg in $(brew list | grep -E '^llvm(@[0-9]+)?$'); do + brew uninstall "${llvm_pkg}" || : + done # We want to link aws-sdk-cpp statically but Homebrew's # aws-sdk-cpp provides only shared library. If we have # Homebrew's aws-sdk-cpp, our build mix Homebrew's # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. - brew uninstall aws-sdk-cpp + brew uninstall aws-sdk-cpp || : # We want to use bundled RE2 for static linking. If # Homebrew's RE2 is installed, its header file may be used. # We uninstall Homebrew's RE2 to ensure using bundled RE2. brew uninstall grpc || : # gRPC depends on RE2 brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too - brew uninstall re2 + brew uninstall re2 || : # We want to use bundled Protobuf for static linking. If # Homebrew's Protobuf is installed, its library file may be # used on test We uninstall Homebrew's Protobuf to ensure using # bundled Protobuf. - brew uninstall protobuf + brew uninstall protobuf || : echo "" echo "=== Free disk space before LLVM build ===" @@ -312,7 +316,8 @@ jobs: # Use vcpkg to install LLVM. vcpkg install \ --clean-after-build \ - --x-install-root=${VCPKG_ROOT}/installed \ + --vcpkg-root=${VCPKG_ROOT_LOCAL} \ + --x-install-root=${VCPKG_ROOT_LOCAL}/installed \ --x-manifest-root=arrow/ci/vcpkg \ --overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \ --x-feature=gandiva-llvm @@ -558,7 +563,7 @@ jobs: # GH-499: How to create release notes? echo "Creating release: ${{ steps.commit_ids.outputs.release_tag }}" gh release create "${{ steps.commit_ids.outputs.release_tag }}" \ - -n "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" \ + -n "Release ${{ steps.commit_ids.outputs.release_name }} RC${{ steps.commit_ids.outputs.rc }}
Triggered by: ${{ github.actor }}
Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID
arrow_branch: ${{github.event.inputs.ARROW_BRANCH}}
arrow_repo: ${{github.event.inputs.ARROW_REPO}}
release_tag_name: ${{github.event.inputs.RELEASE_TAG_NAME}}
arrow-java branch: ${{github.ref_name}}" \ --prerelease \ --repo ${GITHUB_REPOSITORY} \ --title "Apache Arrow Java ${{ steps.commit_ids.outputs.version }} RC${{ steps.commit_ids.outputs.rc }} (arrow-java: ${{ steps.commit_ids.outputs.arrow_java_commit }}, arrow: ${{ steps.commit_ids.outputs.arrow_commit }})" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca3cacec3b..1efb47ed70 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,7 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer + - id: check-shebang-scripts-are-executable - id: check-yaml - id: check-added-large-files - id: file-contents-sorter @@ -43,8 +44,8 @@ repos: rm -f apache-arrow-java.tar.gz" always_run: true pass_filenames: false - - repo: https://github.com/koalaman/shellcheck-precommit - rev: v0.10.0 + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.10.0.1 hooks: - id: shellcheck args: diff --git a/ci/scripts/jni_macos_build.sh b/ci/scripts/jni_macos_build.sh index 1ad76266ee..77f367a37e 100755 --- a/ci/scripts/jni_macos_build.sh +++ b/ci/scripts/jni_macos_build.sh @@ -61,7 +61,7 @@ github_actions_group_begin "Building Arrow C++ libraries" install_dir="${build_dir}/cpp-install" : "${ARROW_ACERO:=ON}" export ARROW_ACERO -: "${ARROW_BUILD_TESTS:=ON}" +: "${ARROW_BUILD_TESTS:=OFF}" export ARROW_BUILD_TESTS : "${ARROW_DATASET:=ON}" export ARROW_DATASET @@ -78,6 +78,55 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data" export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" export AWS_EC2_METADATA_DISABLED=TRUE +# Determine vcpkg triplet based on architecture +vcpkg_arch="$(arch)" +case "${vcpkg_arch}" in +arm64) + vcpkg_triplet="arm64-osx" + ;; +i386 | x86_64) + vcpkg_triplet="x64-osx" + ;; +*) + vcpkg_triplet="arm64-osx" + ;; +esac + +# Set LLVM_DIR to point to vcpkg-installed LLVM if VCPKG_ROOT_LOCAL is set +llvm_dir_arg="" +gandiva_cxx_flags="" +osx_sysroot_arg="" +re2_source_arg="-Dre2_SOURCE=BUNDLED" +if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then + vcpkg_installed="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}" + llvm_cmake_dir="${vcpkg_installed}/share/llvm" + if [ -d "${llvm_cmake_dir}" ]; then + llvm_dir_arg="-DLLVM_DIR=${llvm_cmake_dir}" + + # vcpkg's clang needs to know where to find system headers + # Arrow's GandivaAddBitcode.cmake uses CMAKE_OSX_SYSROOT to set SDKROOT env var + sdk_path="$(xcrun --show-sdk-path)" + if [ -d "${sdk_path}" ]; then + osx_sysroot_arg="-DCMAKE_OSX_SYSROOT=${sdk_path}" + fi + + # Also pass the C++ standard library include path via ARROW_GANDIVA_PC_CXX_FLAGS + xcode_path="$(xcode-select -p)" + cxx_include_path="${xcode_path}/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1" + if [ -d "${cxx_include_path}" ]; then + gandiva_cxx_flags="-DARROW_GANDIVA_PC_CXX_FLAGS=-stdlib=libc++;-isystem;${cxx_include_path}" + fi + + # Use vcpkg's RE2 since it's installed as a dependency of LLVM + # This ensures ABI compatibility - vcpkg's RE2 uses std::string_view API + # which matches what vcpkg's LLVM and Abseil expect + re2_cmake_dir="${vcpkg_installed}/share/re2" + if [ -d "${re2_cmake_dir}" ]; then + re2_source_arg="-Dre2_ROOT=${vcpkg_installed}" + fi + fi +fi + cmake \ -S "${arrow_dir}/cpp" \ -B "${build_dir}/cpp" \ @@ -100,10 +149,13 @@ cmake \ -DCMAKE_INSTALL_PREFIX="${install_dir}" \ -DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \ -DGTest_SOURCE=BUNDLED \ + "${llvm_dir_arg}" \ + "${osx_sysroot_arg}" \ + "${gandiva_cxx_flags}" \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ - -Dre2_SOURCE=BUNDLED \ + "${re2_source_arg}" \ -GNinja cmake --build "${build_dir}/cpp" --target install github_actions_group_end @@ -125,7 +177,27 @@ if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then github_actions_group_end fi -export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install" +# Pass paths to dependencies so the JNI build can find them +# Build up the JNI CMake args based on what's available +jni_cmake_args="${llvm_dir_arg}" + +# Add Protobuf path if bundled, otherwise CMake will find system Protobuf +if [ -d "${build_dir}/cpp/protobuf_ep-install" ]; then + jni_cmake_args="${jni_cmake_args} -DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install" +fi + +# RE2 path for the JNI build - prefer vcpkg's RE2 if we used it for the C++ build, +# otherwise fall back to bundled RE2 if available +if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then + vcpkg_re2_dir="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}" + if [ -d "${vcpkg_re2_dir}/share/re2" ]; then + jni_cmake_args="${jni_cmake_args} -Dre2_ROOT=${vcpkg_re2_dir}" + fi +elif [ -d "${build_dir}/cpp/re2_ep-install" ]; then + jni_cmake_args="${jni_cmake_args} -Dre2_ROOT=${build_dir}/cpp/re2_ep-install" +fi + +export JAVA_JNI_CMAKE_ARGS="${jni_cmake_args}" "${source_dir}/ci/scripts/jni_build.sh" \ "${source_dir}" \ "${install_dir}" \ @@ -153,6 +225,7 @@ archery linking check-dependencies \ --allow libncurses \ --allow libobjc \ --allow libz \ + --allow libz3 \ "arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.dylib" \ "arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.dylib" \ "arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.dylib" \ diff --git a/ci/scripts/jni_manylinux_build.sh b/ci/scripts/jni_manylinux_build.sh index 0c63fc3408..b097a1d2e5 100755 --- a/ci/scripts/jni_manylinux_build.sh +++ b/ci/scripts/jni_manylinux_build.sh @@ -26,19 +26,19 @@ set -euo pipefail . "$(dirname "${0}")/util_log.sh" github_actions_group_begin "Update llvm" - vcpkg install \ - --debug \ - --clean-after-build \ - --x-install-root=${VCPKG_ROOT}/installed \ - --x-manifest-root=/arrow/ci/vcpkg \ - --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \ - --x-feature=dev \ - --x-feature=flight \ - --x-feature=gcs \ - --x-feature=json \ - --x-feature=parquet \ - --x-feature=gandiva \ - --x-feature=s3 +vcpkg install \ + --debug \ + --clean-after-build \ + --x-install-root="${VCPKG_ROOT}/installed" \ + --x-manifest-root=/arrow/ci/vcpkg \ + --overlay-ports=/arrow/ci/vcpkg/overlay/llvm/ \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=parquet \ + --x-feature=gandiva \ + --x-feature=s3 github_actions_group_end github_actions_group_begin "Prepare arguments" diff --git a/vector/src/main/codegen/templates/UnionListWriter.java b/vector/src/main/codegen/templates/UnionListWriter.java index 80383254f0..9424533f29 100644 --- a/vector/src/main/codegen/templates/UnionListWriter.java +++ b/vector/src/main/codegen/templates/UnionListWriter.java @@ -53,7 +53,6 @@ public class Union${listName}Writer extends AbstractFieldWriter { private boolean inStruct = false; private boolean listStarted = false; private String structName; - private ArrowType extensionType; <#if listName == "LargeList" || listName == "LargeListView"> private static final long OFFSET_WIDTH = 8; <#else> @@ -204,13 +203,13 @@ public MapWriter map(String name, boolean keysSorted) { @Override public ExtensionWriter extension(ArrowType arrowType) { - extensionType = arrowType; - return this; + writer.extension(arrowType); + return writer; } - @Override public ExtensionWriter extension(String name, ArrowType arrowType) { - return writer.extension(name, arrowType); + ExtensionWriter extensionWriter = writer.extension(name, arrowType); + return extensionWriter; } <#if listName == "LargeList"> @@ -337,18 +336,14 @@ public void writeNull() { @Override public void writeExtension(Object value) { - writer.writeExtension(value, extensionType); - writer.setPosition(writer.idx() + 1); + writer.writeExtension(value); } - @Override - public void writeExtension(Object value, ArrowType type) { - writeExtension(value); + public void addExtensionTypeWriterFactory(ExtensionTypeWriterFactory var1) { + writer.addExtensionTypeWriterFactory(var1); } - public void write(ExtensionHolder var1) { writer.write(var1); - writer.setPosition(writer.idx() + 1); } <#list vv.types as type> From caed5a5ce319fe43d5a0e12ffb76cedef8e1c8c6 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 12:11:08 -0600 Subject: [PATCH 4/5] Update build workflows with the most recent actions version --- .github/workflows/dev.yml | 2 +- .github/workflows/jarbuild.yml | 6 +++--- .github/workflows/rc.yml | 8 ++++---- .github/workflows/test.yml | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 2ac230f635..0de6e02f57 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -42,7 +42,7 @@ jobs: with: python-version: '3.x' - name: pre-commit (cache) - uses: actions/cache@v4 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ~/.cache/pre-commit key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} diff --git a/.github/workflows/jarbuild.yml b/.github/workflows/jarbuild.yml index 3c78dd0639..503d5a9ffc 100644 --- a/.github/workflows/jarbuild.yml +++ b/.github/workflows/jarbuild.yml @@ -155,7 +155,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Cache - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .docker key: jni-linux-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} @@ -332,7 +332,7 @@ jobs: run: | echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} - name: Cache ccache - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ccache key: jni-macos-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} @@ -401,7 +401,7 @@ jobs: repository: apache/arrow-testing path: testing - name: Cache ~/.m2 - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ~/.m2 key: binaries-build-${{ hashFiles('**/*.java', '**/pom.xml') }} diff --git a/.github/workflows/rc.yml b/.github/workflows/rc.yml index 4fae167500..79abfa5628 100644 --- a/.github/workflows/rc.yml +++ b/.github/workflows/rc.yml @@ -139,7 +139,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Cache - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .docker key: jni-linux-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} @@ -266,7 +266,7 @@ jobs: run: | echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} - name: Cache ccache - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ccache key: jni-macos-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} @@ -342,7 +342,7 @@ jobs: run: | echo "CCACHE_DIR=${PWD}/ccache" >> ${GITHUB_ENV} - name: Cache ccache - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ccache key: jni-windows-${{ matrix.platform.arch }}-${{ hashFiles('arrow/cpp/**') }} @@ -416,7 +416,7 @@ jobs: repository: apache/arrow-testing path: testing - name: Cache ~/.m2 - uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: ~/.m2 key: binaries-build-${{ hashFiles('**/*.java', '**/pom.xml') }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3ea5d8a4b6..d3a8259276 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -63,7 +63,7 @@ jobs: fetch-depth: 0 submodules: recursive - name: Cache Docker Volumes - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .docker key: maven-${{ matrix.jdk }}-${{ matrix.maven }}-${{ hashFiles('compose.yaml', '**/pom.xml', '**/*.java') }} @@ -180,7 +180,7 @@ jobs: run: | ci/scripts/util_free_space.sh - name: Cache Docker Volumes - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .docker key: integration-conda-${{ hashFiles('cpp/**') }} From 063c5a81bbe1afd64a1080f344bcc24d6c279cb8 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Wed, 25 Feb 2026 12:47:36 -0600 Subject: [PATCH 5/5] Fix deprecated macOS 13 runner - update to macOS 15-intel --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d3a8259276..23a28da811 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -89,7 +89,7 @@ jobs: include: - arch: AMD64 jdk: 11 - macos: 13 + macos: 15-intel - arch: AArch64 jdk: 11 macos: latest