From fb253b4e540c6ec9f02556b7a2c8f086651ab34b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 15:39:54 +0100 Subject: [PATCH 01/27] POC: [Python] Testing scikit-build-core as build backend for PyArrow --- python/pyproject.toml | 28 +-- python/setup.py | 483 ------------------------------------------ 2 files changed, 15 insertions(+), 496 deletions(-) delete mode 100755 python/setup.py diff --git a/python/pyproject.toml b/python/pyproject.toml index 217dba81b873..ad5b5126d559 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,16 +17,14 @@ [build-system] requires = [ + "scikit-build-core", "cython >= 3.1", # Needed for build-time stub docstring extraction "libcst>=1.8.6", "numpy>=1.25", - # configuring setuptools_scm in pyproject.toml requires - # versions released after 2022 "setuptools_scm[toml]>=8", - "setuptools>=77", ] -build-backend = "setuptools.build_meta" +build-backend = "scikit_build_core.build" [project] name = "pyarrow" @@ -81,16 +79,20 @@ exclude = [ '\._.*$', ] -[tool.setuptools] -zip-safe=false -include-package-data=true +[tool.scikit-build] +cmake.build-type = "Release" +metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] +wheel.packages = ["pyarrow"] +# CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ +# (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) +wheel.install-dir = "pyarrow" -[tool.setuptools.packages.find] -include = ["pyarrow"] -namespaces = false - -[tool.setuptools.package-data] -pyarrow = ["*.pxd", "*.pyi", "*.pyx", "includes/*.pxd", "py.typed"] +[tool.scikit-build.cmake.define] +PYARROW_BUNDLE_ARROW_CPP = {env = "PYARROW_BUNDLE_ARROW_CPP", default = "OFF"} +PYARROW_BUNDLE_CYTHON_CPP = {env = "PYARROW_BUNDLE_CYTHON_CPP", default = "OFF"} +PYARROW_GENERATE_COVERAGE = {env = "PYARROW_GENERATE_COVERAGE", default = "OFF"} +PYARROW_CXXFLAGS = {env = "PYARROW_CXXFLAGS", default = ""} [tool.setuptools_scm] root = '..' diff --git a/python/setup.py b/python/setup.py deleted file mode 100755 index 02e7cb4614de..000000000000 --- a/python/setup.py +++ /dev/null @@ -1,483 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import contextlib -import os -import os.path -from os.path import join as pjoin -import re -import shlex -import shutil -import sys -import warnings - -if sys.version_info >= (3, 10): - import sysconfig -else: - # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825) - from distutils import sysconfig - -from setuptools import setup, Extension, Distribution -from setuptools.command.sdist import sdist - -from Cython.Distutils import build_ext as _build_ext -import Cython - -# Check if we're running 64-bit Python -is_64_bit = sys.maxsize > 2**32 - -# We can't use sys.platform in a cross-compiling situation -# as here it may be set to the host not target platform -is_emscripten = ( - sysconfig.get_config_var("SOABI") - and sysconfig.get_config_var("SOABI").find("emscripten") != -1 -) - - -if Cython.__version__ < '3.1': - raise Exception( - 'Please update your Cython version. Supported Cython >= 3.1') - -setup_dir = os.path.abspath(os.path.dirname(__file__)) - -ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') - - -@contextlib.contextmanager -def changed_dir(dirname): - oldcwd = os.getcwd() - os.chdir(dirname) - try: - yield - finally: - os.chdir(oldcwd) - - -def strtobool(val): - """Convert a string representation of truth to true (1) or false (0). - - True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values - are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if - 'val' is anything else. - """ - # Copied from distutils - val = val.lower() - if val in ('y', 'yes', 't', 'true', 'on', '1'): - return 1 - elif val in ('n', 'no', 'f', 'false', 'off', '0'): - return 0 - else: - raise ValueError("invalid truth value %r" % (val,)) - - -MSG_DEPR_SETUP_BUILD_FLAGS = """ - !! - - *********************************************************************** - The '{}' flag is being passed to setup.py, but this is - deprecated. - - If a certain component is available in Arrow C++, it will automatically - be enabled for the PyArrow build as well. If you want to force the - build of a certain component, you can still use the - PYARROW_WITH_$COMPONENT environment variable. - *********************************************************************** - - !! -""" - - -class build_ext(_build_ext): - _found_names = () - - def build_extensions(self): - import numpy - numpy_incl = numpy.get_include() - - self.extensions = [ext for ext in self.extensions - if ext.name != '__dummy__'] - - for ext in self.extensions: - if (hasattr(ext, 'include_dirs') and - numpy_incl not in ext.include_dirs): - ext.include_dirs.append(numpy_incl) - _build_ext.build_extensions(self) - - def run(self): - self._run_cmake() - self._update_stubs() - _build_ext.run(self) - - def _update_stubs(self): - """Copy stubs to build directory, then inject docstrings into the copies.""" - if is_emscripten: - # stubs are not supported in Emscripten build - return - stubs_dir = pjoin(setup_dir, 'pyarrow-stubs') - if not os.path.exists(stubs_dir): - return - - build_cmd = self.get_finalized_command('build') - build_lib = os.path.abspath(build_cmd.build_lib) - - # Copy clean stubs to build directory first - self._copy_stubs(stubs_dir, build_lib) - - # Inject docstrings into the build copies (not the source stubs). - # We pass build_lib as stubs_dir since it mirrors the pyarrow-stubs/ - # directory structure (both contain a pyarrow/ subdirectory with .pyi - # files), so the namespace resolution logic works identically. - import importlib.util - spec = importlib.util.spec_from_file_location( - "update_stub_docstrings", - pjoin(setup_dir, 'scripts', 'update_stub_docstrings.py')) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - mod.add_docstrings_from_build(build_lib, build_lib) - - def _copy_stubs(self, stubs_dir, build_lib): - """Copy .pyi stub files to the build directory.""" - src_dir = pjoin(stubs_dir, 'pyarrow') - dest_dir = pjoin(build_lib, 'pyarrow') - - if not os.path.exists(src_dir): - return - - print(f"-- Copying stubs: {src_dir} -> {dest_dir}") - for root, dirs, files in os.walk(src_dir): - for fname in files: - if fname.endswith('.pyi'): - src = pjoin(root, fname) - rel_path = os.path.relpath(src, src_dir) - dest = pjoin(dest_dir, rel_path) - os.makedirs(os.path.dirname(dest), exist_ok=True) - shutil.copy2(src, dest) - - # adapted from cmake_build_ext in dynd-python - # github.com/libdynd/dynd-python - - description = "Build the C-extensions for arrow" - user_options = ([('cmake-generator=', None, 'CMake generator'), - ('extra-cmake-args=', None, 'extra arguments for CMake'), - ('build-type=', None, - 'build type (debug or release), default release'), - ('boost-namespace=', None, - 'namespace of boost (default: boost)'), - ('with-cuda', None, 'build the Cuda extension'), - ('with-flight', None, 'build the Flight extension'), - ('with-substrait', None, 'build the Substrait extension'), - ('with-acero', None, 'build the Acero Engine extension'), - ('with-dataset', None, 'build the Dataset extension'), - ('with-parquet', None, 'build the Parquet extension'), - ('with-parquet-encryption', None, - 'build the Parquet encryption extension'), - ('with-azure', None, - 'build the Azure Blob Storage extension'), - ('with-gcs', None, - 'build the Google Cloud Storage (GCS) extension'), - ('with-s3', None, 'build the Amazon S3 extension'), - ('with-static-parquet', None, 'link parquet statically'), - ('with-static-boost', None, 'link boost statically'), - ('with-orc', None, 'build the ORC extension'), - ('with-gandiva', None, 'build the Gandiva extension'), - ('generate-coverage', None, - 'enable Cython code coverage'), - ('bundle-boost', None, - 'bundle the (shared) Boost libraries'), - ('bundle-cython-cpp', None, - 'bundle generated Cython C++ code ' - '(used for code coverage)'), - ('bundle-arrow-cpp', None, - 'bundle the Arrow C++ libraries'), - ('bundle-arrow-cpp-headers', None, - 'bundle the Arrow C++ headers')] + - _build_ext.user_options) - - def initialize_options(self): - _build_ext.initialize_options(self) - self.cmake_generator = os.environ.get('PYARROW_CMAKE_GENERATOR') - if not self.cmake_generator and sys.platform == 'win32': - self.cmake_generator = 'Visual Studio 15 2017 Win64' - self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '') - self.build_type = os.environ.get('PYARROW_BUILD_TYPE', - 'release').lower() - - self.cmake_cxxflags = os.environ.get('PYARROW_CXXFLAGS', '') - - if sys.platform == 'win32': - # Cannot do debug builds in Windows unless Python itself is a debug - # build - if not hasattr(sys, 'gettotalrefcount'): - self.build_type = 'release' - - self.with_azure = None - self.with_gcs = None - self.with_s3 = None - self.with_hdfs = None - self.with_cuda = None - self.with_substrait = None - self.with_flight = None - self.with_acero = None - self.with_dataset = None - self.with_parquet = None - self.with_parquet_encryption = None - self.with_orc = None - self.with_gandiva = None - - self.generate_coverage = strtobool( - os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) - self.bundle_arrow_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0')) - self.bundle_cython_cpp = strtobool( - os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - - CYTHON_MODULE_NAMES = [ - 'lib', - '_fs', - '_csv', - '_json', - '_compute', - '_cuda', - '_flight', - '_dataset', - '_dataset_orc', - '_dataset_parquet', - '_acero', - '_feather', - '_parquet', - '_parquet_encryption', - '_pyarrow_cpp_tests', - '_orc', - '_azurefs', - '_gcsfs', - '_s3fs', - '_substrait', - '_hdfs', - 'gandiva'] - - def _run_cmake(self): - # check if build_type is correctly passed / set - if self.build_type.lower() not in ('release', 'debug', - 'relwithdebinfo'): - raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to " - "be 'release', 'debug' or 'relwithdebinfo'") - - # The directory containing this setup.py - source = os.path.dirname(os.path.abspath(__file__)) - - # The staging directory for the module being built - build_cmd = self.get_finalized_command('build') - saved_cwd = os.getcwd() - build_temp = pjoin(saved_cwd, build_cmd.build_temp) - build_lib = pjoin(saved_cwd, build_cmd.build_lib) - - if not os.path.isdir(build_temp): - self.mkpath(build_temp) - - if self.inplace: - # a bit hacky - build_lib = saved_cwd - - install_prefix = pjoin(build_lib, "pyarrow") - - # Change to the build directory - with changed_dir(build_temp): - # Detect if we built elsewhere - if os.path.isfile('CMakeCache.txt'): - cachefile = open('CMakeCache.txt', 'r') - cachedir = re.search('CMAKE_CACHEFILE_DIR:INTERNAL=(.*)', - cachefile.read()).group(1) - cachefile.close() - if (cachedir != build_temp): - build_base = pjoin(saved_cwd, build_cmd.build_base) - print(f"-- Skipping build. Temp build {build_temp} does " - f"not match cached dir {cachedir}") - print("---- For a clean build you might want to delete " - f"{build_base}.") - return - - cmake_options = [ - f'-DCMAKE_INSTALL_PREFIX={install_prefix}', - f'-DPYTHON_EXECUTABLE={sys.executable}', - f'-DPython3_EXECUTABLE={sys.executable}', - f'-DPYARROW_CXXFLAGS={self.cmake_cxxflags}', - ] - - def append_cmake_bool(value, varname): - cmake_options.append(f'-D{varname}={"on" if value else "off"}') - - def append_cmake_component(flag, varname): - # only pass this to cmake if the user pass the --with-component - # flag to setup.py build_ext - if flag is not None: - flag_name = ( - "--with-" - + varname.removeprefix("PYARROW_").lower().replace("_", "-")) - warnings.warn( - MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name), - UserWarning, stacklevel=2 - ) - append_cmake_bool(flag, varname) - - if self.cmake_generator: - cmake_options += ['-G', self.cmake_generator] - - append_cmake_component(self.with_cuda, 'PYARROW_CUDA') - append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') - append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') - append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') - append_cmake_component(self.with_acero, 'PYARROW_ACERO') - append_cmake_component(self.with_dataset, 'PYARROW_DATASET') - append_cmake_component(self.with_orc, 'PYARROW_ORC') - append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') - append_cmake_component(self.with_parquet_encryption, - 'PYARROW_PARQUET_ENCRYPTION') - append_cmake_component(self.with_azure, 'PYARROW_AZURE') - append_cmake_component(self.with_gcs, 'PYARROW_GCS') - append_cmake_component(self.with_s3, 'PYARROW_S3') - append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') - - append_cmake_bool(self.bundle_arrow_cpp, - 'PYARROW_BUNDLE_ARROW_CPP') - append_cmake_bool(self.bundle_cython_cpp, - 'PYARROW_BUNDLE_CYTHON_CPP') - append_cmake_bool(self.generate_coverage, - 'PYARROW_GENERATE_COVERAGE') - - cmake_options.append( - f'-DCMAKE_BUILD_TYPE={self.build_type.lower()}') - - extra_cmake_args = shlex.split(self.extra_cmake_args) - - build_tool_args = [] - if sys.platform == 'win32': - if not is_64_bit: - raise RuntimeError('Not supported on 32-bit Windows') - else: - build_tool_args.append('--') - if os.environ.get('PYARROW_BUILD_VERBOSE', '0') == '1': - cmake_options.append('-DCMAKE_VERBOSE_MAKEFILE=ON') - parallel = os.environ.get('PYARROW_PARALLEL') - if parallel: - build_tool_args.append(f'-j{parallel}') - - # Generate the build files - if is_emscripten: - print("-- Running emcmake cmake for PyArrow on Emscripten") - self.spawn(['emcmake', 'cmake'] + extra_cmake_args + - cmake_options + [source]) - else: - print("-- Running cmake for PyArrow") - self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) - - print("-- Finished cmake for PyArrow") - - print("-- Running cmake --build for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - build_tool_args) - print("-- Finished cmake --build for PyArrow") - - print("-- Running cmake --build --target install for PyArrow") - self.spawn(['cmake', '--build', '.', '--config', self.build_type] + - ['--target', 'install'] + build_tool_args) - print("-- Finished cmake --build --target install for PyArrow") - - self._found_names = [] - for name in self.CYTHON_MODULE_NAMES: - built_path = pjoin(install_prefix, name + ext_suffix) - if os.path.exists(built_path): - self._found_names.append(name) - - def _get_build_dir(self): - # Get the package directory from build_py - build_py = self.get_finalized_command('build_py') - return build_py.get_package_dir('pyarrow') - - def _get_cmake_ext_path(self, name): - # This is the name of the arrow C-extension - filename = name + ext_suffix - return pjoin(self._get_build_dir(), filename) - - def get_ext_generated_cpp_source(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + ".cpp") - else: - return pjoin(name + ".cpp") - - def get_ext_built_api_header(self, name): - if sys.platform == 'win32': - head, tail = os.path.split(name) - return pjoin(head, tail + "_api.h") - else: - return pjoin(name + "_api.h") - - def get_names(self): - return self._found_names - - def get_outputs(self): - # Just the C extensions - # regular_exts = _build_ext.get_outputs(self) - return [self._get_cmake_ext_path(name) - for name in self.get_names()] - - -class BinaryDistribution(Distribution): - def has_ext_modules(foo): - return True - - -class CopyLicenseSdist(sdist): - """Custom sdist command that copies license files from parent directory.""" - - def make_release_tree(self, base_dir, files): - # Call parent to do the normal work - super().make_release_tree(base_dir, files) - - # Define source (parent dir) and destination (sdist root) for license files - license_files = [ - ("LICENSE.txt", "../LICENSE.txt"), - ("NOTICE.txt", "../NOTICE.txt"), - ] - - for dest_name, src_path in license_files: - src_full = os.path.join(os.path.dirname(__file__), src_path) - dest_full = os.path.join(base_dir, dest_name) - - # Remove any existing file/symlink at destination - if os.path.exists(dest_full) or os.path.islink(dest_full): - os.unlink(dest_full) - - if not os.path.exists(src_full): - msg = f"Required license file not found: {src_full}" - raise FileNotFoundError(msg) - - shutil.copy2(src_full, dest_full) - print(f"Copied {src_path} to {dest_name} in sdist") - - -setup( - distclass=BinaryDistribution, - # Dummy extension to trigger build_ext - ext_modules=[Extension('__dummy__', sources=[])], - cmdclass={ - 'build_ext': build_ext, - 'sdist': CopyLicenseSdist, - }, -) From 765e3ba026a885fd319ee34755402da6d0936812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 15:57:00 +0100 Subject: [PATCH 02/27] Add scikit-build-core to requirements-build.txt --- python/requirements-build.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/requirements-build.txt b/python/requirements-build.txt index c3b7aa48eb67..2e8e8e774ecf 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,5 +1,5 @@ cython>=3.1 libcst>=1.8.6 numpy>=1.25 +scikit-build-core setuptools_scm>=8 -setuptools>=77 From 8732d3844605834f338f31ededc97885525d73ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 16:10:59 +0100 Subject: [PATCH 03/27] Try fixing source test tests on dev PR --- .github/workflows/dev.yml | 2 +- dev/release/02-source-test.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 59171ddcaae0..d8ee21761a52 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit openssl - pip install "cython>=3.1" setuptools pytest requests setuptools-scm + pip install build "cython>=3.1" pytest requests scikit-build-core setuptools-scm - name: Run Release Test shell: bash run: | diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index 5bd7c717709f..fdd1db9c6072 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -64,7 +64,7 @@ def test_symbolic_links def test_python_version source Dir.chdir("#{@tag_name_no_rc}/python") do - sh("python3", "setup.py", "sdist") + sh("python", "-m", "build", "--sdist") if on_release_branch? pyarrow_source_archive = "dist/pyarrow-#{@release_version}.tar.gz" else From 0332d2e26bdbe054a3f6ac2dd201c30526ba8fdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 16:14:17 +0100 Subject: [PATCH 04/27] Update some more dependencies --- ci/conda_env_python.txt | 2 +- python/requirements-wheel-build.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 33ac193f86e8..dd16d66b725d 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -29,5 +29,5 @@ numpy>=1.16.6 pytest pytest-faulthandler s3fs>=2023.10.0 -setuptools>=77 +scikit-build-core setuptools_scm>=8 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index 6a2c62212437..a3ac13967729 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -3,6 +3,6 @@ cython>=3.1 # Needed for build-time stub docstring extraction libcst>=1.8.6 numpy>=2.0.0 +scikit-build-core setuptools_scm -setuptools>=77 wheel From c675ee7b1fabdb26045762326b2f588bba2252d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 16:42:58 +0100 Subject: [PATCH 05/27] Try creating a wrapper backend to just copy licenses before anything else happens --- python/_build_backend/__init__.py | 41 +++++++++++++++++++++++++++++++ python/pyproject.toml | 5 ++-- 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 python/_build_backend/__init__.py diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py new file mode 100644 index 000000000000..88519e7f34bd --- /dev/null +++ b/python/_build_backend/__init__.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Build backend wrapper that copies license files from the repo root +before delegating to scikit-build-core. + +Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level above +python/. The pyproject-metadata validator requires that files listed in +``project.license-files`` exist inside the project directory, so we copy them +in before anything else happens. +""" + +import shutil +from pathlib import Path + +from scikit_build_core.build import * # noqa: F401,F403 + +_PYTHON_DIR = Path(__file__).resolve().parent.parent +_REPO_ROOT = _PYTHON_DIR.parent + +for _name in ("LICENSE.txt", "NOTICE.txt"): + _src = _REPO_ROOT / _name + _dst = _PYTHON_DIR / _name + # If file doesn't exist, example on an sdist, this is just no-op. + if _src.exists() and not _dst.exists(): + shutil.copy2(_src, _dst) diff --git a/python/pyproject.toml b/python/pyproject.toml index ad5b5126d559..9020aadebab4 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,7 +24,8 @@ requires = [ "numpy>=1.25", "setuptools_scm[toml]>=8", ] -build-backend = "scikit_build_core.build" +build-backend = "_build_backend" +backend-path = ["."] [project] name = "pyarrow" @@ -82,7 +83,7 @@ exclude = [ [tool.scikit-build] cmake.build-type = "Release" metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" -sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "LICENSE.txt", "NOTICE.txt"] wheel.packages = ["pyarrow"] # CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ # (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) From d3d25bf6b2dbda8cb3e99947282ceb1cb94e0c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 12 Feb 2026 17:02:23 +0100 Subject: [PATCH 06/27] Remove custom build backend and just use links --- python/.gitignore | 4 --- python/LICENSE.txt | 1 + python/NOTICE.txt | 1 + python/_build_backend/__init__.py | 41 ------------------------------- python/pyproject.toml | 5 ++-- 5 files changed, 4 insertions(+), 48 deletions(-) create mode 120000 python/LICENSE.txt create mode 120000 python/NOTICE.txt delete mode 100644 python/_build_backend/__init__.py diff --git a/python/.gitignore b/python/.gitignore index ce97ba4af623..858c983d49c0 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -44,7 +44,3 @@ manylinux1/arrow nm_arrow.log visible_symbols.log -# the purpose of the custom SDist class in setup.py is to include these files -# in the sdist tarball, but we don't want to track duplicates -LICENSE.txt -NOTICE.txt diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 120000 index 000000000000..4ab43736a839 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1 @@ +../LICENSE.txt \ No newline at end of file diff --git a/python/NOTICE.txt b/python/NOTICE.txt new file mode 120000 index 000000000000..eb9f24e040b5 --- /dev/null +++ b/python/NOTICE.txt @@ -0,0 +1 @@ +../NOTICE.txt \ No newline at end of file diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py deleted file mode 100644 index 88519e7f34bd..000000000000 --- a/python/_build_backend/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -Build backend wrapper that copies license files from the repo root -before delegating to scikit-build-core. - -Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level above -python/. The pyproject-metadata validator requires that files listed in -``project.license-files`` exist inside the project directory, so we copy them -in before anything else happens. -""" - -import shutil -from pathlib import Path - -from scikit_build_core.build import * # noqa: F401,F403 - -_PYTHON_DIR = Path(__file__).resolve().parent.parent -_REPO_ROOT = _PYTHON_DIR.parent - -for _name in ("LICENSE.txt", "NOTICE.txt"): - _src = _REPO_ROOT / _name - _dst = _PYTHON_DIR / _name - # If file doesn't exist, example on an sdist, this is just no-op. - if _src.exists() and not _dst.exists(): - shutil.copy2(_src, _dst) diff --git a/python/pyproject.toml b/python/pyproject.toml index 9020aadebab4..ad5b5126d559 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,8 +24,7 @@ requires = [ "numpy>=1.25", "setuptools_scm[toml]>=8", ] -build-backend = "_build_backend" -backend-path = ["."] +build-backend = "scikit_build_core.build" [project] name = "pyarrow" @@ -83,7 +82,7 @@ exclude = [ [tool.scikit-build] cmake.build-type = "Release" metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" -sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/", "LICENSE.txt", "NOTICE.txt"] +sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] wheel.packages = ["pyarrow"] # CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ # (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) From 8ac25ca4367f9734d7614cee22f16085aa9dba60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 13 Feb 2026 10:20:05 +0100 Subject: [PATCH 07/27] Fix populating UPPERCASE_PYBUILD_TYPE for multi-config generators where CMAKE_BUILD_TYPE isn't populated --- python/CMakeLists.txt | 16 +++++++++++++++- python/pyproject.toml | 2 -- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f99225284a8b..31ce2f149eae 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -352,7 +352,21 @@ set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) # Write out compile-time configuration constants -string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_PYBUILD_TYPE) +if(CMAKE_BUILD_TYPE) + string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_PYBUILD_TYPE) +else() + # For multi-config generators (XCode and Visual Studio), + # CMAKE_BUILD_TYPE is not set at configure time. + # scikit-build-core does the right thing with cmake.build-type and + # adds the corresponding --config but does not populate CMAKE_BUILD_TYPE + # for those. On this specific case, we set the default to "RELEASE" + # as it's the most common build type for users building from source. + # This is mainly relevant for our Windows wheels, which are built with + # Visual Studio and thus use a multi-config generator with Release. + # As a note this is only to populate config_internal.h.cmake. + set(UPPERCASE_PYBUILD_TYPE "RELEASE") +endif() + configure_file("${PYARROW_CPP_SOURCE_DIR}/config_internal.h.cmake" "${PYARROW_CPP_SOURCE_DIR}/config_internal.h" ESCAPE_QUOTES) diff --git a/python/pyproject.toml b/python/pyproject.toml index ad5b5126d559..0d5e04843ad0 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -84,8 +84,6 @@ cmake.build-type = "Release" metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" sdist.include = ["pyarrow/_generated_version.py", "cmake_modules/"] wheel.packages = ["pyarrow"] -# CMakeLists.txt installs to DESTINATION ".". This maps that into pyarrow/ -# (matches old setup.py behavior: install_prefix = build_lib/pyarrow/) wheel.install-dir = "pyarrow" [tool.scikit-build.cmake.define] From 3f9fd78ec5f943ce2099ea064ebda4998de4be33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 13 Feb 2026 14:51:52 +0100 Subject: [PATCH 08/27] Add some logging --- .env | 4 ++-- ci/scripts/python_wheel_windows_build.bat | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.env b/.env index b2b5b5eb3b67..5d136890f63c 100644 --- a/.env +++ b/.env @@ -99,8 +99,8 @@ VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" # 2026.01.16 Release # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. # This is a workaround for our CI problem that "archery docker build" doesn't # use pulled built images in dev/tasks/python-wheels/github.windows.yml. -PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-02-25 -PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-02-25 +PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2026-03-04 +PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2026-03-04 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan". # See https://github.com/conan-io/conan-docker-tools#readme and diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index d69a6aac54f9..47ea59cc045a 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -138,6 +138,10 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel +%PYTHON_CMD% -c "import scikit_build_core; print(scikit_build_core.__version__)" +%PYTHON_CMD% -c "import sys; print(sys.executable); print(sys.path)" +%PYTHON_CMD% -m pip list +%PYTHON_CMD% -m pip show scikit-build-core %PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 @REM Repair the wheel with delvewheel From f272755edbbbbfa40e99227240f6340cdca4f166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 16 Feb 2026 12:10:17 +0100 Subject: [PATCH 09/27] Remove unnecessary logging --- ci/scripts/python_wheel_windows_build.bat | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 47ea59cc045a..d69a6aac54f9 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -138,10 +138,6 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -c "import scikit_build_core; print(scikit_build_core.__version__)" -%PYTHON_CMD% -c "import sys; print(sys.executable); print(sys.path)" -%PYTHON_CMD% -m pip list -%PYTHON_CMD% -m pip show scikit-build-core %PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 @REM Repair the wheel with delvewheel From 3463122f97621c9cc73f03be2d7d545ce36d683a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 16 Feb 2026 12:17:53 +0100 Subject: [PATCH 10/27] Fix sdist --- ci/scripts/python_sdist_build.sh | 2 +- python/requirements-build.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh index dfb995184312..4f7437c423d4 100755 --- a/ci/scripts/python_sdist_build.sh +++ b/ci/scripts/python_sdist_build.sh @@ -23,5 +23,5 @@ source_dir=${1}/python pushd "${source_dir}" export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} -${PYTHON:-python} setup.py sdist +${PYTHON:-python} -m build --sdist popd diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 2e8e8e774ecf..fdd3e68a1b16 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,3 +1,4 @@ +build cython>=3.1 libcst>=1.8.6 numpy>=1.25 From 26780d7282e23d58b477499fd8764dec1c78d979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 17 Feb 2026 11:55:22 +0100 Subject: [PATCH 11/27] Add tiny build backend wrapper on top of scikit-build-core to manage licenses --- python/_build_backend/__init__.py | 66 +++++++++++++++++++++++++++++++ python/pyproject.toml | 5 ++- 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 python/_build_backend/__init__.py diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py new file mode 100644 index 000000000000..d074f1699ac4 --- /dev/null +++ b/python/_build_backend/__init__.py @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Build backend wrapper that resolves license symlinks before delegating +to scikit-build-core. + +Arrow's LICENSE.txt and NOTICE.txt live at the repository root, one level +above python/. They are symlinked into python/ so that license-files in +pyproject.toml can reference them otherwise project metadata fails validation. +This is done before any build backend is invoked that's why symlinks are necessary. +But when building sdist tarballs symlinks are not copied and we end up with +broken LICENSE.txt and NOTICE.txt. + +This custom build backend only replace the symlinks with hardlinks +before scikit_build_core.build.build_sdist so +that sdist contains the actual file content. The symlinks are restored +afterwards so the git working tree stays clean. +""" + +import os +from contextlib import contextmanager +from pathlib import Path + +from scikit_build_core.build import build_sdist as scikit_build_sdist + +LICENSE_FILES = ("LICENSE.txt", "NOTICE.txt") +PYTHON_DIR = Path(__file__).resolve().parent.parent + + +@contextmanager +def hardlink_licenses(): + # Temporarily replace symlinks with hardlinks so sdist gets real content + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + if filepath.is_symlink(): + target = filepath.resolve() + filepath.unlink() + os.link(target, filepath) + try: + yield + finally: + # Copy back the original symlinks so git status is clean + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + filepath.unlink() + os.symlink(f"../{name}", filepath) + + +def build_sdist(sdist_directory, config_settings=None): + with hardlink_licenses(): + return scikit_build_sdist(sdist_directory, config_settings) diff --git a/python/pyproject.toml b/python/pyproject.toml index 0d5e04843ad0..14aa37ed0453 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,7 +24,10 @@ requires = [ "numpy>=1.25", "setuptools_scm[toml]>=8", ] -build-backend = "scikit_build_core.build" +# We use a really simple build backend wrapper over scikit-build-core +# to solve licenses to work around links not being included in sdists. +build-backend = "_build_backend" +backend-path = ["."] [project] name = "pyarrow" From 45e49baf3bb3d10733fec135a20a8c42c2d801de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 17 Feb 2026 12:28:25 +0100 Subject: [PATCH 12/27] Import all scikit_build_core.build hooks to be available like build_wheel and others --- python/_build_backend/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index d074f1699ac4..043ea394531e 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -36,6 +36,7 @@ from contextlib import contextmanager from pathlib import Path +from scikit_build_core.build import * # noqa: F401,F403 from scikit_build_core.build import build_sdist as scikit_build_sdist LICENSE_FILES = ("LICENSE.txt", "NOTICE.txt") From af9fe7b023cb1dbee393ca4bf3e05c0907ec78a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 17 Feb 2026 19:07:56 +0100 Subject: [PATCH 13/27] Try to solve Windows license problems with links --- python/_build_backend/__init__.py | 40 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index 043ea394531e..e9a6ead8aecd 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -32,9 +32,11 @@ afterwards so the git working tree stays clean. """ -import os from contextlib import contextmanager +import os from pathlib import Path +import shutil +import sys from scikit_build_core.build import * # noqa: F401,F403 from scikit_build_core.build import build_sdist as scikit_build_sdist @@ -44,24 +46,34 @@ @contextmanager -def hardlink_licenses(): - # Temporarily replace symlinks with hardlinks so sdist gets real content +def prepare_licenses(): + # Temporarily replace symlinks with hardlinks so sdist gets real content. + # On Windows we just copy the files since hardlinks might not be supported. for name in LICENSE_FILES: - filepath = PYTHON_DIR / name - if filepath.is_symlink(): - target = filepath.resolve() - filepath.unlink() - os.link(target, filepath) + parent_license = PYTHON_DIR.parent / name + pyarrow_license = PYTHON_DIR / name + if sys.platform == "win32": + # For Windows copy the files. + pyarrow_license.unlink(missing_ok=True) + shutil.copy2(parent_license, pyarrow_license) + else: + # For Unix-like systems we replace the symlink with + # a hardlink to avoid copying the file content. + if pyarrow_license.is_symlink(): + target = pyarrow_license.resolve() + pyarrow_license.unlink() + os.link(target, pyarrow_license) try: yield finally: - # Copy back the original symlinks so git status is clean - for name in LICENSE_FILES: - filepath = PYTHON_DIR / name - filepath.unlink() - os.symlink(f"../{name}", filepath) + if sys.platform != "win32": + # Copy back the original symlinks so git status is clean. + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + filepath.unlink() + os.symlink(f"../{name}", filepath) def build_sdist(sdist_directory, config_settings=None): - with hardlink_licenses(): + with prepare_licenses(): return scikit_build_sdist(sdist_directory, config_settings) From 62df334c130b458525e854bf6fd4d79b11dda6d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 12:42:23 +0100 Subject: [PATCH 14/27] Some fixes on documentation, remove remnant setup.py executions and remove usage of PYARROW_CMAKE_GENERATOR --- ci/scripts/python_build.bat | 1 - ci/scripts/python_build.sh | 4 +--- ci/scripts/python_sdist_test.sh | 2 +- ci/scripts/python_wheel_macos_build.sh | 2 +- ci/scripts/python_wheel_windows_build.bat | 1 - ci/scripts/python_wheel_xlinux_build.sh | 2 +- dev/release/verify-release-candidate.bat | 6 +++-- docs/source/developers/documentation.rst | 3 ++- .../guide/step_by_step/building.rst | 2 +- docs/source/developers/python/building.rst | 24 +++++++++---------- python/.gitignore | 4 ++-- python/examples/minimal_build/build_conda.sh | 2 +- python/examples/minimal_build/build_venv.sh | 2 +- 13 files changed, 27 insertions(+), 28 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index 06f5a637223a..fbd44a1c4c2c 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -114,7 +114,6 @@ echo "=== Building Python ===" set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% set PYARROW_WITH_DATASET=%ARROW_DATASET% diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 36dc35a2de8b..bc606e3b60aa 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -59,7 +59,7 @@ if [ -n "${CONDA_PREFIX}" ]; then conda list fi -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} @@ -90,8 +90,6 @@ export DYLD_LIBRARY_PATH=${ARROW_HOME}/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_P rm -rf "${python_build_dir}" cp -aL "${source_dir}" "${python_build_dir}" pushd "${python_build_dir}" -# - Cannot call setup.py as it may install in the wrong directory -# on Debian/Ubuntu (ARROW-15243). # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. ${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index 98a938d970ac..26ed2f417d5d 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -25,7 +25,7 @@ export ARROW_SOURCE_DIR=${arrow_dir} export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 94f3e7ba89cb..c998a627f483 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -147,7 +147,7 @@ popd echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} -export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index d69a6aac54f9..b4e71f35a90b 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -118,7 +118,6 @@ echo "=== (%PYTHON%) Building wheel ===" set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% set PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index 013c09765fbb..75b805dd1dce 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -155,7 +155,7 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} -export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 50dfc06e698b..c69dab58e767 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -132,15 +132,17 @@ popd @rem Build and import pyarrow pushd !ARROW_SOURCE!\python +pip install build || exit /B 1 pip install -r requirements-test.txt || exit /B 1 -set PYARROW_CMAKE_GENERATOR=%GENERATOR% +set CMAKE_GENERATOR=%GENERATOR% set PYARROW_WITH_FLIGHT=1 set PYARROW_WITH_PARQUET=1 set PYARROW_WITH_PARQUET_ENCRYPTION=1 set PYARROW_WITH_DATASET=1 set PYARROW_TEST_CYTHON=OFF -python setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel || exit /B 1 +set PYARROW_BUNDLE_ARROW_CPP=ON +python -m build --sdist --wheel . --no-isolation || exit /B 1 pytest pyarrow -v -s --enable-parquet || exit /B 1 popd diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst index 5f0ebbdb7db2..3cdb2f23b00d 100644 --- a/docs/source/developers/documentation.rst +++ b/docs/source/developers/documentation.rst @@ -71,8 +71,9 @@ These two steps are mandatory and must be executed in order. this step requires that ``pyarrow`` library is installed in your python environment. One way to accomplish this is to follow the build instructions at :ref:`python-development` - and then run ``python setup.py install`` in arrow/python + and then run ``pip install --no-build-isolation .`` in arrow/python (it is best to do this in a dedicated conda/virtual environment). + Add ``-vv`` to the pip command to get output of the build process. You can still build the documentation without ``pyarrow`` library installed but note that Python part of the documentation diff --git a/docs/source/developers/guide/step_by_step/building.rst b/docs/source/developers/guide/step_by_step/building.rst index 5317cf06c748..7b4f42a04f5e 100644 --- a/docs/source/developers/guide/step_by_step/building.rst +++ b/docs/source/developers/guide/step_by_step/building.rst @@ -155,7 +155,7 @@ Building other Arrow libraries .. code:: console - $ python setup.py build_ext --inplace + $ pip install --no-build-isolation -vv . **Recompiling C++** diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index deb6076e44a8..539d2f93f45c 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -391,7 +391,7 @@ To build PyArrow run: .. code-block:: $ pushd arrow/python - $ python setup.py build_ext --inplace + $ pip install --no-build-isolation -vv . $ popd .. tab-item:: Windows @@ -400,7 +400,7 @@ To build PyArrow run: .. code-block:: $ pushd arrow\python - $ python setup.py build_ext --inplace + $ pip install --no-build-isolation -vv . $ popd .. note:: @@ -428,8 +428,8 @@ To build PyArrow run: .. code-block:: - $ set PYARROW_BUNDLE_ARROW_CPP=1 - $ python setup.py build_ext --inplace + $ set PYARROW_BUNDLE_ARROW_CPP=ON + $ pip install --no-build-isolation -vv . Note that bundled Arrow C++ libraries will not be automatically updated when rebuilding Arrow C++. @@ -444,9 +444,9 @@ artifacts before rebuilding. See :ref:`python-dev-env-variables`. By default, PyArrow will be built in release mode even if Arrow C++ has been built in debug mode. To create a debug build of PyArrow, run -``export PYARROW_BUILD_TYPE=debug`` prior to running ``python setup.py -build_ext --inplace`` above. A ``relwithdebinfo`` build can be created -similarly. +``export PYARROW_BUILD_TYPE=debug`` prior to running +``pip install --no-build-isolation -vv .`` above. +A ``relwithdebinfo`` build can be created similarly. Self-Contained Wheel ^^^^^^^^^^^^^^^^^^^^ @@ -457,13 +457,13 @@ libraries). This ensures that all necessary native libraries are bundled inside the wheel, so users can install it without needing to have Arrow or Parquet installed separately on their system. -To do this, pass the ``--bundle-arrow-cpp`` option to the build command: +To do this, set the ``PYARROW_BUNDLE_ARROW_CPP`` environment variable before building ``pyarrow``: .. code-block:: - $ pip install wheel # if not installed - $ python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \ - --bundle-arrow-cpp bdist_wheel + $ set PYARROW_BUNDLE_ARROW_CPP=ON + $ pip install build wheel # if not installed + $ python -m build --sdist --wheel . --no-isolation This option is typically only needed for releases or distribution scenarios, not for local development. @@ -558,7 +558,7 @@ PyArrow are: * - ``PYARROW_BUILD_TYPE`` - Build type for PyArrow (release, debug or relwithdebinfo), sets ``CMAKE_BUILD_TYPE`` - ``release`` - * - ``PYARROW_CMAKE_GENERATOR`` + * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` * - ``PYARROW_CMAKE_OPTIONS`` diff --git a/python/.gitignore b/python/.gitignore index 858c983d49c0..5ec5fdf01207 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -24,9 +24,9 @@ cython_debug # Bundled headers pyarrow/include -# setup.py working directory +# build directory build -# setup.py dist directory +# dist directory dist # Coverage .coverage diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 0b731638cd97..1855869cff12 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -95,7 +95,7 @@ rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} export PYARROW_BUILD_TYPE=Debug -export PYARROW_CMAKE_GENERATOR=Ninja +export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh python -m pip install --no-deps --no-build-isolation -vv . diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index f462c4e9b9d0..8b6fa925e398 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -67,7 +67,7 @@ rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} export PYARROW_BUILD_TYPE=Debug -export PYARROW_CMAKE_GENERATOR=Ninja +export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh python -m pip install --no-deps --no-build-isolation -vv . From 529694f49ca6228ad1d10df16745231faa1098a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 12:43:53 +0100 Subject: [PATCH 15/27] Ooops unsaved files --- ci/scripts/python_wheel_macos_build.sh | 1 - ci/scripts/python_wheel_xlinux_build.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index c998a627f483..bc1465b891b1 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -148,7 +148,6 @@ popd echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} export PYARROW_WITH_DATASET=${ARROW_DATASET} diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index 75b805dd1dce..523f13aa477e 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -156,7 +156,6 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} From 61f3023d9566854fb121482bb676f32e5e586f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 13:02:30 +0100 Subject: [PATCH 16/27] Remove obsolete PYARROW_BUILD_TYPE. Use and document new --config-settings cmake.build-type usage --- ci/scripts/python_build.bat | 1 - ci/scripts/python_build.sh | 4 +--- ci/scripts/python_sdist_test.sh | 3 +-- ci/scripts/python_wheel_macos_build.sh | 3 +-- ci/scripts/python_wheel_windows_build.bat | 3 +-- ci/scripts/python_wheel_xlinux_build.sh | 3 +-- docs/source/developers/python/building.rst | 8 ++------ docs/source/developers/python/development.rst | 2 +- python/examples/minimal_build/build_conda.sh | 3 +-- python/examples/minimal_build/build_venv.sh | 3 +-- 10 files changed, 10 insertions(+), 23 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index fbd44a1c4c2c..bf462fce7271 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -111,7 +111,6 @@ echo "=== CCACHE Stats after build ===" ccache -sv echo "=== Building Python ===" -set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index bc606e3b60aa..7cadf6ca19a8 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -60,8 +60,6 @@ if [ -n "${CONDA_PREFIX}" ]; then fi export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} - export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} @@ -92,7 +90,7 @@ cp -aL "${source_dir}" "${python_build_dir}" pushd "${python_build_dir}" # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. -${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . +${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" . popd if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index 26ed2f417d5d..52023ff5e7e0 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -26,7 +26,6 @@ export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_S3=${ARROW_S3:-OFF} @@ -64,7 +63,7 @@ if [ -n "${ARROW_PYTHON_VENV:-}" ]; then . "${ARROW_PYTHON_VENV}/bin/activate" fi -${PYTHON:-python} -m pip install "${sdist}" +${PYTHON:-python} -m pip install --config-settings cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" "${sdist}" # shellcheck disable=SC2086 pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index bc1465b891b1..23b059756b60 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -146,7 +146,6 @@ cmake --build . --target install popd echo "=== (${PYTHON_VERSION}) Building wheel ===" -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} @@ -168,7 +167,7 @@ export CMAKE_PREFIX_PATH=${build_dir}/install export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python -python -m build --sdist --wheel . --no-isolation +python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index b4e71f35a90b..a58cd00be3c9 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -115,7 +115,6 @@ cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1 popd echo "=== (%PYTHON%) Building wheel ===" -set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" @@ -137,7 +136,7 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -m build --sdist --wheel . --no-isolation || exit /B 1 +%PYTHON_CMD% -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index 523f13aa477e..aded0704778c 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -154,7 +154,6 @@ popd check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" -export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} export PYARROW_BUNDLE_ARROW_CPP=ON export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} @@ -174,7 +173,7 @@ export ARROW_HOME=/tmp/arrow-dist export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python -python -m build --sdist --wheel . --no-isolation +python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 539d2f93f45c..2cb0750f9f6c 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -444,8 +444,7 @@ artifacts before rebuilding. See :ref:`python-dev-env-variables`. By default, PyArrow will be built in release mode even if Arrow C++ has been built in debug mode. To create a debug build of PyArrow, run -``export PYARROW_BUILD_TYPE=debug`` prior to running -``pip install --no-build-isolation -vv .`` above. +``pip install --no-build-isolation -vv --config-settings cmake.build-type=Debug .``. A ``relwithdebinfo`` build can be created similarly. Self-Contained Wheel @@ -461,7 +460,7 @@ To do this, set the ``PYARROW_BUNDLE_ARROW_CPP`` environment variable before bui .. code-block:: - $ set PYARROW_BUNDLE_ARROW_CPP=ON + $ export PYARROW_BUNDLE_ARROW_CPP=ON $ pip install build wheel # if not installed $ python -m build --sdist --wheel . --no-isolation @@ -555,9 +554,6 @@ PyArrow are: * - PyArrow environment variable - Description - Default value - * - ``PYARROW_BUILD_TYPE`` - - Build type for PyArrow (release, debug or relwithdebinfo), sets ``CMAKE_BUILD_TYPE`` - - ``release`` * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index 5529ad25a294..857358a6c317 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -222,7 +222,7 @@ Debug build Since PyArrow depends on the Arrow C++ libraries, debugging can frequently involve crossing between Python and C++ shared libraries. For the best experience, make sure you've built both Arrow C++ -(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``export PYARROW_BUILD_TYPE=debug``) +(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``--config-settings cmake.build-type=Debug``) in debug mode. Using gdb on Linux diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 1855869cff12..1565396df3d3 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -94,11 +94,10 @@ pushd $ARROW_ROOT/python rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} -export PYARROW_BUILD_TYPE=Debug export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh -python -m pip install --no-deps --no-build-isolation -vv . +python -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type=Debug . popd pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index 8b6fa925e398..d84ca6d55ab7 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -66,11 +66,10 @@ pushd $ARROW_ROOT/python rm -rf build/ # remove any pesky preexisting build directory export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}} -export PYARROW_BUILD_TYPE=Debug export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh -python -m pip install --no-deps --no-build-isolation -vv . +python -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type=Debug . popd From 9fa857270c41b23b8e7aef5fb57206f1f98f2973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 13:30:38 +0100 Subject: [PATCH 17/27] Remove obsolete PYARROW_CMAKE_OPTIONS and document new --config-settings cmake.args usage --- ci/scripts/python_wheel_macos_build.sh | 6 ++++-- ci/scripts/python_wheel_windows_build.bat | 5 +++-- ci/scripts/python_wheel_xlinux_build.sh | 5 +++-- docs/source/developers/python/building.rst | 8 +++++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 23b059756b60..435f1ed5f53d 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -159,7 +159,6 @@ export PYARROW_WITH_PARQUET=${ARROW_PARQUET} export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_WITH_S3=${ARROW_S3} -export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" export ARROW_HOME=${build_dir}/install # PyArrow build configuration export CMAKE_PREFIX_PATH=${build_dir}/install @@ -167,7 +166,10 @@ export CMAKE_PREFIX_PATH=${build_dir}/install export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python -python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} +python -m build --sdist --wheel . --no-isolation \ + --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-settings cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ + --config-settings cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index a58cd00be3c9..40005d25ef14 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -117,7 +117,6 @@ popd echo "=== (%PYTHON%) Building wheel ===" set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON -set PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% set PYARROW_WITH_DATASET=%ARROW_DATASET% @@ -136,7 +135,9 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% || exit /B 1 +%PYTHON_CMD% -m build --sdist --wheel . --no-isolation ^ + --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% ^ + --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index aded0704778c..f8a59e676ffa 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -155,7 +155,6 @@ check_arrow_visibility echo "=== (${PYTHON_VERSION}) Building wheel ===" export PYARROW_BUNDLE_ARROW_CPP=ON -export PYARROW_CMAKE_OPTIONS="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" export PYARROW_WITH_ACERO=${ARROW_ACERO} export PYARROW_WITH_AZURE=${ARROW_AZURE} export PYARROW_WITH_DATASET=${ARROW_DATASET} @@ -173,7 +172,9 @@ export ARROW_HOME=/tmp/arrow-dist export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python -python -m build --sdist --wheel . --no-isolation --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} +python -m build --sdist --wheel . --no-isolation \ + --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 2cb0750f9f6c..f1f7f01cfd1b 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -557,9 +557,6 @@ PyArrow are: * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` - * - ``PYARROW_CMAKE_OPTIONS`` - - Extra CMake and Arrow options (ex. ``"-DARROW_SIMD_LEVEL=NONE -DCMAKE_OSX_ARCHITECTURES=x86_64;arm64"``) - - ``''`` * - ``PYARROW_CXXFLAGS`` - Extra C++ compiler flags - ``''`` @@ -579,6 +576,11 @@ PyArrow are: - Number of processes used to compile PyArrow’s C++/Cython components - ``''`` +For extra CMake arguments you can use the ``--config-settings cmake.args=`` +argument when building PyArrow. For example, to build a version of PyArrow +with ``ARROW_SIMD_LEVEL=NONE``, you can run +``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. + The components being disabled or enabled when building PyArrow is by default based on how Arrow C++ is build (i.e. it follows the ``ARROW_$COMPONENT`` flags). However, the ``PYARROW_WITH_$COMPONENT`` environment variables can still be used From f2f7e17b6c11b0717335a9d5729dd02c53f967af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 13:59:39 +0100 Subject: [PATCH 18/27] Remove PYARROW_BUILD_VERBOSE and PYARROW_PARALLEL, document new usages and update usage. Also document --config-settings cmake.build-type --- ci/scripts/python_build.bat | 3 +- ci/scripts/python_build.sh | 3 +- ci/scripts/python_wheel_macos_build.sh | 1 + ci/scripts/python_wheel_windows_build.bat | 4 +-- ci/scripts/python_wheel_xlinux_build.sh | 1 + dev/release/verify-release-candidate.sh | 2 +- dev/tasks/python-wheels/github.osx.yml | 1 - docs/source/developers/python/building.rst | 32 ++++++++++++++-------- 8 files changed, 28 insertions(+), 19 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index bf462fce7271..dd59b3008d61 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -111,7 +111,6 @@ echo "=== CCACHE Stats after build ===" ccache -sv echo "=== Building Python ===" -set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% @@ -135,6 +134,6 @@ pushd %SOURCE_DIR%\python %PYTHON_CMD% -m pip install -r requirements-build.txt || exit /B 1 @REM Build PyArrow -%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv . || exit /B 1 +%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv --config-settings build.verbose=true . || exit /B 1 popd diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 7cadf6ca19a8..3da5b2f0934f 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -59,6 +59,7 @@ if [ -n "${CONDA_PREFIX}" ]; then conda list fi +export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF} export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} @@ -74,8 +75,6 @@ export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} export PYARROW_WITH_S3=${ARROW_S3:-OFF} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} -export PYARROW_PARALLEL=${n_jobs} - : "${CMAKE_PREFIX_PATH:=${ARROW_HOME}}" export CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 435f1ed5f53d..b383bcaf085a 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -167,6 +167,7 @@ export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python python -m build --sdist --wheel . --no-isolation \ + --config-settings build.verbose=true \ --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ --config-settings cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ --config-settings cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 40005d25ef14..4e9666afb541 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -115,7 +115,6 @@ cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1 popd echo "=== (%PYTHON%) Building wheel ===" -set PYARROW_BUILD_VERBOSE=1 set PYARROW_BUNDLE_ARROW_CPP=ON set PYARROW_WITH_ACERO=%ARROW_ACERO% set PYARROW_WITH_AZURE=%ARROW_AZURE% @@ -135,7 +134,8 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python @REM Build wheel -%PYTHON_CMD% -m build --sdist --wheel . --no-isolation ^ +%PYTHON_CMD% -m build --sdist --wheel . --no-isolation -vv ^ + --config-settings build.verbose=true ^ --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% ^ --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index f8a59e676ffa..6544d8edaecb 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -173,6 +173,7 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python python -m build --sdist --wheel . --no-isolation \ + --config-settings build.verbose=true \ --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 325c4342e6f8..f91b8de474c2 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -545,7 +545,7 @@ test_python() { CMAKE_PREFIX_PATH="${CONDA_BACKUP_CMAKE_PREFIX_PATH}:${CMAKE_PREFIX_PATH}" fi - export PYARROW_PARALLEL=$NPROC + export CMAKE_BUILD_PARALLEL_LEVEL=$NPROC export PYARROW_WITH_DATASET=1 export PYARROW_WITH_HDFS=1 export PYARROW_WITH_ORC=1 diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index ab17d11da1f8..3de8c3c021bf 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -23,7 +23,6 @@ CMAKE_BUILD_TYPE: release CXX: "clang++" MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}" - PYARROW_BUILD_VERBOSE: 1 PYARROW_VERSION: "{{ arrow.no_rc_version }}" PYTHON_VERSION: "{{ python_version }}" PYTHON_ABI_TAG: "{{ python_abi_tag }}" diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index f1f7f01cfd1b..61cd4251de19 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -349,7 +349,7 @@ Optional build components There are several optional components that can be enabled or disabled by setting specific flags to ``ON`` or ``OFF``, respectively. See the list of -:ref:`python-dev-env-variables` below. +:ref:`python-dev-components` below. You may choose between different kinds of C++ build types: @@ -378,7 +378,7 @@ Build PyArrow If you did build one of the optional components in C++, the equivalent components will be enabled by default for building pyarrow. This default can be overridden by setting the corresponding ``PYARROW_WITH_$COMPONENT`` environment variable -to 0 or 1, see :ref:`python-dev-env-variables` below. +to 0 or 1, see :ref:`python-dev-components` below. To build PyArrow run: @@ -435,7 +435,7 @@ To build PyArrow run: updated when rebuilding Arrow C++. To set the number of threads used to compile PyArrow's C++/Cython components, -set the ``PYARROW_PARALLEL`` environment variable. +set the ``CMAKE_BUILD_PARALLEL_LEVEL`` environment variable. If you build PyArrow but then make changes to the Arrow C++ or PyArrow code, you can end up with stale build artifacts. This can lead to @@ -541,8 +541,8 @@ described in development section. .. _python-dev-env-variables: -Relevant components and environment variables -============================================= +Relevant environment variables and build options +================================================ List of relevant environment variables that can be used to build PyArrow are: @@ -554,6 +554,9 @@ PyArrow are: * - PyArrow environment variable - Description - Default value + * - ``CMAKE_BUILD_PARALLEL_LEVEL`` + - Number of processes used to compile PyArrow’s C++/Cython components + - ``''`` * - ``CMAKE_GENERATOR`` - Example: ``'Visual Studio 17 2022 Win64'`` - ``''`` @@ -569,18 +572,25 @@ PyArrow are: * - ``PYARROW_BUNDLE_CYTHON_CPP`` - Bundle the C++ files generated by Cython - ``0`` (``OFF``) - * - ``PYARROW_BUILD_VERBOSE`` - - Enable verbose output from Makefile builds - - ``0`` (``OFF``) - * - ``PYARROW_PARALLEL`` - - Number of processes used to compile PyArrow’s C++/Cython components - - ``''`` + +To set the build type (e.g. ``Debug``, ``Release``, ``RelWithDebInfo``), pass +``--config-settings cmake.build-type=Debug`` to the ``pip install`` or +``python -m build`` command. For extra CMake arguments you can use the ``--config-settings cmake.args=`` argument when building PyArrow. For example, to build a version of PyArrow with ``ARROW_SIMD_LEVEL=NONE``, you can run ``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. +To enable verbose output from the build tool, pass +``--config-settings build.verbose=true`` to the ``pip install`` or +``python -m build`` command. + +.. _python-dev-components: + +Relevant components +=================== + The components being disabled or enabled when building PyArrow is by default based on how Arrow C++ is build (i.e. it follows the ``ARROW_$COMPONENT`` flags). However, the ``PYARROW_WITH_$COMPONENT`` environment variables can still be used From 5c5531c6a488662d9bec668ff3546914b614c9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 18 Feb 2026 14:58:27 +0100 Subject: [PATCH 19/27] --config-settings or --config-setting depending on pip or build --- ci/scripts/python_wheel_macos_build.sh | 8 ++++---- ci/scripts/python_wheel_windows_build.bat | 6 +++--- ci/scripts/python_wheel_xlinux_build.sh | 6 +++--- docs/source/developers/python/building.rst | 11 +++++++---- docs/source/developers/python/development.rst | 3 ++- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index b383bcaf085a..700c023aa3b8 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -167,10 +167,10 @@ export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python python -m build --sdist --wheel . --no-isolation \ - --config-settings build.verbose=true \ - --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ - --config-settings cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ - --config-settings cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" + --config-setting build.verbose=true \ + --config-setting cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-setting cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ + --config-setting cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 4e9666afb541..3c85cad0ec3e 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -135,9 +135,9 @@ pushd C:\arrow\python @REM Build wheel %PYTHON_CMD% -m build --sdist --wheel . --no-isolation -vv ^ - --config-settings build.verbose=true ^ - --config-settings cmake.build-type=%CMAKE_BUILD_TYPE% ^ - --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 + --config-setting build.verbose=true ^ + --config-setting cmake.build-type=%CMAKE_BUILD_TYPE% ^ + --config-setting cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index 6544d8edaecb..b3609aa306b8 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -173,9 +173,9 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python python -m build --sdist --wheel . --no-isolation \ - --config-settings build.verbose=true \ - --config-settings cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ - --config-settings cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" + --config-setting build.verbose=true \ + --config-setting cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + --config-setting cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 61cd4251de19..58ba5349c65a 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -573,9 +573,12 @@ PyArrow are: - Bundle the C++ files generated by Cython - ``0`` (``OFF``) +Note that ``pip install`` uses ``--config-settings`` (plural) while +``python -m build`` uses ``--config-setting`` (singular). + To set the build type (e.g. ``Debug``, ``Release``, ``RelWithDebInfo``), pass -``--config-settings cmake.build-type=Debug`` to the ``pip install`` or -``python -m build`` command. +``--config-settings cmake.build-type=Debug`` to ``pip install`` or +``--config-setting cmake.build-type=Debug`` to ``python -m build``. For extra CMake arguments you can use the ``--config-settings cmake.args=`` argument when building PyArrow. For example, to build a version of PyArrow @@ -583,8 +586,8 @@ with ``ARROW_SIMD_LEVEL=NONE``, you can run ``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. To enable verbose output from the build tool, pass -``--config-settings build.verbose=true`` to the ``pip install`` or -``python -m build`` command. +``--config-settings build.verbose=true`` to ``pip install`` or +``--config-setting build.verbose=true`` to ``python -m build``. .. _python-dev-components: diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index 857358a6c317..8afc55a173e1 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -222,7 +222,8 @@ Debug build Since PyArrow depends on the Arrow C++ libraries, debugging can frequently involve crossing between Python and C++ shared libraries. For the best experience, make sure you've built both Arrow C++ -(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow (``--config-settings cmake.build-type=Debug``) +(``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow +(``pip install --no-build-isolation --config-settings cmake.build-type=Debug .``) in debug mode. Using gdb on Linux From 70463e439dc925c9777a2711a2366a25cfc50a22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 24 Feb 2026 11:40:10 +0100 Subject: [PATCH 20/27] Use -C instead of --config-setting or --config-settings based on whether we use build or pip --- ci/scripts/python_build.bat | 2 +- ci/scripts/python_build.sh | 2 +- ci/scripts/python_sdist_test.sh | 2 +- ci/scripts/python_wheel_macos_build.sh | 8 ++++---- ci/scripts/python_wheel_windows_build.bat | 6 +++--- ci/scripts/python_wheel_xlinux_build.sh | 6 +++--- docs/source/developers/python/building.rst | 15 +++++---------- docs/source/developers/python/development.rst | 2 +- python/examples/minimal_build/build_conda.sh | 2 +- python/examples/minimal_build/build_venv.sh | 2 +- 10 files changed, 21 insertions(+), 26 deletions(-) diff --git a/ci/scripts/python_build.bat b/ci/scripts/python_build.bat index dd59b3008d61..10d10bda6a9d 100644 --- a/ci/scripts/python_build.bat +++ b/ci/scripts/python_build.bat @@ -134,6 +134,6 @@ pushd %SOURCE_DIR%\python %PYTHON_CMD% -m pip install -r requirements-build.txt || exit /B 1 @REM Build PyArrow -%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv --config-settings build.verbose=true . || exit /B 1 +%PYTHON_CMD% -m pip install --no-deps --no-build-isolation -vv -C build.verbose=true . || exit /B 1 popd diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 3da5b2f0934f..f8c1af3982dd 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -89,7 +89,7 @@ cp -aL "${source_dir}" "${python_build_dir}" pushd "${python_build_dir}" # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. -${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" . +${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv -C cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" . popd if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index 52023ff5e7e0..eca8e0542e23 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -63,7 +63,7 @@ if [ -n "${ARROW_PYTHON_VENV:-}" ]; then . "${ARROW_PYTHON_VENV}/bin/activate" fi -${PYTHON:-python} -m pip install --config-settings cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" "${sdist}" +${PYTHON:-python} -m pip install -C cmake.build-type="${CMAKE_BUILD_TYPE:-Debug}" "${sdist}" # shellcheck disable=SC2086 pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 700c023aa3b8..1571cd57f258 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -167,10 +167,10 @@ export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} pushd ${source_dir}/python python -m build --sdist --wheel . --no-isolation \ - --config-setting build.verbose=true \ - --config-setting cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ - --config-setting cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ - --config-setting cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" + -C build.verbose=true \ + -C cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + -C cmake.args="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}" \ + -C cmake.args="-DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" popd echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 3c85cad0ec3e..14e3e5a62971 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -135,9 +135,9 @@ pushd C:\arrow\python @REM Build wheel %PYTHON_CMD% -m build --sdist --wheel . --no-isolation -vv ^ - --config-setting build.verbose=true ^ - --config-setting cmake.build-type=%CMAKE_BUILD_TYPE% ^ - --config-setting cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 + -C build.verbose=true ^ + -C cmake.build-type=%CMAKE_BUILD_TYPE% ^ + -C cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=%CMAKE_INTERPROCEDURAL_OPTIMIZATION%" || exit /B 1 @REM Repair the wheel with delvewheel @REM diff --git a/ci/scripts/python_wheel_xlinux_build.sh b/ci/scripts/python_wheel_xlinux_build.sh index b3609aa306b8..960fe5bad6d9 100755 --- a/ci/scripts/python_wheel_xlinux_build.sh +++ b/ci/scripts/python_wheel_xlinux_build.sh @@ -173,9 +173,9 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist pushd /arrow/python python -m build --sdist --wheel . --no-isolation \ - --config-setting build.verbose=true \ - --config-setting cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ - --config-setting cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" + -C build.verbose=true \ + -C cmake.build-type=${CMAKE_BUILD_TYPE:-Debug} \ + -C cmake.args="-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=${CMAKE_INTERPROCEDURAL_OPTIMIZATION}" echo "=== Strip symbols from wheel ===" mkdir -p dist/temp-fix-wheel diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 58ba5349c65a..afb698bf13cc 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -444,7 +444,7 @@ artifacts before rebuilding. See :ref:`python-dev-env-variables`. By default, PyArrow will be built in release mode even if Arrow C++ has been built in debug mode. To create a debug build of PyArrow, run -``pip install --no-build-isolation -vv --config-settings cmake.build-type=Debug .``. +``pip install --no-build-isolation -vv -C cmake.build-type=Debug .``. A ``relwithdebinfo`` build can be created similarly. Self-Contained Wheel @@ -573,21 +573,16 @@ PyArrow are: - Bundle the C++ files generated by Cython - ``0`` (``OFF``) -Note that ``pip install`` uses ``--config-settings`` (plural) while -``python -m build`` uses ``--config-setting`` (singular). - To set the build type (e.g. ``Debug``, ``Release``, ``RelWithDebInfo``), pass -``--config-settings cmake.build-type=Debug`` to ``pip install`` or -``--config-setting cmake.build-type=Debug`` to ``python -m build``. +``-C cmake.build-type=Debug`` to ``pip install`` or to ``python -m build``. -For extra CMake arguments you can use the ``--config-settings cmake.args=`` +For extra CMake arguments you can use the ``-C cmake.args=`` argument when building PyArrow. For example, to build a version of PyArrow with ``ARROW_SIMD_LEVEL=NONE``, you can run -``pip install --no-build-isolation -vv --config-settings cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. +``pip install --no-build-isolation -vv -C cmake.args="-DARROW_SIMD_LEVEL=NONE" .``. To enable verbose output from the build tool, pass -``--config-settings build.verbose=true`` to ``pip install`` or -``--config-setting build.verbose=true`` to ``python -m build``. +``-C build.verbose=true`` to ``pip install`` or to ``python -m build``. .. _python-dev-components: diff --git a/docs/source/developers/python/development.rst b/docs/source/developers/python/development.rst index 8afc55a173e1..5757b761875a 100644 --- a/docs/source/developers/python/development.rst +++ b/docs/source/developers/python/development.rst @@ -223,7 +223,7 @@ Since PyArrow depends on the Arrow C++ libraries, debugging can frequently involve crossing between Python and C++ shared libraries. For the best experience, make sure you've built both Arrow C++ (``-DCMAKE_BUILD_TYPE=Debug``) and PyArrow -(``pip install --no-build-isolation --config-settings cmake.build-type=Debug .``) +(``pip install --no-build-isolation -C cmake.build-type=Debug .``) in debug mode. Using gdb on Linux diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 1565396df3d3..5b39b93b2fd5 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -97,7 +97,7 @@ export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH} export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh -python -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type=Debug . +python -m pip install --no-deps --no-build-isolation -vv -C cmake.build-type=Debug . popd pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index d84ca6d55ab7..53c2810efcec 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -69,7 +69,7 @@ export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH} export CMAKE_GENERATOR=Ninja # Use the same command that we use on python_build.sh -python -m pip install --no-deps --no-build-isolation -vv --config-settings cmake.build-type=Debug . +python -m pip install --no-deps --no-build-isolation -vv -C cmake.build-type=Debug . popd From fabf6729f0045275e93f2a0cf0ef50654f43deea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 26 Feb 2026 15:26:00 +0100 Subject: [PATCH 21/27] Simplify copy licenses to be the same in all platforms --- python/_build_backend/__init__.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index e9a6ead8aecd..04e3a15181e0 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -36,7 +36,6 @@ import os from pathlib import Path import shutil -import sys from scikit_build_core.build import * # noqa: F401,F403 from scikit_build_core.build import build_sdist as scikit_build_sdist @@ -47,31 +46,20 @@ @contextmanager def prepare_licenses(): - # Temporarily replace symlinks with hardlinks so sdist gets real content. - # On Windows we just copy the files since hardlinks might not be supported. + # Temporarily copy the files so they are included on sdist. for name in LICENSE_FILES: parent_license = PYTHON_DIR.parent / name pyarrow_license = PYTHON_DIR / name - if sys.platform == "win32": - # For Windows copy the files. - pyarrow_license.unlink(missing_ok=True) - shutil.copy2(parent_license, pyarrow_license) - else: - # For Unix-like systems we replace the symlink with - # a hardlink to avoid copying the file content. - if pyarrow_license.is_symlink(): - target = pyarrow_license.resolve() - pyarrow_license.unlink() - os.link(target, pyarrow_license) + pyarrow_license.unlink(missing_ok=True) + shutil.copy2(parent_license, pyarrow_license) try: yield finally: - if sys.platform != "win32": - # Copy back the original symlinks so git status is clean. - for name in LICENSE_FILES: - filepath = PYTHON_DIR / name - filepath.unlink() - os.symlink(f"../{name}", filepath) + # Copy back the original symlinks so git status is clean. + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + os.unlink(filepath) + os.symlink(f"../{name}", filepath) def build_sdist(sdist_directory, config_settings=None): From af1f099dce775bde8f202cb0ebb2113ccc73c717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 26 Feb 2026 18:48:43 +0100 Subject: [PATCH 22/27] Do not try to regenerate symlinks on Windows as it just fails --- python/_build_backend/__init__.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index 04e3a15181e0..f40f9ea2c840 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -36,6 +36,7 @@ import os from pathlib import Path import shutil +import sys from scikit_build_core.build import * # noqa: F401,F403 from scikit_build_core.build import build_sdist as scikit_build_sdist @@ -55,11 +56,12 @@ def prepare_licenses(): try: yield finally: - # Copy back the original symlinks so git status is clean. - for name in LICENSE_FILES: - filepath = PYTHON_DIR / name - os.unlink(filepath) - os.symlink(f"../{name}", filepath) + if sys.platform != "win32": + # Copy back the original symlinks so git status is clean. + for name in LICENSE_FILES: + filepath = PYTHON_DIR / name + os.unlink(filepath) + os.symlink(f"../{name}", filepath) def build_sdist(sdist_directory, config_settings=None): From dd0fa11814cc2a72189d46d471a42e4f9483b338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 5 Mar 2026 09:59:28 +0100 Subject: [PATCH 23/27] Improve comment to reflect latest changes on custom build backend Co-authored-by: Rok Mihevc --- python/_build_backend/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index f40f9ea2c840..37c5a1c498c4 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -26,10 +26,7 @@ But when building sdist tarballs symlinks are not copied and we end up with broken LICENSE.txt and NOTICE.txt. -This custom build backend only replace the symlinks with hardlinks -before scikit_build_core.build.build_sdist so -that sdist contains the actual file content. The symlinks are restored -afterwards so the git working tree stays clean. +This custom build backend replaces the symlinks with actual file copies before scikit_build_core.build.build_sdist so that the sdist contains the real file content. The symlinks are restored afterwards to keep the git working tree clean. """ from contextlib import contextmanager From b1bd7034f493f05f3d160241c393c193abf0ecef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 5 Mar 2026 10:01:01 +0100 Subject: [PATCH 24/27] Remove unnecessary build directory from .gitignore --- python/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/python/.gitignore b/python/.gitignore index 5ec5fdf01207..de51d21c9ff0 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -4,7 +4,6 @@ CMakeCache.txt CTestTestfile.cmake Makefile cmake_install.cmake -build/ Testing/ # Python stuff From e2344c7d4b0d3b9ec6f7bd641167d6f8a4340bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 5 Mar 2026 10:04:30 +0100 Subject: [PATCH 25/27] Try PYARROW_BUILD_TYPE as target_compile_definitions as suggested --- python/CMakeLists.txt | 11 ++++++----- .../pyarrow/src/arrow/python/config_internal.h.cmake | 4 +++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 31ce2f149eae..0e1709958be2 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -359,11 +359,9 @@ else() # CMAKE_BUILD_TYPE is not set at configure time. # scikit-build-core does the right thing with cmake.build-type and # adds the corresponding --config but does not populate CMAKE_BUILD_TYPE - # for those. On this specific case, we set the default to "RELEASE" - # as it's the most common build type for users building from source. - # This is mainly relevant for our Windows wheels, which are built with - # Visual Studio and thus use a multi-config generator with Release. - # As a note this is only to populate config_internal.h.cmake. + # for those. On this specific case, we set the default to "RELEASE". + # The actual build type is injected through target compile definitions + # for multi-config generators. set(UPPERCASE_PYBUILD_TYPE "RELEASE") endif() @@ -515,6 +513,9 @@ else() endif() target_link_libraries(arrow_python PUBLIC Python3::NumPy) target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING) +if(CMAKE_CONFIGURATION_TYPES) + target_compile_definitions(arrow_python PRIVATE PYARROW_BUILD_TYPE="$") +endif() set_target_properties(arrow_python PROPERTIES VERSION "${PYARROW_FULL_SO_VERSION}" SOVERSION "${PYARROW_SO_VERSION}") install(TARGETS arrow_python diff --git a/python/pyarrow/src/arrow/python/config_internal.h.cmake b/python/pyarrow/src/arrow/python/config_internal.h.cmake index e8a6e78c48a0..f76edccb69d1 100644 --- a/python/pyarrow/src/arrow/python/config_internal.h.cmake +++ b/python/pyarrow/src/arrow/python/config_internal.h.cmake @@ -15,4 +15,6 @@ // specific language governing permissions and limitations // under the License. -#define PYARROW_BUILD_TYPE "@UPPERCASE_PYBUILD_TYPE@" \ No newline at end of file +#ifndef PYARROW_BUILD_TYPE +#define PYARROW_BUILD_TYPE "@UPPERCASE_PYBUILD_TYPE@" +#endif From 9159b32aef6812779f9d236645b2ee6a962a4f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 5 Mar 2026 10:59:00 +0100 Subject: [PATCH 26/27] Fix lint from GitHub commit UI wrong line length --- python/_build_backend/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/_build_backend/__init__.py b/python/_build_backend/__init__.py index 37c5a1c498c4..6e5c328a69ff 100644 --- a/python/_build_backend/__init__.py +++ b/python/_build_backend/__init__.py @@ -26,7 +26,9 @@ But when building sdist tarballs symlinks are not copied and we end up with broken LICENSE.txt and NOTICE.txt. -This custom build backend replaces the symlinks with actual file copies before scikit_build_core.build.build_sdist so that the sdist contains the real file content. The symlinks are restored afterwards to keep the git working tree clean. +This custom build backend replaces the symlinks with actual file copies before +scikit_build_core.build.build_sdist so that the sdist contains the real file content. +The symlinks are restored afterwards to keep the git working tree clean. """ from contextlib import contextmanager From 12f61d02726dd5de11c41aa188fe5e54728e45ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 5 Mar 2026 13:30:06 +0100 Subject: [PATCH 27/27] Test build type on Windows --- ci/scripts/python_wheel_windows_test.bat | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index 1e9cacac8bfa..26dc8425c3ff 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -44,6 +44,7 @@ py -0p @REM Test that the modules are importable %PYTHON_CMD% -c "import pyarrow" || exit /B 1 +%PYTHON_CMD% -c "import pyarrow; print(pyarrow.build_info.build_type)" || exit /B 1 %PYTHON_CMD% -c "import pyarrow._azurefs" || exit /B 1 %PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1 %PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1