Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions .github/workflows/bench-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,6 @@ jobs:
- uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
10 changes: 0 additions & 10 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,6 @@ jobs:
with:
sccache: s3
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
67 changes: 3 additions & 64 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -188,29 +188,6 @@ jobs:
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
# DuckDB build from duckdb/, if building httpfs statically, fails unless
# you have cURL dev headers present. Ideally we'd see these loaded during
# DuckDB setup itself, but this is a tricky change, see
# https://github.com/duckdb/duckdb/issues/21073
#
# This does NOT check for DUCKDB_VERSION changes, and its use in CI
# is generally discouraged. Please change build.rs.
#
# Ideally I'd use a awalsh128/cache-apt-pkgs-action@v1.5.2,
# as apt-get takes around 22s, but this specific action is broken, see
# https://github.com/awalsh128/cache-apt-pkgs-action/issues/181
#
# TODO(myrrc): this should all go away once we have AMI in CI.
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- name: Docs
run: |
RUSTDOCFLAGS="-D warnings" cargo doc --no-deps
Expand Down Expand Up @@ -310,16 +287,6 @@ jobs:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install nightly for fmt
run: rustup toolchain install $NIGHTLY_TOOLCHAIN --component rustfmt
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- name: Rust Lint - Format
run: cargo +$NIGHTLY_TOOLCHAIN fmt --all --check
- name: Rustc check
Expand Down Expand Up @@ -444,16 +411,6 @@ jobs:
uses: taiki-e/install-action@v2
with:
tool: nextest
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- name: Rust Tests
if: ${{ matrix.suite == 'tests' }}
run: |
Expand Down Expand Up @@ -520,7 +477,8 @@ jobs:
components: "rust-src, rustfmt, clippy, llvm-tools-preview"
- name: Install build dependencies
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build cmake libcurl4-openssl-dev
sudo apt-get update
sudo apt-get install -y ninja-build cmake
- name: Install nextest
uses: taiki-e/install-action@v2
with:
Expand Down Expand Up @@ -654,16 +612,6 @@ jobs:
uses: taiki-e/install-action@v2
with:
tool: nextest
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: matrix.os != 'windows-x64' && steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- name: Rust Tests (Windows)
if: matrix.os == 'windows-x64'
run: |
Expand Down Expand Up @@ -822,20 +770,11 @@ jobs:
uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev
- name: Run sqllogictest tests
run: |
cargo test -p vortex-sqllogictest --test sqllogictests


wasm-integration:
name: "wasm-integration"
runs-on: ubuntu-latest
Expand Down
12 changes: 0 additions & 12 deletions .github/workflows/sql-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,6 @@ jobs:

- uses: actions/checkout@v6
if: inputs.mode != 'pr'

- uses: dorny/paths-filter@v3
id: duckdb-buildfile-changed
with:
filters: |
changed:
- 'vortex-duckdb/build.rs'
- name: Install curl dev headers for duckdb source build
if: steps.duckdb-buildfile-changed.outputs.changed == 'true'
run: |
sudo apt-get update -y -qq && sudo apt-get install -y -qq libcurl4-openssl-dev

- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
4 changes: 4 additions & 0 deletions benchmarks/duckdb-bench/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ impl DuckClient {
let connection = db.connect()?;
vortex_duckdb::initialize(&db)?;

// Install and load httpfs so DuckDB can access remote files (S3, GCS, HTTP).
connection.query("INSTALL httpfs;")?;
connection.query("LOAD httpfs;")?;

// Enable Parquet metadata cache for all benchmark runs.
//
// `parquet_metadata_cache` is an extension-specific option that's
Expand Down
2 changes: 2 additions & 0 deletions vortex-bench/src/tpcds/duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ pub fn generate_tpcds(base_dir: PathBuf, scale_factor: String) -> Result<PathBuf
let sql_script = format!(
"SET autoinstall_known_extensions=1;\
SET autoload_known_extensions=1;\
INSTALL tpcds;\
LOAD tpcds;\
CALL dsdgen(sf={scale_factor});\
EXPORT DATABASE '{output_dir}' (FORMAT PARQUET);",
scale_factor = scale_factor,
Expand Down
7 changes: 0 additions & 7 deletions vortex-duckdb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,3 @@ By default, our tests use a precompiled build which means you don't get an

./target/release/duckdb will be a duckdb instance with vortex-duckdb already
loaded.

## Testing a custom DuckDB tag

Change `DUCKDB_VERSION` environment variable value to a preferred hash or commit
(local build), or change build.rs (for testing in CI). If you use a commit,
DuckDB needs to link httpfs statically so you also need to install CURL
development headers (e.g. `libcurl4-openssl-dev`).
52 changes: 9 additions & 43 deletions vortex-duckdb/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ use std::process::Command;
use bindgen::Abi;
use once_cell::sync::Lazy;

// Changing this part or build.rs in general runs a "build duckdb from source"
// job in the CI, with all dependencies and benchmarks using this source-based build.
// It's not ideal, but allows us to check pre-release stuff for breaking things or performance
// regressions.
static DUCKDB_VERSION: Lazy<DuckDBVersion> = Lazy::new(|| {
// Override the DuckDB version via environment variable in case of an extension build.
// `DUCKDB_VERSION` is set by the extension build in the `duckdb-vortex` repo.
Expand Down Expand Up @@ -256,15 +252,7 @@ fn extract_duckdb_source(source_dir: &Path) -> Result<PathBuf, Box<dyn std::erro
}

/// Build DuckDB from source. Used for commit hashes or when VX_DUCKDB_DEBUG is set.
fn build_duckdb(
duckdb_source_dir: &Path,
version: &DuckDBVersion,
debug: bool,
) -> Result<PathBuf, Box<dyn std::error::Error>> {
let build_type = match debug {
true => "debug",
false => "release",
};
fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error::Error>> {
// Check for ninja
if Command::new("ninja").arg("--version").output().is_err() {
return Err(
Expand All @@ -274,12 +262,10 @@ fn build_duckdb(

let inner_dir_name = DUCKDB_VERSION.archive_inner_dir_name();
let duckdb_repo_dir = duckdb_source_dir.join(&inner_dir_name);
let build_dir = duckdb_repo_dir.join("build").join(build_type);
let build_dir = duckdb_repo_dir.join("build").join("debug");

// Check if already built
let lib_dir = build_dir.join("src");
let lib_dir_str = lib_dir.display();
println!("cargo:info=Checking if DuckDB is already built in {lib_dir_str}",);

let already_built = lib_dir.join("libduckdb.dylib").exists()
|| lib_dir.join("libduckdb.so").exists()
|| lib_dir
Expand All @@ -300,26 +286,12 @@ fn build_duckdb(
("1", "0")
};

let mut envs = vec![
("GEN", "ninja"),
("DISABLE_SANITIZER", asan_option),
("THREADSAN", tsan_option),
("BUILD_SHELL", "false"),
("BUILD_UNITTESTS", "false"),
("ENABLE_UNITTEST_CPP_TESTS", "false"),
];

// If we're building from a commit (likely a pre-release), we need to
// build extensions statically. Otherwise DuckDB tries to load them
// from an http endpoint with version 0.0.1 (all non-tagged builds)
// which doesn't exists. httpfs also requires CURL dev headers
if matches!(version, DuckDBVersion::Commit(_)) {
envs.push(("BUILD_EXTENSIONS", "httpfs;parquet;tpch;tpcds;jemalloc"));
};

let output = Command::new("make")
.current_dir(&duckdb_repo_dir)
.envs(envs)
.env("GEN", "ninja")
.env("DISABLE_SANITIZER", asan_option)
.env("THREADSAN", tsan_option)
.arg("debug")
.output()?;

if !output.status.success() {
Expand Down Expand Up @@ -403,21 +375,15 @@ fn main() {
drop(fs::remove_dir_all(&duckdb_symlink));
std::os::unix::fs::symlink(&extracted_source_path, &duckdb_symlink).unwrap();

// Determine whether to build from source or use prebuilt libraries
let use_debug_build =
env::var("VX_DUCKDB_DEBUG").is_ok_and(|v| matches!(v.as_str(), "1" | "true"));
println!("cargo:info=DuckDB debug build: {use_debug_build}");

let library_path = if use_debug_build || !DUCKDB_VERSION.is_release() {
// Build from source for:
// - Commit hashes (no prebuilt available)
// - When VX_DUCKDB_DEBUG=1 (user wants debug build)
match build_duckdb(&extracted_source_path, &DUCKDB_VERSION, use_debug_build) {
Ok(path) => path,
Err(err) => {
println!("cargo:error={err}");
panic!("duckdb build failed");
}
}
build_duckdb(&extracted_source_path).unwrap()
} else {
// Download prebuilt libraries for release versions
let archive_path = download_duckdb_lib_archive().unwrap();
Expand Down
40 changes: 16 additions & 24 deletions vortex-duckdb/src/e2e_test/vortex_scan_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,8 @@ fn test_vortex_scan_over_http() {
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
let addr = listener.local_addr().unwrap();

// Spawn 10 threads because DuckDB does HEAD and GET requests with retries,
// thus 2 threads, one for each implementation, aren't enough
std::thread::spawn(move || {
for _ in 0..10 {
for _ in 0..2 {
if let Ok((mut stream, _)) = listener.accept() {
let response = format!(
"HTTP/1.1 200 OK\r\nContent-Length: {}\r\n\r\n",
Expand All @@ -378,30 +376,24 @@ fn test_vortex_scan_over_http() {

let conn = database_connection();
conn.query("SET vortex_filesystem = 'duckdb';").unwrap();
for httpfs_impl in ["httplib", "curl"] {
println!("Testing httpfs client implementation: {httpfs_impl}");
conn.query(&format!(
"SET httpfs_client_implementation = '{httpfs_impl}';"
))
.unwrap();
conn.query("INSTALL httpfs;").unwrap();
conn.query("LOAD httpfs;").unwrap();

let url = format!(
"http://{}/{}",
addr,
file.path().file_name().unwrap().to_string_lossy()
);
println!("url={url}, file={}", file.path().display());
let url = format!(
"http://{}/{}",
addr,
file.path().file_name().unwrap().to_string_lossy()
);

let result = conn
.query(&format!("SELECT COUNT(*) FROM read_vortex('{url}')"))
.unwrap();
let chunk = result.into_iter().next().unwrap();
let count = chunk
.get_vector(0)
.as_slice_with_len::<i64>(chunk.len().as_())[0];
let result = conn
.query(&format!("SELECT COUNT(*) FROM read_vortex('{url}')"))
.unwrap();
let chunk = result.into_iter().next().unwrap();
let count = chunk
.get_vector(0)
.as_slice_with_len::<i64>(chunk.len().as_())[0];

assert_eq!(count, 3);
}
assert_eq!(count, 3);
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions vortex-duckdb/src/filesystem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl FileSystem for DuckDbFileSystem {
fn list(&self, prefix: &str) -> BoxStream<'_, VortexResult<FileListing>> {
let mut directory_url = self.base_url.clone();
if !prefix.is_empty() {
directory_url.set_path(prefix);
directory_url.set_path(&format!("{}/{}", directory_url.path(), prefix));
}

// DuckDB's ListFiles expects bare paths for local files, but full URLs
Expand Down Expand Up @@ -159,7 +159,7 @@ impl FileSystem for DuckDbFileSystem {

async fn open_read(&self, path: &str) -> VortexResult<Arc<dyn VortexReadAt>> {
let mut url = self.base_url.clone();
url.set_path(path);
url.set_path(&format!("{}/{}", url.path(), path));
let reader = unsafe { DuckDbFsReader::open_url(self.ctx.as_ptr(), &url)? };
Ok(Arc::new(reader))
}
Expand Down
Loading