diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81de8a6..a78fbc6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -127,22 +127,32 @@ jobs: - platform: debian-amd64 cli_script: docker-tests/test_debian_amd64.sh python_script: docker-tests/python/test_debian_amd64.sh + needs_local_binary: false - platform: alpine-amd64 cli_script: docker-tests/test_alpine_amd64.sh python_script: docker-tests/python/test_alpine_amd64.sh + needs_local_binary: false + # Ubuntu 24.04 + 25.10. 25.10 only works with the libxml2/ICU + # bundling added in this binary, so the test must run against a + # fresh local build rather than the latest released artifact. + - platform: ubuntu-amd64 + cli_script: docker-tests/test_ubuntu_amd64.sh + python_script: "" + needs_local_binary: true - platform: missing-libs-debian-amd64 cli_script: docker-tests/test_missing_libs.sh python_script: "" + needs_local_binary: true steps: - uses: actions/checkout@v4 - name: Install Rust - if: matrix.platform == 'missing-libs-debian-amd64' + if: matrix.needs_local_binary uses: dtolnay/rust-toolchain@stable - name: Build Linux binary - if: matrix.platform == 'missing-libs-debian-amd64' + if: matrix.needs_local_binary run: | cargo build --release echo "PG0_BINARY_PATH=$(pwd)/target/release/pg0" >> $GITHUB_ENV diff --git a/Cargo.lock b/Cargo.lock index 9f438a6..256fb8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,6 +79,12 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "ar" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d67af77d68a931ecd5cbd8a3b5987d63a1d1d1278f7f6a60ae33db485cdebb69" + [[package]] name = "arbitrary" version = "1.4.2" @@ -178,6 +184,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -934,6 +942,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -1177,18 +1195,22 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" name = "pg0" version = "0.13.0" dependencies = [ + "ar", "clap", "dirs", "flate2", + "hex", "postgresql_embedded", "postgresql_extensions", "serde", "serde_json", + "sha2", "tar", "thiserror 1.0.69", "tracing", "tracing-subscriber", "zip 2.4.2", + "zstd", ] [[package]] @@ -1203,6 +1225,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "postgresql_archive" version = "0.20.0" @@ -2978,3 +3006,31 @@ dependencies = [ "log", "simd-adler32", ] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 951531b..aef7092 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,17 @@ tar = "0.4" [target.'cfg(windows)'.dependencies] zip = { version = "2", default-features = false, features = ["deflate"] } +[build-dependencies] +# .deb files are ar archives wrapping a zstd-compressed tar; we crack them open +# at build time to extract libxml2.so.2 and the ICU 74 .so files so they can +# be bundled into the pg0 binary. +ar = "0.9" +zstd = "0.13" +tar = "0.4" +flate2 = "1" +sha2 = "0.10" +hex = "0.4" + [profile.release] strip = true lto = true diff --git a/README.md b/README.md index 2088f65..b438c72 100644 --- a/README.md +++ b/README.md @@ -22,14 +22,17 @@ Use pg0 for local development, testing, CI/CD pipelines, or any scenario where y ## Supported Platforms +This table describes which **binaries** we publish. Whether a binary actually runs on a given OS release depends on the libraries that distro ships - see [Tested and Supported Platforms](#tested-and-supported-platforms) for the per-distribution story (e.g. Alpine 3.20-3.21 work, Alpine 3.22+ does not). + | Platform | Architecture | Binary | |----------|--------------|--------| -| macOS | Apple Silicon (M1/M2/M3) | `pg0-macos-arm64` | -| Linux | x86_64 (glibc) | `pg0-linux-amd64-gnu` | -| Linux | x86_64 (musl/Alpine) | `pg0-linux-amd64-musl` | -| Linux | ARM64 (glibc) | `pg0-linux-arm64-gnu` | -| Linux | ARM64 (musl/Alpine) | `pg0-linux-arm64-musl` | -| Windows | x64 | `pg0-windows-amd64.exe` | +| macOS | Apple Silicon (M1/M2/M3) | `pg0-darwin-aarch64` | +| macOS | Intel | `pg0-darwin-x86_64` | +| Linux | x86_64 (glibc, e.g. Debian/Ubuntu) | `pg0-linux-x86_64-gnu` | +| Linux | x86_64 (musl, e.g. Alpine) | `pg0-linux-x86_64-musl` | +| Linux | ARM64 (glibc) | `pg0-linux-aarch64-gnu` | +| Linux | ARM64 (musl) | `pg0-linux-aarch64-musl` | +| Windows | x64 | `pg0-windows-x86_64.exe` | ## Features @@ -119,15 +122,19 @@ pg0 works in Docker containers. Here are the minimal setup steps for each suppor ```dockerfile FROM debian:bookworm-slim -# or: python:3.11-slim, ubuntu:22.04, etc. +# or: python:3.11-slim, ubuntu:22.04, ubuntu:24.04, ubuntu:25.10, etc. -# Install required dependencies +# Install required dependencies. libxml2 and ICU are bundled into the pg0 +# binary so they do not need to be installed - this means pg0 works on +# Ubuntu 25.10+ where libxml2.so.2 has been replaced by libxml2.so.16. +# tzdata is needed because PostgreSQL reads /usr/share/zoneinfo at startup, +# and libreadline is needed by `pg0 psql`. RUN apt-get update && apt-get install -y \ curl \ - libxml2 \ libssl3 \ libgssapi-krb5-2 \ - && apt-get install -y libicu72 || apt-get install -y libicu74 || apt-get install -y libicu* \ + tzdata \ + libreadline8 \ && rm -rf /var/lib/apt/lists/* # Create non-root user (PostgreSQL cannot run as root) @@ -182,10 +189,10 @@ docker run -d myimage sh -c "pg0 start && exec your-application" Run pg0 in a Docker container with a single command: ```bash -# Debian/Ubuntu +# Debian/Ubuntu (works on 22.04, 24.04, 25.10, 26.04, ...) docker run --rm -it python:3.11-slim bash -c ' apt-get update -qq && - apt-get install -y curl libxml2 libssl3 libgssapi-krb5-2 libicu72 && + apt-get install -y curl libssl3 libgssapi-krb5-2 tzdata libreadline8 && useradd -m pguser && su - pguser -c "curl -fsSL https://raw.githubusercontent.com/vectorize-io/pg0/main/install.sh | bash && export PATH=\"\$HOME/.local/bin:\$PATH\" && @@ -448,34 +455,59 @@ Data is stored in `~/.pg0/instances//data/` (or your custom `--data-dir`) ## Runtime Dependencies -pg0 bundles PostgreSQL but requires some shared libraries at runtime. These are typically pre-installed on most systems, but may need to be added in minimal environments like Docker. +pg0 bundles PostgreSQL, pgvector, libxml2 and ICU directly into the binary, so it works on minimal systems without those libraries installed (including Ubuntu 25.10+ where the libxml2 SONAME was bumped to `.so.16`). A few common shared libraries still need to be present on the host because they are reused from the OS. **macOS:** No additional dependencies required. **Linux (Debian/Ubuntu):** ```bash -apt-get install libxml2 libssl3 libgssapi-krb5-2 +apt-get install libssl3 libgssapi-krb5-2 tzdata libreadline8 ``` **Linux (Alpine):** ```bash apk add icu-libs lz4-libs libxml2 ``` +(Alpine uses the musl pg0 binary, which dynamically links against system ICU and libxml2. See the support table below for compatible Alpine versions.) ### Why these dependencies? The bundled PostgreSQL binaries are compiled with these features enabled: -| Library | Purpose | Can disable? | -|---------|---------|--------------| -| OpenSSL (`libssl`) | SSL/TLS connections | Not recommended | -| GSSAPI (`libgssapi-krb5`) | Kerberos authentication | Rarely needed locally | -| libxml2 | XML data type and functions | Rarely needed | -| ICU (`icu-libs`) | Unicode collation (Alpine only) | glibc builds don't need it | -| LZ4 (`lz4-libs`) | WAL/TOAST compression | Small impact | +| Library | Purpose | Bundled in pg0? | +|---------|---------|-----------------| +| libxml2 | XML data type and functions | Yes (Linux GNU only) | +| ICU (`libicu*`) | Unicode collation | Yes (Linux GNU only); Alpine uses system `icu-libs` | +| OpenSSL (`libssl`) | SSL/TLS connections | No - host-provided | +| GSSAPI (`libgssapi-krb5`) | Kerberos authentication | No - host-provided | +| LZ4 (`lz4-libs`) | WAL/TOAST compression | No - usually pre-installed | +| tzdata (`/usr/share/zoneinfo`) | Time zone data | No - host-provided | +| Readline (`libreadline`) | Interactive `pg0 psql` | No - host-provided | Most desktop Linux distributions and macOS have these libraries pre-installed. You only need to install them manually in minimal Docker images or bare-metal servers. +## Tested and Supported Platforms + +The table below reflects what we actually exercise via the docker tests in `docker-tests/` plus the platforms targeted by the release CI. Anything not in the table is best-effort: it may work, but we do not test it. + +| Platform / Image | Architecture | Status | Notes | +|---|---|---|---| +| macOS (Apple Silicon, M1/M2/M3) | aarch64 | ✅ Supported | Released binary; built in CI | +| macOS (Intel) | x86_64 | ✅ Supported | Released binary; built in CI | +| Debian 12 (bookworm) | x86_64, aarch64 | ✅ Tested | `docker-tests/test_debian_*.sh` (python:3.11-slim) | +| Debian 13 (trixie) | x86_64, aarch64 | ✅ Expected to work | Same glibc / libxml2 ABI as bookworm | +| Ubuntu 22.04 (Jammy) | x86_64, aarch64 | ✅ Expected to work | glibc 2.35 baseline; matches release CI build host | +| Ubuntu 24.04 (Noble) | x86_64 | ✅ Tested | `docker-tests/test_ubuntu_amd64.sh` | +| Ubuntu 25.10 (Plucky) | x86_64 | ✅ Tested | `docker-tests/test_ubuntu_amd64.sh` - works thanks to bundled libxml2.so.2 + libicu70 | +| Ubuntu 26.04 (next LTS) | x86_64, aarch64 | ✅ Expected to work | Inherits libxml2 2.14 / ICU 76 from 25.10 | +| Alpine 3.20 | x86_64, aarch64 | ✅ Tested | `docker-tests/test_alpine_*.sh` (python:3.12-alpine3.20). Uses musl + system ICU 74 | +| Alpine 3.21 | x86_64, aarch64 | ✅ Expected to work | Same ICU 74 line as 3.20 (untested but ABI-compatible) | +| Alpine 3.22, 3.23+ | x86_64, aarch64 | ❌ Not supported | Ships ICU 76; the upstream theseus-rs musl PostgreSQL binary is built against ICU 74 and there is no compat package on Alpine. Use Alpine 3.20 or 3.21 instead | +| Windows 10/11 | x86_64 | ✅ Supported | Released binary; built in CI | +| NixOS | x86_64, aarch64 | ✅ Supported | Timezone pinned to UTC since v0.13.0 ([#11](https://github.com/vectorize-io/pg0/issues/11)) | +| Any environment that runs as root only (e.g. Google Colab, restricted containers) | any | ❌ Not supported | PostgreSQL refuses to run as root - see [Troubleshooting](#postgresql-cannot-run-as-root) | +| Linux with glibc < 2.35 | any | ⚠️ Auto-fallback | The install script switches to the statically-linked musl binary; pgvector is not available in that mode | + ## Troubleshooting ### PostgreSQL Cannot Run as Root diff --git a/build.rs b/build.rs index 72bac1d..b676a8e 100644 --- a/build.rs +++ b/build.rs @@ -1,34 +1,40 @@ +use std::collections::HashMap; use std::env; use std::fs; +use std::fs::File; use std::io; -use std::path::PathBuf; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + +use sha2::{Digest, Sha256}; fn main() { println!("cargo:rerun-if-changed=versions.env"); - // Load versions from versions.env let versions_env = fs::read_to_string("versions.env").expect("Failed to read versions.env"); - let mut pg_version = String::new(); - let mut pgvector_version = String::new(); - let mut pgvector_tag = String::new(); - let mut pgvector_repo = String::new(); - + let mut versions: HashMap = HashMap::new(); for line in versions_env.lines() { let line = line.trim(); if line.is_empty() || line.starts_with('#') { continue; } if let Some((key, value)) = line.split_once('=') { - match key.trim() { - "PG_VERSION" => pg_version = value.trim().to_string(), - "PGVECTOR_VERSION" => pgvector_version = value.trim().to_string(), - "PGVECTOR_COMPILED_TAG" => pgvector_tag = value.trim().to_string(), - "PGVECTOR_COMPILED_REPO" => pgvector_repo = value.trim().to_string(), - _ => {} - } + versions.insert(key.trim().to_string(), value.trim().to_string()); } } + let get = |k: &str| -> String { + versions + .get(k) + .unwrap_or_else(|| panic!("Missing {} in versions.env", k)) + .clone() + }; + + let pg_version = get("PG_VERSION"); + let pgvector_version = get("PGVECTOR_VERSION"); + let pgvector_tag = get("PGVECTOR_COMPILED_TAG"); + let pgvector_repo = get("PGVECTOR_COMPILED_REPO"); + println!("cargo:rustc-env=PG_VERSION={}", pg_version); println!("cargo:rustc-env=PGVECTOR_VERSION={}", pgvector_version); println!("cargo:rustc-env=PGVECTOR_COMPILED_TAG={}", pgvector_tag); @@ -36,9 +42,9 @@ fn main() { let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); - // Bundle PostgreSQL and pgvector bundle_postgresql(&pg_version, &out_dir); bundle_pgvector(&pg_version, &pgvector_tag, &pgvector_repo, &out_dir); + bundle_runtime_libs(&versions, &out_dir); } fn bundle_postgresql(pg_version: &str, out_dir: &PathBuf) { @@ -172,3 +178,200 @@ fn download_file(url: &str, dest: &PathBuf) -> io::Result<()> { Ok(()) } + +/// Bundle libxml2.so.2 and its transitive ICU dependency alongside PostgreSQL. +/// +/// The theseus-rs PostgreSQL build links against libxml2.so.2 (DT_NEEDED); +/// libxml2 then pulls ICU in transitively. Both have been replaced upstream: +/// - libxml2 2.14 (Ubuntu 25.10+) bumped the SONAME to .so.16; there is no +/// .so.2 compat package. +/// - ICU has moved to .so.74 in 24.04, .so.76 in 25.10+, and continues to +/// drift forward. +/// +/// We download libxml2 + the matching libicu .deb files from Ubuntu 22.04's +/// archive at build time, repack them into a single tar.gz, and embed that +/// into the pg0 binary. Ubuntu 22.04 is chosen because its libs require at +/// most GLIBC 2.34, keeping us inside the manylinux_2_35 wheel baseline - +/// Ubuntu 24.04's libs would require GLIBC 2.38 and break users on 22.04 / +/// Debian 12. +/// +/// At runtime, main.rs extracts the libs next to the bundled postgres binary +/// and prepends that directory to LD_LIBRARY_PATH. +/// +/// Only Linux GNU targets get a non-empty bundle. Other targets get an empty +/// bundle file so the include_bytes! macro in main.rs has something to point +/// at on every platform. +fn bundle_runtime_libs(versions: &HashMap, out_dir: &PathBuf) { + let target = env::var("TARGET").unwrap(); + let bundle_path = out_dir.join("runtime_libs.tar.gz"); + + let arch = match target.as_str() { + "x86_64-unknown-linux-gnu" => "AMD64", + "aarch64-unknown-linux-gnu" => "ARM64", + _ => { + // Empty bundle for everything else. + fs::write(&bundle_path, b"").expect("Failed to write empty runtime libs bundle"); + println!( + "cargo:rustc-env=RUNTIME_LIBS_BUNDLE_PATH={}", + bundle_path.display() + ); + return; + } + }; + + let entries: Vec = vec![ + DebSpec { + url: versions + .get(&format!("LIBXML2_DEB_URL_{}", arch)) + .expect("missing LIBXML2_DEB_URL") + .clone(), + sha256: versions + .get(&format!("LIBXML2_DEB_SHA256_{}", arch)) + .expect("missing LIBXML2_DEB_SHA256") + .clone(), + // Ubuntu 22.04 ships libxml2.so.2 -> libxml2.so.2.9.13. + wanted: vec!["libxml2.so.2.9.13".to_string()], + }, + DebSpec { + url: versions + .get(&format!("LIBICU_DEB_URL_{}", arch)) + .expect("missing LIBICU_DEB_URL") + .clone(), + sha256: versions + .get(&format!("LIBICU_DEB_SHA256_{}", arch)) + .expect("missing LIBICU_DEB_SHA256") + .clone(), + // Ubuntu 22.04 ships libicu70.1 (matching the .so.70 SONAME). + // libxml2.so.2 only directly needs libicuuc, but libicuuc itself + // pulls in libicudata; libicui18n is included for completeness. + wanted: vec![ + "libicudata.so.70.1".to_string(), + "libicui18n.so.70.1".to_string(), + "libicuuc.so.70.1".to_string(), + ], + }, + ]; + + let mut staged: Vec<(String, Vec)> = Vec::new(); + for spec in &entries { + let deb_path = out_dir.join(format!( + "{}.deb", + sha256_short(&spec.url) + )); + if !deb_path.exists() { + eprintln!("Downloading {}...", spec.url); + download_file(&spec.url, &deb_path).expect("Failed to download .deb"); + } + verify_sha256(&deb_path, &spec.sha256); + for filename in &spec.wanted { + let bytes = extract_lib_from_deb(&deb_path, filename) + .unwrap_or_else(|e| panic!("Failed to extract {} from {}: {}", filename, spec.url, e)); + staged.push((filename.clone(), bytes)); + } + } + + write_tar_gz(&bundle_path, &staged).expect("Failed to write runtime libs bundle"); + eprintln!( + "Bundled runtime libs ({} files) at {}", + staged.len(), + bundle_path.display() + ); + println!( + "cargo:rustc-env=RUNTIME_LIBS_BUNDLE_PATH={}", + bundle_path.display() + ); +} + +struct DebSpec { + url: String, + sha256: String, + /// Filenames (basename only) we want to pull out of `data.tar.zst`. + wanted: Vec, +} + +fn sha256_short(input: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(input.as_bytes()); + hex::encode(&hasher.finalize()[..8]) +} + +fn verify_sha256(path: &Path, expected: &str) { + let mut file = File::open(path).expect("open deb for hashing"); + let mut hasher = Sha256::new(); + let mut buf = [0u8; 64 * 1024]; + loop { + let n = file.read(&mut buf).expect("read deb"); + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + let actual = hex::encode(hasher.finalize()); + assert_eq!( + actual, + expected, + "SHA256 mismatch for {} — refusing to ship a deb that doesn't match versions.env", + path.display() + ); +} + +/// Extract `` (matched by basename) from the `data.tar.zst` member of +/// a .deb archive. Returns the raw file contents. +fn extract_lib_from_deb(deb_path: &Path, wanted: &str) -> io::Result> { + let file = File::open(deb_path)?; + let mut ar = ar::Archive::new(file); + while let Some(entry) = ar.next_entry() { + let mut entry = entry?; + let id = std::str::from_utf8(entry.header().identifier()).unwrap_or(""); + if !id.starts_with("data.tar") { + continue; + } + // The stream is one of: data.tar (uncompressed), data.tar.gz, data.tar.xz, + // data.tar.zst. Modern Ubuntu uses .zst; we only support that path. + if !id.ends_with(".zst") { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("unsupported data archive compression: {}", id), + )); + } + let decoder = zstd::Decoder::new(&mut entry)?; + let mut tar = tar::Archive::new(decoder); + for tentry in tar.entries()? { + let mut tentry = tentry?; + let path = tentry.path()?.to_path_buf(); + let basename = path + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(""); + if basename == wanted { + let mut buf = Vec::new(); + tentry.read_to_end(&mut buf)?; + return Ok(buf); + } + } + } + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("{} not found in {}", wanted, deb_path.display()), + )) +} + +/// Write `entries` as a flat tar.gz: each entry becomes a top-level file with +/// 0o755 permissions. The runtime extractor in src/main.rs creates the SONAME +/// symlinks (e.g. libxml2.so.2 -> libxml2.so.2.9.14) after extraction. +fn write_tar_gz(out_path: &Path, entries: &[(String, Vec)]) -> io::Result<()> { + let file = File::create(out_path)?; + let gz = flate2::write::GzEncoder::new(file, flate2::Compression::default()); + let mut builder = tar::Builder::new(gz); + for (name, bytes) in entries { + let mut header = tar::Header::new_gnu(); + header.set_path(name)?; + header.set_size(bytes.len() as u64); + header.set_mode(0o755); + header.set_cksum(); + builder.append(&header, &mut bytes.as_slice())?; + } + let gz = builder.into_inner()?; + gz.finish()?.flush()?; + Ok(()) +} diff --git a/docker-tests/README.md b/docker-tests/README.md index 195484e..0788ea7 100644 --- a/docker-tests/README.md +++ b/docker-tests/README.md @@ -10,6 +10,7 @@ Automated tests to verify pg0 works correctly across different platforms and dis | `test_debian_arm64.sh` | python:3.11-slim | linux/arm64 | aarch64 | glibc | | `test_alpine_amd64.sh` | python:3.12-alpine3.20 | linux/amd64 | x86_64 | musl | | `test_alpine_arm64.sh` | python:3.12-alpine3.20 | linux/arm64 | aarch64 | musl | +| `test_ubuntu_amd64.sh` | ubuntu:24.04 + ubuntu:25.10 | linux/amd64 | x86_64 | glibc | ## Python SDK Tests diff --git a/docker-tests/run_all_tests.sh b/docker-tests/run_all_tests.sh index 1be1b88..d9115cf 100755 --- a/docker-tests/run_all_tests.sh +++ b/docker-tests/run_all_tests.sh @@ -40,6 +40,7 @@ echo " - Debian AMD64 (python:3.11-slim)" echo " - Debian ARM64 (python:3.11-slim)" echo " - Alpine AMD64 (python:3.11-alpine)" echo " - Alpine ARM64 (python:3.11-alpine)" +echo " - Ubuntu AMD64 (ubuntu:24.04 + ubuntu:25.10)" echo "" echo -e "${YELLOW}Note: ARM64 tests will use emulation on x86_64 hosts${NC}" echo "" @@ -50,6 +51,7 @@ run_test "Debian AMD64" "$DIR/test_debian_amd64.sh" run_test "Debian ARM64" "$DIR/test_debian_arm64.sh" run_test "Alpine AMD64" "$DIR/test_alpine_amd64.sh" run_test "Alpine ARM64" "$DIR/test_alpine_arm64.sh" +run_test "Ubuntu AMD64 (24.04 + 25.10)" "$DIR/test_ubuntu_amd64.sh" run_test "Missing Libs Detection (Debian AMD64)" "$DIR/test_missing_libs.sh" # Print summary diff --git a/docker-tests/test_missing_libs.sh b/docker-tests/test_missing_libs.sh index 2dd9a41..48452c7 100755 --- a/docker-tests/test_missing_libs.sh +++ b/docker-tests/test_missing_libs.sh @@ -67,8 +67,12 @@ echo "Installation directory cleared." ' echo "" -echo "=== Phase 2: Remove libxml2 to simulate missing library ===" -apt-get remove -y libxml2 2>&1 | tail -3 +echo "=== Phase 2: Remove libgssapi-krb5-2 to simulate missing library ===" +# libxml2.so.2 is bundled inside the pg0 binary, so removing the system +# package no longer surfaces a missing-lib error. libgssapi-krb5-2 is one of +# the deps we expect users to install themselves, so removing it still hits +# the detection code path. +apt-get remove -y libgssapi-krb5-2 2>&1 | tail -3 echo "" echo "=== Phase 3: Verify pg0 detects missing libraries ===" @@ -90,10 +94,10 @@ else exit 1 fi -if echo "$OUTPUT" | grep -qi "libxml2"; then - echo "PASS: Found libxml2 in the missing library list" +if echo "$OUTPUT" | grep -qi "libgssapi_krb5"; then + echo "PASS: Found libgssapi_krb5 in the missing library list" else - echo "FAIL: Expected libxml2 to be listed as missing" + echo "FAIL: Expected libgssapi_krb5 to be listed as missing" exit 1 fi diff --git a/docker-tests/test_ubuntu_amd64.sh b/docker-tests/test_ubuntu_amd64.sh new file mode 100755 index 0000000..22edbba --- /dev/null +++ b/docker-tests/test_ubuntu_amd64.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# Test pg0 against the official Ubuntu LTS / interim images. +# 24.04 (Noble) is currently the only supported LTS; 25.10 (Plucky) and the +# upcoming 26.04 ship libxml2 2.14 (SONAME .so.16) which breaks the bundled +# theseus-rs PostgreSQL binary that links against libxml2.so.2. +set -u + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +INSTALL_SCRIPT="$SCRIPT_DIR/../install.sh" + +VOLUME_ARGS="" +BINARY_ENV="" +if [ -n "${PG0_BINARY_PATH:-}" ]; then + echo "Using local binary: $PG0_BINARY_PATH" + VOLUME_ARGS="-v $PG0_BINARY_PATH:/tmp/pg0-binary:ro" + BINARY_ENV="-e PG0_BINARY_URL=file:///tmp/pg0-binary" +elif [ -n "${PG0_VERSION:-}" ]; then + echo "Using released binary v$PG0_VERSION" + BINARY_ENV="-e PG0_BINARY_URL=https://github.com/vectorize-io/pg0/releases/download/v${PG0_VERSION}/pg0-linux-x86_64-gnu" +fi + +run_one() { + local image="$1" + echo "" + echo "==================================" + echo "Testing pg0 on $image (linux/amd64)" + echo "==================================" + + docker run --rm --platform=linux/amd64 \ + $BINARY_ENV \ + -v "$INSTALL_SCRIPT:/tmp/install.sh:ro" \ + $VOLUME_ARGS \ + -e DEBIAN_FRONTEND=noninteractive \ + "$image" bash -c ' +set -e + +echo "=== System Info ===" +uname -m +cat /etc/os-release | grep PRETTY_NAME + +echo "" +echo "=== Installing dependencies ===" +# Some hosts cannot reach the http:// Ubuntu mirrors; switch to https. +if ls /etc/apt/sources.list.d/*.sources >/dev/null 2>&1; then + sed -i "s|http://archive.ubuntu.com|https://archive.ubuntu.com|g; s|http://security.ubuntu.com|https://security.ubuntu.com|g" /etc/apt/sources.list.d/*.sources +fi +echo "Acquire::https::Verify-Peer false;" > /etc/apt/apt.conf.d/99insecure +echo "Acquire::https::Verify-Host false;" >> /etc/apt/apt.conf.d/99insecure +apt-get update -qq +# README-recommended runtime deps + tzdata + libreadline (for psql). +# Some package names differ across releases - fall back across them. +apt-get install -y -qq curl ca-certificates sudo procps tzdata >/dev/null +apt-get install -y -qq libgssapi-krb5-2 >/dev/null +# libssl3 is a virtual that resolves to libssl3t64 on 24.04+ +apt-get install -y -qq libssl3 >/dev/null 2>&1 || apt-get install -y -qq libssl3t64 >/dev/null +# libxml2 was renamed to libxml2-16 in 25.10 (SONAME .so.2 -> .so.16) +apt-get install -y -qq libxml2 >/dev/null 2>&1 || apt-get install -y -qq libxml2-16 >/dev/null +# libicu major version varies by release +apt-get install -y -qq libicu74 >/dev/null 2>&1 || \ + apt-get install -y -qq libicu76 >/dev/null 2>&1 || \ + apt-get install -y -qq libicu72 >/dev/null +# readline for psql +apt-get install -y -qq libreadline8 >/dev/null 2>&1 || \ + apt-get install -y -qq libreadline8t64 >/dev/null + +echo "" +echo "=== Creating non-root user ===" +useradd -m -s /bin/bash pguser + +echo "" +echo "=== Switching to non-root user for pg0 ===" +su - pguser << EOF +set -e +export PG0_BINARY_URL="${PG0_BINARY_URL:-}" + +echo "=== Installing pg0 ===" +bash /tmp/install.sh +export PATH="\$HOME/.local/bin:\$PATH" + +echo "" +echo "=== Starting PostgreSQL ===" +pg0 start +sleep 3 + +echo "" +echo "=== Basic query ===" +pg0 psql -c "SELECT version();" -t | head -1 + +echo "" +echo "=== pgvector ===" +pg0 psql -c "CREATE EXTENSION IF NOT EXISTS vector;" +pg0 psql -c "CREATE TABLE embeddings (id INT, vec vector(3));" +pg0 psql -c "INSERT INTO embeddings VALUES (1, '\''[1,2,3]'\'');" +pg0 psql -c "SELECT * FROM embeddings;" -t + +echo "" +echo "=== Stopping PostgreSQL ===" +pg0 stop + +echo "" +echo "PASS: $image" +EOF +' + local rc=$? + if [ $rc -ne 0 ]; then + echo "FAIL: $image (exit $rc)" + fi + return $rc +} + +failures=() +for image in ubuntu:24.04 ubuntu:25.10; do + if ! run_one "$image"; then + failures+=("$image") + fi +done + +echo "" +echo "==================================" +if [ ${#failures[@]} -eq 0 ]; then + echo "All Ubuntu tests passed" + exit 0 +else + echo "Failures: ${failures[*]}" + exit 1 +fi diff --git a/src/main.rs b/src/main.rs index 3197bd1..2a85de3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,7 @@ use postgresql_embedded::{Settings, VersionReq}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process; use tar::Archive; use thiserror::Error; @@ -17,6 +17,12 @@ static POSTGRESQL_BUNDLE: &[u8] = include_bytes!(env!("POSTGRESQL_BUNDLE_PATH")) /// The embedded pgvector bundle static PGVECTOR_BUNDLE: &[u8] = include_bytes!(env!("PGVECTOR_BUNDLE_PATH")); +/// Extra runtime libraries (libxml2.so.2 + the libicu major it transitively +/// loads) that the bundled PostgreSQL binary dynamic-links against. Empty on +/// platforms where the host reliably provides them (macOS, Windows, +/// Alpine/musl). See build.rs. +static RUNTIME_LIBS_BUNDLE: &[u8] = include_bytes!(env!("RUNTIME_LIBS_BUNDLE_PATH")); + #[derive(Error, Debug)] enum CliError { #[error("PostgreSQL error: {0}")] @@ -441,71 +447,198 @@ fn extract_bundled_postgresql(installation_dir: &PathBuf, pg_version: &str) -> R // Check if already extracted let bin_dir = version_dir.join("bin"); - if bin_dir.exists() && bin_dir.join(POSTGRES_BINARY).exists() { - tracing::debug!("PostgreSQL already extracted at {}", version_dir.display()); - return Ok(version_dir); - } + let already_extracted = bin_dir.exists() && bin_dir.join(POSTGRES_BINARY).exists(); - if POSTGRESQL_BUNDLE.is_empty() { - return Err(CliError::Other( - "PostgreSQL bundle is empty - this binary was not built with BUNDLE_POSTGRESQL=true".to_string() - )); - } + if !already_extracted { + if POSTGRESQL_BUNDLE.is_empty() { + return Err(CliError::Other( + "PostgreSQL bundle is empty - this binary was not built with BUNDLE_POSTGRESQL=true".to_string() + )); + } - println!("Extracting bundled PostgreSQL {}...", pg_version); - fs::create_dir_all(&version_dir)?; + println!("Extracting bundled PostgreSQL {}...", pg_version); + fs::create_dir_all(&version_dir)?; - extract_postgresql_archive(POSTGRESQL_BUNDLE, &version_dir)?; + extract_postgresql_archive(POSTGRESQL_BUNDLE, &version_dir)?; - // Verify extraction - if !bin_dir.join(POSTGRES_BINARY).exists() { - return Err(CliError::Other(format!( - "PostgreSQL extraction failed - {} not found at {}", - POSTGRES_BINARY, - bin_dir.display() - ))); - } + if !bin_dir.join(POSTGRES_BINARY).exists() { + return Err(CliError::Other(format!( + "PostgreSQL extraction failed - {} not found at {}", + POSTGRES_BINARY, + bin_dir.display() + ))); + } - // Make binaries executable on Unix - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - if let Ok(entries) = fs::read_dir(&bin_dir) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_file() { - if let Ok(metadata) = path.metadata() { - let mut perms = metadata.permissions(); - perms.set_mode(0o755); - let _ = fs::set_permissions(&path, perms); + // Make binaries executable on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(entries) = fs::read_dir(&bin_dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_file() { + if let Ok(metadata) = path.metadata() { + let mut perms = metadata.permissions(); + perms.set_mode(0o755); + let _ = fs::set_permissions(&path, perms); + } } } } - } - // Also make lib files executable/accessible - let lib_dir = version_dir.join("lib"); - if let Ok(entries) = fs::read_dir(&lib_dir) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_file() { - if let Ok(metadata) = path.metadata() { - let mut perms = metadata.permissions(); - perms.set_mode(0o755); - let _ = fs::set_permissions(&path, perms); + // Also make lib files executable/accessible + let lib_dir = version_dir.join("lib"); + if let Ok(entries) = fs::read_dir(&lib_dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_file() { + if let Ok(metadata) = path.metadata() { + let mut perms = metadata.permissions(); + perms.set_mode(0o755); + let _ = fs::set_permissions(&path, perms); + } } } } } + } else { + tracing::debug!("PostgreSQL already extracted at {}", version_dir.display()); } - // Check for missing shared libraries on Linux + // Always ensure the runtime libs are unpacked and LD_LIBRARY_PATH points + // at them. Runs even when the postgres install is cached so users + // upgrading from a pg0 version that didn't ship the bundle pick up the + // new libs without manually wiping ~/.pg0/installation/. + ensure_runtime_libs(&version_dir)?; + #[cfg(target_os = "linux")] + prepend_lib_dir_to_ld_library_path(&version_dir.join("lib")); + + // Final guard: if any required .so is still unresolved, surface a clear + // error instead of letting initdb / postgres start with a confusing + // dlopen failure. #[cfg(target_os = "linux")] check_shared_libraries(&bin_dir)?; - println!("PostgreSQL {} extracted successfully.", pg_version); + if !already_extracted { + println!("PostgreSQL {} extracted successfully.", pg_version); + } Ok(version_dir) } +/// Unpack RUNTIME_LIBS_BUNDLE into `/lib/` and create the SONAME +/// symlinks the dynamic linker looks up (e.g. libxml2.so.2 -> +/// libxml2.so.2.9.14). No-op when the bundle is empty (non-Linux-GNU targets) +/// or when the libs are already present. +fn ensure_runtime_libs(version_dir: &Path) -> Result<(), CliError> { + if RUNTIME_LIBS_BUNDLE.is_empty() { + return Ok(()); + } + + let lib_dir = version_dir.join("lib"); + fs::create_dir_all(&lib_dir)?; + + let decoder = GzDecoder::new(RUNTIME_LIBS_BUNDLE); + let mut archive = Archive::new(decoder); + let mut extracted_names: Vec = Vec::new(); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?.to_path_buf(); + let basename = match path.file_name().and_then(|s| s.to_str()) { + Some(name) => name.to_string(), + None => continue, + }; + let dest = lib_dir.join(&basename); + if !dest.exists() { + entry.unpack(&dest)?; + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&dest)?.permissions(); + perms.set_mode(0o755); + let _ = fs::set_permissions(&dest, perms); + } + } + extracted_names.push(basename); + } + + // Create SONAME symlinks: libxml2.so.2 -> libxml2.so.2.9.14, etc. + // The dynamic linker only looks up files by SONAME (".so."), not + // the fully-versioned filename, so the symlinks are what actually makes + // the bundled libs reachable. + #[cfg(unix)] + for name in &extracted_names { + if let Some(soname) = soname_for(name) { + let link = lib_dir.join(&soname); + if link.exists() { + continue; + } + // Symlink relative to lib_dir so it stays valid if the install + // directory is moved. + if let Err(e) = std::os::unix::fs::symlink(name, &link) { + if e.kind() != std::io::ErrorKind::AlreadyExists { + return Err(e.into()); + } + } + } + } + + Ok(()) +} + +/// Given a fully-versioned shared-library filename like +/// `libxml2.so.2.9.13` or `libicudata.so.70.1`, return the SONAME the dynamic +/// linker actually resolves: `libxml2.so.2`, `libicudata.so.70`. Returns None +/// for filenames we don't recognise (so we don't accidentally create bogus +/// symlinks). +fn soname_for(filename: &str) -> Option { + let so_idx = filename.find(".so.")?; + let (stem, rest) = filename.split_at(so_idx + ".so.".len()); + // `stem` ends with ".so."; `rest` is the version tail e.g. "2.9.14". + let major = rest.split('.').next()?; + if major.is_empty() { + return None; + } + Some(format!("{}{}", stem, major)) +} + +/// Walk up from a `bin/psql` path to the version-specific install dir and +/// make sure the runtime libs are present + LD_LIBRARY_PATH points at them. +/// Used by `pg0 psql`, which spawns psql against an instance whose install +/// directory was set up by an earlier `pg0 start` (possibly from a previous +/// pg0 release that didn't ship the libs bundle). +fn ensure_runtime_libs_for_psql(psql_path: &Path) -> Result<(), CliError> { + let version_dir = match psql_path.parent().and_then(|p| p.parent()) { + Some(p) => p.to_path_buf(), + None => return Ok(()), + }; + ensure_runtime_libs(&version_dir)?; + #[cfg(target_os = "linux")] + prepend_lib_dir_to_ld_library_path(&version_dir.join("lib")); + Ok(()) +} + +/// Prepend `lib_dir` to the process LD_LIBRARY_PATH so that subprocesses +/// (initdb, postgres, pg_ctl, psql) find the bundled libs first. Existing +/// entries are preserved. +#[cfg(target_os = "linux")] +fn prepend_lib_dir_to_ld_library_path(lib_dir: &Path) { + let lib_dir_s = lib_dir.to_string_lossy().to_string(); + let new = match std::env::var("LD_LIBRARY_PATH") { + Ok(existing) if !existing.is_empty() => { + // Avoid duplicating ourselves on repeat calls. + if existing + .split(':') + .any(|p| p == lib_dir_s) + { + return; + } + format!("{}:{}", lib_dir_s, existing) + } + _ => lib_dir_s, + }; + std::env::set_var("LD_LIBRARY_PATH", new); +} + /// Check that the postgres binary can find all required shared libraries. /// Only called on Linux. If ldd is unavailable, silently skips the check. #[cfg(target_os = "linux")] @@ -1079,6 +1212,10 @@ fn psql(name: String, args: Vec) -> Result<(), CliError> { } let psql_path = find_psql_binary(&info.installation_dir)?; + // psql is dynamic-linked against the same libxml2/libicu as postgres, so + // make sure subprocess can find the bundled libs even when this command is + // invoked against an instance that another `pg0 start` already extracted. + ensure_runtime_libs_for_psql(&psql_path)?; // Build connection URI let uri = format!( @@ -1233,6 +1370,12 @@ fn install_extension(instance_name: String, extension_name: String) -> Result<() // Build Settings for the extension installer // The installation_dir needs to point to the version-specific directory let version_install_dir = info.installation_dir.join(&pg_version); + // Make sure the bundled libxml2/libicu are present and on the loader path + // before pg_config / pg_ctl are spawned on a host where the system libs + // are missing or have a different SONAME. + ensure_runtime_libs(&version_install_dir)?; + #[cfg(target_os = "linux")] + prepend_lib_dir_to_ld_library_path(&version_install_dir.join("lib")); let settings = Settings { version: version_req.clone(), port: info.port, diff --git a/versions.env b/versions.env index 9f83058..9cdb9bd 100644 --- a/versions.env +++ b/versions.env @@ -2,3 +2,30 @@ PG_VERSION=18.1.0 PGVECTOR_VERSION=0.8.1 PGVECTOR_COMPILED_TAG=v0.18.237 PGVECTOR_COMPILED_REPO=nicoloboschi/pgvector_compiled + +# Runtime libraries bundled with the Linux GNU binaries. +# +# The theseus-rs PostgreSQL build does NOT directly link against ICU. It +# links against libxml2.so.2 (DT_NEEDED), and libxml2 in turn pulls in ICU +# transitively. So we bundle a libxml2.so.2 + the matching libicu it was +# built against, all sourced from the same Ubuntu archive. +# +# We deliberately use Ubuntu 22.04 (Jammy) packages because: +# 1. Their glibc symbol requirements top out at GLIBC 2.34, comfortably +# below our manylinux_2_35 wheel baseline. Using Ubuntu 24.04 packages +# would require GLIBC 2.38, breaking users on Ubuntu 22.04 / Debian 12. +# 2. Ubuntu 22.04's libxml2.so.2 transitively wants libicu*.so.70, so we +# bundle that ICU major rather than the .so.74 ICU 24.04 ships. +# This works on every supported distro: 22.04 has the same SONAMEs natively; +# 24.04+ keeps libxml2.so.2 around but in a newer ICU lineage; 25.10+ has +# neither, but the bundled libxml2.so.2 + libicu70 fills both gaps. +LIBXML2_DEB_VERSION=2.9.13+dfsg-1ubuntu0.11 +LIBXML2_DEB_URL_AMD64=http://archive.ubuntu.com/ubuntu/pool/main/libx/libxml2/libxml2_2.9.13+dfsg-1ubuntu0.11_amd64.deb +LIBXML2_DEB_SHA256_AMD64=74eb1df2112ec3cd5a961b737d68c9a96c6de2f115a124ede81af5292b5d23e7 +LIBXML2_DEB_URL_ARM64=http://ports.ubuntu.com/pool/main/libx/libxml2/libxml2_2.9.13+dfsg-1ubuntu0.11_arm64.deb +LIBXML2_DEB_SHA256_ARM64=313e94b051c3e3974dea06c4576dc2b42f04cf7aba6c9dbb2f14135c218c7be7 +LIBICU_DEB_VERSION=70.1-2ubuntu1 +LIBICU_DEB_URL_AMD64=http://archive.ubuntu.com/ubuntu/pool/main/i/icu/libicu70_70.1-2ubuntu1_amd64.deb +LIBICU_DEB_SHA256_AMD64=223c1e6cebf2a6748637303227d8b84337c14a2d30eacbc46a68d02557dcff3e +LIBICU_DEB_URL_ARM64=http://ports.ubuntu.com/pool/main/i/icu/libicu70_70.1-2ubuntu1_arm64.deb +LIBICU_DEB_SHA256_ARM64=a06f86489e364f555a0655f476e6b2693760aff9d8dec0011daf9b17957da3c5