From 7e85f6e7dcc4a2fb710f4b4e5636e5c64230230f Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Fri, 29 May 2026 12:33:02 -0500 Subject: [PATCH 1/2] chore: prepare v0.3.0 release Signed-off-by: Nelson Spence --- .github/workflows/fuzz.yml | 40 ++++--- .github/workflows/release.yml | 121 ++++++++++++++++++++- CHANGELOG.md | 25 ++++- Cargo.lock | 4 +- Cargo.toml | 27 ++++- README.md | 31 +++--- RELEASING.md | 8 +- ROADMAP.md | 15 ++- THREAT_MODEL.md | 9 +- ordvec-python/Cargo.toml | 2 +- ordvec-python/README.md | 5 +- ordvec-python/pyproject.toml | 2 +- ordvec-python/python/ordvec/__init__.py | 2 +- ordvec-python/src/lib.rs | 49 +++++++++ src/util.rs | 9 +- tests/release_signed_release_invariants.sh | 32 +++++- 16 files changed, 315 insertions(+), 66 deletions(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 7403a16..8a30587 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -12,10 +12,11 @@ name: fuzz # across ALL seven targets. # # This runs UNATTENDED on a cron schedule, so every third-party action is -# SHA-pinned and cargo-fuzz is version-pinned — a fuzz smoke must not itself -# become a supply-chain hole. Read-only token; the only `run:` interpolation is -# the matrix target name, passed through `env:` (never inlined into the shell) -# so there is no template-injection surface (THREAT-CICD-001). +# SHA-pinned, cargo-fuzz is installed with its bundled lockfile on a pinned +# Rust toolchain, and fuzzing runs on a pinned nightly — a fuzz smoke must not +# itself become a supply-chain hole. Read-only token; the only `run:` +# interpolation is the matrix target name, passed through `env:` (never inlined +# into the shell) so there is no template-injection surface (THREAT-CICD-001). on: pull_request: @@ -28,6 +29,11 @@ on: permissions: contents: read +env: + CARGO_FUZZ_VERSION: "0.13.1" + CARGO_FUZZ_INSTALL_TOOLCHAIN: "1.89.0" + FUZZ_NIGHTLY: "nightly-2025-08-15" + concurrency: group: fuzz-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -52,19 +58,18 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # 1.89.0; cargo-fuzz locked install + with: + toolchain: ${{ env.CARGO_FUZZ_INSTALL_TOOLCHAIN }} - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # nightly; channel via toolchain: below with: - toolchain: nightly + toolchain: ${{ env.FUZZ_NIGHTLY }} - name: Install cargo-fuzz (version-pinned) - # NB: no `--locked` — cargo-fuzz 0.13.1's bundled Cargo.lock pins an old - # rustix (0.36.x) that no longer compiles on current nightly. The tool - # itself stays version-pinned; its build deps resolve to compatible - # versions. - run: cargo install cargo-fuzz --version 0.13.1 + run: cargo "+${CARGO_FUZZ_INSTALL_TOOLCHAIN}" install cargo-fuzz --version "${CARGO_FUZZ_VERSION}" --locked - name: Smoke env: TARGET: ${{ matrix.target }} - run: cargo +nightly fuzz run "$TARGET" -- -max_total_time=60 -rss_limit_mb=4096 + run: cargo "+${FUZZ_NIGHTLY}" fuzz run "$TARGET" -- -max_total_time=60 -rss_limit_mb=4096 # Weekly full sweep over all seven targets at a larger time budget. weekly: @@ -90,16 +95,15 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false + - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # 1.89.0; cargo-fuzz locked install + with: + toolchain: ${{ env.CARGO_FUZZ_INSTALL_TOOLCHAIN }} - uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # nightly; channel via toolchain: below with: - toolchain: nightly + toolchain: ${{ env.FUZZ_NIGHTLY }} - name: Install cargo-fuzz (version-pinned) - # NB: no `--locked` — cargo-fuzz 0.13.1's bundled Cargo.lock pins an old - # rustix (0.36.x) that no longer compiles on current nightly. The tool - # itself stays version-pinned; its build deps resolve to compatible - # versions. - run: cargo install cargo-fuzz --version 0.13.1 + run: cargo "+${CARGO_FUZZ_INSTALL_TOOLCHAIN}" install cargo-fuzz --version "${CARGO_FUZZ_VERSION}" --locked - name: Fuzz env: TARGET: ${{ matrix.target }} - run: cargo +nightly fuzz run "$TARGET" -- -max_total_time=300 -rss_limit_mb=4096 + run: cargo "+${FUZZ_NIGHTLY}" fuzz run "$TARGET" -- -max_total_time=300 -rss_limit_mb=4096 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 033aa3f..7822487 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -42,6 +42,8 @@ # `*.sigstore.json` bundle on the Release (`gh attestation verify`; also the # Scorecard signing probe -> 8, a backup if the .intoto.jsonl ever regresses). # * gh-action-pypi-publish -> PEP 740 attestations on PyPI (Integrity API). +# * post-publish PyPI JSON hash check -> every served wheel/sdist digest +# matches the staged dist files. # * crates.io / PyPI publish via Trusted Publishing (OIDC) — NO stored tokens. # # Fail-closed: `release-assets-draft` and both publishes `needs:` attest + @@ -270,10 +272,9 @@ jobs: # could inject code into the shipped wheel (zizmor cache-poisoning, # HIGH); the speedup isn't worth that risk on the release path. The CI # path (`python.yml`) keeps sccache on for the PR/main cadence. - # Prove the freshly-built wheel runs before it can be published. The - # linux/aarch64 wheel is cross-built under QEMU and can't execute on the x86 - # host, so it's skipped (covered by python.yml's native arm leg + the - # macos-arm64 leg here). + # Prove each native wheel runs before it can be published. The + # linux/aarch64 wheel is cross-built here and smoke-tested on a native + # ubuntu-24.04-arm runner by `smoke-linux-aarch64-wheel` below. - name: Set up Python to test the built wheel if: ${{ !(matrix.platform.runner == 'ubuntu-latest' && matrix.platform.target == 'aarch64') }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 @@ -284,8 +285,16 @@ jobs: shell: bash run: | set -euo pipefail + WHEEL="$(python - <<'PY' + from pathlib import Path + wheels = sorted(Path("ordvec-python/dist").glob("*.whl")) + if len(wheels) != 1: + raise SystemExit(f"expected exactly one wheel, found {wheels}") + print(wheels[0]) + PY + )" python -m pip install --require-hashes -r ordvec-python/requirements-dev.txt - python -m pip install ordvec-python/dist/*.whl + python -m pip install --no-index "$WHEEL" python -m pytest ordvec-python/tests -q - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: @@ -293,6 +302,61 @@ jobs: path: ordvec-python/dist/*.whl if-no-files-found: error + smoke-linux-aarch64-wheel: + name: smoke linux/aarch64 wheel + needs: [guard, build-wheels] + if: needs.guard.outputs.ok == 'true' + runs-on: ubuntu-24.04-arm + steps: + - name: Harden the runner + uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 + with: + egress-policy: audit + - name: Set up Python to test the built wheel + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.13" + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Download the exact linux/aarch64 wheel + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: wheels-ubuntu-latest-aarch64 + path: wheelhouse + - name: Install exact wheel and run tiny RankQuant/Bitmap smoke + shell: bash + run: | + set -euo pipefail + WHEEL="$(python - <<'PY' + from pathlib import Path + wheels = sorted(Path("wheelhouse").glob("*.whl")) + if len(wheels) != 1: + raise SystemExit(f"expected exactly one linux/aarch64 wheel, found {wheels}") + print(wheels[0]) + PY + )" + python -m pip install --require-hashes -r ordvec-python/requirements-dev.txt + python -m pip install --no-index "$WHEEL" + python - <<'PY' + import numpy as np + from ordvec import Bitmap, RankQuant + + docs8 = np.arange(32, dtype=np.float32).reshape(4, 8) + rq = RankQuant(8, 2) + rq.add(docs8) + scores, ids = rq.search_asymmetric(docs8[:1], 2) + assert scores.shape == (1, 2) + assert ids.shape == (1, 2) + + docs64 = np.arange(256, dtype=np.float32).reshape(4, 64) + bm = Bitmap(64, 16) + bm.add(docs64) + scores, ids = bm.search(docs64[:1], 2) + assert scores.shape == (1, 2) + assert ids.shape == (1, 2) + PY + build-sdist: name: build sdist + SBOM needs: guard @@ -438,7 +502,7 @@ jobs: release-assets-draft: name: stage all assets on the DRAFT Release (does NOT un-draft) - needs: [guard, notes, attest, provenance, require-ci-green] + needs: [guard, notes, attest, provenance, require-ci-green, smoke-linux-aarch64-wheel] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest permissions: @@ -604,6 +668,51 @@ jobs: uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0 with: packages-dir: dist + - name: Post-publish PyPI hashes match staged dist + env: + VERSION: ${{ needs.guard.outputs.version }} + run: | + set -euo pipefail + python3 - <<'PY' + import hashlib + import json + import os + import sys + import time + import urllib.request + from pathlib import Path + + version = os.environ["VERSION"] + dist = Path("dist") + local = { + path.name: hashlib.sha256(path.read_bytes()).hexdigest() + for path in sorted(dist.iterdir()) + if path.is_file() and (path.name.endswith(".whl") or path.name.endswith(".tar.gz")) + } + if not local: + raise SystemExit("no local wheel/sdist files found in dist") + + url = f"https://pypi.org/pypi/ordvec/{version}/json" + last_error = None + for attempt in range(1, 25): + try: + with urllib.request.urlopen(url, timeout=15) as response: + payload = json.load(response) + remote = { + item["filename"]: item["digests"]["sha256"] + for item in payload.get("urls", []) + } + if remote == local: + print(f"OK: PyPI-served hashes match staged dist for ordvec {version}") + break + last_error = f"local={local!r} remote={remote!r}" + except Exception as exc: # noqa: BLE001 - diagnostic for CI logs. + last_error = repr(exc) + print(f"waiting for PyPI JSON/hash propagation ({attempt}/24): {last_error}", file=sys.stderr) + time.sleep(5) + else: + raise SystemExit(f"PyPI post-publish hash verification failed for {url}: {last_error}") + PY publish-github-release: name: un-draft the GitHub Release (only after BOTH registry publishes succeed) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36f5cbc..9157488 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ All notable changes to this project are documented here. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## Unreleased + +## 0.3.0 - 2026-05-29 ### Added @@ -18,6 +20,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added optional typed calibration profile references to the v1 manifest schema, with path/hash/identity/compatibility verification but no statistical computation. +- Added the repo-local, publish=false `ordvec-ffi` crate with the base C ABI + for loading persisted `RankQuant` and `Bitmap` indexes and running + synchronous search through opaque handles. +- Added the repo-local `ordvec-go` cgo wrapper over the base C ABI. ### Documentation @@ -27,6 +33,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Documented sidecar manifest verification as a pre-load provenance check that does not sign, manage keys, call networks, or decide trust policy. +### Fixed + +- Hardened Python `add()` input boundaries so attempts to grow an index beyond + `MAX_VECTORS` raise `ValueError` before crossing into Rust core asserts. +- Corrected Python package dependency wording to the published metadata: + CPython 3.10+ with `numpy>=2.2`. + +### Security + +- Hardened the tag-triggered release workflow with exact Linux/aarch64 wheel + smoke coverage, post-publish PyPI hash verification, reproducible + release-required fuzz installation, and stricter local release-order + invariants for OIDC and publish steps. + ## [0.2.0] - 2026-05-26 First public release on crates.io / PyPI — the crate was not published before @@ -64,7 +84,7 @@ internal history. range, matching the fail-loud contract of `pack_buckets` / `bucket_centre`. Valid rank vectors (a permutation of `[0, d)`) are unaffected. - **Python bindings (`ordvec-python`):** raised the floor to **Python 3.10** and - **numpy 2.0**; the abi3 wheel target moves to `abi3-py310`. Python 3.9 reached + **numpy 2.2**; the abi3 wheel target moves to `abi3-py310`. Python 3.9 reached end-of-life (October 2025) and pytest's CVE-2025-71176 fix dropped 3.9 support. ### Deprecated @@ -140,6 +160,5 @@ system dependencies** — no BLAS, no `ndarray`, no `faer`. AVX-512 intrinsics this crate relies on were stabilized. - Dual-licensed under **MIT OR Apache-2.0**. -[Unreleased]: https://github.com/Fieldnote-Echo/ordvec/compare/v0.2.0...HEAD [0.2.0]: https://github.com/Fieldnote-Echo/ordvec/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/Fieldnote-Echo/ordvec/releases/tag/v0.1.0 diff --git a/Cargo.lock b/Cargo.lock index 67bc51d..01043e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -601,7 +601,7 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "ordvec" -version = "0.2.0" +version = "0.3.0" dependencies = [ "rand", "rand_chacha", @@ -633,7 +633,7 @@ dependencies = [ [[package]] name = "ordvec-python" -version = "0.2.0" +version = "0.3.0" dependencies = [ "numpy", "ordvec", diff --git a/Cargo.toml b/Cargo.toml index b31d434..049cef7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ordvec" -version = "0.2.0" +version = "0.3.0" edition = "2021" rust-version = "1.89" # AVX-512 intrinsics stabilized in 1.89.0; also clears the 1.87 floor from u64::is_multiple_of description = "Training-free ordinal & sign quantization for vector retrieval" @@ -14,12 +14,37 @@ categories = ["algorithms", "science", "compression"] # Keep dev/internal files out of the published crate — they stay in the repo # but aren't useful to crate consumers. exclude = [ + ".agents/", + ".claude/", + ".codex/", ".github/", ".gitignore", + ".playwright-mcp/", "CLAUDE.md", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.md", + "DCO", + "GOVERNANCE.md", + "RELEASING.md", + "ROADMAP.md", + "SECURITY.md", + "cliff.toml", + "THREAT_MODEL.md", + "codecov.yml", "deny.toml", "docs/ALTERNATIVES_CONSIDERED.md", "docs/FOLLOWUP_BODY_KERNEL_TIE_BREAK.md", + "docs/INDEX_PROVENANCE.md", + "docs/c-api.md", + "fuzz/", + "ordvec-ffi/", + "ordvec-go/", + "ordvec-manifest/", + "ordvec-python/", + "tests/__pycache__/", + "tests/release_publish_invariants.py", + "tests/release_publish_invariants.sh", + "tests/release_signed_release_invariants.sh", ] # docs.rs build configuration: build with default features only, so the diff --git a/README.md b/README.md index 0a7c4d5..94400f1 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ Details in [`docs/RANK_MODES.md`](docs/RANK_MODES.md). ```toml [dependencies] -ordvec = "0.2" +ordvec = "0.3" # Or, to track unreleased `main`, use a git dependency instead: # ordvec = { git = "https://github.com/Fieldnote-Echo/ordvec" } @@ -146,6 +146,7 @@ pip install ordvec Wheels target CPython 3.10+ (abi3); to build from source instead, see [`ordvec-python/`](https://github.com/Fieldnote-Echo/ordvec/tree/main/ordvec-python). +The runtime dependency floor is `numpy>=2.2`. ## Documentation @@ -154,12 +155,13 @@ Wheels target CPython 3.10+ (abi3); to build from source instead, see - **Design alternatives evaluated and cut:** [`docs/ALTERNATIVES_CONSIDERED.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/ALTERNATIVES_CONSIDERED.md) - **Index-file trust model:** - [`docs/INDEX_PROVENANCE.md`](docs/INDEX_PROVENANCE.md), - [`THREAT_MODEL.md`](THREAT_MODEL.md) -- **Repo-local manifest verifier:** - [`ordvec-manifest/`](ordvec-manifest/) (`cargo run -p ordvec-manifest -- verify --manifest ...`) -- **C ABI:** - [`docs/c-api.md`](docs/c-api.md) + [`docs/INDEX_PROVENANCE.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/INDEX_PROVENANCE.md), + [`THREAT_MODEL.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/THREAT_MODEL.md) +- **Repo-local manifest verifier, C ABI, and Go wrapper:** + available from the full GitHub checkout. These sidecars are not part of the + published core `.crate`; use the GitHub checkout for `ordvec-manifest/`, + `ordvec-ffi/`, `ordvec-go/`, and + [`docs/c-api.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/c-api.md). - **Formal proof spine:** [`ordvec-formalization`](https://github.com/Fieldnote-Echo/ordvec-formalization), including its [`proof-spine`](https://github.com/Fieldnote-Echo/ordvec-formalization/blob/main/docs/proof-spine.md), [`theorem-map`](https://github.com/Fieldnote-Echo/ordvec-formalization/blob/main/docs/theorem-map.md), @@ -217,13 +219,14 @@ checksum, MAC, or signature — by design.** The loaders validate *structure* (magic, version, bounds, exact-length payload) but not *origin*: a structurally valid file can still be untrusted. If an index file crosses a trust boundary (network transfer, shared storage), verify it before loading. -This repo includes a publish=false sidecar CLI, `ordvec-manifest`, that binds an -index file to a JSON manifest by SHA-256, header metadata, row identity, and -attestation shape checks. It does not sign artifacts, manage keys, or decide -deployment trust policy. No in-format crypto is shipped because it would add key -management the library can't own. See -[`docs/INDEX_PROVENANCE.md`](docs/INDEX_PROVENANCE.md) and -[`THREAT_MODEL.md`](THREAT_MODEL.md). +The full GitHub checkout includes a publish=false sidecar CLI, +`ordvec-manifest`, that binds an index file to a JSON manifest by SHA-256, +header metadata, row identity, and attestation shape checks. It does not sign +artifacts, manage keys, or decide deployment trust policy. No in-format crypto +is shipped because it would add key management the library can't own. See +[`docs/INDEX_PROVENANCE.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/INDEX_PROVENANCE.md) +and [`THREAT_MODEL.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/THREAT_MODEL.md) +in the full repository. ## Provenance diff --git a/RELEASING.md b/RELEASING.md index e076d9b..54e412a 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -149,10 +149,14 @@ filename. Until either is updated, the corresponding gated publish fails - Once **both** publishes succeed, `publish-github-release` un-drafts the GitHub Release automatically. If one publish fails, the Release stays DRAFT — re-run the failed job, the un-draft then completes. + - `publish-pypi` also queries PyPI after upload and compares every served + wheel/sdist SHA-256 digest against the staged `dist/` files before the + GitHub Release can un-draft. 7. Verify each published artifact and its provenance: - crates.io / docs.rs; - - PyPI (`pip download ordvec==X.Y.Z` and inspect, plus check the PEP 740 - attestation at `GET https://pypi.org/integrity/ordvec/X.Y.Z//provenance`); + - PyPI (confirm the post-publish hash-verification log, optionally + `pip download ordvec==X.Y.Z` and inspect, plus check the PEP 740 attestation + at `GET https://pypi.org/integrity/ordvec/X.Y.Z//provenance`); - the GitHub Release page (`.crate`, wheels, sdist, `*.sigstore.json`, `*.intoto.jsonl` all present); - `gh attestation verify -R Fieldnote-Echo/ordvec` on a downloaded diff --git a/ROADMAP.md b/ROADMAP.md index e964d3a..9156fa6 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -36,12 +36,15 @@ edge pipelines — compose, not a competitor to them. The throughline is **"be a good neighbour"**: ordvec should embed _natively_ into more hosts rather than forcing callers to adapt to it. -- **Publish.** A coordinated first release to crates.io (`ordvec`) and PyPI - (`ordvec`), carrying SLSA build provenance and SBOMs (the release machinery is - already in place). Unblocks `docs.rs` and the registry badges. -- **Cross-stack embedding via a C ABI.** A `cdylib` plus a generated C header so - non-Rust / non-Python edge runtimes can link ordvec directly — the single - largest reach multiplier beyond Rust and Python. +- **Distribution baseline.** ordvec now has coordinated crates.io / PyPI + release machinery with SLSA build provenance, SBOM artifacts, registry + trusted publishing, and post-publish byte checks. Ongoing work here is + platform breadth, not the first publish path. +- **Cross-stack embedding via C and Go.** The repo now carries a base + publish=false C ABI (`ordvec-ffi`) plus a thin Go wrapper (`ordvec-go`) for + loading persisted `RankQuant` / `Bitmap` indexes and running synchronous + search. Next ABI work is the v2 builder/add surface, richer batched APIs, and + compatibility policy. - **Adapters.** Thin integration layers for host retrieval / RAG systems. ordvec factored out of [turbovec](https://github.com/RyanCodrai/turbovec); natural next targets are mainstream RAG frameworks (via the Python binding) and diff --git a/THREAT_MODEL.md b/THREAT_MODEL.md index d5ea7aa..4e6c255 100644 --- a/THREAT_MODEL.md +++ b/THREAT_MODEL.md @@ -1,6 +1,6 @@ # Threat Model — `ordvec` -> **Status:** v0.2.0 (pre-1.0), 2026-05-28. This is the maintained threat model +> **Status:** v0.3.0 (pre-1.0), 2026-05-29. This is the maintained threat model > for the `ordvec` Rust crate, C ABI, Go wrapper, PyO3/maturin Python bindings, > and the repo-local `ordvec-manifest` sidecar verifier. It is reviewed when the > attack surface changes (new persistence formats, new `unsafe` kernels, new @@ -467,7 +467,7 @@ blast radius of a compromised dependency separately. | THREAT-FFI-003 | FFI | Binding | Accidental telemetry through ABI stats | Low | Low | **Mitigated** — caller-owned stats, no logging | | THREAT-FFI-004 | FFI | Binding | Concurrent input mutation during released-GIL call | Medium | Medium | **P2** — documented contract | | THREAT-FFI-005 | FFI | Binding | Unsanitized path forwarding | Medium | Medium | **P2** — documented contract | -| THREAT-SUPPLY-001 | Supply chain | Config | Release config / single-owner | Low | Critical | **Mitigated** (reviewer + main-only); residual = account compromise / 2nd owner | +| THREAT-SUPPLY-001 | Supply chain | Config | Release config / single-owner | Low | Critical | **Mitigated** (reviewer-gated release-tag deployment + `require-ci-green` main-SHA gate); residual = account compromise / 2nd owner | | THREAT-SUPPLY-002 | Supply chain | Config | Release immutability / tag integrity | Low | High | **Mitigated** — registries immutable; GitHub immutable releases on + `main` protected | | THREAT-SUPPLY-003 | Supply chain | Config | Typosquatting adjacent names | Medium | Medium | P3 | | THREAT-QUERY-001 | Resource | Deployment | Batch / `k` exhaustion in serving | Medium | Medium | **P2** — deployment docs | @@ -491,8 +491,9 @@ blast radius of a compromised dependency separately. across all SIMD modules (SIMD-001); the `fastscan_b2` fuzz target (FUZZ-001) plus a CI `fuzz.yml` — PR smoke + weekly sweep (FUZZ-002); the `rank_to_bucket` primitive made fail-loud (`rank < d`) to match the rest of the bucket API, with -matching binding guards; release-environment reviewers + main-only deployment -(SUPPLY-001); **GitHub immutable releases enabled + `main` branch protection** +matching binding guards; reviewer-gated release-tag deployment plus the +`require-ci-green` main-SHA gate (SUPPLY-001); **GitHub immutable releases +enabled + `main` branch protection** (SUPPLY-002); [`docs/INDEX_PROVENANCE.md`](docs/INDEX_PROVENANCE.md) (DESER-002); [`RELEASING.md`](RELEASING.md) (SUPPLY-001). diff --git a/ordvec-python/Cargo.toml b/ordvec-python/Cargo.toml index c2f9a0b..9ffed2c 100644 --- a/ordvec-python/Cargo.toml +++ b/ordvec-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ordvec-python" -version = "0.2.0" +version = "0.3.0" edition = "2021" rust-version = "1.89" # inherits ordvec's AVX-512 MSRV floor description = "Python bindings for ordvec — training-free ordinal & sign vector quantization" diff --git a/ordvec-python/README.md b/ordvec-python/README.md index a76707c..8d19666 100644 --- a/ordvec-python/README.md +++ b/ordvec-python/README.md @@ -42,8 +42,9 @@ recall, monotonicity, and null fit remain empirical diagnostics. pip install ordvec ``` -Wheels target CPython 3.10+ (abi3). Building from source needs a Rust toolchain -(MSRV 1.89) and [maturin](https://www.maturin.rs/). +Wheels target CPython 3.10+ (abi3) and require `numpy>=2.2`. Building from +source needs a Rust toolchain (MSRV 1.89) and +[maturin](https://www.maturin.rs/). ## Provenance & license diff --git a/ordvec-python/pyproject.toml b/ordvec-python/pyproject.toml index 25794ac..c3943d7 100644 --- a/ordvec-python/pyproject.toml +++ b/ordvec-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "ordvec" -version = "0.2.0" +version = "0.3.0" description = "Training-free ordinal & sign quantization for compressed vector retrieval" readme = "README.md" requires-python = ">=3.10" diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py index 7c2b9d9..bc9adb9 100644 --- a/ordvec-python/python/ordvec/__init__.py +++ b/ordvec-python/python/ordvec/__init__.py @@ -100,4 +100,4 @@ "SignBitmapIndex", ] -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index 31472b7..e52155c 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -76,6 +76,27 @@ fn check_width(got: usize, dim: usize) -> PyResult<()> { Ok(()) } +/// Mirror the core `add` capacity guard before releasing the GIL and entering +/// Rust core asserts. Public Python `add()` methods should raise `ValueError` +/// for over-capacity input, not a pyo3 `PanicException`. +fn check_add_capacity(current: usize, adding: usize, elems_per_vec: usize) -> PyResult<()> { + let new_n = current + .checked_add(adding) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("n_vectors overflows usize"))?; + let max = ordvec_core::rank_io::MAX_VECTORS; + if new_n > max { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "index would exceed MAX_VECTORS ({max}); had {current}, adding {adding}" + ))); + } + new_n.checked_mul(elems_per_vec).ok_or_else(|| { + pyo3::exceptions::PyValueError::new_err( + "index buffer length (n_vectors * elems_per_vec) overflows usize", + ) + })?; + Ok(()) +} + /// Reject a `bits` value outside the `{1, 2, 4}` packing domain (used by the /// RankQuant pack/unpack/norm primitives) as a clean `ValueError` rather than /// letting the core `assert!` surface as a `PanicException`. @@ -411,6 +432,7 @@ impl Rank { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; + check_add_capacity(self.inner.len(), arr.nrows(), self.inner.dim())?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -591,6 +613,7 @@ impl RankQuant { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; + check_add_capacity(self.inner.len(), arr.nrows(), self.inner.bytes_per_vec())?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -812,6 +835,7 @@ impl Bitmap { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; + check_add_capacity(self.inner.len(), arr.nrows(), self.inner.dim() / 64)?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -1156,6 +1180,7 @@ impl SignBitmap { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; + check_add_capacity(self.inner.len(), arr.nrows(), self.inner.dim() / 64)?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -1662,3 +1687,27 @@ fn _ordvec(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("MAX_VECTORS", ordvec_core::rank_io::MAX_VECTORS)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::check_add_capacity; + use ordvec_core::rank_io::MAX_VECTORS; + + #[test] + fn add_capacity_allows_exact_ceiling() { + check_add_capacity(MAX_VECTORS - 1, 1, 1).unwrap(); + check_add_capacity(MAX_VECTORS, 0, 1).unwrap(); + } + + #[test] + fn add_capacity_rejects_vector_count_overflow() { + let err = check_add_capacity(MAX_VECTORS, 1, 1).unwrap_err(); + assert!(err.to_string().contains("MAX_VECTORS")); + } + + #[test] + fn add_capacity_rejects_buffer_length_overflow() { + let err = check_add_capacity(0, MAX_VECTORS, usize::MAX).unwrap_err(); + assert!(err.to_string().contains("buffer length")); + } +} diff --git a/src/util.rs b/src/util.rs index ae0ae87..1684ae7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -22,10 +22,11 @@ pub(crate) fn cmp_finite_f32_then_index( rhs_value: f32, rhs_index: usize, ) -> std::cmp::Ordering { - lhs_value - .partial_cmp(&rhs_value) - .expect("ordvec: finite f32 comparator received non-finite value") - .then_with(|| lhs_index.cmp(&rhs_index)) + if lhs_value == rhs_value { + lhs_index.cmp(&rhs_index) + } else { + lhs_value.total_cmp(&rhs_value) + } } /// Result-buffer length `nq * k`, panicking loudly on usize overflow diff --git a/tests/release_signed_release_invariants.sh b/tests/release_signed_release_invariants.sh index 99542e0..d2824fc 100755 --- a/tests/release_signed_release_invariants.sh +++ b/tests/release_signed_release_invariants.sh @@ -52,10 +52,23 @@ job_needs() { printf '%s\n' "$body" | grep -qE "(^[[:space:]]+needs:.*\\b${needed}\\b|^[[:space:]]+-[[:space:]]+${needed}[[:space:]]*$)" } +job_line() { + local jobname="$1" pattern="$2" + job_body "$jobname" | grep -nE "$pattern" | head -1 | cut -d: -f1 +} + +require_job_line() { + local jobname="$1" pattern="$2" description="$3" line + line="$(job_line "$jobname" "$pattern")" + [ -n "$line" ] || fail "$jobname must contain $description" + printf '%s\n' "$line" +} + # ---------------------------------------------------------------------- # (1) release-assets-draft needs attest + provenance + require-ci-green + notes +# + exact linux/aarch64 wheel smoke # ---------------------------------------------------------------------- -for dep in attest provenance require-ci-green notes; do +for dep in attest provenance require-ci-green notes smoke-linux-aarch64-wheel; do job_needs release-assets-draft "$dep" \ || fail "release-assets-draft must \`needs: $dep\` (fail-closed on missing provenance/CI)" done @@ -143,6 +156,23 @@ printf '%s\n' "$pcb" | grep -qE 'sha256sum' \ printf '%s\n' "$pcb" | grep -qE 'crates\.io/api/v1/crates/ordvec|static\.crates\.io/crates/ordvec' \ || fail "publish-crate must download the just-published .crate from crates.io after \`cargo publish\` (post-publish byte-identity proof; pre-publish alone cannot inspect cargo publish's internal packaging)" +pre_line="$(require_job_line publish-crate '^[[:space:]]+- name:[[:space:]]*Verify byte-identity vs the attested \.crate' 'a pre-publish byte-identity verification step')" +oidc_line="$(require_job_line publish-crate '^[[:space:]]+- name:[[:space:]]*Mint a short-lived crates\.io credential' 'an OIDC credential mint step')" +publish_line="$(require_job_line publish-crate '^[[:space:]]+- name:[[:space:]]*cargo publish' 'a cargo publish step')" +post_line="$(require_job_line publish-crate '^[[:space:]]+- name:[[:space:]]*Post-publish byte-identity' 'a post-publish crates.io byte-identity step')" +[ "$pre_line" -lt "$oidc_line" ] \ + || fail "publish-crate must verify byte-identity BEFORE minting the crates.io OIDC credential" +[ "$oidc_line" -lt "$publish_line" ] \ + || fail "publish-crate must mint the crates.io OIDC credential BEFORE \`cargo publish\`" +[ "$publish_line" -lt "$post_line" ] \ + || fail "publish-crate must run the crates.io post-publish download/compare AFTER \`cargo publish\`" + +ppb="$(job_body publish-pypi)" +printf '%s\n' "$ppb" | grep -qE 'Post-publish PyPI hashes match staged dist' \ + || fail "publish-pypi must verify PyPI-served wheel/sdist hashes after publish" +printf '%s\n' "$ppb" | grep -qE 'pypi\.org/pypi/ordvec/.+/json|pypi\.org/pypi/ordvec/' \ + || fail "publish-pypi must query PyPI after publish for served file hashes" + # ---------------------------------------------------------------------- # (10) publish-github-release un-drafts ONLY AFTER both registry publishes succeed. # ---------------------------------------------------------------------- From 8e778788771a7eaab089716b97b3a3c6bc7c838a Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Fri, 29 May 2026 21:30:56 -0500 Subject: [PATCH 2/2] fix: check python add byte capacity Signed-off-by: Nelson Spence --- ordvec-python/src/lib.rs | 54 ++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index e52155c..53721ea 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -79,7 +79,12 @@ fn check_width(got: usize, dim: usize) -> PyResult<()> { /// Mirror the core `add` capacity guard before releasing the GIL and entering /// Rust core asserts. Public Python `add()` methods should raise `ValueError` /// for over-capacity input, not a pyo3 `PanicException`. -fn check_add_capacity(current: usize, adding: usize, elems_per_vec: usize) -> PyResult<()> { +fn check_add_capacity( + current: usize, + adding: usize, + elems_per_vec: usize, + elem_size: usize, +) -> PyResult<()> { let new_n = current .checked_add(adding) .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("n_vectors overflows usize"))?; @@ -89,11 +94,14 @@ fn check_add_capacity(current: usize, adding: usize, elems_per_vec: usize) -> Py "index would exceed MAX_VECTORS ({max}); had {current}, adding {adding}" ))); } - new_n.checked_mul(elems_per_vec).ok_or_else(|| { + let total_elems = new_n.checked_mul(elems_per_vec).ok_or_else(|| { pyo3::exceptions::PyValueError::new_err( "index buffer length (n_vectors * elems_per_vec) overflows usize", ) })?; + total_elems.checked_mul(elem_size).ok_or_else(|| { + pyo3::exceptions::PyValueError::new_err("index buffer byte size overflows usize") + })?; Ok(()) } @@ -432,7 +440,12 @@ impl Rank { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; - check_add_capacity(self.inner.len(), arr.nrows(), self.inner.dim())?; + check_add_capacity( + self.inner.len(), + arr.nrows(), + self.inner.dim(), + std::mem::size_of::(), + )?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -613,7 +626,12 @@ impl RankQuant { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; - check_add_capacity(self.inner.len(), arr.nrows(), self.inner.bytes_per_vec())?; + check_add_capacity( + self.inner.len(), + arr.nrows(), + self.inner.bytes_per_vec(), + std::mem::size_of::(), + )?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -835,7 +853,12 @@ impl Bitmap { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; - check_add_capacity(self.inner.len(), arr.nrows(), self.inner.dim() / 64)?; + check_add_capacity( + self.inner.len(), + arr.nrows(), + self.inner.dim() / 64, + std::mem::size_of::(), + )?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -1180,7 +1203,12 @@ impl SignBitmap { "array must be C-contiguous; call np.ascontiguousarray() first", ) })?; - check_add_capacity(self.inner.len(), arr.nrows(), self.inner.dim() / 64)?; + check_add_capacity( + self.inner.len(), + arr.nrows(), + self.inner.dim() / 64, + std::mem::size_of::(), + )?; // Release the GIL around the parallel rank-transform / pack so other // Python threads run during a bulk add. `slice` (`&[f32]`) and // `&mut self.inner` are both `Ungil`, so no pointer juggling is needed. @@ -1695,19 +1723,25 @@ mod tests { #[test] fn add_capacity_allows_exact_ceiling() { - check_add_capacity(MAX_VECTORS - 1, 1, 1).unwrap(); - check_add_capacity(MAX_VECTORS, 0, 1).unwrap(); + check_add_capacity(MAX_VECTORS - 1, 1, 1, 1).unwrap(); + check_add_capacity(MAX_VECTORS, 0, 1, 1).unwrap(); } #[test] fn add_capacity_rejects_vector_count_overflow() { - let err = check_add_capacity(MAX_VECTORS, 1, 1).unwrap_err(); + let err = check_add_capacity(MAX_VECTORS, 1, 1, 1).unwrap_err(); assert!(err.to_string().contains("MAX_VECTORS")); } #[test] fn add_capacity_rejects_buffer_length_overflow() { - let err = check_add_capacity(0, MAX_VECTORS, usize::MAX).unwrap_err(); + let err = check_add_capacity(0, MAX_VECTORS, usize::MAX, 1).unwrap_err(); assert!(err.to_string().contains("buffer length")); } + + #[test] + fn add_capacity_rejects_byte_size_overflow() { + let err = check_add_capacity(0, MAX_VECTORS, usize::MAX / 2, 4).unwrap_err(); + assert!(err.to_string().contains("byte size")); + } }