Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,8 @@ jobs:
exit 1
fi
echo "OK: byte-identity verified ($A_SHA)"
- name: Validate manifest publish dry-run
run: cargo publish -p ordvec-manifest --dry-run --locked
- name: Mint a short-lived crates.io credential (OIDC)
id: auth
uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ description = "Training-free ordinal & sign quantization for vector retrieval"
license = "MIT OR Apache-2.0"
repository = "https://github.com/Fieldnote-Echo/ordvec"
homepage = "https://github.com/Fieldnote-Echo/ordvec"
documentation = "https://docs.rs/ordvec"
readme = "README.md"
keywords = ["vector-search", "quantization", "nearest-neighbor", "ann", "simd"]
categories = ["algorithms", "science", "compression"]
Expand Down
255 changes: 239 additions & 16 deletions tests/release_publish_invariants.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@


WORKFLOW_PATH = os.environ.get("RELEASE_WORKFLOW_PATH", ".github/workflows/release.yml")
CI_WORKFLOW_PATH = os.environ.get("CI_WORKFLOW_PATH", ".github/workflows/ci.yml")
PYTHON_WORKFLOW_PATH = os.environ.get("PYTHON_WORKFLOW_PATH", ".github/workflows/python.yml")
STRICT_STABLE_TAG_PATTERN = r"^v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)$"

Expand Down Expand Up @@ -522,6 +523,165 @@ def check_strict_release_tag_patterns(workflow: dict[str, Any], path: str) -> No
fail(f"strict release tag regex must reject {tag}")


def cargo_package_files(package: str) -> set[str]:
cmd = ["cargo", "package", "-p", package, "--list", "--locked", "--allow-dirty"]
try:
output = subprocess.check_output(cmd, text=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as exc:
stderr = (exc.stderr or "").strip()
fail(f"{' '.join(cmd)} failed while checking package contents: {stderr}")
Comment thread
Fieldnote-Echo marked this conversation as resolved.
return {line.strip() for line in output.splitlines() if line.strip()}


def check_required_package_files(package: str, files: set[str], required: set[str]) -> None:
missing = sorted(required - files)
if missing:
fail(f"{package}: package is missing required files: {', '.join(missing)}")


def check_forbidden_package_prefixes(
package: str, files: set[str], forbidden_prefixes: tuple[str, ...]
) -> None:
forbidden = sorted(
file for file in files if any(file == prefix.rstrip("/") or file.startswith(prefix) for prefix in forbidden_prefixes)
)
if forbidden:
fail(f"{package}: package includes forbidden files: {', '.join(forbidden)}")


def check_packaged_readme_links(package: str, files: set[str], readme_path: str) -> None:
readme = read_text(readme_path)
for match in re.finditer(r"!?\[[^\]]*\]\(([^)]+)\)", readme):
raw_target = match.group(1).strip()
if not raw_target or raw_target.startswith("#"):
continue
if re.match(r"^[A-Za-z][A-Za-z0-9+.-]*:", raw_target):
continue
target = raw_target.split("#", 1)[0].split("?", 1)[0].strip()
if not target:
continue
if target.startswith("/") or target.startswith("../") or "/../" in target:
fail(f"{package}: README link {raw_target!r} escapes the packaged crate")
normalized = posixpath.normpath(target)
if normalized not in files and not any(file.startswith(normalized + "/") for file in files):
fail(f"{package}: README link {raw_target!r} points to a file or directory not packaged")


def check_package_contents() -> None:
core_files = cargo_package_files("ordvec")
check_required_package_files(
"ordvec",
core_files,
{
"Cargo.lock",
"Cargo.toml",
"Cargo.toml.orig",
"CHANGELOG.md",
"LICENSE-APACHE-2.0",
"LICENSE-MIT",
"README.md",
"benchmarks/rank_modes_results.txt",
"docs/PERSISTED_FORMAT.md",
"docs/RANK_MODES.md",
"docs/compatibility-policy.md",
"docs/determinism.md",
"examples/bench_rank.rs",
"src/lib.rs",
"tests/index/main.rs",
"tests/persistence_compat.rs",
},
)
check_forbidden_package_prefixes(
"ordvec",
core_files,
(
".agents/",
".claude/",
".codex/",
".github/",
".playwright-mcp/",
"fuzz/",
"ordvec-ffi/",
"ordvec-go/",
"ordvec-manifest/",
"ordvec-python/",
"target/",
"tests/release_",
),
)
check_packaged_readme_links("ordvec", core_files, "README.md")

manifest_files = cargo_package_files("ordvec-manifest")
check_required_package_files(
"ordvec-manifest",
manifest_files,
{
"Cargo.lock",
"Cargo.toml",
"Cargo.toml.orig",
"README.md",
"src/lib.rs",
"src/main.rs",
"src/sqlite.rs",
"tests/manifest.rs",
},
)
check_forbidden_package_prefixes(
"ordvec-manifest",
manifest_files,
(
".agents/",
".claude/",
".codex/",
".github/",
".playwright-mcp/",
"docs/",
"fuzz/",
"ordvec-ffi/",
"ordvec-go/",
"ordvec-manifest/",
"ordvec-python/",
"target/",
"tests/release_",
),
)
check_packaged_readme_links("ordvec-manifest", manifest_files, "ordvec-manifest/README.md")


def check_ci_package_guards(workflow: dict[str, Any], path: str) -> None:
jobs = mapping(workflow.get("jobs"), f"{path}: jobs")
deps = mapping(jobs.get("deps"), f"{path}: jobs.deps")
steps = sequence(deps.get("steps"), f"{path}: jobs.deps.steps")

core_dry_runs: list[str] = []
manifest_deferred_runs: list[str] = []
for index, raw_step in enumerate(steps):
step = mapping(raw_step, f"{path}: jobs.deps.steps[{index}]")
run = step.get("run")
if not isinstance(run, str):
continue
for words in cargo_command_words(run, "publish", "ordvec"):
if "--dry-run" in words:
core_dry_runs.append(run)
if cargo_command_words(run, "package", "ordvec-manifest"):
manifest_deferred_runs.append(run)

if len(core_dry_runs) != 1:
fail(f"{path}: deps job must run exactly one `cargo publish -p ordvec --dry-run --locked`")
if len(manifest_deferred_runs) != 1:
fail(f"{path}: deps job must run exactly one deferred ordvec-manifest package check")

manifest_run = manifest_deferred_runs[0]
required_fragments = (
'failed to select a version for the requirement `ordvec = "',
"ordvec-manifest package check is deferred",
"release.yml packages ordvec-manifest after publish-crate succeeds",
)
for fragment in required_fragments:
if fragment not in manifest_run:
fail(f"{path}: deferred ordvec-manifest package check must mention {fragment!r}")


def shell_vars(name: str) -> set[str]:
return {f"${name}", f"${{{name}}}"}

Expand Down Expand Up @@ -574,17 +734,33 @@ def has_cargo_package_arg(words: list[str], package: str) -> bool:
return False


def has_cargo_command(run: str, subcommand: str, package: str) -> bool:
def cargo_command_words(run: str, subcommand: str, package: str) -> list[list[str]]:
commands: list[list[str]] = []
for line in shell_logical_lines(run):
try:
words = shlex.split(line)
except ValueError:
continue
if len(words) < 3 or words[0] != "cargo" or words[1] != subcommand:
continue
if "--locked" in words and has_cargo_package_arg(words[2:], package):
return True
return False
for part in re.split(r"&&|\|\||;", line):
part = part.strip()
for prefix in ("if ", "then ", "! "):
if part.startswith(prefix):
part = part[len(prefix):].strip()
if not part:
continue
try:
words = shlex.split(part)
except ValueError:
continue
cmd_idx = 0
while cmd_idx < len(words) and re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*=.*", words[cmd_idx]):
cmd_idx += 1
cmd = words[cmd_idx:]
if len(cmd) < 3 or cmd[0] != "cargo" or cmd[1] != subcommand:
continue
if "--locked" in cmd and has_cargo_package_arg(cmd[2:], package):
commands.append(cmd)
return commands
Comment thread
Fieldnote-Echo marked this conversation as resolved.


def has_cargo_command(run: str, subcommand: str, package: str) -> bool:
return bool(cargo_command_words(run, subcommand, package))


def has_shell_arg(words: list[str], values: set[str]) -> bool:
Expand Down Expand Up @@ -866,24 +1042,37 @@ def check_publish_pypi(workflow: dict[str, Any], path: str) -> None:


def check_publish_crate_job(
workflow: dict[str, Any], path: str, job_name: str, package: str, artifact_name: str
workflow: dict[str, Any],
path: str,
job_name: str,
package: str,
artifact_name: str,
*,
require_publish_dry_run: bool = False,
) -> None:
jobs = mapping(workflow.get("jobs"), f"{path}: jobs")
job = mapping(jobs.get(job_name), f"{path}: jobs.{job_name}")
steps = sequence(job.get("steps"), f"{path}: jobs.{job_name}.steps")

crate_downloads: list[tuple[int, dict[str, Any], dict[str, Any]]] = []
package_runs: list[str] = []
publish_runs: list[str] = []
package_runs: list[tuple[int, str]] = []
publish_runs: list[tuple[int, str]] = []
publish_dry_runs: list[tuple[int, str]] = []
auth_steps: list[int] = []

for index, raw_step in enumerate(steps):
step = mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]")
run = step.get("run")
if isinstance(run, str):
if has_cargo_command(run, "package", package):
package_runs.append(run)
if has_cargo_command(run, "publish", package):
publish_runs.append(run)
package_runs.append((index, run))
for words in cargo_command_words(run, "publish", package):
if "--dry-run" in words:
publish_dry_runs.append((index, run))
else:
publish_runs.append((index, run))
if action_name(step) == "rust-lang/crates-io-auth-action":
auth_steps.append(index)
if action_name(step) == "actions/download-artifact":
with_block = step.get("with", {})
with_map = mapping(with_block, f"{path}: {step_label(index, step)} with")
Expand All @@ -905,22 +1094,37 @@ def check_publish_crate_job(
fail(f"{path}: {job_name} must run exactly one `cargo package -p {package} --locked`")
if len(publish_runs) != 1:
fail(f"{path}: {job_name} must run exactly one `cargo publish -p {package} --locked`")
if require_publish_dry_run and len(publish_dry_runs) != 1:
fail(
f"{path}: {job_name} must run exactly one "
f"`cargo publish -p {package} --dry-run --locked` before minting OIDC"
)

verify_step_names = {
"Verify byte-identity vs the attested .crate",
"Post-publish byte-identity (download from crates.io == attested)",
}
verify_steps: list[dict[str, Any]] = []
verify_step_indices: dict[str, int] = {}
found_names: set[str] = set()
for index, raw_step in enumerate(steps):
step = mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]")
name = step.get("name")
if name in verify_step_names:
verify_steps.append(step)
verify_step_indices[name] = index
found_names.add(name)
if found_names != verify_step_names:
fail(f"{path}: {job_name} must have both attested .crate verification steps")

if require_publish_dry_run:
dry_run_index = publish_dry_runs[0][0]
byte_identity_index = verify_step_indices["Verify byte-identity vs the attested .crate"]
if dry_run_index < byte_identity_index:
fail(f"{path}: {job_name} dry-run publish must run after byte-identity verification")
if auth_steps and dry_run_index > min(auth_steps):
fail(f"{path}: {job_name} dry-run publish must run before OIDC token minting")

attested_path = f"${{RUNNER_TEMP}}/attested/{package}-${{VERSION}}.crate"
for step in verify_steps:
name = step.get("name")
Expand Down Expand Up @@ -966,6 +1170,21 @@ def check_publish_crate_job(

def check_publish_crates(workflow: dict[str, Any], path: str) -> None:
jobs = mapping(workflow.get("jobs"), f"{path}: jobs")
build_manifest_job = mapping(jobs.get("build-manifest-crate"), f"{path}: jobs.build-manifest-crate")
if not has_need(build_manifest_job, "publish-crate"):
fail(f"{path}: build-manifest-crate must need publish-crate so lockstep ordvec exists")
build_manifest_steps = sequence(
build_manifest_job.get("steps"), f"{path}: jobs.build-manifest-crate.steps"
)
build_manifest_packages = 0
for index, raw_step in enumerate(build_manifest_steps):
step = mapping(raw_step, f"{path}: jobs.build-manifest-crate.steps[{index}]")
run = step.get("run")
if isinstance(run, str) and has_cargo_command(run, "package", "ordvec-manifest"):
build_manifest_packages += 1
if build_manifest_packages != 1:
fail(f"{path}: build-manifest-crate must package ordvec-manifest after publish-crate")

manifest_job = mapping(jobs.get("publish-manifest-crate"), f"{path}: jobs.publish-manifest-crate")
if not has_need(manifest_job, "publish-crate"):
fail(f"{path}: publish-manifest-crate must need publish-crate so ordvec publishes first")
Expand All @@ -976,16 +1195,20 @@ def check_publish_crates(workflow: dict[str, Any], path: str) -> None:
"publish-manifest-crate",
"ordvec-manifest",
"dist-manifest-crate",
require_publish_dry_run=True,
)


def main() -> None:
workflow = load_workflow(WORKFLOW_PATH)
ci_workflow = load_workflow(CI_WORKFLOW_PATH)
check_release_version_sync()
check_release_compatibility_sync()
check_publication_model()
check_python_package_metadata()
check_strict_release_tag_patterns(workflow, WORKFLOW_PATH)
check_package_contents()
check_ci_package_guards(ci_workflow, CI_WORKFLOW_PATH)
check_hash_requirement_temp_paths([WORKFLOW_PATH, PYTHON_WORKFLOW_PATH])
check_release_security_gates(workflow, WORKFLOW_PATH)
check_aarch64_smoke_selector(workflow, WORKFLOW_PATH)
Expand Down
10 changes: 10 additions & 0 deletions tests/release_signed_release_invariants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,16 @@ check_crate_publish_job publish-crate ordvec dist-crate
check_crate_publish_job publish-manifest-crate ordvec-manifest dist-manifest-crate
job_needs publish-manifest-crate publish-crate \
|| fail "publish-manifest-crate must \`needs: publish-crate\` so the lockstep core crate publishes first"
manifest_pre_line="$(require_job_line publish-manifest-crate '^[[:space:]]+- name:[[:space:]]*Verify byte-identity vs the attested \.crate' 'a manifest pre-publish byte-identity verification step')"
manifest_dry_line="$(require_job_line publish-manifest-crate '^[[:space:]]+- name:[[:space:]]*Validate manifest publish dry-run' 'a manifest publish dry-run step')"
manifest_oidc_line="$(require_job_line publish-manifest-crate '^[[:space:]]+- name:[[:space:]]*Mint a short-lived crates\.io credential' 'a manifest OIDC credential mint step')"
printf '%s\n' "$(job_body publish-manifest-crate)" \
| awk '/cargo[[:space:]]+publish/ && /ordvec-manifest/ && /--dry-run/ && /--locked/ { found = 1 } END { exit found ? 0 : 1 }' \
|| fail "publish-manifest-crate must dry-run \`cargo publish -p ordvec-manifest --dry-run --locked\` after byte-identity and before OIDC"
[ "$manifest_pre_line" -lt "$manifest_dry_line" ] \
|| fail "publish-manifest-crate must dry-run publish AFTER byte-identity verification"
[ "$manifest_dry_line" -lt "$manifest_oidc_line" ] \
|| fail "publish-manifest-crate must dry-run publish BEFORE minting the crates.io OIDC credential"

pcd="$(job_body pypi-canonical-dist)"
printf '%s\n' "$pcd" | grep -qE 'release_pypi_canonical_dist\.py canonicalize' \
Expand Down
Loading