Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions .github/buildomat/jobs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ _exit_trap() {
local status=$?
set +o errexit

# Restore the override opteadm from /tmp before debug-evidence
# collection runs, in case anything earlier in this trap or any
# downstream tooling has overwritten the installed binary. The
# /tmp copy is saved by the OPTE_COMMIT install block below.
if [[ "x$OPTE_COMMIT" != "x" ]]; then
pfexec cp /tmp/opteadm /opt/oxide/opte/bin/opteadm
fi
Expand Down Expand Up @@ -134,19 +138,6 @@ z_swadm () {
pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@
}

# only set this if you want to override the version of opte/xde installed by the
# install_opte.sh script
OPTE_COMMIT=""
if [[ "x$OPTE_COMMIT" != "x" ]]; then
curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde
pfexec rem_drv xde || true
pfexec mv xde /kernel/drv/amd64/xde
pfexec add_drv xde || true
curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/release/$OPTE_COMMIT/opteadm
chmod +x opteadm
cp opteadm /tmp/opteadm
pfexec mv opteadm /opt/oxide/opte/bin/opteadm
fi

#
# XXX work around 14537 (UFS should not allow directories to be unlinked) which
Expand Down Expand Up @@ -197,6 +188,25 @@ ptime -m tar xvzf /input/package/work/package.tar.gz
# shellcheck source=/dev/null
source .github/buildomat/ci-env.sh

# Source the OPTE override (if any) from the canonical location and apply it.
#
# When set, download the xde driver and opteadm directly from buildomat and
# swap them in. The deploy target is a ramdisk image without pkg(5), so we
# use rem_drv/add_drv instead of the p5p approach used by install_opte.sh
# and releng.
# shellcheck source=/dev/null
source tools/opte_version_override
if [[ "x$OPTE_COMMIT" != "x" ]]; then
curl -sSfOL "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde"
pfexec rem_drv xde || true
pfexec mv xde /kernel/drv/amd64/xde
pfexec add_drv xde || true
curl -sSfOL "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/release/$OPTE_COMMIT/opteadm"
chmod +x opteadm
cp opteadm /tmp/opteadm
pfexec mv opteadm /opt/oxide/opte/bin/opteadm
fi

# Ask buildomat for the range of extra addresses that we're allowed to use, and
# break them up into the ranges we need.

Expand Down
2 changes: 2 additions & 0 deletions .github/buildomat/jobs/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,7 @@ files=(
target/release/xtask
target/debug/bootstrap
tests/*
tools/opte_version
tools/opte_version_override
)
ptime -m tar cvzf $WORK/package.tar.gz "${files[@]}" "${packages[@]}"
23 changes: 19 additions & 4 deletions .github/workflows/check-opte-ver.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
name: check-opte-ver
on:
# Run on every PR targeting main, regardless of which paths changed, as the
# override file may have been set in an earlier commit on the branch. So,
# we must check on every PR rather than only when tracked paths change.
pull_request:
paths:
- '.github/workflows/check-opte-ver.yml'
- 'Cargo.toml'
- 'tools/opte_version'
branches: [main]
jobs:
check-opte-ver:
runs-on: ubuntu-22.04
Expand All @@ -18,3 +18,18 @@ jobs:
run: cargo install toml-cli@0.2.3
- name: Check OPTE version and rev match
run: ./tools/ci_check_opte_ver.sh

check-opte-override:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461
- name: Reject OPTE override on main
run: |
source tools/opte_version_override
if [[ "x$OPTE_COMMIT" != "x" ]]; then
echo "::error::OPTE_COMMIT is set in tools/opte_version_override."
echo "::error::The OPTE override must be cleared before merging to main."
exit 1
fi
118 changes: 114 additions & 4 deletions dev-tools/releng/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,18 @@ async fn main() -> Result<()> {
let opte_version =
fs::read_to_string(WORKSPACE_DIR.join("tools/opte_version")).await?;

// Parse tools/opte_version_override for OPTE_COMMIT. When set, we
// download the override p5p from buildomat and use it as a package
// source during image build instead of the helios pkg repo version.
let opte_override = parse_opte_version_override(
&WORKSPACE_DIR.join("tools/opte_version_override"),
)
.await?;
if let Some(ov) = &opte_override {
info!(logger, "OPTE override active: commit={}", ov.commit);
}
let opte_version = opte_version.trim();

let client = reqwest::ClientBuilder::new()
.connect_timeout(Duration::from_secs(15))
.timeout(Duration::from_secs(120))
Expand Down Expand Up @@ -617,7 +629,7 @@ async fn main() -> Result<()> {
.arg("-o") // output directory for image
.arg(args.output_dir.join(format!("os-{}", target)))
.arg("-F") // pass extra image builder features
.arg(format!("optever={}", opte_version.trim()))
.arg(format!("optever={opte_version}"))
.arg("-P") // include all files from extra proto area
.arg(proto_dir.join("root"))
.arg("-N") // image name
Expand Down Expand Up @@ -675,11 +687,33 @@ async fn main() -> Result<()> {
.arg(format!("helios-dev={HELIOS_PKGREPO}"))
}

// helios-build experiment-image
jobs.push_command(format!("{}-image", target), image_cmd)
// When OPTE_COMMIT is set, download the override p5p from buildomat
// and add it as a package source for the image build.
if let Some(ov) = &opte_override {
let p5p_path = tempdir.path().join(format!("opte-{target}.p5p"));
let commit = ov.commit.clone();
let dest = p5p_path.clone();
let cl = client.clone();
let log = logger.clone();
jobs.push(
format!("{target}-opte-p5p"),
download_opte_p5p(log, cl, commit, dest),
);

image_cmd = image_cmd
.arg("-p")
.arg(format!("helios-dev=file://{p5p_path}"));
}

let image_job = jobs
.push_command(format!("{target}-image"), image_cmd)
.after("helios-setup")
.after("helios-incorp")
.after(format!("{}-proto", target));
.after(format!("{target}-proto"));

if opte_override.is_some() {
image_job.after(format!("{target}-opte-p5p"));
}
}
// Build the recovery target after we build the host target. Only one
// of these will build at a time since Cargo locks its target directory;
Expand Down Expand Up @@ -887,6 +921,82 @@ async fn build_proto_area(
Ok(())
}

/// Parsed contents of `tools/opte_version_override` when an override is active.
struct OpteOverride {
commit: String,
}

/// Parse `tools/opte_version_override` for `OPTE_COMMIT`. Returns `None` if
/// `OPTE_COMMIT` is unset or empty. Errors if more than one non-comment
/// `OPTE_COMMIT=` assignment is present, since a stale line above the active
/// one would otherwise win silently.
async fn parse_opte_version_override(
path: &Utf8PathBuf,
) -> Result<Option<OpteOverride>> {
let contents = fs::read_to_string(path)
.await
.context("failed to read tools/opte_version_override")?;

let assignments: Vec<&str> = contents
.lines()
.filter_map(|line| line.trim().strip_prefix("OPTE_COMMIT="))
.map(|val| val.trim_matches(|c| c == '"' || c == '\''))
.collect();

if assignments.len() > 1 {
bail!(
"tools/opte_version_override contains {} OPTE_COMMIT \
assignments (expected at most 1)",
assignments.len()
);
}

Ok(assignments
.into_iter()
.find(|val| !val.is_empty())
.map(|commit| OpteOverride { commit: commit.to_string() }))
}

const OPTE_BUILDOMAT_BASE: &str =
"https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte";

/// Download the OPTE override p5p archive from buildomat.
async fn download_opte_p5p(
logger: Logger,
client: reqwest::Client,
commit: String,
dest: Utf8PathBuf,
) -> Result<()> {
let url = format!("{OPTE_BUILDOMAT_BASE}/repo/{commit}/opte.p5p");
info!(logger, "downloading OPTE override p5p from {url}");
for attempt in 1..=RETRY_ATTEMPTS {
let result = async {
let response = client.get(&url).send().await?.error_for_status()?;
let bytes = response.bytes().await?;
fs::write(&dest, &bytes).await?;
Ok::<_, anyhow::Error>(())
}
.await;

match result {
Ok(()) => {
info!(logger, "downloaded OPTE p5p to {dest}");
return Ok(());
}
Err(err) => {
if attempt == RETRY_ATTEMPTS {
return Err(err).with_context(|| {
format!("failed to download OPTE p5p from {url}")
});
}
info!(logger, "retrying OPTE p5p download (attempt {attempt})");
}
}
}

bail!("failed to download OPTE p5p after {RETRY_ATTEMPTS} attempts")
}

async fn host_add_root_profile(host_proto_root: Utf8PathBuf) -> Result<()> {
fs::create_dir_all(&host_proto_root).await?;
fs::write(
Expand Down
18 changes: 14 additions & 4 deletions tools/ci_check_opte_ver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
set -euo pipefail

source tools/opte_version_override
if [[ "x$OPTE_COMMIT" != "x" ]]; then
exit 0
fi

# Grab all the oxidecomputer/opte dependencies' revisions
readarray -t opte_deps_revs < <(toml get Cargo.toml workspace.dependencies | jq -r 'to_entries | .[] | select(.value.git? | contains("oxidecomputer/opte")?) | .value.rev')
Expand All @@ -19,6 +16,19 @@ for rev in "${opte_deps_revs[@]}"; do
fi
done

# When an OPTE override is active, the kernel binary is built from
# $OPTE_COMMIT while the userland (opte-ioctl, oxide-vpc) is built from
# the Cargo dep revision. They must match, otherwise kernel/userland
# ABI drift surfaces as opaque ioctl failures at runtime.
if [[ "x$OPTE_COMMIT" != "x" ]]; then
if [ "$OPTE_REV" != "$OPTE_COMMIT" ]; then
echo "OPTE override mismatch:"
echo " Cargo.toml deps: $OPTE_REV"
echo " tools/opte_version_override: $OPTE_COMMIT"
exit 1
fi
fi

# Grab the API version for this revision
API_VER=$(curl -s https://raw.githubusercontent.com/oxidecomputer/opte/"$OPTE_REV"/crates/opte-api/src/lib.rs | sed -n 's/pub const API_VERSION: u64 = \([0-9]*\);/\1/p')

Expand Down Expand Up @@ -71,6 +81,6 @@ BUILDOMAT_DEPLOY_TARGET=$(cat .github/buildomat/jobs/deploy.sh | sed -n 's/#:[ ]
if [ "lab-2.0-opte-0.$API_VER" != "$BUILDOMAT_DEPLOY_TARGET" ]; then
echo "OPTE version mismatch:"
echo "Cargo.toml: $OPTE_REV ($OPTE_VER)"
echo "buildomat deploy job: $BUILDOMAT_DEPLOY_TARGET (expected lab-opte-0.$API_VER)"
echo "buildomat deploy job: $BUILDOMAT_DEPLOY_TARGET (expected lab-2.0-opte-0.$API_VER)"
exit 1
fi
64 changes: 43 additions & 21 deletions tools/install_opte.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ fi
# Grab the version of the opte package to install
OPTE_VERSION="$(cat "$OMICRON_TOP/tools/opte_version")"

# Source the OPTE override (if any). When OPTE_COMMIT is set, the desired
# OPTE version isn't published to the helios pkg repo yet, so we install
# directly from the override p5p built by OPTE CI on buildomat (see the
# branch on $OPTE_COMMIT below).
source "$OMICRON_TOP/tools/opte_version_override"

OMICRON_FROZEN_PKG_COMMENT="OMICRON-PINNED-PACKAGE"

# Once we install, we mark the package as frozen at that particular version.
Expand All @@ -71,22 +77,48 @@ if PKG_FROZEN=$(pkg freeze | grep driver/network/opte); then
pfexec pkg unfreeze driver/network/opte
fi

# Actually install the xde kernel module and opteadm tool
RC=0
pfexec pkg install -v pkg://helios-dev/driver/network/opte@"$OPTE_VERSION" || RC=$?
if [[ "$RC" -eq 0 ]]; then
echo "xde driver installed successfully"
elif [[ "$RC" -eq 4 ]]; then
echo "Correct xde driver already installed"
if [[ "x$OPTE_COMMIT" != "x" ]]; then
# Install from the override p5p archive built by OPTE CI. The p5p
# contains exactly one version (built from $OPTE_COMMIT), which
# generally won't match the canonical $OPTE_VERSION, so we let pkg
# pick the version from the p5p rather than pinning here.
P5P_URL="https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/repo/$OPTE_COMMIT/opte.p5p"
P5P_PATH="/tmp/opte-override.p5p"
echo "Downloading override p5p from $P5P_URL"
curl -fL -o "$P5P_PATH" "$P5P_URL"

RC=0
pfexec pkg install -g "$P5P_PATH" driver/network/opte || RC=$?
if [[ "$RC" -eq 0 ]]; then
echo "xde driver installed from override p5p"
elif [[ "$RC" -eq 4 ]]; then
echo "Correct xde driver already installed"
else
echo "Installing xde driver from override p5p failed"
exit "$RC"
fi
rm -f "$P5P_PATH"
else
echo "Installing xde driver failed"
exit "$RC"
# Install the published version from the helios pkg repo.
RC=0
pfexec pkg install -v pkg://helios-dev/driver/network/opte@"$OPTE_VERSION" || RC=$?
if [[ "$RC" -eq 0 ]]; then
echo "xde driver installed successfully"
elif [[ "$RC" -eq 4 ]]; then
echo "Correct xde driver already installed"
else
echo "Installing xde driver failed"
exit "$RC"
fi
fi

# Discover the actually-installed version (may differ from $OPTE_VERSION
# when an override is active) and freeze at that.
INSTALLED_VERSION=$(pkg info -l driver/network/opte | awk '/^[[:space:]]*Version:/ {print $2}')
RC=0
pfexec pkg freeze -c "$OMICRON_FROZEN_PKG_COMMENT" driver/network/opte@"$OPTE_VERSION" || RC=$?
pfexec pkg freeze -c "$OMICRON_FROZEN_PKG_COMMENT" "driver/network/opte@$INSTALLED_VERSION" || RC=$?
if [[ "$RC" -ne 0 ]]; then
echo "Failed to pin opte package to $OPTE_VERSION"
echo "Failed to pin opte package to $INSTALLED_VERSION"
exit $RC
fi

Expand All @@ -97,13 +129,3 @@ if [[ "$RC" -ne 0 ]]; then
echo "The \`opteadm\` administration tool is not on your path."
echo "You may add \"/opt/oxide/opte/bin\" to your path to access it."
fi

source $OMICRON_TOP/tools/opte_version_override

if [[ "x$OPTE_COMMIT" != "x" ]]; then
set +x
curl -fOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde
pfexec rem_drv xde || true
pfexec mv xde /kernel/drv/amd64/xde
pfexec add_drv xde || true
fi
23 changes: 21 additions & 2 deletions tools/opte_version_override
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
#!/usr/bin/env bash

# only set this if you want to override the version of opte/xde installed by the
# install_opte.sh script
# Override for using an unpublished OPTE version. When OPTE_COMMIT is set,
# the override p5p package is downloaded from buildomat and used instead of
# the version published in the helios pkg repo. The p5p is built by the
# opte-p5p buildomat job and published at:
# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/repo/{commit}/opte.p5p
#
# Consumers:
# - install_opte.sh installs directly from the override p5p
# - releng image builds use extra_packages with the p5p as a pkg source
# - deploy.sh installs from the override p5p on the running system
# - ci_check_opte_ver.sh enforces OPTE_COMMIT == Cargo dep rev
#
# To activate: set OPTE_COMMIT to the git commit hash of the OPTE build.
# OPTE_COMMIT must equal the `rev` pinned for `oxide-vpc` / `opte-ioctl` in
# Cargo.toml. The kernel module is built from OPTE_COMMIT and the userland
# (opte-ioctl, oxide-vpc) is built from the Cargo dep rev. If these hashes
# diverge, kernel/userland ABI drift surfaces as opaque ioctl failures at
# runtime. Even kernel-only fixes (logging, perf, internal refactors) must come
# with a matching Cargo bump.
#
# To deactivate (once the new version is published): set OPTE_COMMIT to "".
OPTE_COMMIT=""
Loading