From 9a257753bca20be73750700975b902018dac12c8 Mon Sep 17 00:00:00 2001 From: Miguel Angel Ajo Pelayo Date: Thu, 18 Jun 2026 14:37:33 +0200 Subject: [PATCH] test(qemu): replace Fedora Cloud image with Alpine Linux tiny image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation Fedora Cloud qcow2 images used in QEMU driver tests weigh ~556 MB (x86_64) + ~519 MB (aarch64) = ~1.1 GB total, taking ~1 minute to download on every CI run. Alpine Linux 3.22.4 UEFI tiny images reduce that to ~127 MB + ~151 MB = ~278 MB — a 74% reduction — and download in ~6 s on GitHub Actions runners, making a cache unnecessary. ## Image replacement x86_64: 556 MB → 127 MB (77%) aarch64: 519 MB → 151 MB (71%) Total: ~1076 MB → ~278 MB (74%) Alpine's nocloud tiny images support UEFI boot on both architectures and use tiny-cloud (a minimal cloud-init alternative) that reads the standard NoCloud CIDATA vfat volume the QEMU driver already generates. ## Driver changes (driver.py) tiny-cloud differs from full cloud-init in two ways: 1. Hostname: tiny-cloud reads @hostname from the YAML key 'hostname' in meta-data, not 'local-hostname'. Add 'hostname' alongside 'local-hostname' so both implementations set it correctly. 2. Password: tiny-cloud ignores plain_text_passwd in the users stanza. Add a runcmd entry to set the password via chpasswd. Credentials are escaped with shlex.quote() so special characters are safe. Full cloud-init images are unaffected. hostport=0 support: Hostfwd now allows hostport=0, which tells QEMU's user-mode networking to pick a free port automatically. After QEMU starts, the driver queries the actual assigned port via QMP 'human-monitor-command info usernet', logs it at INFO level: hostfwd 'ssh': resolved port 0 -> 127.0.0.1: (guest port 22) and stores it in _resolved_hostports so get_hostfwd_port() can return it to the client side. ## Client changes (client.py) shell() now checks for an 'ssh' hostfwd entry by calling get_hostfwd_port('ssh'). If present, it opens a direct fabric Connection to 127.0.0.1:, bypassing the jumpstarter streaming layer entirely. If no ssh hostfwd is configured (KeyError), it falls back to the original FabricAdapter path over vsock. This is necessary because Alpine uses OpenRC, not systemd, so systemd-ssh-generator never runs and sshd only listens on TCP. Fedora cloud images ship systemd-ssh-generator (systemd >= 256) which automatically binds sshd to AF_VSOCK port 22 inside VMs — that is why the original vsock path worked with Fedora but not Alpine. ## Test changes (driver_test.py) - Pass hostfwd={'ssh': {hostport=0, guestport=22}} so qemu.shell() uses the TCP path with a QEMU-assigned port; no probe socket needed. - Wait for 'bootstrap_complete: done' on the serial console before attempting login, guaranteeing tiny-cloud has set the password and sshd is ready (avoids the race that existed with Fedora too, where the extra setenforce console round-trip happened to provide enough delay). - Press Enter if the GRUB countdown appears to skip the 10 s wait. - Update post-login prompt from bash '[user@host ~]$' to Alpine ash 'host:~$'. - Remove 'sudo setenforce 0' (no SELinux on Alpine). - Relax 'uname -r' assertion to a non-empty check. ## CI changes (python-tests.yaml) Remove the actions/cache steps entirely. Alpine images download in ~6 s on GitHub runners (measured), which is faster than cache restore overhead for files this size. --- .github/workflows/python-tests.yaml | 14 ++---- .../jumpstarter_driver_qemu/client.py | 26 ++++++++--- .../jumpstarter_driver_qemu/driver.py | 44 ++++++++++++++++++- .../jumpstarter_driver_qemu/driver_test.py | 25 ++++++++--- 4 files changed, 84 insertions(+), 25 deletions(-) diff --git a/.github/workflows/python-tests.yaml b/.github/workflows/python-tests.yaml index 9334de073..d06269040 100644 --- a/.github/workflows/python-tests.yaml +++ b/.github/workflows/python-tests.yaml @@ -120,19 +120,11 @@ jobs: run: | brew install renode/tap/renode - - name: Cache Fedora Cloud images - id: cache-fedora-cloud-images - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 - with: - path: python/packages/jumpstarter-driver-qemu/images - key: fedora-cloud-43-1.6 - - - name: Download Fedora Cloud images - if: steps.cache-fedora-cloud-images.outputs.cache-hit != 'true' + - name: Download Alpine cloud images run: | for arch in aarch64 x86_64; do - curl -L --fail --output "python/packages/jumpstarter-driver-qemu/images/Fedora-Cloud-Base-Generic-43-1.6.${arch}.qcow2" \ - "https://iad.mirror.rackspace.com/fedora/releases/43/Cloud/${arch}/images/Fedora-Cloud-Base-Generic-43-1.6.${arch}.qcow2" + curl -L --fail --output "python/packages/jumpstarter-driver-qemu/images/nocloud_alpine-3.22.4-${arch}-uefi-tiny-r0.qcow2" \ + "https://dl-cdn.alpinelinux.org/alpine/v3.22/releases/cloud/nocloud_alpine-3.22.4-${arch}-uefi-tiny-r0.qcow2" done - name: Run pytest diff --git a/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/client.py b/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/client.py index fb75ea253..85cd4754d 100644 --- a/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/client.py +++ b/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/client.py @@ -4,6 +4,7 @@ from contextlib import contextmanager import click +from fabric import Connection from jumpstarter_driver_composite.client import CompositeClient from jumpstarter_driver_network.adapters import FabricAdapter, NovncAdapter @@ -75,12 +76,25 @@ def novnc(self): @contextmanager def shell(self): - with FabricAdapter( - client=self.ssh, - user=self.username, - connect_kwargs={"password": self.password}, - ) as conn: - yield conn + # If the driver has an 'ssh' hostfwd entry, fetch the actual host port + # (resolving any port=0 assignment) and connect directly over TCP. + # Otherwise fall back to tunnelling through the jumpstarter stream (vsock). + try: + port = int(self.call("get_hostfwd_port", "ssh")) + with Connection( + host="127.0.0.1", + port=port, + user=self.username, + connect_kwargs={"password": self.password}, + ) as conn: + yield conn + except KeyError: + with FabricAdapter( + client=self.ssh, + user=self.username, + connect_kwargs={"password": self.password}, + ) as conn: + yield conn def cli(self): # Get the base group from CompositeClient which includes all child commands diff --git a/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver.py b/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver.py index 84d74b601..f9859ac25 100644 --- a/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver.py +++ b/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver.py @@ -5,6 +5,7 @@ import logging import os import platform +import shlex import shutil from collections.abc import AsyncGenerator from dataclasses import dataclass, field @@ -381,6 +382,26 @@ async def on(self) -> None: # noqa: C901 Path(self.parent._pty).unlink(missing_ok=True) Path(self.parent._pty).symlink_to(pty) + # Resolve any hostport=0 hostfwd entries to the actual port QEMU chose. + # Parse 'info usernet': lines look like "TCP[HOST_FORWARD] fd addr port addr port ..." + # Store resolved ports on the parent so get_hostfwd_port() can return them to clients. + zero_fwds = {k: v for k, v in self.parent.hostfwd.items() if v.hostport == 0} + if zero_fwds: + usernet = await qmp.execute("human-monitor-command", {"command-line": "info usernet"}) + self.logger.debug("info usernet output:\n%s", usernet) + for line in usernet.splitlines(): + parts = line.split() + if len(parts) >= 6 and "HOST_FORWARD" in parts[0]: + # parts: Protocol[State] fd hostaddr hostport guestaddr guestport ... + actual_hostaddr, actual_hostport, actual_guestport = parts[2], int(parts[3]), int(parts[5]) + for k, v in zero_fwds.items(): + if v.hostaddr == actual_hostaddr and v.guestport == actual_guestport: + self.logger.info( + "hostfwd '%s': resolved port 0 -> %s:%d (guest port %d)", + k, actual_hostaddr, actual_hostport, actual_guestport, + ) + self.parent._resolved_hostports[k] = actual_hostport + await qmp.execute("system_reset") await qmp.disconnect() @@ -410,7 +431,7 @@ def close(self): class Hostfwd(BaseModel): protocol: Literal["tcp"] = "tcp" hostaddr: str = "127.0.0.1" - hostport: int = Field(ge=1, le=65535) + hostport: int = Field(ge=0, le=65535) # 0 = let QEMU pick a free port guestport: int = Field(ge=1, le=65535) @@ -440,6 +461,8 @@ class Qemu(Driver): flash_timeout: int = field(default=30 * 60) # 30 minutes _tmp_dir: TemporaryDirectory = field(init=False, default_factory=TemporaryDirectory) + # Maps hostfwd key -> actual host port after QEMU resolves port 0 assignments + _resolved_hostports: dict[str, int] = field(init=False, default_factory=dict) @classmethod def client(cls) -> str: @@ -512,6 +535,7 @@ def cidata(self) -> TemporaryDirectory: { "instance-id": str(self.uuid), "local-hostname": self.hostname, + "hostname": self.hostname, } ) ) @@ -528,12 +552,30 @@ def cidata(self) -> TemporaryDirectory: "sudo": "ALL=(ALL) NOPASSWD:ALL", } ], + # runcmd sets the password explicitly for cloud-init implementations + # that do not support plain_text_passwd (e.g. Alpine's tiny-cloud). + # cloud-init ignores runcmd entries it doesn't understand, so this + # is safe to include unconditionally. + # shlex.quote ensures special characters in credentials are safe. + "runcmd": [ + f"printf %s {shlex.quote(f'{self.username}:{self.password}')} | chpasswd", + ], } ) ) return tmp + @export + @validate_call(validate_return=True) + def get_hostfwd_port(self, key: str) -> int: + """Return the actual host port for a hostfwd entry (resolves port 0 assignments).""" + if key in self._resolved_hostports: + return self._resolved_hostports[key] + if key in self.hostfwd: + return self.hostfwd[key].hostport + raise KeyError(f"hostfwd key {key!r} not found") + @export @validate_call(validate_return=True) def get_hostname(self) -> str: diff --git a/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver_test.py b/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver_test.py index 3532b0416..840249cce 100644 --- a/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver_test.py +++ b/python/packages/jumpstarter-driver-qemu/jumpstarter_driver_qemu/driver_test.py @@ -59,6 +59,10 @@ def get_native_arch_config(): def test_driver_qemu(tmp_path, ovmf): arch, ovmf_arch = get_native_arch_config() + # Alpine uses OpenRC (not systemd), so systemd-ssh-generator does not run + # and sshd never binds to AF_VSOCK. Use a TCP hostfwd with hostport=0 so + # QEMU picks a free port automatically; the driver resolves the actual port + # from QMP after startup and updates the ssh child accordingly. with serve( Qemu( arch=arch, @@ -66,19 +70,20 @@ def test_driver_qemu(tmp_path, ovmf): "OVMF_CODE.fd": ovmf / ovmf_arch / "code.fd", "OVMF_VARS.fd": ovmf / ovmf_arch / "vars.fd", }, + hostfwd={"ssh": {"protocol": "tcp", "hostaddr": "127.0.0.1", "hostport": 0, "guestport": 22}}, ) ) as qemu: hostname = qemu.hostname username = qemu.username password = qemu.password - cached_image = Path(__file__).parent.parent / "images" / f"Fedora-Cloud-Base-Generic-43-1.6.{arch}.qcow2" + cached_image = Path(__file__).parent.parent / "images" / f"nocloud_alpine-3.22.4-{arch}-uefi-tiny-r0.qcow2" if cached_image.exists(): qemu.flasher.flash(cached_image.resolve()) else: qemu.flasher.flash( - f"https://download.fedoraproject.org/pub/fedora/linux/releases/43/Cloud/{arch}/images/Fedora-Cloud-Base-Generic-43-1.6.{arch}.qcow2", + f"https://dl-cdn.alpinelinux.org/alpine/v3.22/releases/cloud/nocloud_alpine-3.22.4-{arch}-uefi-tiny-r0.qcow2", ) qemu.power.on() @@ -88,16 +93,22 @@ def test_driver_qemu(tmp_path, ovmf): with qemu.console.pexpect() as p: p.logfile = sys.stdout.buffer - p.expect_exact(f"{hostname} login:", timeout=600) + # Press Enter if GRUB is waiting. Both the countdown and bootstrap_complete + # can appear before the login prompt, so match whichever comes first. + idx = p.expect_exact(["automatically in ", "bootstrap_complete: done"], timeout=600) + if idx == 0: + # GRUB countdown: skip it, then wait for cloud-init to finish + p.sendline("") + p.expect_exact("bootstrap_complete: done", timeout=600) + # tiny-cloud finished: password is set, sshd is ready + p.expect_exact(f"{hostname} login:", timeout=60) p.sendline(username) p.expect_exact("Password:") p.sendline(password) - p.expect_exact(f"[{username}@{hostname} ~]$") - p.sendline("sudo setenforce 0") - p.expect_exact(f"[{username}@{hostname} ~]$") + p.expect_exact(f"{hostname}:~$") with qemu.shell() as s: - assert s.run("uname -r").stdout.strip() == f"6.17.1-300.fc43.{arch}" + assert s.run("uname -r").stdout.strip() != "" qemu.power.off()