Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
431 changes: 431 additions & 0 deletions tests/validation/common/collect_platform_info.py

Large diffs are not rendered by default.

942 changes: 942 additions & 0 deletions tests/validation/common/generate_report.py

Large diffs are not rendered by default.

121 changes: 114 additions & 7 deletions tests/validation/common/nicctl.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# # SPDX-License-Identifier: BSD-3-Clause
# # Copyright 2025 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2026 Intel Corporation

import logging
import re
import time

import pytest
from mfd_network_adapter import NetworkInterface

logger = logging.getLogger(__name__)


class Nicctl:
"""Wrapper of nicctl.sh script from Media-Transport-Library."""
Expand All @@ -28,7 +33,10 @@ def _nicctl_path(self) -> str:
def _parse_vf_list(self, output: str) -> list:
if "No VFs found" in output:
return []
vf_info_regex = r"(\d{4}[0-9a-fA-F:.]+)\(?\S*\)?\s+\S*\s*vfio"
# Match PCI addresses from both:
# 1. list_vf output (bare PCI addresses, one per line)
# 2. create_vf output ("Bind 0000:xx:yy.z(...) to vfio-pci success")
vf_info_regex = r"(\d{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.\d+)"
return re.findall(vf_info_regex, output)

def vfio_list(self, pci_addr: str = "all") -> list:
Expand All @@ -44,10 +52,19 @@ def create_vfs(self, pci_id: str, num_of_vfs: int = 6) -> list:
:param num_of_vfs: number of VFs to create
:return: returns list of created vfs
"""
resp = self.connection.execute_command(
self.connection.execute_command(
f"sudo {self.nicctl} create_vf {pci_id} {num_of_vfs}", shell=True
)
return self._parse_vf_list(resp.stdout)
# Allow VFIO bindings to stabilize after VF creation.
# Without this delay, the first DPDK process to open a VF may
# hit "Unable to reset device! Error: 11 (Resource temporarily
# unavailable)" because the VFIO group/container is not fully
# initialized yet.
time.sleep(2)
# Use vfio_list (nicctl.sh list) to get clean VF addresses.
# The create_vf output mixes PF and VF PCI addresses in status
# messages, while list_vf outputs only VF addresses.
return self.vfio_list(pci_id)

def disable_vf(self, pci_id: str) -> None:
"""Remove VFs on NIC.
Expand Down Expand Up @@ -82,11 +99,13 @@ def bind_kernel(self, pci_id: str) -> None:


class InterfaceSetup:
def __init__(self, hosts, mtl_path):
def __init__(self, hosts, mtl_path, host_mtl_paths=None):
self.hosts = hosts
self.mtl_path = mtl_path
self.host_mtl_paths = host_mtl_paths or {}
self.nicctl_objs = {
host.name: Nicctl(mtl_path, host) for host in hosts.values()
host.name: Nicctl(self.host_mtl_paths.get(host.name, mtl_path), host)
for host in hosts.values()
}
self.customs = []
self.cleanups = []
Expand Down Expand Up @@ -297,3 +316,91 @@ def cleanup(self):
nicctl.disable_vf(interface)
elif if_type.lower() == "pf":
nicctl.bind_kernel(interface)


def reset_vfio_bindings(host, host_name: str, vf_list: list) -> None:
"""Unbind/rebind VFs to force VFIO group release after a DPDK crash."""
from mtl_engine.execute import kill_stale_processes

kill_stale_processes(host)
time.sleep(2)

for vf in vf_list:
if not vf:
continue
try:
host.connection.execute_command(
f"echo '{vf}' > /sys/bus/pci/devices/{vf}/driver/unbind "
f"2>/dev/null || true",
shell=True,
timeout=15,
)
time.sleep(1)
host.connection.execute_command(
f"dpdk-devbind.py -b vfio-pci {vf}",
shell=True,
timeout=30,
)
result = host.connection.execute_command(
f"dpdk-devbind.py -s | grep '{vf}' | head -1",
shell=True,
timeout=15,
)
status = (result.stdout or "").strip()
if "vfio-pci" in status:
logger.debug(f"Reset VF {vf} on {host_name} — vfio-pci ✓")
else:
logger.warning(f"VF {vf} on {host_name} NOT bound: {status}")
except Exception as e:
logger.warning(f"Could not reset VF {vf} on {host_name}: {e}")


def ensure_vfio_bound(host, host_name: str, vf_list: list) -> bool:
"""Ensure all VFs are bound to vfio-pci; rebind any that aren't.

Returns True if any VF had to be rebound.
"""
any_rebound = False
for vf in vf_list:
if not vf:
continue
try:
result = host.connection.execute_command(
f"dpdk-devbind.py -s | grep '{vf}' | head -1",
shell=True,
timeout=15,
)
status = (result.stdout or "").strip()
if "drv=vfio-pci" in status:
continue # Already properly bound

logger.warning(
f"VF {vf} on {host_name} not bound to vfio-pci "
f"({status or 'no status'}), rebinding…"
)
any_rebound = True
host.connection.execute_command(
f"echo '{vf}' > /sys/bus/pci/devices/{vf}/driver/unbind "
f"2>/dev/null || true",
shell=True,
timeout=15,
)
time.sleep(1)
host.connection.execute_command(
f"dpdk-devbind.py -b vfio-pci {vf}",
shell=True,
timeout=30,
)
result = host.connection.execute_command(
f"dpdk-devbind.py -s | grep '{vf}' | head -1",
shell=True,
timeout=15,
)
new_status = (result.stdout or "").strip()
if "vfio-pci" in new_status:
logger.info(f"Rebound VF {vf} on {host_name} — vfio-pci ✓")
else:
logger.error(f"Failed to rebind VF {vf} on {host_name}: {new_status}")
except Exception as e:
logger.warning(f"Could not check VF {vf} on {host_name}: {e}")
return any_rebound
15 changes: 14 additions & 1 deletion tests/validation/configs/gen_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2026 Intel Corporation

import argparse

import yaml
Expand Down Expand Up @@ -46,7 +50,12 @@ def gen_test_config(


def gen_topology_config(
pci_device: str, ip_address: str, username: str, password: str, key_path: str
pci_device: str,
ip_address: str,
username: str,
password: str,
key_path: str,
extra_info: dict = None,
) -> str:
# Support comma-separated PCI devices for multiple interfaces
pci_devices = [dev.strip() for dev in pci_device.split(",")]
Expand Down Expand Up @@ -85,6 +94,8 @@ def gen_topology_config(
topology_config["hosts"][0]["connections"][0]["connection_options"][
"key_path"
] = key_path
if extra_info:
topology_config["hosts"][0]["extra_info"] = extra_info
return yaml.safe_dump(topology_config, explicit_start=True, sort_keys=False)


Expand Down Expand Up @@ -154,6 +165,7 @@ def main() -> None:
default="None",
help="specify path to SSH private key for the test host",
)

args = parser.parse_args()
if args.password == "None" and args.key_path == "None":
parser.error("one of the arguments --password --key_path is required")
Expand All @@ -180,6 +192,7 @@ def main() -> None:
username=args.username,
password=args.password,
key_path=args.key_path,
extra_info={"mtl_path": args.mtl_path},
)
)

Expand Down
Loading
Loading