From 8e2f55fa74cdf1de3603c00bab92c270596e27ab Mon Sep 17 00:00:00 2001 From: Dan Cross Date: Fri, 5 Dec 2025 17:22:51 +0000 Subject: [PATCH] VirtIO 1.0 and multi-queue support --- Cargo.lock | 9 +- Cargo.toml | 1 - bin/propolis-server/src/lib/initializer.rs | 2 - bin/propolis-standalone/src/main.rs | 2 - crates/viona-api/Cargo.toml | 3 +- crates/viona-api/header-check/Cargo.toml | 3 +- crates/viona-api/header-check/build.rs | 7 +- crates/viona-api/header-check/test/main.rs | 5 +- crates/viona-api/src/ffi.rs | 155 +++ crates/viona-api/src/lib.rs | 43 +- crates/viona-api/sys/Cargo.toml | 15 - crates/viona-api/sys/src/lib.rs | 113 -- lib/propolis/Cargo.toml | 3 +- lib/propolis/src/block/crucible.rs | 4 +- lib/propolis/src/block/in_memory.rs | 2 +- lib/propolis/src/block/mem_async.rs | 2 +- lib/propolis/src/hw/chipset/i440fx.rs | 12 +- lib/propolis/src/hw/nvme/cmds.rs | 2 +- lib/propolis/src/hw/nvme/mod.rs | 4 +- lib/propolis/src/hw/pci/bridge.rs | 8 +- lib/propolis/src/hw/pci/cfgspace.rs | 34 +- lib/propolis/src/hw/pci/device.rs | 162 ++- lib/propolis/src/hw/virtio/bits.rs | 82 +- lib/propolis/src/hw/virtio/block.rs | 27 +- lib/propolis/src/hw/virtio/mod.rs | 192 ++- lib/propolis/src/hw/virtio/p9fs.rs | 28 +- lib/propolis/src/hw/virtio/pci.rs | 1331 ++++++++++++++++---- lib/propolis/src/hw/virtio/queue.rs | 361 +++++- lib/propolis/src/hw/virtio/softnpu.rs | 32 +- lib/propolis/src/hw/virtio/viona.rs | 704 ++++++++--- lib/propolis/src/util/aspace.rs | 2 +- lib/propolis/src/util/regmap.rs | 12 +- 32 files changed, 2558 insertions(+), 804 deletions(-) create mode 100644 crates/viona-api/src/ffi.rs delete mode 100644 crates/viona-api/sys/Cargo.toml delete mode 100644 crates/viona-api/sys/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 7b912b5cb..a8330328a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5338,6 +5338,7 @@ dependencies = [ "anyhow", "async-trait", "bhyve_api 0.0.0", + "bit_field", "bitflags 2.9.4", "bitstruct", "byteorder", @@ -8428,14 +8429,6 @@ version = "0.0.0" dependencies = [ "libc", "nvpair 0.0.0", - "viona_api_sys", -] - -[[package]] -name = "viona_api_sys" -version = "0.0.0" -dependencies = [ - "libc", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a46dc9717..cfb47cc3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,6 @@ propolis_types = { path = "crates/propolis-types" } rfb = { path = "crates/rfb" } rgb_frame = { path = "crates/rgb-frame" } viona_api = { path = "crates/viona-api" } -viona_api_sys = { path = "crates/viona-api/sys" } # PHD testing framework phd-framework = { path = "phd-tests/framework" } diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index ee7ebdac6..8e5d4961b 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -800,8 +800,6 @@ impl MachineInitializer<'_> { let viona = virtio::PciVirtioViona::new( &nic.backend_spec.vnic_name, - 0x0800.try_into().unwrap(), - 0x0100.try_into().unwrap(), &self.machine.hdl, params, ) diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 5821d614f..cc5a1d31c 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -1262,8 +1262,6 @@ fn setup_instance( let viona = hw::virtio::PciVirtioViona::new( vnic_name, - 0x0800.try_into().unwrap(), - 0x0100.try_into().unwrap(), &hdl, viona_params, )?; diff --git a/crates/viona-api/Cargo.toml b/crates/viona-api/Cargo.toml index dddb6345b..64545d328 100644 --- a/crates/viona-api/Cargo.toml +++ b/crates/viona-api/Cargo.toml @@ -9,7 +9,6 @@ doctest = false [dependencies] libc.workspace = true -viona_api_sys.workspace = true # nvpair dependency only enabled when building on illumos to avoid any attempts # to link to an absent libnvpair @@ -17,4 +16,4 @@ viona_api_sys.workspace = true nvpair.workspace = true [features] -falcon = ["viona_api_sys/falcon"] +falcon = [] diff --git a/crates/viona-api/header-check/Cargo.toml b/crates/viona-api/header-check/Cargo.toml index d2935314c..f26d8d8b8 100644 --- a/crates/viona-api/header-check/Cargo.toml +++ b/crates/viona-api/header-check/Cargo.toml @@ -4,9 +4,10 @@ version = "0.0.0" license = "MPL-2.0" build = "build.rs" publish = false +edition = "2021" [dependencies] -viona_api_sys = { path = "../sys" } +viona_api = { path = ".." } libc = "0.2" [build-dependencies] diff --git a/crates/viona-api/header-check/build.rs b/crates/viona-api/header-check/build.rs index 9d6be8467..fb48ff399 100644 --- a/crates/viona-api/header-check/build.rs +++ b/crates/viona-api/header-check/build.rs @@ -19,10 +19,7 @@ fn main() { } }; - let include_paths = [ - "usr/src/uts/intel", - "usr/src/uts/common", - ]; + let include_paths = ["usr/src/uts/intel", "usr/src/uts/common"]; cfg.include("/usr/include"); for p in include_paths { cfg.include(gate_dir.join(p)); @@ -53,5 +50,5 @@ fn main() { _ => false, }); - cfg.generate("../sys/src/lib.rs", "main.rs"); + cfg.generate("../src/ffi.rs", "main.rs"); } diff --git a/crates/viona-api/header-check/test/main.rs b/crates/viona-api/header-check/test/main.rs index 6ec990ac8..a1cec6c6f 100644 --- a/crates/viona-api/header-check/test/main.rs +++ b/crates/viona-api/header-check/test/main.rs @@ -2,9 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -extern crate viona_api_sys; -extern crate libc; - -use viona_api_sys::*; +use viona_api::*; include!(concat!(env!("OUT_DIR"), "/main.rs")); diff --git a/crates/viona-api/src/ffi.rs b/crates/viona-api/src/ffi.rs new file mode 100644 index 000000000..bf076afe9 --- /dev/null +++ b/crates/viona-api/src/ffi.rs @@ -0,0 +1,155 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(non_camel_case_types)] + +use libc::size_t; +use std::ffi::c_void; + +const fn vna_ioc(ioc: i32) -> i32 { + const V: i32 = b'V' as i32; + const C: i32 = b'C' as i32; + V << 16 | C << 8 | ioc +} + +pub const VNA_IOC_CREATE: i32 = vna_ioc(0x01); +pub const VNA_IOC_DELETE: i32 = vna_ioc(0x02); +pub const VNA_IOC_VERSION: i32 = vna_ioc(0x03); +pub const VNA_IOC_DEFAULT_PARAMS: i32 = vna_ioc(0x04); + +pub const VNA_IOC_RING_INIT: i32 = vna_ioc(0x10); +pub const VNA_IOC_RING_RESET: i32 = vna_ioc(0x11); +pub const VNA_IOC_RING_KICK: i32 = vna_ioc(0x12); +pub const VNA_IOC_RING_SET_MSI: i32 = vna_ioc(0x13); +pub const VNA_IOC_RING_INTR_CLR: i32 = vna_ioc(0x14); +pub const VNA_IOC_RING_SET_STATE: i32 = vna_ioc(0x15); +pub const VNA_IOC_RING_GET_STATE: i32 = vna_ioc(0x16); +pub const VNA_IOC_RING_PAUSE: i32 = vna_ioc(0x17); +pub const VNA_IOC_RING_INIT_MODERN: i32 = vna_ioc(0x18); + +pub const VNA_IOC_INTR_POLL: i32 = vna_ioc(0x20); +pub const VNA_IOC_SET_FEATURES: i32 = vna_ioc(0x21); +pub const VNA_IOC_GET_FEATURES: i32 = vna_ioc(0x22); +pub const VNA_IOC_SET_NOTIFY_IOP: i32 = vna_ioc(0x23); +pub const VNA_IOC_SET_PROMISC: i32 = vna_ioc(0x24); +pub const VNA_IOC_GET_PARAMS: i32 = vna_ioc(0x25); +pub const VNA_IOC_SET_PARAMS: i32 = vna_ioc(0x26); +pub const VNA_IOC_GET_MTU: i32 = vna_ioc(0x27); +pub const VNA_IOC_SET_MTU: i32 = vna_ioc(0x28); +pub const VNA_IOC_SET_NOTIFY_MMIO: i32 = vna_ioc(0x29); +pub const VNA_IOC_INTR_POLL_MQ: i32 = vna_ioc(0x2a); + +/// VirtIO 1.2 queue pair support. +pub const VNA_IOC_GET_PAIRS: i32 = vna_ioc(0x30); +pub const VNA_IOC_SET_PAIRS: i32 = vna_ioc(0x31); +pub const VNA_IOC_GET_USEPAIRS: i32 = vna_ioc(0x32); +pub const VNA_IOC_SET_USEPAIRS: i32 = vna_ioc(0x33); + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_vna_ioc() { + assert_eq!(vna_ioc(0x22), 0x00_56_43_22); + } +} + +/// The minimum number of queue pairs supported by a device. +pub const VIONA_MIN_QPAIRS: usize = 1; + +/// The maximum number of queue pairs supported by a device. +/// +/// Note that the VirtIO limit is much higher (0x8000); Viona artificially +/// limits the number to 256 pairs, which makes it possible to implmeent +/// interrupt notification with a reasonably sized bitmap. +pub const VIONA_MAX_QPAIRS: usize = 0x100; + +const fn howmany(x: usize, y: usize) -> usize { + assert!(y > 0); + x.div_ceil(y) +} + +#[repr(C)] +pub struct vioc_create { + pub c_linkid: u32, + pub c_vmfd: i32, +} + +#[repr(C)] +#[derive(Default)] +pub struct vioc_ring_init_modern { + pub rim_index: u16, + pub rim_qsize: u16, + pub _pad: [u16; 2], + pub rim_qaddr_desc: u64, + pub rim_qaddr_avail: u64, + pub rim_qaddr_used: u64, +} + +#[repr(C)] +#[derive(Default)] +pub struct vioc_ring_msi { + pub rm_index: u16, + pub _pad: [u16; 3], + pub rm_addr: u64, + pub rm_msg: u64, +} + +#[repr(C)] +#[derive(Default)] +pub struct vioc_intr_poll_mq { + pub vipm_nrings: u16, + pub _pad: u16, + pub vipm_status: [u32; howmany(VIONA_MAX_QPAIRS, 32)], +} + +#[repr(C)] +#[derive(Default)] +pub struct vioc_notify_mmio { + pub vim_address: u64, + pub vim_size: u32, +} + +#[repr(C)] +#[derive(Default)] +pub struct vioc_ring_state { + pub vrs_index: u16, + pub vrs_avail_idx: u16, + pub vrs_used_idx: u16, + pub vrs_qsize: u16, + pub vrs_qaddr_desc: u64, + pub vrs_qaddr_avail: u64, + pub vrs_qaddr_used: u64, +} + +pub const VIONA_PROMISC_NONE: i32 = 0; +pub const VIONA_PROMISC_MULTI: i32 = 1; +pub const VIONA_PROMISC_ALL: i32 = 2; +#[cfg(feature = "falcon")] +pub const VIONA_PROMISC_ALL_VLAN: i32 = 3; + +#[repr(C)] +#[derive(Default)] +pub struct vioc_get_params { + pub vgp_param: *mut c_void, + pub vgp_param_sz: size_t, +} + +#[repr(C)] +#[derive(Default)] +pub struct vioc_set_params { + pub vsp_param: *mut c_void, + pub vsp_param_sz: size_t, + pub vsp_error: *mut c_void, + pub vsp_error_sz: size_t, +} + +/// This is the viona interface version which viona_api expects to operate +/// against. All constants and structs defined by the crate are done so in +/// terms of that specific version. +pub const VIONA_CURRENT_INTERFACE_VERSION: u32 = 6; + +/// Maximum size of packed nvlists used in viona parameter ioctls +pub const VIONA_MAX_PARAM_NVLIST_SZ: usize = 4096; diff --git a/crates/viona-api/src/lib.rs b/crates/viona-api/src/lib.rs index ddde9fd17..d8535972c 100644 --- a/crates/viona-api/src/lib.rs +++ b/crates/viona-api/src/lib.rs @@ -7,7 +7,9 @@ use std::io::{Error, ErrorKind, Result}; use std::os::fd::*; use std::os::unix::fs::MetadataExt; -pub use viona_api_sys::*; +mod ffi; + +pub use ffi::*; // Hide libnvpair usage when not building on illumos to avoid linking errors #[cfg(target_os = "illumos")] @@ -23,7 +25,7 @@ impl VionaFd { let this = Self::open()?; let mut vna_create = vioc_create { c_linkid: link_id, c_vmfd: vm_fd }; - let _ = unsafe { this.ioctl(ioctls::VNA_IOC_CREATE, &mut vna_create) }?; + let _ = unsafe { this.ioctl(VNA_IOC_CREATE, &mut vna_create) }?; Ok(this) } @@ -109,7 +111,7 @@ impl VionaFd { /// Query the API version exposed by the kernel VMM. pub fn api_version(&self) -> Result { - let vers = self.ioctl_usize(ioctls::VNA_IOC_VERSION, 0)?; + let vers = self.ioctl_usize(VNA_IOC_VERSION, 0)?; // We expect and demand a positive version number from the // VNA_IOC_VERSION interface. @@ -129,16 +131,20 @@ impl VionaFd { const fn ioctl_usize_safe(cmd: i32) -> bool { matches!( cmd, - ioctls::VNA_IOC_DELETE - | ioctls::VNA_IOC_RING_RESET - | ioctls::VNA_IOC_RING_KICK - | ioctls::VNA_IOC_RING_PAUSE - | ioctls::VNA_IOC_RING_INTR_CLR - | ioctls::VNA_IOC_VERSION - | ioctls::VNA_IOC_SET_NOTIFY_IOP - | ioctls::VNA_IOC_SET_PROMISC - | ioctls::VNA_IOC_GET_MTU - | ioctls::VNA_IOC_SET_MTU, + VNA_IOC_DELETE + | VNA_IOC_RING_RESET + | VNA_IOC_RING_KICK + | VNA_IOC_RING_PAUSE + | VNA_IOC_RING_INTR_CLR + | VNA_IOC_VERSION + | VNA_IOC_SET_NOTIFY_IOP + | VNA_IOC_SET_PROMISC + | VNA_IOC_GET_MTU + | VNA_IOC_SET_MTU + | VNA_IOC_GET_PAIRS + | VNA_IOC_SET_PAIRS + | VNA_IOC_GET_USEPAIRS + | VNA_IOC_SET_USEPAIRS, ) } } @@ -191,7 +197,14 @@ fn minor(meta: &std::fs::Metadata) -> u32 { #[repr(u32)] #[derive(Copy, Clone)] pub enum ApiVersion { - /// Add support for getting/setting MTU + /// Adds multi-queue support and change the data structure for per-queue + /// interrupt polling to a compact bitmap. + V6 = 6, + + /// Adds support for VirtIO 1.0 (modern) virtqueues. + V5 = 5, + + /// Adds support for getting/setting MTU V4 = 4, /// Adds support for interface parameters @@ -205,7 +218,7 @@ pub enum ApiVersion { } impl ApiVersion { pub const fn current() -> Self { - Self::V4 + Self::V6 } } impl PartialEq for u32 { diff --git a/crates/viona-api/sys/Cargo.toml b/crates/viona-api/sys/Cargo.toml deleted file mode 100644 index 32b533164..000000000 --- a/crates/viona-api/sys/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "viona_api_sys" -version = "0.0.0" -license = "MPL-2.0" -edition = "2021" - -[lib] -test = false -doctest = false - -[dependencies] -libc = "0.2" - -[features] -falcon = [] diff --git a/crates/viona-api/sys/src/lib.rs b/crates/viona-api/sys/src/lib.rs deleted file mode 100644 index be0b068b2..000000000 --- a/crates/viona-api/sys/src/lib.rs +++ /dev/null @@ -1,113 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -pub mod ioctls { - const VNA_IOC: i32 = ((b'V' as i32) << 16) | ((b'C' as i32) << 8); - - pub const VNA_IOC_CREATE: i32 = VNA_IOC | 0x01; - pub const VNA_IOC_DELETE: i32 = VNA_IOC | 0x02; - pub const VNA_IOC_VERSION: i32 = VNA_IOC | 0x03; - pub const VNA_IOC_DEFAULT_PARAMS: i32 = VNA_IOC | 0x04; - - pub const VNA_IOC_RING_INIT: i32 = VNA_IOC | 0x10; - pub const VNA_IOC_RING_RESET: i32 = VNA_IOC | 0x11; - pub const VNA_IOC_RING_KICK: i32 = VNA_IOC | 0x12; - pub const VNA_IOC_RING_SET_MSI: i32 = VNA_IOC | 0x13; - pub const VNA_IOC_RING_INTR_CLR: i32 = VNA_IOC | 0x14; - pub const VNA_IOC_RING_SET_STATE: i32 = VNA_IOC | 0x15; - pub const VNA_IOC_RING_GET_STATE: i32 = VNA_IOC | 0x16; - pub const VNA_IOC_RING_PAUSE: i32 = VNA_IOC | 0x17; - - pub const VNA_IOC_INTR_POLL: i32 = VNA_IOC | 0x20; - pub const VNA_IOC_SET_FEATURES: i32 = VNA_IOC | 0x21; - pub const VNA_IOC_GET_FEATURES: i32 = VNA_IOC | 0x22; - pub const VNA_IOC_SET_NOTIFY_IOP: i32 = VNA_IOC | 0x23; - pub const VNA_IOC_SET_PROMISC: i32 = VNA_IOC | 0x24; - pub const VNA_IOC_GET_PARAMS: i32 = VNA_IOC | 0x25; - pub const VNA_IOC_SET_PARAMS: i32 = VNA_IOC | 0x26; - pub const VNA_IOC_GET_MTU: i32 = VNA_IOC | 0x27; - pub const VNA_IOC_SET_MTU: i32 = VNA_IOC | 0x28; -} - -pub const VIONA_VQ_MAX: u16 = 2; - -mod structs { - #![allow(non_camel_case_types)] - - use super::VIONA_VQ_MAX; - - #[repr(C)] - pub struct vioc_create { - pub c_linkid: u32, - pub c_vmfd: i32, - } - - #[repr(C)] - pub struct vioc_ring_init { - pub ri_index: u16, - pub ri_qsize: u16, - pub _pad: [u16; 2], - pub ri_qaddr: u64, - } - - #[repr(C)] - pub struct vioc_ring_msi { - pub rm_index: u16, - pub _pad: [u16; 3], - pub rm_addr: u64, - pub rm_msg: u64, - } - - #[repr(C)] - pub struct vioc_intr_poll { - pub vip_status: [u32; VIONA_VQ_MAX as usize], - } - - #[repr(C)] - #[derive(Default)] - pub struct vioc_ring_state { - pub vrs_index: u16, - pub vrs_avail_idx: u16, - pub vrs_used_idx: u16, - pub vrs_qsize: u16, - pub vrs_qaddr: u64, - } - - #[repr(C)] - pub enum viona_promisc_t { - VIONA_PROMISC_NONE = 0, - VIONA_PROMISC_MULTI, - VIONA_PROMISC_ALL, - #[cfg(feature = "falcon")] - VIONA_PROMISC_ALL_VLAN, - } - - use libc::size_t; - use std::ffi::c_void; - - #[repr(C)] - pub struct vioc_get_params { - pub vgp_param: *mut c_void, - pub vgp_param_sz: size_t, - } - - #[repr(C)] - pub struct vioc_set_params { - pub vsp_param: *mut c_void, - pub vsp_param_sz: size_t, - pub vsp_error: *mut c_void, - pub vsp_error_sz: size_t, - } -} - -/// This is the viona interface version which viona_api expects to operate -/// against. All constants and structs defined by the crate are done so in -/// terms of that specific version. -pub const VIONA_CURRENT_INTERFACE_VERSION: u32 = 4; - -/// Maximum size of packed nvlists used in viona parameter ioctls -pub const VIONA_MAX_PARAM_NVLIST_SZ: usize = 4096; - -pub use ioctls::*; -pub use structs::*; diff --git a/lib/propolis/Cargo.toml b/lib/propolis/Cargo.toml index a3e243155..0139a804c 100644 --- a/lib/propolis/Cargo.toml +++ b/lib/propolis/Cargo.toml @@ -3,10 +3,11 @@ name = "propolis" version = "0.1.0" license = "MPL-2.0" edition = "2021" -rust-version = "1.83" +rust-version = "1.90" [dependencies] libc.workspace = true +bit_field.workspace = true bitflags.workspace = true bitstruct.workspace = true byteorder.workspace = true diff --git a/lib/propolis/src/block/crucible.rs b/lib/propolis/src/block/crucible.rs index 9541fb417..4adc085dd 100644 --- a/lib/propolis/src/block/crucible.rs +++ b/lib/propolis/src/block/crucible.rs @@ -416,7 +416,9 @@ fn block_offset_count( len_bytes: usize, block_size: usize, ) -> Result<(crucible::BlockIndex, usize), Error> { - if off_bytes % block_size == 0 && len_bytes % block_size == 0 { + if off_bytes.is_multiple_of(block_size) + && len_bytes.is_multiple_of(block_size) + { Ok(( crucible::BlockIndex((off_bytes / block_size) as u64), len_bytes / block_size, diff --git a/lib/propolis/src/block/in_memory.rs b/lib/propolis/src/block/in_memory.rs index d6efdc543..964bf963e 100644 --- a/lib/propolis/src/block/in_memory.rs +++ b/lib/propolis/src/block/in_memory.rs @@ -106,7 +106,7 @@ impl InMemoryBackend { let len = bytes.len(); if len == 0 { return Err(Error::new(ErrorKind::Other, "size cannot be 0")); - } else if (len % block_size as usize) != 0 { + } else if !len.is_multiple_of(block_size as usize) { return Err(Error::new( ErrorKind::Other, format!( diff --git a/lib/propolis/src/block/mem_async.rs b/lib/propolis/src/block/mem_async.rs index c43c073b2..616a59944 100644 --- a/lib/propolis/src/block/mem_async.rs +++ b/lib/propolis/src/block/mem_async.rs @@ -108,7 +108,7 @@ impl MemAsyncBackend { if size == 0 { return Err(Error::new(ErrorKind::Other, "size cannot be 0")); - } else if (size % u64::from(block_size)) != 0 { + } else if !size.is_multiple_of(u64::from(block_size)) { return Err(Error::new( ErrorKind::Other, format!( diff --git a/lib/propolis/src/hw/chipset/i440fx.rs b/lib/propolis/src/hw/chipset/i440fx.rs index a3566d96c..164b97076 100644 --- a/lib/propolis/src/hw/chipset/i440fx.rs +++ b/lib/propolis/src/hw/chipset/i440fx.rs @@ -173,8 +173,8 @@ impl I440FxHostBridge { device_id: PIIX4_HB_DEV_ID, sub_vendor_id: VENDOR_OXIDE, sub_device_id: PIIX4_HB_SUB_DEV_ID, - class: pci::bits::CLASS_BRIDGE, - subclass: pci::bits::SUBCLASS_BRIDGE_HOST, + device_class: pci::bits::CLASS_BRIDGE, + device_subclass: pci::bits::SUBCLASS_BRIDGE_HOST, ..Default::default() }) .finish(); @@ -347,8 +347,8 @@ impl Piix3Lpc { device_id: PIIX3_ISA_DEV_ID, sub_vendor_id: VENDOR_OXIDE, sub_device_id: PIIX3_ISA_SUB_DEV_ID, - class: pci::bits::CLASS_BRIDGE, - subclass: pci::bits::SUBCLASS_BRIDGE_ISA, + device_class: pci::bits::CLASS_BRIDGE, + device_subclass: pci::bits::SUBCLASS_BRIDGE_ISA, ..Default::default() }) .add_custom_cfg(PIR_OFFSET as u8, PIR_LEN as u8) @@ -800,8 +800,8 @@ impl Piix3PM { device_id: PIIX4_PM_DEV_ID, sub_vendor_id: VENDOR_OXIDE, sub_device_id: PIIX4_PM_SUB_DEV_ID, - class: pci::bits::CLASS_BRIDGE, - subclass: pci::bits::SUBCLASS_BRIDGE_OTHER, + device_class: pci::bits::CLASS_BRIDGE, + device_subclass: pci::bits::SUBCLASS_BRIDGE_OTHER, // Linux will complain about the PM-timer being potentially slow if // it detects the ACPI device exposing a revision prior to 0x3. revision_id: 0x3, diff --git a/lib/propolis/src/hw/nvme/cmds.rs b/lib/propolis/src/hw/nvme/cmds.rs index 9ab591001..88438ee3b 100644 --- a/lib/propolis/src/hw/nvme/cmds.rs +++ b/lib/propolis/src/hw/nvme/cmds.rs @@ -851,7 +851,7 @@ impl PrpIter<'_> { // The first PRP List entry: // - shall be Qword aligned, and // - may also have a non-zero offset within the memory page. - if (self.prp2 % 8) != 0 { + if !self.prp2.is_multiple_of(8) { return Err("PRP2 not Qword aligned!"); } diff --git a/lib/propolis/src/hw/nvme/mod.rs b/lib/propolis/src/hw/nvme/mod.rs index 0f281bf67..6b1956488 100644 --- a/lib/propolis/src/hw/nvme/mod.rs +++ b/lib/propolis/src/hw/nvme/mod.rs @@ -814,8 +814,8 @@ impl PciNvme { device_id: PROPOLIS_NVME_DEV_ID, sub_vendor_id: VENDOR_OXIDE, sub_device_id: PROPOLIS_NVME_DEV_ID, - class: pci::bits::CLASS_STORAGE, - subclass: pci::bits::SUBCLASS_STORAGE_NVM, + device_class: pci::bits::CLASS_STORAGE, + device_subclass: pci::bits::SUBCLASS_STORAGE_NVM, prog_if: pci::bits::PROGIF_ENTERPRISE_NVME, ..Default::default() }); diff --git a/lib/propolis/src/hw/pci/bridge.rs b/lib/propolis/src/hw/pci/bridge.rs index f366d0f4e..7e2a89fdd 100644 --- a/lib/propolis/src/hw/pci/bridge.rs +++ b/lib/propolis/src/hw/pci/bridge.rs @@ -116,8 +116,8 @@ impl Bridge { device_id: device, sub_vendor_id: vendor, sub_device_id: device, - class: BRIDGE_PROG_CLASS, - subclass: BRIDGE_PROG_SUBCLASS, + device_class: BRIDGE_PROG_CLASS, + device_subclass: BRIDGE_PROG_SUBCLASS, prog_if: BRIDGE_PROG_IF, ..Default::default() }, @@ -142,8 +142,8 @@ impl Bridge { BridgeReg::Common(id) => match id { StdCfgReg::VendorId => ro.write_u16(self.ident.vendor_id), StdCfgReg::DeviceId => ro.write_u16(self.ident.device_id), - StdCfgReg::Class => ro.write_u8(self.ident.class), - StdCfgReg::Subclass => ro.write_u8(self.ident.subclass), + StdCfgReg::Class => ro.write_u8(self.ident.device_class), + StdCfgReg::Subclass => ro.write_u8(self.ident.device_subclass), StdCfgReg::SubVendorId => { ro.write_u16(self.ident.sub_vendor_id) } diff --git a/lib/propolis/src/hw/pci/cfgspace.rs b/lib/propolis/src/hw/pci/cfgspace.rs index 697409df8..e84259cf7 100644 --- a/lib/propolis/src/hw/pci/cfgspace.rs +++ b/lib/propolis/src/hw/pci/cfgspace.rs @@ -6,25 +6,31 @@ use crate::common::RWOp; use crate::common::ReadOp; +use crate::hw::pci::CapId; use crate::util::regmap::Flags; use crate::util::regmap::RegMap; use super::bits::*; use super::Cap; +#[derive(Debug)] +pub(super) enum CfgCapReg { + Id(u8), + Next(u8), + Body(u8), +} + #[derive(Debug)] pub(super) enum CfgReg { Std, Custom(u8), - CapId(u8), - CapNext(u8), - CapBody(u8), + Cap(CfgCapReg), } /// A helper for building maps of PCI device configuration space. pub(super) struct CfgBuilder { cfgmap: RegMap, - caps: Vec, + caps: Vec>, cap_next_alloc: usize, } @@ -97,27 +103,35 @@ impl CfgBuilder { /// capability pointer registers) is not a multiple of 4 bytes; or /// - The capability's total size (again inclusive of the standard /// registers) is 256 bytes or larger. - pub fn add_capability(&mut self, id: u8, len: u8) { + pub fn add_capability(&mut self, id: CapId, len: u8) { self.check_overlap(self.cap_next_alloc, len as usize); let end = self.cap_next_alloc + 2 + len as usize; // XXX: on the caller to size properly for alignment requirements - assert!(end % 4 == 0); + assert_eq!(end % 4, 0); assert!(end <= u8::MAX as usize); let idx = self.caps.len() as u8; self.caps.push(Cap::new(id, self.cap_next_alloc as u8)); - self.cfgmap.define(self.cap_next_alloc, 1, CfgReg::CapId(idx)); - self.cfgmap.define(self.cap_next_alloc + 1, 1, CfgReg::CapNext(idx)); + self.cfgmap.define( + self.cap_next_alloc, + 1, + CfgReg::Cap(CfgCapReg::Id(idx)), + ); + self.cfgmap.define( + self.cap_next_alloc + 1, + 1, + CfgReg::Cap(CfgCapReg::Next(idx)), + ); self.cfgmap.define( self.cap_next_alloc + 2, len as usize, - CfgReg::CapBody(idx), + CfgReg::Cap(CfgCapReg::Body(idx)), ); self.cap_next_alloc = end; } /// Constructs the configuration space and a description of its /// capabilities. - pub fn finish(self) -> (RegMap, Vec) { + pub fn finish(self) -> (RegMap, Vec>) { (self.cfgmap, self.caps) } } diff --git a/lib/propolis/src/hw/pci/device.rs b/lib/propolis/src/hw/pci/device.rs index f727b01f4..67f86855e 100644 --- a/lib/propolis/src/hw/pci/device.rs +++ b/lib/propolis/src/hw/pci/device.rs @@ -6,7 +6,7 @@ use std::sync::{Arc, Condvar, Mutex, MutexGuard}; use super::bar::{BarDefine, Bars}; use super::bits::*; -use super::cfgspace::{CfgBuilder, CfgReg}; +use super::cfgspace::{CfgBuilder, CfgCapReg, CfgReg}; use super::{bus, BarN, Endpoint}; use crate::accessors::{MemAccessor, MsiAccessor}; use crate::common::*; @@ -17,44 +17,56 @@ use crate::util::regmap::{Flags, RegMap}; use lazy_static::lazy_static; use strum::IntoEnumIterator; +fn op_meta(rwo: &RWOp) -> (usize, &'static str) { + match rwo { + RWOp::Read(ro) => (ro.offset(), "read"), + RWOp::Write(wo) => (wo.offset(), "write"), + } +} + +/// Represents behavior common across virtualized PCI(e) devices. pub trait Device: Send + Sync + 'static { + /// Returns the device state of this device. fn device_state(&self) -> &DeviceState; + /// Reads or writes an MMIO region described by a BAR. fn bar_rw(&self, bar: BarN, rwo: RWOp) { - match rwo { - RWOp::Read(ro) => { - unimplemented!("BAR read ({:?} @ {:x})", bar, ro.offset()) - } - RWOp::Write(wo) => { - unimplemented!("BAR write ({:?} @ {:x})", bar, wo.offset()) - } - } + let (offset, op) = op_meta(&rwo); + unimplemented!("BAR {op} ({bar:?} @ {offset:x})") } + + /// Reads or writes capability space. fn cfg_rw(&self, region: u8, rwo: RWOp) { - match rwo { - RWOp::Read(ro) => { - unimplemented!("CFG read ({:x} @ {:x})", region, ro.offset()) - } - RWOp::Write(wo) => { - unimplemented!("CFG write ({:x} @ {:x})", region, wo.offset()) - } - } + let (offset, op) = op_meta(&rwo); + unimplemented!("CFG {op} ({region:x} @ {offset:x})") } + + /// Reads or writes a capability in configuration space. + fn cap_rw(&self, id: CapId, rwo: RWOp) { + let (offset, op) = op_meta(&rwo); + unimplemented!("CAP {op} ({id:x?} @ {offset:x})") + } + + /// Attaches the device to the virtual machine. fn attach(&self) {} - #[allow(unused_variables)] - fn interrupt_mode_change(&self, mode: IntrMode) {} - #[allow(unused_variables)] - fn msi_update(&self, info: MsiUpdate) {} + + /// Notification that the interrupt mode has changed. For + /// example, we might change from MSI-X to MSI. + fn interrupt_mode_change(&self, mode: IntrMode) { + let _used = mode; + } + + /// Notification that our MSI configuration has changed. + fn msi_update(&self, info: MsiUpdate) { + let _used = info; + } /// Notification that configuration of BAR(s) has changed, either due to /// writes to the BARs themselves, or an overall status change (via the /// Command register or a device reset). - #[allow(unused_variables)] - fn bar_update(&self, bstate: BarState) {} - - // TODO - // fn cap_read(&self); - // fn cap_write(&self); + fn bar_update(&self, bstate: BarState) { + let _used = bstate; + } } impl Endpoint for D { @@ -73,9 +85,7 @@ impl Endpoint for D { }); } CfgReg::Custom(region) => Device::cfg_rw(self, *region, rwo), - CfgReg::CapId(_) | CfgReg::CapNext(_) | CfgReg::CapBody(_) => { - ds.cfg_cap_rw(self, id, rwo) - } + CfgReg::Cap(reg) => ds.cfg_cap_rw(self, reg, rwo), }); } fn bar_rw(&self, bar: BarN, rwo: RWOp) { @@ -157,8 +167,8 @@ lazy_static! { pub struct Ident { pub vendor_id: u16, pub device_id: u16, - pub class: u8, - pub subclass: u8, + pub device_class: u8, + pub device_subclass: u8, pub prog_if: u8, pub revision_id: u8, pub sub_vendor_id: u16, @@ -175,6 +185,7 @@ struct State { update_in_progress: bool, } impl State { + /// Creates a new state structure for a device with the given BARs. fn new(bars: Bars) -> Self { Self { reg_command: RegCmd::empty(), @@ -184,30 +195,57 @@ impl State { update_in_progress: false, } } + + /// Returns the bus attachment state. fn attached(&self) -> &bus::Attachment { self.attach.as_ref().unwrap() } + /// Is MMIO access decoding enabled? fn mmio_en(&self) -> bool { self.reg_command.contains(RegCmd::MMIO_EN) } + /// Is PIO access decoding enabled? fn pio_en(&self) -> bool { self.reg_command.contains(RegCmd::IO_EN) } + /// Given the device state, is decoding enabled for a specified [BarDefine] fn decoding_active(&self, bar: &BarDefine) -> bool { (bar.is_pio() && self.pio_en()) || (bar.is_mmio() && self.mmio_en()) } } -pub(super) struct Cap { - id: u8, +/// A capability ID uniquely identifies a type of capability that may +/// be present in configuration space. Vendor capabilities are generic +/// over some type T that is passed back to the device, allowing it to +/// identify which capability is being accessed. +#[derive(Clone, Copy, Debug)] +pub enum CapId { + Msix, + Vendor(T), +} + +impl CapId { + /// Returns the PCI-defined capability ID for this CapId. + pub fn as_pci_cap_id(&self) -> u8 { + match self { + Self::Msix => CAP_ID_MSIX, + Self::Vendor(_) => CAP_ID_VENDOR, + } + } +} + +/// Represents a capability with its type and offset in configuration space. +pub struct Cap { + id: CapId, offset: u8, } -impl Cap { - pub(super) fn new(id: u8, offset: u8) -> Self { +impl Cap { + /// Creates a new CapId with the given type, at the given offset. + pub(super) fn new(id: CapId, offset: u8) -> Self { Self { id, offset } } } @@ -217,7 +255,7 @@ pub struct DeviceState { lintr_support: bool, cfg_space: RegMap, msix_cfg: Option>, - caps: Vec, + caps: Vec>, pub acc_mem: MemAccessor, // MSI accessor remains "hidden" behind MsixCfg machinery @@ -233,7 +271,7 @@ impl DeviceState { lintr_support: bool, cfg_space: RegMap, msix_cfg: Option>, - caps: Vec, + caps: Vec>, bars: Bars, ) -> Self { let acc_msi = MsiAccessor::new_orphan(); @@ -289,8 +327,8 @@ impl DeviceState { match id { StdCfgReg::VendorId => ro.write_u16(self.ident.vendor_id), StdCfgReg::DeviceId => ro.write_u16(self.ident.device_id), - StdCfgReg::Class => ro.write_u8(self.ident.class), - StdCfgReg::Subclass => ro.write_u8(self.ident.subclass), + StdCfgReg::Class => ro.write_u8(self.ident.device_class), + StdCfgReg::Subclass => ro.write_u8(self.ident.device_subclass), StdCfgReg::SubVendorId => ro.write_u16(self.ident.sub_vendor_id), StdCfgReg::SubDeviceId => ro.write_u16(self.ident.sub_device_id), StdCfgReg::ProgIf => ro.write_u8(self.ident.prog_if), @@ -527,14 +565,19 @@ impl DeviceState { }) } - fn cfg_cap_rw(&self, dev: &dyn Device, id: &CfgReg, rwo: RWOp) { + fn cfg_cap_rw(&self, dev: &dyn Device, id: &CfgCapReg, rwo: RWOp) { match id { - CfgReg::CapId(i) => { + CfgCapReg::Id(idx) => { if let RWOp::Read(ro) = rwo { - ro.write_u8(self.caps[*i as usize].id) + let i = *idx as usize; + let cap_id = match self.caps[i].id { + CapId::Msix => CAP_ID_MSIX, + CapId::Vendor(_) => CAP_ID_VENDOR, + }; + ro.write_u8(cap_id); } } - CfgReg::CapNext(i) => { + CfgCapReg::Next(i) => { if let RWOp::Read(ro) = rwo { let next = *i as usize + 1; if next < self.caps.len() { @@ -544,18 +587,16 @@ impl DeviceState { } } } - CfgReg::CapBody(i) => self.do_cap_rw(dev, *i, rwo), - - // Should be filtered down to only cap regs by now - _ => panic!(), + CfgCapReg::Body(i) => self.cap_rw_body(dev, *i, rwo), } } - fn do_cap_rw(&self, dev: &dyn Device, idx: u8, rwo: RWOp) { + + fn cap_rw_body(&self, dev: &dyn Device, idx: u8, rwo: RWOp) { assert!(idx < self.caps.len() as u8); // XXX: no fancy capability support for now let cap = &self.caps[idx as usize]; match cap.id { - CAP_ID_MSIX => { + CapId::Msix => { let msix_cfg = self.msix_cfg.as_ref().unwrap(); if let RWOp::Write(_) = rwo { // MSI-X cap writes may result in a change to the interrupt @@ -571,9 +612,7 @@ impl DeviceState { .cfg_rw(rwo, |info| self.notify_msi_update(dev, info)); } } - _ => { - // XXX: do some logging? - } + CapId::Vendor(_) => dev.cap_rw(cap.id, rwo), } } fn notify_msi_update(&self, dev: &dyn Device, info: MsiUpdate) { @@ -987,9 +1026,10 @@ impl MsixCfg { state.enabled = new_ena; state.func_mask = new_mask; - // Notify when the MSI-X function mask is changing. Changes to - // enable/disable state is already covered by the logic for - // interrupt_mode_change updates + // Notify when the MSI-X function mask is changing. + // Changes to enable/disable state is already + // covered by the logic for interrupt_mode_change + // updates if old_mask != new_mask && old_ena == new_ena && new_ena @@ -1232,7 +1272,7 @@ impl Builder { self } - fn add_cap_raw(&mut self, id: u8, len: u8) { + fn add_cap_raw(&mut self, id: CapId, len: u8) { self.cfg_builder.add_capability(id, len); } @@ -1251,8 +1291,14 @@ impl Builder { assert!(bar_size < u32::MAX as usize); self = self.add_bar_mmio(bar, bar_size as u32); self.msix_cfg = Some(cfg); - self.add_cap_raw(CAP_ID_MSIX, 10); + self.add_cap_raw(CapId::Msix, 10); + + self + } + /// Add a "Vendor" capabiltiy. + pub fn add_cap_vendor(mut self, tag: u32, len: u8) -> Self { + self.add_cap_raw(CapId::Vendor(tag), len); self } diff --git a/lib/propolis/src/hw/virtio/bits.rs b/lib/propolis/src/hw/virtio/bits.rs index 7e3948f11..c94313f55 100644 --- a/lib/propolis/src/hw/virtio/bits.rs +++ b/lib/propolis/src/hw/virtio/bits.rs @@ -2,59 +2,35 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -pub const VIRTIO_DEV_NET: u16 = 0x1000; -pub const VIRTIO_DEV_BLOCK: u16 = 0x1001; -pub const VIRTIO_DEV_9P: u16 = 0x1009; - -// Legacy virtio-pci devices must present these sub-device-IDs -pub const VIRTIO_SUB_DEV_NET: u16 = 0x1; -pub const VIRTIO_SUB_DEV_BLOCK: u16 = 0x2; -pub const VIRTIO_SUB_DEV_9P_TRANSPORT: u16 = 0x9; - -// Legacy interface feature bits -pub const VIRTIO_F_NOTIFY_ON_EMPTY: usize = 1 << 24; -pub const VIRTIO_F_ANY_LAYOUT: usize = 1 << 27; - -// Standard interface feature bits -pub const VIRTIO_F_RING_INDIRECT_DESC: usize = 1 << 28; -pub const VIRTIO_F_RING_EVENT_IDX: usize = 1 << 29; -pub const VIRTIO_F_VERSION_1: usize = 1 << 32; - // virtio-net feature bits -pub const VIRTIO_NET_F_CSUM: u32 = 1 << 0; -pub const VIRTIO_NET_F_GUEST_CSUM: u32 = 1 << 1; -pub const VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: u32 = 1 << 2; -pub const VIRTIO_NET_F_MTU: u32 = 1 << 3; -pub const VIRTIO_NET_F_MAC: u32 = 1 << 5; -pub const VIRTIO_NET_F_GUEST_TSO4: u32 = 1 << 7; -pub const VIRTIO_NET_F_GUEST_TSO6: u32 = 1 << 8; -pub const VIRTIO_NET_F_GUEST_ECN: u32 = 1 << 9; -pub const VIRTIO_NET_F_GUEST_UFO: u32 = 1 << 10; -pub const VIRTIO_NET_F_HOST_TSO4: u32 = 1 << 11; -pub const VIRTIO_NET_F_HOST_TSO6: u32 = 1 << 12; -pub const VIRTIO_NET_F_HOST_ECN: u32 = 1 << 13; -pub const VIRTIO_NET_F_HOST_UFO: u32 = 1 << 14; -pub const VIRTIO_NET_F_MGR_RXBUF: u32 = 1 << 15; -pub const VIRTIO_NET_F_STATUS: u32 = 1 << 16; -pub const VIRTIO_NET_F_CTRL_VQ: u32 = 1 << 17; -pub const VIRTIO_NET_F_CTRL_RX: u32 = 1 << 18; -pub const VIRTIO_NET_F_CTRL_VLAN: u32 = 1 << 19; +pub const VIRTIO_NET_F_CSUM: u64 = 1 << 0; +pub const VIRTIO_NET_F_GUEST_CSUM: u64 = 1 << 1; +pub const VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: u64 = 1 << 2; +pub const VIRTIO_NET_F_MTU: u64 = 1 << 3; +pub const VIRTIO_NET_F_MAC: u64 = 1 << 5; +pub const VIRTIO_NET_F_GUEST_TSO4: u64 = 1 << 7; +pub const VIRTIO_NET_F_GUEST_TSO6: u64 = 1 << 8; +pub const VIRTIO_NET_F_GUEST_ECN: u64 = 1 << 9; +pub const VIRTIO_NET_F_GUEST_UFO: u64 = 1 << 10; +pub const VIRTIO_NET_F_HOST_TSO4: u64 = 1 << 11; +pub const VIRTIO_NET_F_HOST_TSO6: u64 = 1 << 12; +pub const VIRTIO_NET_F_HOST_ECN: u64 = 1 << 13; +pub const VIRTIO_NET_F_HOST_UFO: u64 = 1 << 14; +pub const VIRTIO_NET_F_MGR_RXBUF: u64 = 1 << 15; +pub const VIRTIO_NET_F_STATUS: u64 = 1 << 16; +pub const VIRTIO_NET_F_CTRL_VQ: u64 = 1 << 17; +pub const VIRTIO_NET_F_CTRL_RX: u64 = 1 << 18; +pub const VIRTIO_NET_F_CTRL_VLAN: u64 = 1 << 19; +pub const VIRTIO_NET_F_MQ: u64 = 1 << 22; // virtio-block feature bits -pub const VIRTIO_BLK_F_SIZE_MAX: u32 = 1 << 1; -pub const VIRTIO_BLK_F_SEG_MAX: u32 = 1 << 2; -pub const VIRTIO_BLK_F_GEOMETRY: u32 = 1 << 4; -pub const VIRTIO_BLK_F_RO: u32 = 1 << 5; -pub const VIRTIO_BLK_F_BLK_SIZE: u32 = 1 << 6; -pub const VIRTIO_BLK_F_FLUSH: u32 = 1 << 9; -pub const VIRTIO_BLK_F_TOPOLOGY: u32 = 1 << 10; -pub const VIRTIO_BLK_F_CONFIG_WCE: u32 = 1 << 11; -pub const VIRTIO_BLK_F_DISCARD: u32 = 1 << 13; -pub const VIRTIO_BLK_F_WRITE_ZEROES: u32 = 1 << 14; - -// virtqueue descriptor bits -pub const VIRTQ_DESC_F_NEXT: u16 = 1; -pub const VIRTQ_DESC_F_WRITE: u16 = 2; -pub const VIRTQ_DESC_F_INDIRECT: u16 = 4; -pub const VRING_AVAIL_F_NO_INTERRUPT: u16 = 1; -pub const VRING_USED_F_NO_NOTIFY: u16 = 1; +pub const VIRTIO_BLK_F_SIZE_MAX: u64 = 1 << 1; +pub const VIRTIO_BLK_F_SEG_MAX: u64 = 1 << 2; +pub const VIRTIO_BLK_F_GEOMETRY: u64 = 1 << 4; +pub const VIRTIO_BLK_F_RO: u64 = 1 << 5; +pub const VIRTIO_BLK_F_BLK_SIZE: u64 = 1 << 6; +pub const VIRTIO_BLK_F_FLUSH: u64 = 1 << 9; +pub const VIRTIO_BLK_F_TOPOLOGY: u64 = 1 << 10; +pub const VIRTIO_BLK_F_CONFIG_WCE: u64 = 1 << 11; +pub const VIRTIO_BLK_F_DISCARD: u64 = 1 << 13; +pub const VIRTIO_BLK_F_WRITE_ZEROES: u64 = 1 << 14; diff --git a/lib/propolis/src/hw/virtio/block.rs b/lib/propolis/src/hw/virtio/block.rs index fe19e25eb..49ce04356 100644 --- a/lib/propolis/src/hw/virtio/block.rs +++ b/lib/propolis/src/hw/virtio/block.rs @@ -10,6 +10,7 @@ use crate::accessors::MemAccessor; use crate::block; use crate::common::*; use crate::hw::pci; +use crate::hw::virtio; use crate::migrate::*; use crate::util::regmap::RegMap; @@ -35,19 +36,16 @@ pub struct PciVirtioBlock { } impl PciVirtioBlock { pub fn new(queue_size: u16) -> Arc { - let queues = - VirtQueues::new([VirtQueue::new(queue_size.try_into().unwrap())]) - .unwrap(); + let queues = VirtQueues::new(&[queue_size.try_into().unwrap()]); // virtio-block only needs two MSI-X entries for its interrupt needs: // - device config changes // - queue 0 notification let msix_count = Some(2); - let (virtio_state, pci_state) = PciVirtioState::create( + let (virtio_state, pci_state) = PciVirtioState::new( + virtio::Mode::Legacy, queues, msix_count, - VIRTIO_DEV_BLOCK, - VIRTIO_SUB_DEV_BLOCK, - pci::bits::CLASS_STORAGE, + virtio::DeviceId::Block, VIRTIO_BLK_CFG_SIZE, ); @@ -262,7 +260,7 @@ impl block::DeviceQueue for BlockVq { } impl VirtioDevice for PciVirtioBlock { - fn cfg_rw(&self, mut rwo: RWOp) { + fn rw_dev_config(&self, mut rwo: RWOp) { BLOCK_DEV_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => self.block_cfg_read(id, ro), RWOp::Write(_) => { @@ -270,7 +268,12 @@ impl VirtioDevice for PciVirtioBlock { } }); } - fn get_features(&self) -> u32 { + + fn mode(&self) -> virtio::Mode { + self.virtio_state().mode() + } + + fn features(&self) -> u64 { let mut feat = VIRTIO_BLK_F_BLK_SIZE; feat |= VIRTIO_BLK_F_SEG_MAX; feat |= VIRTIO_BLK_F_FLUSH; @@ -284,7 +287,8 @@ impl VirtioDevice for PciVirtioBlock { } feat } - fn set_features(&self, _feat: u32) -> Result<(), ()> { + + fn set_features(&self, _feat: u64) -> Result<(), ()> { // XXX: real features Ok(()) } @@ -294,6 +298,7 @@ impl VirtioDevice for PciVirtioBlock { self.block_attach.notify(0usize.into(), None); } } + impl PciVirtio for PciVirtioBlock { fn virtio_state(&self) -> &PciVirtioState { &self.virtio_state @@ -302,11 +307,13 @@ impl PciVirtio for PciVirtioBlock { &self.pci_state } } + impl block::Device for PciVirtioBlock { fn attachment(&self) -> &block::DeviceAttachment { &self.block_attach } } + impl Lifecycle for PciVirtioBlock { fn type_name(&self) -> &'static str { "pci-virtio-block" diff --git a/lib/propolis/src/hw/virtio/mod.rs b/lib/propolis/src/hw/virtio/mod.rs index a76d04c89..0e88169de 100644 --- a/lib/propolis/src/hw/virtio/mod.rs +++ b/lib/propolis/src/hw/virtio/mod.rs @@ -2,8 +2,16 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Propolis implements VirtIO devices for guests with appropriate drivers. +//! +//! We model virtio devices as (virtual) PCI devices, using the virtio PCI +//! transport mechanism as defined in the VirtIO 1.2 specification. +//! Currently we expose drivers for virtio-net, virtio-block, and virtio-9pfs. + use std::sync::Arc; +use bitflags::bitflags; + #[allow(unused)] mod bits; @@ -16,24 +24,190 @@ mod queue; pub mod softnpu; pub mod viona; -use crate::common::*; +use crate::common::RWOp; +use crate::hw::pci as pci_hw; +use crate::lifecycle::Lifecycle; use queue::VirtQueue; pub use block::PciVirtioBlock; pub use viona::PciVirtioViona; +bitflags! { + pub struct LegacyFeatures: u64 { + const NOTIFY_ON_EMPTY = 1 << 24; + const ANY_LAYOUT = 1 << 27; + } +} + +/// Describes the VirtIO "mode" exposed by the device. +#[derive(Clone, Copy, Debug, Eq, PartialEq, strum::FromRepr)] +#[repr(u32)] +pub enum Mode { + /// Legacy mode is pre-VirtIO 1.0. + Legacy, + + /// Modern devices are those that implement and expose the VirtIO + /// 1.0 and later specification. + Modern, + + /// Transitional devices exposes both the pre-VirtIO 1.0 "Legacy" + /// interface VirtIO 1.0 and later "Modern" interface. + Transitional, +} + +impl Mode { + /// Returns the PCI revision ID for the given mode. + pub fn pci_revision(self) -> u8 { + match self { + Mode::Legacy | Mode::Transitional => 0, + Mode::Modern => 1, + } + } +} + +/// Recognized VirtIO Device IDs, as defined in the VirtIO 1.2 specification, +/// section 5, "Device Types". +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum DeviceId { + Reserved = 0, + Network = 1, + Block = 2, + Console = 3, + Entropy = 4, + TradMemBalloon = 5, + IoMem = 6, + RpMsg = 7, + Scsi = 8, + NineP = 9, + Mac80211Wlan = 10, + RprocSerial = 11, + Caif = 12, + MemBalloon = 13, + Gpu = 16, + Timer = 17, + Input = 18, + Socket = 19, + Crypto = 20, + SigDistMod = 21, + Pstore = 22, + Iommu = 23, + Memory = 24, + Audio = 25, + Filesystem = 26, + Pmem = 27, + Rpmb = 28, + Mac80211HWSim = 29, + VideoEncoder = 30, + VideoDecoder = 31, + ArmScmi = 32, + NitroSecureMod = 33, + I2c = 34, + Watchdog = 35, + Can = 36, + ParameterServer = 38, + AudioPolicy = 39, + Bluetooth = 40, + Gpio = 41, + Rdma = 42, +} + +impl DeviceId { + /// Maps a VirtIO Device ID to a PCI Device ID for the given mode. + /// + /// VirtIO defines its own namespace for device IDs that is independent + /// of the underlying transport between host and guest. The mapping from + /// that space into PCI device IDs is dependent on the mode; for devices + /// following the VirtIO 1.0 and later specifications, this is straight + /// forward: just add 0x1040 to the VirtIO ID. + /// + /// However, for legacy and transitional mode devices, the mapping is + /// irregular, and a table in the VirtIO specification lists the defined + /// subset of device types and their respective PCI IDs. But note that there + /// are legacy devices with no such defined mapping, and thus no standard + /// transitional IDs. In these cases, we choose to use IDs that seem to be + /// shared in a broad consensus across different implementations, in + /// particular, QEMU. + /// + /// This is not really an issue for us, since we only expose a handful of + /// device models; regardless, we provide mappings for everything defined in + /// the VirtIO spec. + /// + /// See VirtIO 1.2, sec 4.1.2.1 for the mapping from VirtIO device ID + /// to PCI device ID. + pub fn pci_dev_id(self, mode: Mode) -> Result { + match mode { + Mode::Modern => Ok(self as u16 + 0x1040), + Mode::Legacy | Mode::Transitional => match self { + Self::Network => Ok(0x1000), + Self::Block => Ok(0x1001), + Self::TradMemBalloon => Ok(0x1002), + Self::Console => Ok(0x1003), + Self::Scsi => Ok(0x1004), + Self::Entropy => Ok(0x1005), + Self::NineP => Ok(0x1009), + Self::Socket => Ok(0x1012), // Taken from QEMU, used by Linux + _ => Err(self), + }, + } + } + + /// Maps a VirtIO Device ID to a PCI Device Sub ID. + /// XXX: Check these mappings against some reference. + pub fn pci_sub_dev_id(self, mode: Mode) -> Result { + match mode { + Mode::Legacy | Mode::Transitional => Ok(self as u16), + Mode::Modern => self.pci_dev_id(mode), + } + } + + /// Maps a VirtIO Device ID to a PCI Device Class. + /// + /// Sadly, these mappings are mostly arbitrary. + pub fn pci_class(self) -> Result { + match self { + Self::Network => Ok(pci_hw::bits::CLASS_NETWORK), + Self::Block | Self::NineP => Ok(pci_hw::bits::CLASS_STORAGE), + _ => Err(self), + } + } + + /// Constructs a crate::hw::pci::Ident from the given VirtIO device + /// ID and mode. + pub fn pci_ident(self, mode: Mode) -> Result { + use crate::hw::ids::pci::VENDOR_VIRTIO; + let vendor_id = VENDOR_VIRTIO; + let sub_vendor_id = VENDOR_VIRTIO; + let device_id = self.pci_dev_id(mode)?; + let sub_device_id = self.pci_sub_dev_id(mode)?; + let device_class = self.pci_class()?; + let revision_id = mode.pci_revision(); + Ok(pci_hw::Ident { + vendor_id, + device_id, + sub_vendor_id, + sub_device_id, + device_class, + revision_id, + ..Default::default() + }) + } +} + pub trait VirtioDevice: Send + Sync + 'static + Lifecycle { - /// Read/write device-specific virtio configuration space - fn cfg_rw(&self, ro: RWOp); + /// Read/write device-specific virtio configuration space. + fn rw_dev_config(&self, ro: RWOp); + + /// Returns the device virtio mode (Legacy, Transitional, Modern). + fn mode(&self) -> Mode; - /// Get the device-specific virtio feature bits - fn get_features(&self) -> u32; + /// Returns the device-specific virtio feature bits. + fn features(&self) -> u64; - /// Set the device-specific virtio feature bits + /// Sets the device-specific virtio feature bits /// /// Returns `Err` if an error occurred while setting the features. Doing so /// will transition the device to the Failed state. - fn set_features(&self, feat: u32) -> Result<(), ()>; + fn set_features(&self, feat: u64) -> Result<(), ()>; /// Service driver notification for a given virtqueue fn queue_notify(&self, vq: &Arc); @@ -59,14 +233,18 @@ pub trait VirtioIntr: Send + 'static { pub enum VqChange { /// Underlying virtio device has been reset Reset, + /// Physical address changed for VQ Address, + /// MSI(-X) configuration changed for VQ IntrCfg, } + pub enum VqIntr { /// Pin (lintr) interrupt Pin, + /// MSI(-X) with address, data, and masked state Msi(u64, u32, bool), } diff --git a/lib/propolis/src/hw/virtio/p9fs.rs b/lib/propolis/src/hw/virtio/p9fs.rs index 78f10b559..73e26f43b 100644 --- a/lib/propolis/src/hw/virtio/p9fs.rs +++ b/lib/propolis/src/hw/virtio/p9fs.rs @@ -12,14 +12,13 @@ use std::path::PathBuf; use std::sync::{Arc, Mutex}; use crate::common::*; -use crate::hw::pci; +use crate::hw::{pci, virtio}; use crate::migrate::Migrator; use crate::util::regmap::RegMap; use crate::vmm::MemCtx; -use super::bits::*; use super::pci::{PciVirtio, PciVirtioState}; -use super::queue::{write_buf, Chain, VirtQueue, VirtQueues}; +use super::queue::{write_buf, Chain, VirtQueue, VirtQueues, VqSize}; use super::VirtioDevice; use ispf::WireSize; @@ -79,16 +78,13 @@ pub struct PciVirtio9pfs { impl PciVirtio9pfs { pub fn new(queue_size: u16, handler: Arc) -> Arc { - let queues = - VirtQueues::new([VirtQueue::new(queue_size.try_into().unwrap())]) - .unwrap(); + let queues = VirtQueues::new(&[VqSize::new(queue_size)]); let msix_count = Some(2); //guess - let (virtio_state, pci_state) = PciVirtioState::create( + let (virtio_state, pci_state) = PciVirtioState::new( + virtio::Mode::Legacy, queues, msix_count, - VIRTIO_DEV_9P, - VIRTIO_SUB_DEV_9P_TRANSPORT, - pci::bits::CLASS_STORAGE, + virtio::DeviceId::NineP, VIRTIO_9P_CFG_SIZE, ); Arc::new(Self { virtio_state, pci_state, handler }) @@ -96,7 +92,7 @@ impl PciVirtio9pfs { } impl VirtioDevice for PciVirtio9pfs { - fn cfg_rw(&self, mut rwo: RWOp) { + fn rw_dev_config(&self, mut rwo: RWOp) { P9FS_DEV_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => { probes::p9fs_cfg_read!(|| ()); @@ -122,11 +118,15 @@ impl VirtioDevice for PciVirtio9pfs { }) } - fn get_features(&self) -> u32 { + fn mode(&self) -> virtio::Mode { + virtio::Mode::Legacy + } + + fn features(&self) -> u64 { VIRTIO_9P_F_MOUNT_TAG } - fn set_features(&self, _feat: u32) -> Result<(), ()> { + fn set_features(&self, _feat: u64) -> Result<(), ()> { Ok(()) } @@ -182,7 +182,7 @@ pub(crate) mod bits { use std::mem::size_of; // features - pub const VIRTIO_9P_F_MOUNT_TAG: u32 = 0x1; + pub const VIRTIO_9P_F_MOUNT_TAG: u64 = 0x1; pub const VIRTIO_9P_MAX_TAG_SIZE: usize = 256; pub const VIRTIO_9P_CFG_SIZE: usize = diff --git a/lib/propolis/src/hw/virtio/pci.rs b/lib/propolis/src/hw/virtio/pci.rs index 5212673cb..87fd748c4 100644 --- a/lib/propolis/src/hw/virtio/pci.rs +++ b/lib/propolis/src/hw/virtio/pci.rs @@ -7,17 +7,20 @@ use std::num::NonZeroU16; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Condvar, Mutex, MutexGuard, Weak}; -use super::bits::*; use super::probes; -use super::queue::VirtQueues; +use super::queue::{self, VirtQueues}; use super::{VirtioDevice, VirtioIntr, VqChange, VqIntr}; -use crate::common::*; -use crate::hw::ids::pci::VENDOR_VIRTIO; -use crate::hw::pci; +use crate::common::{RWOp, ReadOp, WriteOp, PAGE_SHIFT, PAGE_SIZE}; +use crate::hw::pci::{self, BarN, CapId}; +use crate::hw::virtio; +use crate::hw::virtio::queue::VqSize; use crate::intr_pins::IntrPin; -use crate::migrate::*; +use crate::migrate::{ + MigrateCtx, MigrateMulti, MigrateStateError, PayloadOffers, PayloadOutputs, +}; use crate::util::regmap::RegMap; +use bit_field::BitField; use lazy_static::lazy_static; const VIRTIO_MSI_NO_VECTOR: u16 = 0xffff; @@ -26,19 +29,19 @@ const VIRTIO_PCI_ISR_QUEUE: u8 = 1 << 0; const VIRTIO_PCI_ISR_CFG: u8 = 1 << 1; bitflags! { - #[derive(Default, PartialEq)] + #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct Status: u8 { const RESET = 0; - const ACK = 1; - const DRIVER = 2; - const DRIVER_OK = 4; - const FEATURES_OK = 8; - const NEEDS_RESET = 64; - const FAILED = 128; + const ACK = 1 << 0; + const DRIVER = 1 << 1; + const DRIVER_OK = 1 << 2; + const FEATURES_OK = 1 << 3; + const NEEDS_RESET = 1 << 6; + const FAILED = 1 << 7; } } -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] enum IntrMode { IsrOnly, IsrLintr, @@ -56,33 +59,66 @@ impl From for IntrMode { struct VirtioState { status: Status, - queue_sel: u16, - nego_feat: u32, + queue_select: u16, + negotiated_features: u64, + /// True if the high-half of feature register has been selected via + /// the feature select register; false if the low-half is selected + /// (which is always the case for legacy devices). + device_feature_select: u32, + driver_feature_select: u32, + config_generation: u8, + config_generation_seen: bool, + device_config_size: usize, + mode: virtio::Mode, intr_mode: IntrMode, intr_mode_updating: bool, msix_cfg_vec: u16, msix_queue_vec: Vec, } + impl VirtioState { - fn new(num_queues: u16) -> Self { - let mut msix_queue_vec = Vec::with_capacity(num_queues as usize); - msix_queue_vec - .resize_with(num_queues as usize, || VIRTIO_MSI_NO_VECTOR); + fn new( + device_config_size: usize, + nmsix: usize, + mode: virtio::Mode, + ) -> Self { + let msix_queue_vec = vec![VIRTIO_MSI_NO_VECTOR; nmsix]; Self { status: Status::RESET, - queue_sel: 0, - nego_feat: 0, + queue_select: 0, + negotiated_features: 0, + device_feature_select: 0, + driver_feature_select: 0, + config_generation: 0, + config_generation_seen: false, + device_config_size, + mode, intr_mode: IntrMode::IsrOnly, intr_mode_updating: false, msix_cfg_vec: VIRTIO_MSI_NO_VECTOR, msix_queue_vec, } } + fn reset(&mut self) { self.status = Status::RESET; - self.queue_sel = 0; - self.nego_feat = 0; + self.queue_select = 0; + self.negotiated_features = 0; + self.config_generation = 0; + self.config_generation_seen = false; self.msix_cfg_vec = VIRTIO_MSI_NO_VECTOR; + self.msix_queue_vec.fill(VIRTIO_MSI_NO_VECTOR); + } + + fn witness_config_generation(&mut self) { + self.config_generation_seen = true; + } + + fn _evolve_config_generation(&mut self) { + if self.config_generation_seen { + self.config_generation = self.config_generation.wrapping_add(1); + self.config_generation_seen = false; + } } } @@ -90,33 +126,47 @@ pub trait PciVirtio: VirtioDevice + Send + Sync + 'static { fn virtio_state(&self) -> &PciVirtioState; fn pci_state(&self) -> &pci::DeviceState; - #[allow(unused_variables)] - /// Notification that the IO port representing the queue notification - /// register in the device BAR has changed. - fn notify_port_update(&self, state: Option) {} + /// Handles notification that the IO port representing the queue + /// notification register in the device BAR has changed. + fn notify_port_update(&self, state: Option) { + let _used = state; + } + + /// Handles notification that an MMIO address in the range representing the + /// queue notification register in the device BAR has changed. + fn notify_mmio_addr_update(&self, addr: Option) { + let _used = addr; + } - /// Notification from the PCI emulation that one of the BARs has undergone a - /// change of configuration + /// Handles notification from the PCI emulation layer that one of the BARs + /// has undergone a configuration change. fn bar_update(&self, bstate: pci::BarState) { - if bstate.id == pci::BarN::BAR0 { - // Notify the device about the location (if any) of the Queue Notify - // register in the containing BAR region. - let port = if bstate.decode_en { - // Having registered `bstate.value` as the address in BAR0 only - // succeeds if that address through to the size of the - // registered region - the virtio legacy config registers - does - // not wrap. The base address *could* be zero, unwise as that - // would be, but adding LEGACY_REG_OFF_QUEUE_NOTIFY guarantees - // that the computed offset here is non-zero. - let notify_port_addr = NonZeroU16::new( - bstate.value as u16 + LEGACY_REG_OFF_QUEUE_NOTIFY as u16, - ) - .expect("addition does not wrap"); - Some(notify_port_addr) - } else { - None - }; - self.notify_port_update(port); + match bstate.id { + pci::BarN::BAR0 => { + // Notify the device about the location (if any) of the Queue + // Notify register in the containing BAR region. + let port = bstate.decode_en.then(|| { + // Having registered `bstate.value` as the address in BAR0 + // only succeeds if that address through to the size of the + // registered region - the virtio legacy config registers - + // does not wrap. The base address *could* be zero, unwise + // as that would be, but adding LEGACY_REG_OFF_QUEUE_NOTIFY + // guarantees that the computed offset here is non-zero. + NonZeroU16::new( + bstate.value as u16 + + LEGACY_REG_QUEUE_NOTIFY_OFFSET as u16, + ) + .expect("addition does not wrap") + }); + self.notify_port_update(port); + } + pci::BarN::BAR2 => { + let addr = bstate + .decode_en + .then(|| bstate.value + NOTIFY_REG_OFFSET as u64); + self.notify_mmio_addr_update(addr); + } + _ => {} } } } @@ -125,16 +175,44 @@ impl pci::Device for D { fn device_state(&self) -> &pci::DeviceState { self.pci_state() } + fn bar_rw(&self, bar: pci::BarN, mut rwo: RWOp) { let vs = self.virtio_state(); - - assert_eq!(bar, pci::BarN::BAR0); - let map = match vs.map_which.load(Ordering::SeqCst) { - false => &vs.map_nomsix, - true => &vs.map, + let map = match bar { + pci::BarN::BAR0 => { + if vs.legacy_map_use_msix.load(Ordering::SeqCst) { + &vs.legacy_config + } else { + &vs.legacy_config_nomsix + } + } + pci::BarN::BAR2 => &vs.common_config, + _ => panic!("Config IO to unsupported BAR {bar:?}"), }; map.process(&mut rwo, |id, mut rwo| match id { - VirtioTop::LegacyConfig => { + VirtioConfigRegBlock::Common => { + COMMON_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.common_read(self, id, ro), + RWOp::Write(wo) => { + vs.common_write(self.pci_state(), self, id, wo) + } + }) + } + VirtioConfigRegBlock::Notify => { + NOTIFY_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.notify_read(id, ro), + RWOp::Write(wo) => vs.notify_write(self, id, wo), + }) + } + VirtioConfigRegBlock::IsrStatus => { + ISR_STATUS_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.isr_status_read(id, ro), + RWOp::Write(_wo) => { + // Read-only for device. + } + }) + } + VirtioConfigRegBlock::Legacy => { LEGACY_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => vs.legacy_read(self, id, ro), RWOp::Write(wo) => { @@ -142,9 +220,80 @@ impl pci::Device for D { } }) } - VirtioTop::DeviceConfig => self.cfg_rw(rwo), + VirtioConfigRegBlock::DeviceConfig => self.rw_dev_config(rwo), + // Write ignored, read as zero. + VirtioConfigRegBlock::RazWi => {} }); } + + fn cap_rw(&self, id: CapId, mut rwo: RWOp) { + let vs = self.virtio_state(); + let id = { + let CapId::Vendor(tag) = id else { + unimplemented!("Unhandled capability type: {id:x?}"); + }; + let Ok(id) = VirtioCfgCapTag::try_from(tag) else { + unimplemented!("Unknown vendor capability: {id:x?}"); + }; + id + }; + match id { + VirtioCfgCapTag::Common => { + COMMON_CFG_CAP_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.common_cfg_cap_read(id, ro), + RWOp::Write(_) => { + // Read-only for driver + } + }); + } + VirtioCfgCapTag::Notify => { + NOTIFY_CFG_CAP_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.notify_cfg_cap_read(id, ro), + RWOp::Write(_) => { + // Read-only for driver + } + }); + } + VirtioCfgCapTag::Isr => { + COMMON_CFG_CAP_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.isr_cfg_cap_read(id, ro), + RWOp::Write(_) => { + // Read-only for driver + } + }); + } + VirtioCfgCapTag::Device => { + COMMON_CFG_CAP_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.device_cfg_cap_read(id, ro), + RWOp::Write(_) => { + // Note: unlike most other hypervisors, Propolis does + // not presently support writes via the device config + // register. So, e.g., one cannot set a MAC address this + // way. + // TODO: Plumb a logging object through into here. + // error!( + // self.log, + // "unsupported write {wo:?} to dev config register" + // ), + eprintln!("unsupported write to device cap reg"); + } + }) + } + VirtioCfgCapTag::Pci => { + PCI_CFG_CAP_REGS.process(&mut rwo, |id, rwo| match rwo { + RWOp::Read(ro) => vs.pci_cfg_cap_read(self, id, ro), + RWOp::Write(wo) => vs.pci_cfg_cap_write(self, id, wo), + }); + } + VirtioCfgCapTag::SharedMemory => { + unimplemented!("VirtIO Shared Memory is unsupported"); + } + VirtioCfgCapTag::Vendor => { + unimplemented!("VirtIO Vendor capabilities are unsupported"); + } + } + } + fn attach(&self) { let ps = self.pci_state(); if let Some(pin) = ps.lintr_pin() { @@ -152,10 +301,12 @@ impl pci::Device for D { vs.isr_state.set_pin(pin); } } - fn interrupt_mode_change(&self, mode: pci::IntrMode) { + + fn interrupt_mode_change(&self, intr_mode: pci::IntrMode) { let vs = self.virtio_state(); - vs.set_intr_mode(self.pci_state(), mode.into(), false); + vs.set_intr_mode(self.pci_state(), intr_mode.into(), false); } + fn msi_update(&self, info: pci::MsiUpdate) { let vs = self.virtio_state(); let mut state = vs.state.lock().unwrap(); @@ -207,67 +358,109 @@ pub struct PciVirtioState { state_cv: Condvar, isr_state: Arc, - /// Quick access to register map for MSIX (true) or non-MSIX (false) - map_which: AtomicBool, + common_config: RegMap, + + legacy_config: RegMap, + legacy_config_nomsix: RegMap, - map: RegMap, - map_nomsix: RegMap, + /// Quick access to register map for MSIX (true) or non-MSIX (false) + legacy_map_use_msix: AtomicBool, } + impl PciVirtioState { - pub(super) fn create( + pub(super) fn new( + mode: virtio::Mode, queues: VirtQueues, msix_count: Option, - dev_id: u16, - sub_dev_id: u16, - dev_class: u8, - cfg_sz: usize, + device_type: virtio::DeviceId, + cfg_size: usize, ) -> (Self, pci::DeviceState) { - let mut builder = pci::Builder::new(pci::Ident { - vendor_id: VENDOR_VIRTIO, - device_id: dev_id, - sub_vendor_id: VENDOR_VIRTIO, - sub_device_id: sub_dev_id, - class: dev_class, - ..Default::default() - }) - .add_lintr(); + assert!(cfg_size < PAGE_SIZE); + assert!(cfg_size + LEGACY_REG_SIZE < 0x200); + + let ident = device_type.pci_ident(mode).expect("PCI Ident"); + let mut builder = pci::Builder::new(ident).add_lintr(); if let Some(count) = msix_count { builder = builder.add_cap_msix(pci::BarN::BAR1, count); } - // XXX: properly size the legacy cfg BAR - builder = builder.add_bar_io(pci::BarN::BAR0, 0x200); + if mode == virtio::Mode::Transitional || mode == virtio::Mode::Legacy { + // XXX: properly size the legacy cfg BAR + builder = builder.add_bar_io(pci::BarN::BAR0, 0x200); + } + if mode == virtio::Mode::Transitional || mode == virtio::Mode::Modern { + builder = + builder.add_bar_mmio(pci::BarN::BAR2, 4 * PAGE_SIZE as u32); + builder = builder.add_cap_vendor( + VirtioCfgCapTag::Common.into(), + COMMON_CFG_CAP_SIZE, + ); + // Note: we don't presently support a non-zero multiplier for the + // notification register, so we don't need to size this for the + // number of queues; hence the fixed size. + builder = builder.add_cap_vendor( + VirtioCfgCapTag::Notify.into(), + NOTIFY_CFG_CAP_SIZE, + ); + builder = builder.add_cap_vendor( + VirtioCfgCapTag::Isr.into(), + COMMON_CFG_CAP_SIZE, + ); + builder = builder.add_cap_vendor( + VirtioCfgCapTag::Device.into(), + COMMON_CFG_CAP_SIZE, + ); + builder = builder + .add_cap_vendor(VirtioCfgCapTag::Pci.into(), PCI_CFG_CAP_SIZE); + } let pci_state = builder.finish(); - let layout = [ - (VirtioTop::LegacyConfig, LEGACY_REG_SZ), - (VirtioTop::DeviceConfig, cfg_sz), - ]; - let layout_nomsix = [ - (VirtioTop::LegacyConfig, LEGACY_REG_SZ_NO_MSIX), - (VirtioTop::DeviceConfig, cfg_sz), - ]; - + // With respect to layout, for the time being, we are unconditionally + // transitional, meaning that we support both the legacy and common + // configuration layouts. + let common_config = RegMap::create_packed_passthru( + 4 * PAGE_SIZE, + &[ + (VirtioConfigRegBlock::Common, COMMON_REG_SIZE), + (VirtioConfigRegBlock::RazWi, PAGE_SIZE - COMMON_REG_SIZE), + (VirtioConfigRegBlock::DeviceConfig, cfg_size), + (VirtioConfigRegBlock::RazWi, PAGE_SIZE - cfg_size), + (VirtioConfigRegBlock::Notify, NOTIFY_REG_SIZE), + (VirtioConfigRegBlock::RazWi, PAGE_SIZE - NOTIFY_REG_SIZE), + (VirtioConfigRegBlock::IsrStatus, ISR_STATUS_REG_SIZE), + (VirtioConfigRegBlock::RazWi, PAGE_SIZE - ISR_STATUS_REG_SIZE), + ], + ); + let legacy_config = RegMap::create_packed_passthru( + cfg_size + LEGACY_REG_SIZE, + &[ + (VirtioConfigRegBlock::Legacy, LEGACY_REG_SIZE), + (VirtioConfigRegBlock::DeviceConfig, cfg_size), + ], + ); + let legacy_config_nomsix = RegMap::create_packed_passthru( + cfg_size + LEGACY_REG_SIZE_NO_MSIX, + &[ + (VirtioConfigRegBlock::Legacy, LEGACY_REG_SIZE_NO_MSIX), + (VirtioConfigRegBlock::DeviceConfig, cfg_size), + ], + ); + let legacy_map_use_msix = AtomicBool::new(false); // Allow VQs to access memory through the PCI state - - let queue_count = queues.count().get(); + let nmsix = queues.max_capacity(); + let state = Mutex::new(VirtioState::new(cfg_size, nmsix, mode)); + let state_cv = Condvar::new(); + let isr_state = IsrState::new(); let this = Self { queues, - - state: Mutex::new(VirtioState::new(queue_count)), - state_cv: Condvar::new(), - isr_state: IsrState::new(), - - map: RegMap::create_packed_passthru( - cfg_sz + LEGACY_REG_SZ, - &layout, - ), - map_nomsix: RegMap::create_packed_passthru( - cfg_sz + LEGACY_REG_SZ_NO_MSIX, - &layout_nomsix, - ), - map_which: AtomicBool::new(false), + state, + state_cv, + isr_state, + common_config, + legacy_config, + legacy_config_nomsix, + legacy_map_use_msix, }; for queue in this.queues.iter() { @@ -280,23 +473,459 @@ impl PciVirtioState { (this, pci_state) } + pub fn mode(&self) -> virtio::Mode { + self.state.lock().unwrap().mode + } + + fn qaddr(&self, queue_select: u16, thunk: F) -> u64 + where + F: FnOnce(&virtio::queue::MapInfo) -> u64, + { + self.queues + .get(queue_select) + .map(|queue| { + let state = queue.get_state(); + thunk(&state.mapping) + }) + .unwrap_or(0) + } + + fn common_read( + &self, + dev: &dyn VirtioDevice, + id: &CommonConfigReg, + ro: &mut ReadOp, + ) { + match id { + CommonConfigReg::DeviceFeatureSelect => { + let state = self.state.lock().unwrap(); + ro.write_u32(state.device_feature_select); + } + CommonConfigReg::DeviceFeature => { + let state = self.state.lock().unwrap(); + let shift = state.device_feature_select * 32; + let features = if shift < 64 { + self.features_supported(dev) >> shift + } else { + 0 + }; + ro.write_u32(features as u32); + } + CommonConfigReg::DriverFeatureSelect => { + let state = self.state.lock().unwrap(); + ro.write_u32(state.driver_feature_select); + } + CommonConfigReg::DriverFeature => { + let state = self.state.lock().unwrap(); + let shift = state.driver_feature_select * 32; + let features = if shift < 64 { + state.negotiated_features >> shift + } else { + 0 + }; + ro.write_u32(features as u32); + } + CommonConfigReg::ConfigMsixVector => { + let state = self.state.lock().unwrap(); + ro.write_u16(state.msix_cfg_vec); + } + CommonConfigReg::NumQueues => { + ro.write_u16(self.queues.count().get()); + } + CommonConfigReg::DeviceStatus => { + let state = self.state.lock().unwrap(); + ro.write_u8(state.status.bits()); + } + CommonConfigReg::ConfigGeneration => { + let mut state = self.state.lock().unwrap(); + state.witness_config_generation(); + ro.write_u8(state.config_generation); + } + CommonConfigReg::QueueSelect => { + let state = self.state.lock().unwrap(); + ro.write_u16(state.queue_select); + } + CommonConfigReg::QueueSize => { + let state = self.state.lock().unwrap(); + let size = self + .queues + .get(state.queue_select) + .map(|vq| vq.size()) + .unwrap_or(0); + ro.write_u16(size); + } + CommonConfigReg::QueueMsixVector => { + let state = self.state.lock().unwrap(); + let vector = state + .msix_queue_vec + .get(state.queue_select as usize) + .map(|queue_sel| *queue_sel) + .unwrap_or(VIRTIO_MSI_NO_VECTOR); + ro.write_u16(vector); + } + CommonConfigReg::QueueEnable => { + let state = self.state.lock().unwrap(); + let enabled = self + .queues + .get(state.queue_select) + .map(|q| q.is_enabled()) + .unwrap_or(false); + ro.write_u16(enabled.into()) + } + CommonConfigReg::QueueNotifyOffset => { + ro.write_u16(0); + } + CommonConfigReg::QueueDescAddr => { + let state = self.state.lock().unwrap(); + let addr = self.qaddr(state.queue_select, |m| m.desc_addr); + ro.write_u64(addr); + } + CommonConfigReg::QueueDriverAddr => { + let state = self.state.lock().unwrap(); + let addr = self.qaddr(state.queue_select, |m| m.avail_addr); + ro.write_u64(addr); + } + CommonConfigReg::QueueDeviceAddr => { + let state = self.state.lock().unwrap(); + let addr = self.qaddr(state.queue_select, |m| m.used_addr); + ro.write_u64(addr); + } + // Note: currently unused. + CommonConfigReg::QueueNotifyData => { + let state = self.state.lock().unwrap(); + let data = self + .queues + .get(state.queue_select) + .map(|q| q.notify_data) + .unwrap_or(0); + ro.write_u16(data); + } + // Note: currently unused. + CommonConfigReg::QueueReset => { + ro.write_u16(0); + } + } + } + + fn common_write( + &self, + pci_state: &pci::DeviceState, + dev: &dyn VirtioDevice, + id: &CommonConfigReg, + wo: &mut WriteOp, + ) { + match id { + CommonConfigReg::DeviceFeatureSelect => { + let mut state = self.state.lock().unwrap(); + state.device_feature_select = wo.read_u32(); + } + CommonConfigReg::DeviceFeature => { + // Read-only for driver + } + CommonConfigReg::DriverFeatureSelect => { + let mut state = self.state.lock().unwrap(); + state.driver_feature_select = wo.read_u32(); + } + CommonConfigReg::DriverFeature => { + let mut state = self.state.lock().unwrap(); + let shift = state.driver_feature_select * 32; + if shift < 64 { + let current = { + let lo = 32 - shift as usize; + let hi = 64 - shift as usize; + state.negotiated_features.get_bits(lo..hi) << lo + }; + let offered = (u64::from(wo.read_u32()) << shift) | current; + let negotiated = self.features_supported(dev) & offered; + state.negotiated_features = negotiated; + } + } + CommonConfigReg::ConfigMsixVector => { + let mut state = self.state.lock().unwrap(); + state.msix_cfg_vec = wo.read_u16(); + } + CommonConfigReg::NumQueues => { + // Read-only for driver + } + CommonConfigReg::DeviceStatus => { + self.set_status(dev, wo.read_u8()); + } + CommonConfigReg::ConfigGeneration => { + // Read-only for driver + } + CommonConfigReg::QueueSelect => { + let mut state = self.state.lock().unwrap(); + state.queue_select = wo.read_u16(); + } + CommonConfigReg::QueueSize => { + let state = self.state.lock().unwrap(); + match VqSize::try_from(wo.read_u16()) { + Err(_) => { + // Bad queue size. + self.set_needs_reset(dev); + } + Ok(offered) => { + let qs = state.queue_select; + let Some(queue) = self.queues.get(qs) else { + // Invalid queue; write dropped. + return; + }; + let mut size = queue.size.lock().unwrap(); + *size = offered; + } + } + } + CommonConfigReg::QueueMsixVector => { + let hdl = pci_state.msix_hdl().unwrap(); + let mut state = self.state.lock().unwrap(); + let sel = state.queue_select as usize; + if let Some(queue) = self.queues.get(state.queue_select) { + let val = wo.read_u16(); + + if state.intr_mode != IntrMode::Msi { + // Store the vector information for later + state.msix_queue_vec[sel] = val; + } else { + state = self + .state_cv + .wait_while(state, |s| s.intr_mode_updating) + .unwrap(); + state.intr_mode_updating = true; + state.msix_queue_vec[sel] = val; + + // State lock cannot be held while updating queue + // interrupt handlers due to deadlock possibility. + drop(state); + queue.set_intr(MsiIntr::new(hdl, val)); + state = self.state.lock().unwrap(); + + // With the MSI configuration updated for the virtqueue, + // notify the device of the change + if dev.queue_change(queue, VqChange::IntrCfg).is_err() { + self.needs_reset_locked(dev, &mut state); + } + + state.intr_mode_updating = false; + self.state_cv.notify_all(); + } + } + } + CommonConfigReg::QueueEnable => { + let mut state = self.state.lock().unwrap(); + let enabled = wo.read_u16() != 0; + if let Some(queue) = self.queues.get(state.queue_select) { + if enabled { + queue.enable(); + if dev.queue_change(queue, VqChange::Address).is_err() { + self.needs_reset_locked(dev, &mut state); + } + } + } + } + CommonConfigReg::QueueNotifyOffset => { + // Read-only for driver + } + CommonConfigReg::QueueDescAddr => { + let state = self.state.lock().unwrap(); + let offered_desc_addr = wo.read_u64(); + if let Some(queue) = self.queues.get(state.queue_select) { + let current = &queue.get_state().mapping; + queue.map_virtqueue( + offered_desc_addr, + current.avail_addr, + current.used_addr, + ); + } + } + CommonConfigReg::QueueDriverAddr => { + let state = self.state.lock().unwrap(); + let offered_avail_addr = wo.read_u64(); + if let Some(queue) = self.queues.get(state.queue_select) { + let current = &queue.get_state().mapping; + queue.map_virtqueue( + current.desc_addr, + offered_avail_addr, + current.used_addr, + ); + } + } + CommonConfigReg::QueueDeviceAddr => { + let state = self.state.lock().unwrap(); + let offered_used_addr = wo.read_u64(); + if let Some(queue) = self.queues.get(state.queue_select) { + let current = &queue.get_state().mapping; + queue.map_virtqueue( + current.desc_addr, + current.avail_addr, + offered_used_addr, + ); + } + } + CommonConfigReg::QueueNotifyData => { + // Read-only for driver + } + // Note that this is a per-queue register, but since we don't + // advertise the `VIRTIO_F_RING_RESET` feature bit, if we see + // it, resetting the device isn't unreasonable. + CommonConfigReg::QueueReset => self.set_needs_reset(dev), + } + } + + fn notify_read(&self, id: &NotifyReg, ro: &mut ReadOp) { + match id { + NotifyReg::Notify => { + ro.write_u16(0); + } + } + } + + fn notify_write( + &self, + dev: &dyn VirtioDevice, + id: &NotifyReg, + wo: &mut WriteOp, + ) { + match id { + NotifyReg::Notify => self.queue_notify(dev, wo.read_u16()), + } + } + + fn isr_status_read(&self, id: &IsrStatusReg, ro: &mut ReadOp) { + match id { + IsrStatusReg::IsrStatus => { + // reading ISR Status clears it as well + let isr = self.isr_state.read_clear(); + ro.write_u8(isr); + } + } + } + + fn common_cfg_cap_read(&self, id: &CommonCfgCapReg, op: &mut ReadOp) { + match id { + CommonCfgCapReg::CapLen => op.write_u8(COMMON_CFG_CAP_SIZE + 2), + CommonCfgCapReg::CfgType => { + op.write_u8(VirtioCfgCapTag::Common as u8) + } + CommonCfgCapReg::Bar => op.write_u8(BarN::BAR2 as u8), + CommonCfgCapReg::Id => op.write_u8(0), + CommonCfgCapReg::Padding => {} + CommonCfgCapReg::Offset => op.write_u32(COMMON_REG_OFFSET as u32), + CommonCfgCapReg::Length => op.write_u32(COMMON_REG_SIZE as u32), + } + } + + fn notify_cfg_cap_read(&self, id: &NotifyCfgCapReg, op: &mut ReadOp) { + match id { + NotifyCfgCapReg::Common(common_id) => match common_id { + CommonCfgCapReg::CfgType => { + op.write_u8(VirtioCfgCapTag::Notify as u8) + } + CommonCfgCapReg::CapLen => op.write_u8(NOTIFY_CFG_CAP_SIZE + 2), + CommonCfgCapReg::Offset => { + op.write_u32(NOTIFY_REG_OFFSET as u32) + } + CommonCfgCapReg::Length => op.write_u32(NOTIFY_REG_SIZE as u32), + _ => self.common_cfg_cap_read(common_id, op), + }, + NotifyCfgCapReg::Multiplier => op.write_u32(0), + } + } + + fn device_cfg_cap_read(&self, id: &CommonCfgCapReg, op: &mut ReadOp) { + match id { + CommonCfgCapReg::CfgType => { + op.write_u8(VirtioCfgCapTag::Device as u8) + } + CommonCfgCapReg::Offset => op.write_u32(DEVICE_REG_OFFSET as u32), + CommonCfgCapReg::Length => { + let state = self.state.lock().unwrap(); + op.write_u32(state.device_config_size as u32); + } + _ => self.common_cfg_cap_read(id, op), + } + } + + fn isr_cfg_cap_read(&self, id: &CommonCfgCapReg, op: &mut ReadOp) { + match id { + CommonCfgCapReg::CfgType => op.write_u8(VirtioCfgCapTag::Isr as u8), + CommonCfgCapReg::Offset => { + op.write_u32(ISR_STATUS_REG_OFFSET as u32) + } + CommonCfgCapReg::Length => op.write_u32(ISR_STATUS_REG_SIZE as u32), + _ => self.common_cfg_cap_read(id, op), + } + } + + fn pci_cfg_cap_read( + &self, + dev: &dyn VirtioDevice, + id: &PciCfgCapReg, + op: &mut ReadOp, + ) { + let _todo = dev; + match id { + PciCfgCapReg::Common(common_id) => match common_id { + CommonCfgCapReg::CfgType => { + op.write_u8(VirtioCfgCapTag::Pci as u8) + } + CommonCfgCapReg::Bar => op.write_u8(0), // TODO: Handle + CommonCfgCapReg::Offset => op.write_u32(0), // TODO: Handle + CommonCfgCapReg::Length => op.write_u32(0), // TODO: Handle + _ => self.common_cfg_cap_read(common_id, op), + }, + PciCfgCapReg::PciData => { + // TODO: We actually need to handle this. + op.write_u32(0); + } + } + } + + fn pci_cfg_cap_write( + &self, + dev: &dyn VirtioDevice, + id: &PciCfgCapReg, + op: &mut WriteOp, + ) { + let _todo = (dev, op); + match id { + PciCfgCapReg::Common(common_id) => { + match common_id { + CommonCfgCapReg::Bar => { + // TODO: Store the bar + } + CommonCfgCapReg::Offset => { + // TODO: Store the offset + } + CommonCfgCapReg::Length => { + // TODO: Store the length + } + // Everything else is read-only for the driver. + _ => {} + } + } + PciCfgCapReg::PciData => { + // TODO: Handle the write. + } + } + } + fn legacy_read( &self, dev: &dyn VirtioDevice, - id: &LegacyReg, + id: &LegacyConfigReg, ro: &mut ReadOp, ) { match id { - LegacyReg::FeatDevice => { - ro.write_u32(self.features_supported(dev)); + LegacyConfigReg::DeviceFeature => { + ro.write_u32(self.features_supported(dev) as u32); } - LegacyReg::FeatDriver => { + LegacyConfigReg::DriverFeature => { let state = self.state.lock().unwrap(); - ro.write_u32(state.nego_feat); + ro.write_u32(state.negotiated_features as u32); } - LegacyReg::QueuePfn => { + LegacyConfigReg::QueueAddress4k => { let state = self.state.lock().unwrap(); - if let Some(queue) = self.queues.get(state.queue_sel) { + if let Some(queue) = self.queues.get(state.queue_select) { let qs = queue.get_state(); let addr = qs.mapping.desc_addr; ro.write_u32((addr >> PAGE_SHIFT) as u32); @@ -305,71 +934,76 @@ impl PciVirtioState { ro.write_u32(0); } } - LegacyReg::QueueSize => { + LegacyConfigReg::QueueSize => { let state = self.state.lock().unwrap(); let sz = self .queues - .get(state.queue_sel) + .get(state.queue_select) .map(|vq| vq.size()) .unwrap_or(0); ro.write_u16(sz); } - LegacyReg::QueueSelect => { + LegacyConfigReg::QueueSelect => { let state = self.state.lock().unwrap(); - ro.write_u16(state.queue_sel); + ro.write_u16(state.queue_select); } - LegacyReg::QueueNotify => {} - LegacyReg::DeviceStatus => { + LegacyConfigReg::QueueNotify => {} + LegacyConfigReg::DeviceStatus => { let state = self.state.lock().unwrap(); ro.write_u8(state.status.bits()); } - LegacyReg::IsrStatus => { + LegacyConfigReg::IsrStatus => { // reading ISR Status clears it as well let isr = self.isr_state.read_clear(); ro.write_u8(isr); } - LegacyReg::MsixVectorConfig => { + LegacyConfigReg::ConfigMsixVector => { let state = self.state.lock().unwrap(); ro.write_u16(state.msix_cfg_vec); } - LegacyReg::MsixVectorQueue => { + LegacyConfigReg::QueueMsixVector => { let state = self.state.lock().unwrap(); let val = state .msix_queue_vec - .get(state.queue_sel as usize) - .unwrap_or(&VIRTIO_MSI_NO_VECTOR); - ro.write_u16(*val); + .get(state.queue_select as usize) + .map(|queue_sel| *queue_sel) + .unwrap_or(VIRTIO_MSI_NO_VECTOR); + ro.write_u16(val); } } } + fn legacy_write( &self, pci_state: &pci::DeviceState, dev: &dyn VirtioDevice, - id: &LegacyReg, + id: &LegacyConfigReg, wo: &mut WriteOp, ) { match id { - LegacyReg::FeatDriver => { - let nego = wo.read_u32() & self.features_supported(dev); + LegacyConfigReg::DriverFeature => { + let offered = u64::from(wo.read_u32()); + let negotiated = self.features_supported(dev) & offered; let mut state = self.state.lock().unwrap(); - match dev.set_features(nego) { + match dev.set_features(negotiated) { Ok(_) => { - state.nego_feat = nego; + state.negotiated_features = negotiated; } Err(_) => { self.needs_reset_locked(dev, &mut state); } } } - LegacyReg::QueuePfn => { + LegacyConfigReg::QueueAddress4k => { let mut state = self.state.lock().unwrap(); let pfn = wo.read_u32(); - if let Some(queue) = self.queues.get(state.queue_sel) { + if pfn == 0 { + return; + } + if let Some(queue) = self.queues.get(state.queue_select) { let qs_old = queue.get_state(); let new_addr = u64::from(pfn) << PAGE_SHIFT; queue.map_legacy(new_addr); - if qs_old.mapping.desc_addr != new_addr { if dev.queue_change(queue, VqChange::Address).is_err() { self.needs_reset_locked(dev, &mut state); @@ -377,75 +1011,58 @@ impl PciVirtioState { } } } - LegacyReg::QueueSelect => { - let mut state = self.state.lock().unwrap(); - state.queue_sel = wo.read_u16(); + LegacyConfigReg::QueueSelect => { + self.common_write( + pci_state, + dev, + &CommonConfigReg::QueueSelect, + wo, + ); } - LegacyReg::QueueNotify => { + LegacyConfigReg::QueueNotify => { self.queue_notify(dev, wo.read_u16()); } - LegacyReg::DeviceStatus => { + LegacyConfigReg::DeviceStatus => { self.set_status(dev, wo.read_u8()); } - LegacyReg::MsixVectorConfig => { + LegacyConfigReg::ConfigMsixVector => { let mut state = self.state.lock().unwrap(); state.msix_cfg_vec = wo.read_u16(); } - LegacyReg::MsixVectorQueue => { - let hdl = pci_state.msix_hdl().unwrap(); - let mut state = self.state.lock().unwrap(); - let sel = state.queue_sel as usize; - if let Some(queue) = self.queues.get(state.queue_sel) { - let val = wo.read_u16(); - - if state.intr_mode != IntrMode::Msi { - // Store the vector information for later - state.msix_queue_vec[sel] = val; - } else { - state = self - .state_cv - .wait_while(state, |s| s.intr_mode_updating) - .unwrap(); - state.intr_mode_updating = true; - state.msix_queue_vec[sel] = val; - - // State lock cannot be held while updating queue - // interrupt handlers due to deadlock possibility. - drop(state); - queue.set_intr(MsiIntr::new(hdl, val)); - state = self.state.lock().unwrap(); - - // With the MSI configuration updated for the virtqueue, - // notify the device of the change - if dev.queue_change(queue, VqChange::IntrCfg).is_err() { - self.needs_reset_locked(dev, &mut state); - } - - state.intr_mode_updating = false; - self.state_cv.notify_all(); - } - } + LegacyConfigReg::QueueMsixVector => { + self.common_write( + pci_state, + dev, + &CommonConfigReg::QueueMsixVector, + wo, + ); } - LegacyReg::FeatDevice - | LegacyReg::QueueSize - | LegacyReg::IsrStatus => { + LegacyConfigReg::DeviceFeature + | LegacyConfigReg::QueueSize + | LegacyConfigReg::IsrStatus => { // Read-only regs } } } - fn features_supported(&self, dev: &dyn VirtioDevice) -> u32 { - dev.get_features() | VIRTIO_F_RING_INDIRECT_DESC as u32 + fn features_supported(&self, dev: &dyn VirtioDevice) -> u64 { + dev.features() | queue::Features::transitional().bits() } - fn set_status(&self, dev: &dyn VirtioDevice, status: u8) { + + fn set_status(&self, dev: &dyn VirtioDevice, value: u8) { let mut state = self.state.lock().unwrap(); - let val = Status::from_bits_truncate(status); - if val == Status::RESET && state.status != Status::RESET { - self.virtio_reset(dev, state) + let status = Status::from_bits_truncate(value); + if status == Status::RESET && state.status != Status::RESET { + self.virtio_reset(dev, state); } else { // XXX: better device status FSM - state.status = val; + state.status = status; + if status.contains(Status::FEATURES_OK) { + if dev.set_features(state.negotiated_features).is_err() { + self.needs_reset_locked(dev, &mut state); + } + } } } @@ -474,8 +1091,10 @@ impl PciVirtioState { queue )); if let Some(vq) = self.queues.get(queue) { - vq.live.store(true, Ordering::Release); - dev.queue_notify(vq); + vq.arise(); + if self.mode() != virtio::Mode::Modern || vq.is_enabled() { + dev.queue_notify(vq); + } } } @@ -546,7 +1165,8 @@ impl PciVirtioState { state.intr_mode = new_mode; // Make sure the correct legacy register map is used - self.map_which.store(new_mode == IntrMode::Msi, Ordering::SeqCst); + self.legacy_map_use_msix + .store(new_mode == IntrMode::Msi, Ordering::SeqCst); match new_mode { IntrMode::IsrLintr => { self.isr_state.enable(is_import); @@ -554,8 +1174,10 @@ impl PciVirtioState { IntrMode::Msi => { let hdl = pci_state.msix_hdl().unwrap(); for vq in self.queues.iter() { - let vec = - *state.msix_queue_vec.get(vq.id as usize).unwrap(); + let vec = *state + .msix_queue_vec + .get(vq.id as usize) + .expect("msix for virtqueue is ok"); // State lock cannot be held while updating queue interrupt // handlers due to deadlock possibility. @@ -570,9 +1192,9 @@ impl PciVirtioState { self.state_cv.notify_all(); } - pub fn negotiated_features(&self) -> u32 { + pub fn negotiated_features(&self) -> u64 { let state = self.state.lock().unwrap(); - state.nego_feat + state.negotiated_features } } impl MigrateMulti for PciVirtioState { @@ -586,8 +1208,14 @@ impl MigrateMulti for PciVirtioState { let device = migrate::DeviceStateV1 { status: state.status.bits(), - queue_sel: state.queue_sel, - nego_feat: state.nego_feat, + queue_select: state.queue_select, + negotiated_features: state.negotiated_features, + device_feature_select: state.device_feature_select, + driver_feature_select: state.driver_feature_select, + config_generation: state.config_generation, + config_generation_seen: state.config_generation_seen, + device_config_size: state.device_config_size as u64, + mode: state.mode as u32, msix_cfg_vec: state.msix_cfg_vec, msix_queue_vec: state.msix_queue_vec.clone(), isr_queue, @@ -595,7 +1223,7 @@ impl MigrateMulti for PciVirtioState { }; drop(state); - let queues = self.queues.iter().map(|q| q.export()).collect(); + let queues = self.queues.export(); output.push(migrate::PciVirtioStateV1 { device, queues }.into()) } @@ -611,20 +1239,28 @@ impl MigrateMulti for PciVirtioState { let mut state = self.state.lock().unwrap(); state.status = Status::from_bits(dev.status).ok_or_else(|| { MigrateStateError::ImportFailed(format!( - "virtio status: failed to import saved value {:#x}", - state.status + "virtio status: failed to import saved value {status:#x}", + status = dev.status + )) + })?; + state.queue_select = dev.queue_select; + state.negotiated_features = dev.negotiated_features; + state.device_feature_select = dev.device_feature_select; + state.driver_feature_select = dev.driver_feature_select; + state.config_generation = dev.config_generation; + state.config_generation_seen = dev.config_generation_seen; + state.device_config_size = dev.device_config_size as usize; + state.mode = virtio::Mode::from_repr(dev.mode).ok_or_else(|| { + MigrateStateError::ImportFailed(format!( + "virtio mode: failed to import saved value {mode:#x}", + mode = dev.mode )) })?; - state.queue_sel = dev.queue_sel; - state.nego_feat = dev.nego_feat; state.msix_cfg_vec = dev.msix_cfg_vec; state.msix_queue_vec = dev.msix_queue_vec; self.isr_state.import(dev.isr_queue, dev.isr_cfg); - // VirtQueue state - for (vq, vq_input) in self.queues.iter().zip(input.queues.into_iter()) { - vq.import(vq_input)?; - } + self.queues.import(&input.queues)?; Ok(()) } @@ -659,11 +1295,16 @@ impl MigrateMulti for dyn PciVirtio { // to the VirtIO state. vs.set_intr_mode(ps, ps.get_intr_mode().into(), true); - // Perform a (potentially spurious) update notification for the BAR + // Perform a (potentially spurious) update notification for the BARs // containing the virtio registers. This ensures that anything - // interested in the placement of that BAR (such as the notify-port - // logic) is kept well aware - self.bar_update(ps.bar(pci::BarN::BAR0).unwrap()); + // interested in the placement of those BARs (such as the notify + // logic) is configured properly. + if let Some(bar0) = ps.bar(pci::BarN::BAR0) { + self.bar_update(bar0); + } + if let Some(bar2) = ps.bar(pci::BarN::BAR2) { + self.bar_update(bar2); + } Ok(()) } @@ -830,43 +1471,271 @@ impl VirtioIntr for MsiIntr { } #[derive(Copy, Clone, Eq, PartialEq, Debug)] -enum VirtioTop { - LegacyConfig, +enum VirtioConfigRegBlock { + Legacy, + Common, DeviceConfig, + Notify, + IsrStatus, + RazWi, +} + +// Some of these sizes are drawn from the VirtIO specification (e.g., the +// sum for `COMMON_REG_SIZE` is the sum of the sizes of the data that make +// up the common register fields as defined in VirtIO 1.2). +// +// Others are somewhat abitrary; the page offsets we have chosen, for example, +// are of our own selection and are defined so that a guest driver can map +// different registers in their own pages (using 4KiB page mappings). This is +// not strictly necessary, however. +const LEGACY_REG_SIZE: usize = 0x18; +const LEGACY_REG_SIZE_NO_MSIX: usize = LEGACY_REG_SIZE - 2 * 2; +const LEGACY_REG_QUEUE_NOTIFY_OFFSET: usize = 0x10; + +const COMMON_REG_OFFSET: usize = 0; +const COMMON_REG_SIZE: usize = + 4 + 4 + 4 + 4 + 2 + 2 + 1 + 1 + 2 + 2 + 2 + 2 + 2 + 8 + 8 + 8 + 2 + 2; +const DEVICE_REG_OFFSET: usize = PAGE_SIZE; +const NOTIFY_REG_OFFSET: usize = 2 * PAGE_SIZE; +pub const NOTIFY_REG_SIZE: usize = 4; +const ISR_STATUS_REG_OFFSET: usize = 3 * PAGE_SIZE; +const ISR_STATUS_REG_SIZE: usize = 1; + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum CommonConfigReg { + /// Configuration data for the device as a whole. + DeviceFeatureSelect, + DeviceFeature, + DriverFeatureSelect, + DriverFeature, + ConfigMsixVector, + NumQueues, + DeviceStatus, + ConfigGeneration, + + /// Configuration information for a specific queue. + QueueSelect, + QueueSize, + QueueMsixVector, + QueueEnable, + QueueNotifyOffset, + QueueDescAddr, + QueueDriverAddr, + QueueDeviceAddr, + QueueNotifyData, + QueueReset, } -const LEGACY_REG_SZ: usize = 0x18; -const LEGACY_REG_SZ_NO_MSIX: usize = 0x14; -const LEGACY_REG_OFF_QUEUE_NOTIFY: usize = 0x10; +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum NotifyReg { + Notify, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum IsrStatusReg { + IsrStatus, +} #[derive(Copy, Clone, Eq, PartialEq, Debug)] -enum LegacyReg { - FeatDevice, - FeatDriver, - QueuePfn, +enum LegacyConfigReg { + DeviceFeature, + DriverFeature, + QueueAddress4k, QueueSize, QueueSelect, QueueNotify, DeviceStatus, IsrStatus, - MsixVectorConfig, - MsixVectorQueue, + ConfigMsixVector, + QueueMsixVector, } + lazy_static! { - static ref LEGACY_REGS: RegMap = { + static ref COMMON_REGS: RegMap = { + let layout = [ + // These refer to the device as a whole. + (CommonConfigReg::DeviceFeatureSelect, 4), + (CommonConfigReg::DeviceFeature, 4), + (CommonConfigReg::DriverFeatureSelect, 4), + (CommonConfigReg::DriverFeature, 4), + (CommonConfigReg::ConfigMsixVector, 2), + (CommonConfigReg::NumQueues, 2), + (CommonConfigReg::DeviceStatus, 1), + (CommonConfigReg::ConfigGeneration, 1), + // These are banked for specific virtqueues, distinguished + // via the "QueueSelect" register. + (CommonConfigReg::QueueSelect, 2), + (CommonConfigReg::QueueSize, 2), + (CommonConfigReg::QueueMsixVector, 2), + (CommonConfigReg::QueueEnable, 2), + (CommonConfigReg::QueueNotifyOffset, 2), + (CommonConfigReg::QueueDescAddr, 8), + (CommonConfigReg::QueueDriverAddr, 8), + (CommonConfigReg::QueueDeviceAddr, 8), + (CommonConfigReg::QueueNotifyData, 2), + (CommonConfigReg::QueueReset, 2), + ]; + RegMap::create_packed(COMMON_REG_SIZE, &layout, None) + }; + + static ref NOTIFY_REGS: RegMap = { + let layout = [ + (NotifyReg::Notify, 4), + ]; + RegMap::create_packed(NOTIFY_REG_SIZE, &layout, None) + }; + + static ref ISR_STATUS_REGS: RegMap = { + let layout = [ + (IsrStatusReg::IsrStatus, 1), + ]; + RegMap::create_packed(ISR_STATUS_REG_SIZE, &layout, None) + }; + + static ref LEGACY_REGS: RegMap = { + let layout = [ + (LegacyConfigReg::DeviceFeature, 4), + (LegacyConfigReg::DriverFeature, 4), + (LegacyConfigReg::QueueAddress4k, 4), + (LegacyConfigReg::QueueSize, 2), + (LegacyConfigReg::QueueSelect, 2), + (LegacyConfigReg::QueueNotify, 2), + (LegacyConfigReg::DeviceStatus, 1), + (LegacyConfigReg::IsrStatus, 1), + (LegacyConfigReg::ConfigMsixVector, 2), + (LegacyConfigReg::QueueMsixVector, 2), + ]; + RegMap::create_packed(LEGACY_REG_SIZE, &layout, None) + }; +} + +/// VirtIO configuration capabilities. +/// +/// These definitions come from the description of +/// `cfg_type` in section 4.1.4 in VirtIO 1.2. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[repr(u32)] +enum VirtioCfgCapTag { + Common = 1, + Notify = 2, + Isr = 3, + Device = 4, + Pci = 5, + SharedMemory = 8, + Vendor = 9, +} + +impl From for u32 { + fn from(tag: VirtioCfgCapTag) -> u32 { + tag as u32 + } +} + +impl TryFrom for VirtioCfgCapTag { + type Error = u32; + fn try_from(raw: u32) -> Result { + match raw { + 1 => Ok(Self::Common), + 2 => Ok(Self::Notify), + 3 => Ok(Self::Isr), + 4 => Ok(Self::Device), + 5 => Ok(Self::Pci), + 8 => Ok(Self::SharedMemory), + 9 => Ok(Self::Vendor), + _ => Err(raw), + } + } +} + +const COMMON_CFG_CAP_SIZE: u8 = 1 + 1 + 1 + 1 + 2 + 4 + 4; +const NOTIFY_CFG_CAP_SIZE: u8 = COMMON_CFG_CAP_SIZE + 4; +const PCI_CFG_CAP_SIZE: u8 = COMMON_CFG_CAP_SIZE + 4; + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum CommonCfgCapReg { + CapLen, + CfgType, + Bar, + Id, + Padding, + Offset, + Length, +} + +lazy_static! { + /// The common configuration capability registers in config space. Note + /// that the capability type and next pointer are not included here, as + /// these are defined and consumed by the framework. So while the padding + /// field appears to pad to a 6 byte offset, it actually pads to an 8 byte + /// offset, as the entire register space is already offset by two bytes. + /// + /// This definition corresponds to `struct virtio_pci_cap` from sec 4.1.4 + /// of VirtIO 1.2. + static ref COMMON_CFG_CAP_REGS: RegMap = { + let layout = [ + (CommonCfgCapReg::CapLen, 1), + (CommonCfgCapReg::CfgType, 1), + (CommonCfgCapReg::Bar, 1), + (CommonCfgCapReg::Id, 1), + (CommonCfgCapReg::Padding, 2), // Note, includes type and next + (CommonCfgCapReg::Offset, 4), + (CommonCfgCapReg::Length, 4), + ]; + RegMap::create_packed(COMMON_CFG_CAP_SIZE.into(), &layout, None) + }; +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum NotifyCfgCapReg { + Common(CommonCfgCapReg), + Multiplier, +} + +lazy_static! { + /// The nofiticiation capability regsiters in config space. + /// + /// See the note around `COMMON_CFG_CAP_REGS` for details about + /// padding, offsets, and alignment. This definition corresponds + /// to `struct virtio_pci_notify_cap` from sec 4.1.4.4 of VirtIO 1.2. + static ref NOTIFY_CFG_CAP_REGS: RegMap = { + let layout = [ + (NotifyCfgCapReg::Common(CommonCfgCapReg::CapLen), 1), + (NotifyCfgCapReg::Common(CommonCfgCapReg::CfgType), 1), + (NotifyCfgCapReg::Common(CommonCfgCapReg::Bar), 1), + (NotifyCfgCapReg::Common(CommonCfgCapReg::Id), 1), + (NotifyCfgCapReg::Common(CommonCfgCapReg::Padding), 2), + (NotifyCfgCapReg::Common(CommonCfgCapReg::Offset), 4), + (NotifyCfgCapReg::Common(CommonCfgCapReg::Length), 4), + (NotifyCfgCapReg::Multiplier, 4), + ]; + RegMap::create_packed(NOTIFY_CFG_CAP_SIZE.into(), &layout, None) + }; +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum PciCfgCapReg { + Common(CommonCfgCapReg), + PciData, +} + +lazy_static! { + /// The PCI configuration capability register in config space. + /// + /// See the note around `COMMON_CFG_CAP_REGS` for details about + /// padding, offsets, and alignment. This definition corresponds + /// to `struct virtio_pci_cfg_cap` from sec 4.1.4.9 of VirtIO 1.2. + static ref PCI_CFG_CAP_REGS: RegMap = { let layout = [ - (LegacyReg::FeatDevice, 4), - (LegacyReg::FeatDriver, 4), - (LegacyReg::QueuePfn, 4), - (LegacyReg::QueueSize, 2), - (LegacyReg::QueueSelect, 2), - (LegacyReg::QueueNotify, 2), - (LegacyReg::DeviceStatus, 1), - (LegacyReg::IsrStatus, 1), - (LegacyReg::MsixVectorConfig, 2), - (LegacyReg::MsixVectorQueue, 2), + (PciCfgCapReg::Common(CommonCfgCapReg::CapLen), 1), + (PciCfgCapReg::Common(CommonCfgCapReg::CfgType), 1), + (PciCfgCapReg::Common(CommonCfgCapReg::Bar), 1), + (PciCfgCapReg::Common(CommonCfgCapReg::Id), 1), + (PciCfgCapReg::Common(CommonCfgCapReg::Padding), 2), + (PciCfgCapReg::Common(CommonCfgCapReg::Offset), 4), + (PciCfgCapReg::Common(CommonCfgCapReg::Length), 4), + (PciCfgCapReg::PciData, 4), ]; - RegMap::create_packed(LEGACY_REG_SZ, &layout, None) + RegMap::create_packed(PCI_CFG_CAP_SIZE.into(), &layout, None) }; } @@ -878,8 +1747,14 @@ pub mod migrate { #[derive(Deserialize, Serialize)] pub struct DeviceStateV1 { pub status: u8, - pub queue_sel: u16, - pub nego_feat: u32, + pub queue_select: u16, + pub negotiated_features: u64, + pub device_feature_select: u32, + pub driver_feature_select: u32, + pub config_generation: u8, + pub config_generation_seen: bool, + pub device_config_size: u64, + pub mode: u32, pub msix_cfg_vec: u16, pub msix_queue_vec: Vec, pub isr_queue: bool, @@ -889,7 +1764,7 @@ pub mod migrate { #[derive(Deserialize, Serialize)] pub struct PciVirtioStateV1 { pub device: DeviceStateV1, - pub queues: Vec, + pub queues: queue::migrate::VirtQueuesV1, } impl Schema<'_> for PciVirtioStateV1 { fn id() -> SchemaId { diff --git a/lib/propolis/src/hw/virtio/queue.rs b/lib/propolis/src/hw/virtio/queue.rs index a478f617b..77a3c9d4f 100644 --- a/lib/propolis/src/hw/virtio/queue.rs +++ b/lib/propolis/src/hw/virtio/queue.rs @@ -4,12 +4,12 @@ use std::mem; use std::num::{NonZeroU16, Wrapping}; -use std::ops::Index; -use std::slice::SliceIndex; -use std::sync::atomic::{fence, AtomicBool, Ordering}; +use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; -use super::bits::*; +use bitflags::bitflags; +use zerocopy::FromBytes; + use super::probes; use super::{VirtioIntr, VqIntr}; use crate::accessors::MemAccessor; @@ -17,7 +17,40 @@ use crate::common::*; use crate::migrate::MigrateStateError; use crate::vmm::MemCtx; -use zerocopy::FromBytes; +bitflags! { + /// Features supported by our implementation of virtqueues. + pub struct Features: u64 { + const RING_INDIRECT_DESC = 1 << 28; + const RING_EVENT_IDX = 1 << 29; + const VERSION_1 = 1 << 32; + } + + struct QueueFlags: u16 { + const DESC_NEXT = 1 << 0; + const DESC_WRITE = 1 << 1; + const DESC_INDIRECT = 1 << 2; + } + + struct AvailFlags: u16 { + const NO_INTERRUPT = 1 << 0; + } + + struct UsedFlags: u16 { + const NO_NOTIFY = 1 << 0; + } +} + +impl Features { + /// Returns those features appropriate for a legacy queue. + pub fn legacy() -> Self { + Self::RING_INDIRECT_DESC + } + + /// Returns those features appropriate for a transitional queue. + pub fn transitional() -> Self { + Self::legacy() | Self::VERSION_1 + } +} #[repr(C)] #[derive(Copy, Clone, FromBytes)] @@ -51,6 +84,7 @@ pub struct VqAvail { gpa_desc: GuestAddr, } + impl VqAvail { /// If there's a request ready, pop it off the queue and return the /// corresponding descriptor and available ring indicies. @@ -73,6 +107,7 @@ impl VqAvail { } None } + fn read_ring_descr( &self, id: u16, @@ -83,6 +118,7 @@ impl VqAvail { let addr = self.gpa_desc.offset::(id as usize); mem.read::(addr) } + fn reset(&mut self) { self.valid = false; self.gpa_flags = GuestAddr(0); @@ -91,6 +127,7 @@ impl VqAvail { self.gpa_desc = GuestAddr(0); self.cur_avail_idx = Wrapping(0); } + fn map_split(&mut self, desc_addr: u64, avail_addr: u64) { self.gpa_desc = GuestAddr(desc_addr); // 16-bit flags, followed by 16-bit idx, followed by avail desc ring @@ -98,6 +135,20 @@ impl VqAvail { self.gpa_idx = GuestAddr(avail_addr + 2); self.gpa_ring = GuestAddr(avail_addr + 4); } + + /// Returns guest flags. + fn flags(&self, mem: &MemCtx) -> AvailFlags { + let value = + if self.valid { *mem.read(self.gpa_flags).unwrap() } else { 0 }; + AvailFlags::from_bits_truncate(value) + } + + /// Returns true IFF interrupts are supressed. + #[allow(dead_code)] + fn _intr_supressed(&self, mem: &MemCtx) -> bool { + let flags = self.flags(mem); + flags.contains(AvailFlags::NO_INTERRUPT) + } } pub struct VqUsed { @@ -110,6 +161,7 @@ pub struct VqUsed { used_idx: Wrapping, interrupt: Option>, } + impl VqUsed { fn write_used(&mut self, id: u16, len: u32, rsize: u16, mem: &MemCtx) { // We do not expect used entries to be pushed into a virtqueue which has @@ -126,10 +178,39 @@ impl VqUsed { fence(Ordering::Release); mem.write(self.gpa_idx, &self.used_idx.0); } - fn intr_supressed(&self, mem: &MemCtx) -> bool { - let flags: u16 = *mem.read(self.gpa_flags).unwrap(); - flags & VRING_AVAIL_F_NO_INTERRUPT != 0 + + /// Returns guest flags. + fn flags(&self, mem: &MemCtx) -> UsedFlags { + let value: u16 = *mem.read(self.gpa_flags).unwrap(); + UsedFlags::from_bits_truncate(value) + } + + /// Sets flags. + fn set_flags(&self, flags: UsedFlags, mem: &MemCtx) { + let value = flags.bits(); + mem.write(self.gpa_flags, &value); + } + + /// Disables notifications on this queue; returns the previous state. + fn disable_notify(&self, mem: &MemCtx) -> bool { + let flags = self.flags(mem); + let current = flags.contains(UsedFlags::NO_NOTIFY); + self.set_flags(flags | UsedFlags::NO_NOTIFY, mem); + current } + + fn enable_notify(&self, mem: &MemCtx) { + let mut flags = self.flags(mem); + flags.remove(UsedFlags::NO_NOTIFY); + self.set_flags(flags, mem); + } + + /// Returns true iff notifications are supressed for this queue. + fn notify_supressed(&self, mem: &MemCtx) -> bool { + let flags = self.flags(mem); + flags.contains(UsedFlags::NO_NOTIFY) + } + fn reset(&mut self) { self.valid = false; self.gpa_flags = GuestAddr(0); @@ -145,8 +226,16 @@ impl VqUsed { } } -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct VqSize(NonZeroU16); +impl VqSize { + pub const fn new(size: u16) -> VqSize { + let size = NonZeroU16::new(size).expect("nonzero queue size"); + assert!(size.is_power_of_two()); + Self(size) + } +} + impl TryFrom for VqSize { type Error = VqSizeError; @@ -165,6 +254,7 @@ impl TryFrom for VqSize { NonZeroU16::try_from(value).or(Err(VqSizeError::IsZero))?.try_into() } } + impl Into for VqSize { fn into(self) -> u16 { self.0.get() @@ -181,25 +271,31 @@ pub enum VqSizeError { pub struct VirtQueue { pub id: u16, - pub size: VqSize, + pub size: Mutex, pub live: AtomicBool, + pub enabled: AtomicBool, + pub is_control: AtomicBool, + pub notify_data: u16, avail: Mutex, used: Mutex, pub acc_mem: MemAccessor, } -const LEGACY_QALIGN: u64 = PAGE_SIZE as u64; + const fn qalign(addr: u64, align: u64) -> u64 { assert!(align.is_power_of_two()); - let mask = align - 1; (addr + mask) & !mask } + impl VirtQueue { - pub fn new(size: VqSize) -> Self { + fn new(id: u16, size: VqSize) -> Self { Self { - id: 0, // to be populated when stashed in VirtQueues - size, + id, + size: Mutex::new(size), live: AtomicBool::new(false), + enabled: AtomicBool::new(false), + is_control: AtomicBool::new(false), + notify_data: id, avail: Mutex::new(VqAvail { valid: false, gpa_flags: GuestAddr(0), @@ -219,6 +315,7 @@ impl VirtQueue { acc_mem: MemAccessor::new_orphan(), } } + pub(super) fn reset(&self) { let mut avail = self.avail.lock().unwrap(); let mut used = self.used.lock().unwrap(); @@ -227,11 +324,58 @@ impl VirtQueue { avail.reset(); used.reset(); self.live.store(false, Ordering::Release); + self.enabled.store(false, Ordering::Release); + } + + pub(super) fn enable(&self) { + self.enabled.store(true, Ordering::Release); + } + + pub(super) fn is_enabled(&self) -> bool { + self.enabled.load(Ordering::Acquire) + } + + pub(super) fn arise(&self) { + self.live.store(true, Ordering::Release); + } + + pub(super) fn is_alive(&self) -> bool { + self.live.load(Ordering::Acquire) + } + + pub(super) fn is_control(&self) -> bool { + self.is_control.load(Ordering::Acquire) + } + + pub(super) fn set_control(&self) { + self.is_control.store(true, Ordering::Release); } #[inline(always)] pub fn size(&self) -> u16 { - self.size.into() + let size = *self.size.lock().unwrap(); + size.into() + } + + /// Attempt to establish area mappings for this virtqueue at specified + /// physical addresses. Using the terminology of VirtIO 1.2, we take the + /// addresses for the "Descriptor Area", "Driver Area", and "Device Area". + /// Previously, these were called the "Descriptor Table", "Available Ring", + /// and "Used Ring". However, section 2.7 of the version 1.2 specification + /// also refers to these using the older names, so we retain that + /// terminology. + pub fn map_virtqueue( + &self, + desc_addr: u64, + avail_addr: u64, + used_addr: u64, + ) { + let mut avail = self.avail.lock().expect("avail is initialized"); + let mut used = self.used.lock().expect("used is initialized"); + avail.map_split(desc_addr, avail_addr); + used.map_split(used_addr); + avail.valid = true; + used.valid = true; } /// Attempt to establish ring mappings at a specified physical address, @@ -239,7 +383,9 @@ impl VirtQueue { /// /// `addr` must be aligned to 4k per the legacy requirements pub fn map_legacy(&self, addr: u64) { + const LEGACY_QALIGN: u64 = PAGE_SIZE as u64; assert_eq!(addr & (LEGACY_QALIGN - 1), 0); + assert_ne!(addr, 0); let size = self.size() as usize; @@ -252,13 +398,24 @@ impl VirtQueue { let used_addr = qalign(avail_addr + avail_len as u64, LEGACY_QALIGN); let _used_len = mem::size_of::() * size + 2 * 3; - let mut avail = self.avail.lock().unwrap(); - let mut used = self.used.lock().unwrap(); - avail.map_split(desc_addr, avail_addr); - used.map_split(used_addr); - avail.valid = true; - used.valid = true; + self.map_virtqueue(desc_addr, avail_addr, used_addr); } + + /// Returns true iff there is a valid mapping for this queue in the + /// guest physical address space. + pub fn is_mapped(&self) -> bool { + self.avail.lock().unwrap().valid + } + + /// Returns true if this queue is not mapped, or is empty. + pub fn avail_is_empty(&self, mem: &MemCtx) -> bool { + let avail = self.avail.lock().expect("not poisoned"); + !avail.valid || { + let guest_idx: u16 = *mem.read(avail.gpa_idx).unwrap(); + avail.cur_avail_idx == std::num::Wrapping(guest_idx) + } + } + pub fn get_state(&self) -> Info { let avail = self.avail.lock().unwrap(); let used = self.used.lock().unwrap(); @@ -274,6 +431,7 @@ impl VirtQueue { used_idx: used.used_idx.0, } } + pub fn set_state(&self, info: &Info) { let mut avail = self.avail.lock().unwrap(); let mut used = self.used.lock().unwrap(); @@ -285,6 +443,10 @@ impl VirtQueue { avail.cur_avail_idx = Wrapping(info.avail_idx); used.used_idx = Wrapping(info.used_idx); } + + /// Accummulates a sequence of available descriptors into a `Chain`. + /// + /// VirtIO descriptors can be organized into a linked list pub fn pop_avail( &self, chain: &mut Chain, @@ -369,6 +531,7 @@ impl VirtQueue { } Some((req.avail_idx, len)) } + pub fn push_used(&self, chain: &mut Chain, mem: &MemCtx) { assert!(chain.idx.is_some()); let mut used = self.used.lock().unwrap(); @@ -377,7 +540,10 @@ impl VirtQueue { let len = chain.write_stat.bytes - chain.write_stat.bytes_remain; probes::virtio_vq_push!(|| (self as *const VirtQueue as u64, id, len)); used.write_used(id, len, self.size(), mem); - if !used.intr_supressed(mem) { + // XXX: This is wrong. Interrupt notification is on the avail ring, + // not used. + #[allow(clippy::overly_complex_bool_expr)] + if true || !used.notify_supressed(mem) { if let Some(intr) = used.interrupt.as_ref() { intr.notify(); } @@ -397,10 +563,25 @@ impl VirtQueue { used.interrupt.as_ref().map(|x| x.read()) } - /// Send an interrupt for VQ + /// Disables interrupts (notifications) on the `Used` ring + pub(super) fn disable_intr(&self, mem: &MemCtx) -> bool { + let used = self.used.lock().unwrap(); + used.disable_notify(mem) + } + + /// Enables interrupts (notifications) on the `Used` ring + pub(super) fn enable_intr(&self, mem: &MemCtx) { + let used = self.used.lock().unwrap(); + used.enable_notify(mem); + } + + /// Send an interrupt for this virtual queue. pub(super) fn send_intr(&self, mem: &MemCtx) { let used = self.used.lock().unwrap(); - if !used.intr_supressed(mem) { + // XXX: This is wrong. Interrupt notification is on the avail ring, + // not used. + #[allow(clippy::overly_complex_bool_expr)] + if true || !used.notify_supressed(mem) { if let Some(intr) = used.interrupt.as_ref() { intr.notify(); } @@ -417,6 +598,9 @@ impl VirtQueue { descr_gpa: avail.gpa_desc.0, mapping_valid: avail.valid && used.valid, live: self.live.load(Ordering::Acquire), + enabled: self.enabled.load(Ordering::Acquire), + is_control: self.is_control.load(Ordering::Acquire), + notify_data: self.notify_data, // `flags` field is the first member for avail and used rings avail_gpa: avail.gpa_flags.0, @@ -429,7 +613,7 @@ impl VirtQueue { pub fn import( &self, - state: migrate::VirtQueueV1, + state: &migrate::VirtQueueV1, ) -> Result<(), MigrateStateError> { let mut avail = self.avail.lock().unwrap(); let mut used = self.used.lock().unwrap(); @@ -447,6 +631,13 @@ impl VirtQueue { state.size, ))); } + if self.notify_data != state.notify_data { + return Err(MigrateStateError::ImportFailed(format!( + "VirtQueue: mismatched notify data {} vs {}", + self.size(), + state.size, + ))); + } avail.map_split(state.descr_gpa, state.avail_gpa); avail.valid = state.mapping_valid; @@ -455,7 +646,10 @@ impl VirtQueue { used.map_split(state.used_gpa); used.valid = state.mapping_valid; used.used_idx = Wrapping(state.used_idx); + self.live.store(state.live, Ordering::Release); + self.enabled.store(state.enabled, Ordering::Release); + self.is_control.store(state.is_control, Ordering::Release); Ok(()) } @@ -464,9 +658,9 @@ impl VirtQueue { bitflags! { #[derive(Default)] pub struct DescFlag: u16 { - const NEXT = VIRTQ_DESC_F_NEXT; - const WRITE = VIRTQ_DESC_F_WRITE; - const INDIRECT = VIRTQ_DESC_F_INDIRECT; + const NEXT = 1 << 0; + const WRITE = 1 << 1; + const INDIRECT = 1 << 2; } } @@ -729,6 +923,7 @@ pub struct MapInfo { pub used_addr: u64, pub valid: bool, } + #[derive(Debug)] pub struct Info { pub mapping: MapInfo, @@ -737,55 +932,108 @@ pub struct Info { } pub struct VirtQueues { + len: AtomicUsize, queues: Vec>, } + +const MAX_QUEUES: usize = 65535; + impl VirtQueues { - pub fn new( - queues: impl IntoIterator, - ) -> Result { - let queues = queues + pub fn new(sizes: &[VqSize]) -> Self { + assert!( + !sizes.is_empty() && sizes.len() <= MAX_QUEUES, + "virtqueue size must be positive u16" + ); + Self::new_with_len(sizes.len(), sizes) + } + + pub fn new_with_len(len: usize, sizes: &[VqSize]) -> Self { + assert!( + 0 < len && len <= sizes.len() && sizes.len() <= MAX_QUEUES, + "virtqueue size must be positive u16 and len must be smaller pos" + ); + let queues = sizes .into_iter() .enumerate() - .map(|(id, mut vq)| { - vq.id = id as u16; - Arc::new(vq) - }) + .map(|(id, size)| Arc::new(VirtQueue::new(id as u16, *size))) .collect::>(); - if !(0..(u16::MAX as usize)).contains(&queues.len()) { - return Err(VirtQueuesError::BadQueueCount(queues.len())); - } + let len = AtomicUsize::new(len); + Self { len, queues } + } - Ok(Self { queues }) + pub fn set_len(&self, len: usize) { + assert!(0 < len && len <= self.max_capacity()); + self.len.store(len, Ordering::Release); } + pub fn count(&self) -> NonZeroU16 { - NonZeroU16::try_from(self.queues.len() as u16) + NonZeroU16::try_from(self.len() as u16) .expect("queue count already validated") } + + pub fn len(&self) -> usize { + self.len.load(Ordering::Relaxed) + } + + pub const fn max_capacity(&self) -> usize { + self.queues.len() + } + pub fn get(&self, qid: u16) -> Option<&Arc> { - self.queues.get(usize::from(qid)) + let len = self.len(); + let qid = usize::from(qid); + // XXX: This special case is for viona, which always puts the + // control queue at the end of queue vector. None of the other + // devices currently handle queues specially in this way, but we + // should come up with some better mechanism here. + if qid + 1 == len { + Some(self.get_control()) + } else { + self.queues[..len].get(qid) + } } - pub fn iter(&self) -> std::slice::Iter<'_, Arc> { - self.queues.iter() + + fn get_control(&self) -> &Arc { + &self.queues[self.max_capacity() - 1] } -} -impl]>> Index for VirtQueues { - type Output = S::Output; + pub fn iter( + &self, + ) -> std::iter::Chain< + std::slice::Iter<'_, Arc>, + std::array::IntoIter<&Arc, 1>, + > { + let len = self.len() - 1; + self.queues[..len].iter().chain([self.get_control()]) + } - fn index(&self, index: S) -> &Self::Output { - Index::index(&self.queues, index) + pub fn export(&self) -> migrate::VirtQueuesV1 { + let len = self.len() as u64; + let queues = self.queues.iter().map(|q| q.export()).collect(); + migrate::VirtQueuesV1 { len, queues } } -} -#[derive(Copy, Clone, Debug, thiserror::Error)] -pub enum VirtQueuesError { - #[error("queue count {0} must be nonzero and less than 65535")] - BadQueueCount(usize), + pub fn import( + &self, + state: &migrate::VirtQueuesV1, + ) -> Result<(), MigrateStateError> { + for (vq, vq_input) in self.queues.iter().zip(state.queues.iter()) { + vq.import(vq_input)?; + } + self.set_len(state.len as usize); + Ok(()) + } } pub mod migrate { use serde::{Deserialize, Serialize}; + #[derive(Deserialize, Serialize)] + pub struct VirtQueuesV1 { + pub len: u64, + pub queues: Vec, + } + #[derive(Deserialize, Serialize)] pub struct VirtQueueV1 { pub id: u16, @@ -793,6 +1041,9 @@ pub mod migrate { pub descr_gpa: u64, pub mapping_valid: bool, pub live: bool, + pub enabled: bool, + pub is_control: bool, + pub notify_data: u16, pub avail_gpa: u64, pub avail_cur_idx: u16, diff --git a/lib/propolis/src/hw/virtio/softnpu.rs b/lib/propolis/src/hw/virtio/softnpu.rs index 6438237f2..ad47bdd3f 100644 --- a/lib/propolis/src/hw/virtio/softnpu.rs +++ b/lib/propolis/src/hw/virtio/softnpu.rs @@ -14,7 +14,7 @@ use std::{ use crate::{ chardev::{Sink, Source}, common::*, - hw::{pci, uart::LpcUart}, + hw::{pci, uart::LpcUart, virtio}, migrate::Migrator, util::regmap::RegMap, vmm::MemCtx, @@ -23,7 +23,7 @@ use crate::{ use super::{ bits::*, pci::{PciVirtio, PciVirtioState}, - queue::{write_buf, Chain, VirtQueue, VirtQueues}, + queue::{write_buf, Chain, VirtQueue, VirtQueues, VqSize}, viona::bits::VIRTIO_NET_S_LINK_UP, VirtioDevice, }; @@ -144,19 +144,15 @@ pub struct PortVirtioState { impl PortVirtioState { fn new(queue_size: u16) -> Self { - let queue_size = queue_size.try_into().unwrap(); - let queues = VirtQueues::new( - // RX and TX queues - [VirtQueue::new(queue_size), VirtQueue::new(queue_size)], - ) - .unwrap(); + let rxq_size = VqSize::new(queue_size); + let txq_size = VqSize::new(queue_size); + let queues = VirtQueues::new(&[rxq_size, txq_size]); let msix_count = Some(2); - let (pci_virtio_state, pci_state) = PciVirtioState::create( + let (pci_virtio_state, pci_state) = PciVirtioState::new( + virtio::Mode::Legacy, queues, msix_count, - VIRTIO_DEV_NET, - VIRTIO_SUB_DEV_NET, - pci::bits::CLASS_NETWORK, + virtio::DeviceId::Network, VIRTIO_NET_CFG_SIZE, ); Self { pci_virtio_state, pci_state } @@ -424,7 +420,7 @@ impl PciVirtio for PciVirtioSoftNpuPort { } impl VirtioDevice for PciVirtioSoftNpuPort { - fn cfg_rw(&self, mut rwo: RWOp) { + fn rw_dev_config(&self, mut rwo: RWOp) { NET_DEV_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => self.net_cfg_read(id, ro), RWOp::Write(_) => { @@ -433,11 +429,15 @@ impl VirtioDevice for PciVirtioSoftNpuPort { }); } - fn get_features(&self) -> u32 { + fn mode(&self) -> virtio::Mode { + virtio::Mode::Legacy + } + + fn features(&self) -> u64 { VIRTIO_NET_F_MAC } - fn set_features(&self, _feat: u32) -> std::result::Result<(), ()> { + fn set_features(&self, _feat: u64) -> std::result::Result<(), ()> { Ok(()) } @@ -616,7 +616,7 @@ impl PacketHandler { } }; let mut chain = Chain::with_capacity(1); - let vq = &virtio.pci_virtio_state.queues[0]; + let vq = virtio.pci_virtio_state.queues.get(0).expect("a queue"); if let None = vq.pop_avail(&mut chain, &mem) { return; } diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 2c803705e..0a49dc0f9 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -7,21 +7,26 @@ use std::io::{self, Error, ErrorKind}; use std::num::NonZeroU16; use std::os::unix::io::{AsRawFd, RawFd}; -use std::sync::atomic::Ordering; use std::sync::{Arc, Condvar, Mutex, Weak}; -use crate::common::*; +use crate::common::{RWOp, ReadOp}; use crate::hw::pci; -use crate::lifecycle::{self, IndicatedState}; -use crate::migrate::*; +use crate::hw::virtio; +use crate::hw::virtio::queue::Chain; +use crate::lifecycle::{self, IndicatedState, Lifecycle}; +use crate::migrate::{ + MigrateCtx, MigrateMulti, MigrateStateError, Migrator, PayloadOffers, + PayloadOutputs, +}; use crate::util::regmap::RegMap; -use crate::vmm::VmmHdl; +use crate::vmm::{MemCtx, VmmHdl}; use super::bits::*; use super::pci::{PciVirtio, PciVirtioState}; -use super::queue::{self, VirtQueue, VirtQueues}; +use super::queue::{self, VirtQueue, VirtQueues, VqSize}; use super::{VirtioDevice, VqChange, VqIntr}; +use bit_field::BitField; use lazy_static::lazy_static; use tokio::io::unix::AsyncFd; use tokio::io::Interest; @@ -31,10 +36,135 @@ use tokio::task::JoinHandle; // Re-export API versioning interface for convenience of propolis consumers pub use viona_api::{api_version, ApiVersion}; +pub const RX_QUEUE_SIZE: VqSize = VqSize::new(0x800); +pub const TX_QUEUE_SIZE: VqSize = VqSize::new(0x100); +pub const CTL_QUEUE_SIZE: VqSize = VqSize::new(32); + +pub const VIRTIO_MQ_MIN_QPAIRS: u16 = 1; +pub const VIRTIO_MQ_MAX_QPAIRS: u16 = 0x8000; + +pub const PROPOLIS_MAX_MQ_PAIRS: u16 = 8; + +pub const fn max_num_queues() -> usize { + PROPOLIS_MAX_MQ_PAIRS as usize * 2 +} + const ETHERADDRL: usize = 6; -/// Viona's in-kernel emulation of the device VirtQueues is performed in what it -/// calls "vrings". Since the userspace portion of the Viona emulation is +/// Types and so forth for supporting the control queue. +/// Note that these come from the VirtIO spec, section +/// 5.1.6.2 in VirtIO 1.2. +pub mod control { + use super::ETHERADDRL; + use std::convert::TryFrom; + + /// The control message header has two data: a u8 representing the "class" + /// of control message, which describes what the message applies to, and a + /// "command", which describes what action we should take in response to the + /// command. So for example, class Mq and command Set means to set the + /// number of multiqueue queue pairs. + #[derive(Clone, Copy, Debug, Default)] + #[repr(C)] + pub struct Header { + class: u8, + command: u8, + } + + #[derive(Clone, Copy, Debug)] + pub enum Command { + Rx(RxCmd), + Mac(MacCmd), + Vlan(VlanCmd), + Announce(AnnounceCmd), + Mq(MqCmd), + } + + impl TryFrom
for Command { + type Error = Header; + fn try_from(header: Header) -> Result { + match (header.class, header.command) { + (0, c) => Ok(Self::Rx(RxCmd::from_repr(c).ok_or(header)?)), + (1, c) => Ok(Self::Mac(MacCmd::from_repr(c).ok_or(header)?)), + (2, c) => Ok(Self::Vlan(VlanCmd::from_repr(c).ok_or(header)?)), + (3, c) => { + Ok(Self::Announce(AnnounceCmd::from_repr(c).ok_or(header)?)) + } + (4, c) => Ok(Self::Mq(MqCmd::from_repr(c).ok_or(header)?)), + _ => Err(header), + } + } + } + + #[derive(Clone, Copy, Debug)] + pub enum Ack { + Ok = 0, + Err = 1, + } + + #[derive(Clone, Copy, Debug, strum::FromRepr)] + #[repr(u8)] + pub enum RxCmd { + Promisc = 0, + AllMulticast = 1, + AllUnicast = 2, + NoMulticast = 3, + NoUnicast = 4, + NoBroadcast = 5, + } + + #[derive(Clone, Copy, Debug, strum::FromRepr)] + #[repr(u8)] + pub enum MacCmd { + TableSet = 0, + AddrSet = 1, + } + + #[derive(Clone, Copy, Debug, Default)] + #[repr(C)] + pub struct Mac { + entries: u32, + mac: [u8; ETHERADDRL], + } + + #[derive(Clone, Copy, Debug, Default)] + #[repr(C)] + pub struct Mq { + pub npairs: u16, + } + + #[derive(Clone, Copy, Debug, strum::FromRepr)] + #[repr(u8)] + pub enum MqCmd { + SetPairs = 0, + RssConfig = 1, + HashConfig = 2, + } + + impl TryFrom for MqCmd { + type Error = u8; + fn try_from(value: u8) -> Result { + match value { + 0 => Ok(Self::SetPairs), + v => Err(v), + } + } + } + + #[derive(Clone, Copy, Debug, strum::FromRepr)] + #[repr(u8)] + pub enum VlanCmd { + FilterAdd = 0, + FilterDelete = 1, + } + #[derive(Clone, Copy, Debug, strum::FromRepr)] + #[repr(u8)] + pub enum AnnounceCmd { + Ack = 0, + } +} + +/// Viona's in-kernel emulation of the device VirtQueues is performed in what +/// are calls "vrings". Since the userspace portion of the Viona emulation is /// tasked with keeping the vring state in sync with the VirtQueue it /// represents, we must track its perceived state. #[derive(Copy, Clone, Default, Eq, PartialEq)] @@ -75,22 +205,22 @@ enum VRingState { struct Inner { poller: Option, iop_state: Option, - vring_state: [VRingState; 2], + notify_mmio_addr: Option, + vring_state: Vec, } impl Inner { - fn new() -> Self { - Self { - poller: None, - iop_state: None, - vring_state: [Default::default(); 2], - } + fn new(max_queues: usize) -> Self { + let vring_state = vec![Default::default(); max_queues]; + let poller = None; + let iop_state = None; + let notify_mmio_addr = None; + Self { poller, iop_state, notify_mmio_addr, vring_state } } /// Get the `VRingState` for a given VirtQueue fn for_vq(&mut self, vq: &VirtQueue) -> &mut VRingState { let id = vq.id as usize; assert!(id < self.vring_state.len()); - &mut self.vring_state[id] } } @@ -155,17 +285,34 @@ pub struct PciVirtioViona { pci_state: pci::DeviceState, indicator: lifecycle::Indicator, - dev_features: u32, + dev_features: u64, mac_addr: [u8; ETHERADDRL], mtu: Option, hdl: VionaHdl, inner: Mutex, } + impl PciVirtioViona { pub fn new( vnic_name: &str, - rx_queue_size: NonZeroU16, - tx_queue_size: NonZeroU16, + vm: &VmmHdl, + viona_params: Option, + ) -> io::Result> { + Self::new_with_queue_sizes( + vnic_name, + TX_QUEUE_SIZE, + RX_QUEUE_SIZE, + CTL_QUEUE_SIZE, + vm, + viona_params, + ) + } + + pub fn new_with_queue_sizes( + vnic_name: &str, + rx_queue_size: VqSize, + tx_queue_size: VqSize, + ctl_queue_size: VqSize, vm: &VmmHdl, viona_params: Option, ) -> io::Result> { @@ -174,9 +321,7 @@ impl PciVirtioViona { let hdl = VionaHdl::new(info.link_id, vm.fd())?; #[cfg(feature = "falcon")] - if let Err(e) = - hdl.set_promisc(viona_api::viona_promisc_t::VIONA_PROMISC_ALL_VLAN) - { + if let Err(e) = hdl.set_promisc(viona_api::VIONA_PROMISC_ALL_VLAN) { // Until/unless this support is integrated into stlouis/illumos, // this is an expected failure. This is needed to use vlans, // but shouldn't affect any other use case. @@ -187,10 +332,6 @@ impl PciVirtioViona { vp.set(&hdl)?; } - // interrupts for TX, RX, and device config - let msix_count = Some(3); - let dev_features = hdl.get_avail_features()?; - // Do in-kernel configuration of device MTU if let Some(mtu) = info.mtu { if hdl.api_version().unwrap() >= viona_api::ApiVersion::V4 { @@ -204,30 +345,40 @@ impl PciVirtioViona { } } - let queues = VirtQueues::new( - [rx_queue_size, tx_queue_size] - .map(|sz| VirtQueue::new(sz.try_into().unwrap())), - ) - .unwrap(); - let (virtio_state, pci_state) = PciVirtioState::create( + let queue_sizes = [rx_queue_size, tx_queue_size] + .into_iter() + .cycle() + .take(max_num_queues()) + .chain([ctl_queue_size]) + .collect::>(); + // The vector is sized with the maximum number of rings/queues, but + // until the driver negotiates multiqueue, we only use the first two. + let queues = VirtQueues::new_with_len(3, &queue_sizes); + if let Some(ctlq) = queues.get(2) { + ctlq.set_control(); + } + let nqueues = queues.max_capacity(); + hdl.set_pairs(1).unwrap(); + // Add one for config space. + let msix_count = Some(1 + nqueues as u16); + let (virtio_state, pci_state) = PciVirtioState::new( + virtio::Mode::Transitional, queues, msix_count, - VIRTIO_DEV_NET, - VIRTIO_SUB_DEV_NET, - pci::bits::CLASS_NETWORK, + virtio::DeviceId::Network, VIRTIO_NET_CFG_SIZE, ); + let dev_features = hdl.get_avail_features()?; let mut this = PciVirtioViona { virtio_state, pci_state, indicator: Default::default(), - dev_features, mac_addr: [0; ETHERADDRL], mtu: info.mtu, hdl, - inner: Mutex::new(Inner::new()), + inner: Mutex::new(Inner::new(nqueues)), }; this.mac_addr.copy_from_slice(&info.mac_addr); let this = Arc::new(this); @@ -249,14 +400,116 @@ impl PciVirtioViona { fn process_interrupts(&self) { if let Some(mem) = self.pci_state.acc_mem.access() { self.hdl - .intr_poll(|vq_idx| { + .intr_poll(self.virtio_state.queues.len() - 1, |vq_idx| { self.hdl.ring_intr_clear(vq_idx).unwrap(); - self.virtio_state.queues[vq_idx as usize].send_intr(&mem); + let vq = self.virtio_state.queues.get(vq_idx).unwrap(); + vq.send_intr(&mem); }) .unwrap(); } } + fn is_ctl_queue(&self, vq: &Arc) -> bool { + usize::from(vq.id) + 1 == self.virtio_state.queues.len() + } + + fn ctl_queue_notify(&self, vq: &Arc) { + if let Some(mem) = self.pci_state.acc_mem.access() { + while !vq.avail_is_empty(&mem) { + let mut chain = Chain::with_capacity(4); + let intrs_en = vq.disable_intr(&mem); + while let Some((_idx, _len)) = vq.pop_avail(&mut chain, &mem) { + let res = match self.ctl_msg(vq, &mut chain, &mem) { + Ok(_) => control::Ack::Ok, + Err(_) => control::Ack::Err, + } as u8; + chain.write(&res, &mem); + vq.push_used(&mut chain, &mem); + } + if intrs_en { + vq.enable_intr(&mem); + } + } + } + } + + fn ctl_msg( + &self, + vq: &Arc, + chain: &mut Chain, + mem: &MemCtx, + ) -> Result<(), ()> { + let mut header = control::Header::default(); + if !chain.read(&mut header, &mem) { + return Err(()); + } + use control::Command; + match Command::try_from(header).map_err(|_| ())? { + Command::Rx(cmd) => self.ctl_rx(cmd, vq, chain, mem), + Command::Mac(cmd) => self.ctl_mac(cmd, vq, chain, mem), + Command::Vlan(_) => Ok(()), + Command::Announce(_) => Ok(()), + Command::Mq(cmd) => self.ctl_mq(cmd, vq, chain, mem), + } + } + + fn ctl_rx( + &self, + cmd: control::RxCmd, + vq: &Arc, + chain: &mut Chain, + mem: &MemCtx, + ) -> Result<(), ()> { + let _todo = (cmd, vq, chain, mem); + Err(()) + } + + fn ctl_mac( + &self, + cmd: control::MacCmd, + vq: &Arc, + chain: &mut Chain, + mem: &MemCtx, + ) -> Result<(), ()> { + let _todo = (cmd, vq, chain, mem); + Err(()) + } + + fn set_use_pairs(&self, requested: u16) -> Result<(), ()> { + if requested < 1 || PROPOLIS_MAX_MQ_PAIRS < requested { + return Err(()); + } + let npairs = requested as usize; + if npairs == self.virtio_state.queues.len() { + return Ok(()); + } + self.hdl.set_usepairs(requested).unwrap(); + self.virtio_state.queues.set_len(npairs * 2 + 1); + Ok(()) + } + + fn ctl_mq( + &self, + cmd: control::MqCmd, + vq: &Arc, + chain: &mut Chain, + mem: &MemCtx, + ) -> Result<(), ()> { + use control::MqCmd; + let _todo = vq; + match cmd { + MqCmd::SetPairs => { + let mut msg = control::Mq::default(); + if !chain.read(&mut msg, &mem) { + return Err(()); + } + self.set_use_pairs(msg.npairs) + } + MqCmd::RssConfig => Err(()), + MqCmd::HashConfig => Err(()), + } + } + fn net_cfg_read(&self, id: &NetReg, ro: &mut ReadOp) { match id { NetReg::Mac => ro.write_bytes(&self.mac_addr), @@ -265,8 +518,7 @@ impl PciVirtioViona { ro.write_u16(VIRTIO_NET_S_LINK_UP); } NetReg::MaxVqPairs => { - // hard-wired to single vq pair for now - ro.write_u16(1); + ro.write_u16(PROPOLIS_MAX_MQ_PAIRS); } NetReg::Mtu => { // Guests should not be asking for this value unless @@ -274,6 +526,11 @@ impl PciVirtioViona { // (return zero) than unwrap and panic here. ro.write_u16(self.mtu.unwrap_or(0)); } + NetReg::Speed + | NetReg::Duplex + | NetReg::RssMaxKeySize + | NetReg::RssMaxIndirectionTableLen + | NetReg::SupportedHashTypes => {} } } @@ -282,23 +539,32 @@ impl PciVirtioViona { fn queues_sync(&self) { let mut inner = self.inner.lock().unwrap(); for vq in self.virtio_state.queues.iter() { - if !vq.live.load(Ordering::Acquire) { + // If the queue is not alive, there's nothing to do here. + if !vq.is_alive() { continue; } let rs = inner.for_vq(vq); match *rs { VRingState::Ready | VRingState::Run | VRingState::Paused => { + // A control queue has no in-kernel state to synchronize. + // If this is the case, we simply mark the ring paused + // and continue. + if vq.is_control() { + *rs = VRingState::Paused; + continue; + } + // Ensure the ring is paused for a consistent snapshot if *rs != VRingState::Paused { - if self.hdl.ring_pause(vq.id).is_err() { + if self.hdl.ring_pause(vq).is_err() { *rs = VRingState::Error; continue; } *rs = VRingState::Paused; } - if let Ok(live) = self.hdl.ring_get_state(vq.id) { + if let Ok(live) = self.hdl.ring_get_state(vq) { let base = vq.get_state(); assert_eq!( live.mapping.desc_addr, @@ -330,7 +596,7 @@ impl PciVirtioViona { // The existing state machine for vrings in Viona does not allow for // a Paused -> Running transition, requiring instead that the vring // be reset and reloaded with state in order to proceed again. - if self.hdl.ring_reset(vq.id).is_err() { + if self.hdl.ring_reset(vq).is_err() { *rs = VRingState::Fatal; res = Err(()); // Although this fatal vring state means the device itself will @@ -340,24 +606,23 @@ impl PciVirtioViona { } *rs = VRingState::Init; - let info = vq.get_state(); - if info.mapping.valid { - if self.hdl.ring_set_state(vq.id, vq.size(), &info).is_err() { + if vq.is_mapped() { + if self.hdl.ring_set_state(vq.as_ref()).is_err() { *rs = VRingState::Error; continue; } if let Some(intr_cfg) = vq.read_intr() { - if self.hdl.ring_cfg_msi(vq.id, Some(intr_cfg)).is_err() { + if self.hdl.ring_cfg_msi(vq, Some(intr_cfg)).is_err() { *rs = VRingState::Error; continue; } } *rs = VRingState::Ready; - if vq.live.load(Ordering::Acquire) { + if vq.is_alive() { // If the ring was already running, kick it. - if self.hdl.ring_kick(vq.id).is_err() { + if self.hdl.ring_kick(vq).is_err() { *rs = VRingState::Error; continue; } @@ -381,7 +646,7 @@ impl PciVirtioViona { // No sense in attempting a reset } _ => { - if self.hdl.ring_reset(vq.id).is_err() { + if self.hdl.ring_reset(vq).is_err() { *rs = VRingState::Fatal; } else { *rs = VRingState::Init; @@ -419,16 +684,18 @@ impl PciVirtioViona { if self.queues_restart().is_err() { self.virtio_state.set_needs_reset(self); self.notify_port_update(None); + self.notify_mmio_addr_update(None); } else { // If all is well with the queue restart, attempt to wire up the // notification ioport again. let state = self.inner.lock().unwrap(); - let _ = self.hdl.set_notify_iop(state.iop_state); + let _ = self.hdl.set_notify_io_port(state.iop_state); + let _ = self.hdl.set_notify_mmio_addr(state.notify_mmio_addr); } } } impl VirtioDevice for PciVirtioViona { - fn cfg_rw(&self, mut rwo: RWOp) { + fn rw_dev_config(&self, mut rwo: RWOp) { NET_DEV_REGS.process(&mut rwo, |id, rwo| match rwo { RWOp::Read(ro) => self.net_cfg_read(id, ro), RWOp::Write(_) => { @@ -436,8 +703,15 @@ impl VirtioDevice for PciVirtioViona { } }); } - fn get_features(&self) -> u32 { - let mut feat = VIRTIO_NET_F_MAC; + fn mode(&self) -> virtio::Mode { + self.virtio_state.mode() + } + + fn features(&self) -> u64 { + let mut feat = VIRTIO_NET_F_MAC + | VIRTIO_NET_F_STATUS + | VIRTIO_NET_F_CTRL_VQ + | VIRTIO_NET_F_MQ; // We drop the "VIRTIO_NET_F_MTU" flag from feat if we are unable to // query it. This can happen when executing within a non-global Zone. // @@ -449,19 +723,29 @@ impl VirtioDevice for PciVirtioViona { feat } - fn set_features(&self, feat: u32) -> Result<(), ()> { - self.hdl.set_features(feat).map_err(|_| ()) + + fn set_features(&self, feat: u64) -> Result<(), ()> { + self.hdl.set_features(feat).map_err(|_| ())?; + if (feat & VIRTIO_NET_F_MQ) != 0 { + self.hdl.set_pairs(PROPOLIS_MAX_MQ_PAIRS).map_err(|_| ())?; + self.set_use_pairs(PROPOLIS_MAX_MQ_PAIRS)?; + } + Ok(()) } fn queue_notify(&self, vq: &Arc) { + if self.is_ctl_queue(vq) { + self.ctl_queue_notify(vq); + return; + } let mut inner = self.inner.lock().unwrap(); - let rs = inner.for_vq(vq); - match rs { + let ring_state = inner.for_vq(vq); + match ring_state { VRingState::Ready | VRingState::Run => { - if self.hdl.ring_kick(vq.id).is_err() { - *rs = VRingState::Error; + if self.hdl.ring_kick(vq).is_err() { + *ring_state = VRingState::Error; } else { - *rs = VRingState::Run; + *ring_state = VRingState::Run; } } _ => {} @@ -477,7 +761,7 @@ impl VirtioDevice for PciVirtioViona { match change { VqChange::Reset => { - if self.hdl.ring_reset(vq.id).is_err() { + if self.hdl.ring_reset(vq).is_err() { *rs = VRingState::Fatal; return Err(()); } @@ -491,7 +775,7 @@ impl VirtioDevice for PciVirtioViona { | VRingState::Paused | VRingState::Error => { // Reset any vring not already in such a state - if self.hdl.ring_reset(vq.id).is_err() { + if self.hdl.ring_reset(vq).is_err() { *rs = VRingState::Fatal; return Err(()); } @@ -502,16 +786,11 @@ impl VirtioDevice for PciVirtioViona { return Err(()); } } - let info = vq.get_state(); - if !info.mapping.valid { + if !vq.is_mapped() { return Ok(()); } - if self - .hdl - .ring_init(vq.id, vq.size(), info.mapping.desc_addr) - .is_err() - { + if !vq.is_control() && self.hdl.ring_init(vq).is_err() { // Bad virtqueue configuration is not fatal. While the // vring will not transition to running, we will be content // to wait for the guest to later provide a valid config. @@ -520,7 +799,7 @@ impl VirtioDevice for PciVirtioViona { } if let Some(intr_cfg) = vq.read_intr() { - if self.hdl.ring_cfg_msi(vq.id, Some(intr_cfg)).is_err() { + if self.hdl.ring_cfg_msi(vq, Some(intr_cfg)).is_err() { *rs = VRingState::Error; } } @@ -528,7 +807,8 @@ impl VirtioDevice for PciVirtioViona { } VqChange::IntrCfg => { if *rs != VRingState::Fatal { - if self.hdl.ring_cfg_msi(vq.id, vq.read_intr()).is_err() { + let intr = vq.read_intr(); + if self.hdl.ring_cfg_msi(vq, intr).is_err() { *rs = VRingState::Error; } } @@ -557,7 +837,8 @@ impl Lifecycle for PciVirtioViona { // reinitialization (as part of a reboot/reset), the notification ioport // binding must be torn down. Bhyve will emit failure of an attempted // reinitialization operation if any ioport hooks persist at that time. - let _ = self.hdl.set_notify_iop(None); + let _ = self.hdl.set_notify_io_port(None); + let _ = self.hdl.set_notify_mmio_addr(None); self.indicator.pause(); } @@ -585,16 +866,25 @@ impl PciVirtio for PciVirtioViona { fn pci_state(&self) -> &pci::DeviceState { &self.pci_state } + // The notification addresses (both port and MMIO) for the device can change + // due to guest action, or other administrative tasks within propolis. fn notify_port_update(&self, port: Option) { let mut state = self.inner.lock().unwrap(); state.iop_state = port; - - // The notification ioport for the device can change due to guest - // action, or other administrative tasks within propolis. We want to - // update the in-kernel IO port hook only in the former case, when the - // device emulation is actually running. + // We want to update the in-kernel IO port hook when the address is + // updated due to guest action; that is, when the device emulation is + // actually running. + if self.indicator.state() == IndicatedState::Run { + let _ = self.hdl.set_notify_io_port(port); + } + } + fn notify_mmio_addr_update(&self, addr: Option) { + let mut state = self.inner.lock().unwrap(); + state.notify_mmio_addr = addr; + // Only update the io-kernel address hook when changed by guest action, + // similarly to the port IO case above. if self.indicator.state() == IndicatedState::Run { - let _ = self.hdl.set_notify_iop(port); + let _ = self.hdl.set_notify_mmio_addr(addr); } } } @@ -632,6 +922,11 @@ enum NetReg { Status, MaxVqPairs, Mtu, + Speed, + Duplex, + RssMaxKeySize, + RssMaxIndirectionTableLen, + SupportedHashTypes, } lazy_static! { static ref NET_DEV_REGS: RegMap = { @@ -640,6 +935,11 @@ lazy_static! { (NetReg::Status, 2), (NetReg::MaxVqPairs, 2), (NetReg::Mtu, 2), + (NetReg::Speed, 4), + (NetReg::Duplex, 1), + (NetReg::RssMaxKeySize, 1), + (NetReg::RssMaxIndirectionTableLen, 2), + (NetReg::SupportedHashTypes, 4), ]; RegMap::create_packed(VIRTIO_NET_CFG_SIZE, &layout, None) }; @@ -647,6 +947,44 @@ lazy_static! { use viona_api::VionaFd; +impl From<&VirtQueue> for viona_api::vioc_ring_init_modern { + fn from(vq: &VirtQueue) -> viona_api::vioc_ring_init_modern { + let id = vq.id; + let size = vq.size(); + let state = vq.get_state(); + let desc_addr = state.mapping.desc_addr; + let avail_addr = state.mapping.avail_addr; + let used_addr = state.mapping.used_addr; + viona_api::vioc_ring_init_modern { + rim_index: id, + rim_qsize: size, + rim_qaddr_desc: desc_addr, + rim_qaddr_avail: avail_addr, + rim_qaddr_used: used_addr, + ..Default::default() + } + } +} + +impl From<&VirtQueue> for viona_api::vioc_ring_state { + fn from(vq: &VirtQueue) -> viona_api::vioc_ring_state { + let id = vq.id; + let size = vq.size(); + let state = vq.get_state(); + let desc_addr = state.mapping.desc_addr; + let avail_addr = state.mapping.avail_addr; + let used_addr = state.mapping.used_addr; + viona_api::vioc_ring_state { + vrs_index: id, + vrs_qsize: size, + vrs_qaddr_desc: desc_addr, + vrs_qaddr_avail: avail_addr, + vrs_qaddr_used: used_addr, + ..Default::default() + } + } +} + struct VionaHdl(VionaFd); impl VionaHdl { fn new(link_id: u32, vm_fd: RawFd) -> io::Result { @@ -658,109 +996,139 @@ impl VionaHdl { self.0.ioctl_usize(viona_api::VNA_IOC_DELETE, 0)?; Ok(()) } - fn get_avail_features(&self) -> io::Result { - let mut value = 0; + fn get_avail_features(&self) -> io::Result { + let mut features = 0; unsafe { - self.0.ioctl(viona_api::VNA_IOC_GET_FEATURES, &mut value)?; + self.0.ioctl(viona_api::VNA_IOC_GET_FEATURES, &mut features)?; } - Ok(value) + Ok(features) } - fn set_features(&self, feat: u32) -> io::Result<()> { - let mut value = feat; + fn set_features(&self, mut features: u64) -> io::Result<()> { unsafe { - self.0.ioctl(viona_api::VNA_IOC_SET_FEATURES, &mut value)?; + self.0.ioctl(viona_api::VNA_IOC_SET_FEATURES, &mut features)?; } Ok(()) } - fn ring_init(&self, idx: u16, size: u16, addr: u64) -> io::Result<()> { - let mut vna_ring_init = viona_api::vioc_ring_init { - ri_index: idx, - ri_qsize: size, - _pad: [0; 2], - ri_qaddr: addr, - }; - unsafe { - self.0.ioctl(viona_api::VNA_IOC_RING_INIT, &mut vna_ring_init)?; + fn set_pairs(&self, npairs: u16) -> io::Result<()> { + self.0.ioctl_usize(viona_api::VNA_IOC_SET_PAIRS, npairs as usize)?; + Ok(()) + } + fn set_usepairs(&self, npairs: u16) -> io::Result<()> { + self.0.ioctl_usize(viona_api::VNA_IOC_SET_USEPAIRS, npairs as usize)?; + Ok(()) + } + fn ring_init(&self, vq: &VirtQueue) -> io::Result<()> { + if !vq.is_control() { + let mut vna_ring_init = viona_api::vioc_ring_init_modern::from(vq); + unsafe { + self.0.ioctl( + viona_api::VNA_IOC_RING_INIT_MODERN, + &mut vna_ring_init, + )?; + } } Ok(()) } - fn ring_reset(&self, idx: u16) -> io::Result<()> { - self.0.ioctl_usize(viona_api::VNA_IOC_RING_RESET, idx as usize)?; + fn ring_reset(&self, vq: &VirtQueue) -> io::Result<()> { + if !vq.is_control() { + let idx = vq.id as usize; + self.0.ioctl_usize(viona_api::VNA_IOC_RING_RESET, idx)?; + } Ok(()) } - fn ring_kick(&self, idx: u16) -> io::Result<()> { - self.0.ioctl_usize(viona_api::VNA_IOC_RING_KICK, idx as usize)?; + fn ring_kick(&self, vq: &VirtQueue) -> io::Result<()> { + if !vq.is_control() { + let idx = vq.id as usize; + self.0.ioctl_usize(viona_api::VNA_IOC_RING_KICK, idx)?; + } Ok(()) } - fn ring_pause(&self, idx: u16) -> io::Result<()> { - self.0.ioctl_usize(viona_api::VNA_IOC_RING_PAUSE, idx as usize)?; + fn ring_pause(&self, vq: &VirtQueue) -> io::Result<()> { + if !vq.is_control() { + let idx = vq.id as usize; + self.0.ioctl_usize(viona_api::VNA_IOC_RING_PAUSE, idx)?; + } Ok(()) } - fn ring_set_state( - &self, - idx: u16, - size: u16, - info: &queue::Info, - ) -> io::Result<()> { - let mut cfg = viona_api::vioc_ring_state { - vrs_index: idx, - vrs_avail_idx: info.avail_idx, - vrs_used_idx: info.used_idx, - vrs_qsize: size, - vrs_qaddr: info.mapping.desc_addr, - }; - unsafe { - self.0.ioctl(viona_api::VNA_IOC_RING_SET_STATE, &mut cfg)?; + fn ring_set_state(&self, vq: &VirtQueue) -> io::Result<()> { + if !vq.is_control() { + let mut cfg = viona_api::vioc_ring_state::from(vq); + unsafe { + self.0.ioctl(viona_api::VNA_IOC_RING_SET_STATE, &mut cfg)?; + } } Ok(()) } - fn ring_get_state(&self, idx: u16) -> io::Result { - let mut cfg = - viona_api::vioc_ring_state { vrs_index: idx, ..Default::default() }; - unsafe { - self.0.ioctl(viona_api::VNA_IOC_RING_GET_STATE, &mut cfg)?; + fn ring_get_state(&self, vq: &VirtQueue) -> io::Result { + let mut cfg = viona_api::vioc_ring_state { + vrs_index: vq.id, + ..Default::default() + }; + if !vq.is_control() { + unsafe { + self.0.ioctl(viona_api::VNA_IOC_RING_GET_STATE, &mut cfg)?; + } } Ok(queue::Info { mapping: queue::MapInfo { - desc_addr: cfg.vrs_qaddr, - avail_addr: 0, - used_addr: 0, + desc_addr: cfg.vrs_qaddr_desc, + avail_addr: cfg.vrs_qaddr_avail, + used_addr: cfg.vrs_qaddr_used, valid: true, }, avail_idx: cfg.vrs_avail_idx, used_idx: cfg.vrs_used_idx, }) } - fn ring_cfg_msi(&self, idx: u16, cfg: Option) -> io::Result<()> { - let (addr, msg) = match cfg { - Some(VqIntr::Msi(a, m, masked)) if !masked => (a, m), - // If MSI is disabled, or the entry is masked (individually, - // or at the function level), then disable in-kernel - // acceleration of MSI delivery. - _ => (0, 0), - }; - - let mut vna_ring_msi = viona_api::vioc_ring_msi { - rm_index: idx, - _pad: [0; 3], - rm_addr: addr, - rm_msg: u64::from(msg), - }; - unsafe { - self.0.ioctl(viona_api::VNA_IOC_RING_SET_MSI, &mut vna_ring_msi)?; + fn ring_cfg_msi( + &self, + vq: &VirtQueue, + cfg: Option, + ) -> io::Result<()> { + if !vq.is_control() { + let (addr, msg) = match cfg { + Some(VqIntr::Msi(a, m, masked)) if !masked => (a, m), + // If MSI is disabled, or the entry is masked (individually, + // or at the function level), then disable in-kernel + // acceleration of MSI delivery. + _ => (0, 0), + }; + + let mut vna_ring_msi = viona_api::vioc_ring_msi { + rm_index: vq.id, + _pad: [0; 3], + rm_addr: addr, + rm_msg: u64::from(msg), + }; + unsafe { + self.0.ioctl( + viona_api::VNA_IOC_RING_SET_MSI, + &mut vna_ring_msi, + )?; + } } Ok(()) } - fn intr_poll(&self, mut f: impl FnMut(u16)) -> io::Result<()> { - let mut vna_ip = viona_api::vioc_intr_poll { - vip_status: [0; viona_api::VIONA_VQ_MAX as usize], + fn intr_poll( + &self, + max_intrs: usize, + mut f: impl FnMut(u16), + ) -> io::Result<()> { + let mut vna_ip = viona_api::vioc_intr_poll_mq::default(); + vna_ip.vipm_nrings = max_intrs as u16; + let mut nintrs = unsafe { + self.0.ioctl(viona_api::VNA_IOC_INTR_POLL_MQ, &mut vna_ip)? }; - unsafe { - self.0.ioctl(viona_api::VNA_IOC_INTR_POLL, &mut vna_ip)?; - } - for i in 0..viona_api::VIONA_VQ_MAX { - if vna_ip.vip_status[i as usize] != 0 { - f(i) + let nrings = vna_ip.vipm_nrings as usize; + for i in 0..nrings { + let k = i / 32; + let b = i % 32; + if vna_ip.vipm_status[k].get_bit(b) { + f(i as u16); + nintrs -= 1; + if nintrs == 0 { + break; + } } } Ok(()) @@ -786,27 +1154,41 @@ impl VionaHdl { self.0.api_version() } - /// Set the IO port to which viona attaches for virtqueue notifications + /// Sets the address that viona recognizes for virtqueue notifications /// - /// The viona driver is able to install an IO port hook in the associated VM - /// at a specified address in order to process `out` operations which would - /// result in the in-kernel emulated virtqueues being notified of available - /// buffers. + /// Viona can install a hook in the associated VM at a specified address (in + /// either the guest port or physical address spaces) to recognize guest + /// writes that notify in-kernel emulated virtqueues of available buffers. /// /// With a non-zero argument, viona will attempt to attach such a hook, - /// replacing any currently in place. When the argument is None, any + /// replacing any currently in place. When the argument is None, any /// existing hook is torn down. - fn set_notify_iop(&self, port: Option) -> io::Result<()> { + fn set_notify_io_port(&self, port: Option) -> io::Result<()> { self.0.ioctl_usize( viona_api::VNA_IOC_SET_NOTIFY_IOP, port.map(|p| p.get()).unwrap_or(0) as usize, )?; Ok(()) } + fn set_notify_mmio_addr(&self, addr: Option) -> io::Result<()> { + let mut vim = viona_api::vioc_notify_mmio::default(); + let ptr = addr + .map(|vim_address| { + vim.vim_address = vim_address; + vim.vim_size = super::pci::NOTIFY_REG_SIZE as u32; + &raw mut vim + }) + .unwrap_or(std::ptr::null_mut()); + unsafe { + self.0.ioctl(viona_api::VNA_IOC_SET_NOTIFY_MMIO, ptr)?; + } + Ok(()) + } + /// /// Set the desired promiscuity level on this interface. #[cfg(feature = "falcon")] - fn set_promisc(&self, p: viona_api::viona_promisc_t) -> io::Result<()> { + fn set_promisc(&self, p: i32) -> io::Result<()> { self.0.ioctl_usize(viona_api::VNA_IOC_SET_PROMISC, p as usize)?; Ok(()) } @@ -1004,7 +1386,7 @@ pub(crate) mod bits { pub const VIRTIO_NET_S_LINK_UP: u16 = 1 << 0; pub const VIRTIO_NET_S_ANNOUNCE: u16 = 1 << 1; - pub const VIRTIO_NET_CFG_SIZE: usize = 0xc; + pub const VIRTIO_NET_CFG_SIZE: usize = 6 + 2 + 2 + 2 + 4 + 1 + 1 + 2 + 4; } use bits::*; diff --git a/lib/propolis/src/util/aspace.rs b/lib/propolis/src/util/aspace.rs index 57bc49104..ca41a0a19 100644 --- a/lib/propolis/src/util/aspace.rs +++ b/lib/propolis/src/util/aspace.rs @@ -50,7 +50,7 @@ impl ASpace { /// # Panics /// /// - Panics if start >= end. - pub fn new(start: usize, end: usize) -> ASpace { + pub const fn new(start: usize, end: usize) -> ASpace { assert!(start < end); Self { start, end, map: BTreeMap::new() } } diff --git a/lib/propolis/src/util/regmap.rs b/lib/propolis/src/util/regmap.rs index eddd0b8e8..9b6113923 100644 --- a/lib/propolis/src/util/regmap.rs +++ b/lib/propolis/src/util/regmap.rs @@ -42,7 +42,7 @@ struct RegXfer<'a, ID> { } impl RegMap { - pub fn new(len: usize) -> Self { + pub const fn new(len: usize) -> Self { Self { len, space: ASpace::new(0, len - 1) } } @@ -328,7 +328,7 @@ mod test { &[('a', 1), ('b', 1), ('c', 2), ('d', 4), ('e', 8)], None, ); - let expected = vec![ + let expected = &[ Xfer::read('a', 0, 1), Xfer::read('b', 0, 1), Xfer::read('c', 0, 2), @@ -338,11 +338,11 @@ mod test { // Each field individually let reads = [(0, 1), (1, 1), (2, 2), (4, 4), (8, 8)]; let res = drive_reads(&reads, &map); - assert_eq!(res, expected); + assert_eq!(&res, expected); // One big op, covering all let reads = [(0, 0x10)]; let res = drive_reads(&reads, &map); - assert_eq!(res, expected); + assert_eq!(&res, expected); } #[test] fn misaligned() { @@ -353,7 +353,7 @@ mod test { None, ); - let expected = vec![ + let expected = &[ Xfer::read('a', 0, 6), Xfer::read('a', 0, 6), Xfer::read('b', 0, 2), @@ -363,6 +363,6 @@ mod test { // Each field individually with 4-byte reads let reads = [(0, 4), (4, 4), (8, 4)]; let res = drive_reads(&reads, &map); - assert_eq!(res, expected); + assert_eq!(&res, expected); } }