Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lib/propolis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ ispf = { workspace = true, optional = true }
rand = { workspace = true, optional = true }
softnpu = { workspace = true, optional = true }
dlpi = { workspace = true, optional = true }
static_assertions = "1.1.0"

[dev-dependencies]
crossbeam-channel.workspace = true
Expand Down
5 changes: 4 additions & 1 deletion lib/propolis/src/block/attachment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ use thiserror::Error;
use tokio::sync::futures::Notified;
use tokio::sync::Notify;

/// Static for generating unique block [DeviceId]s with a process
/// Static for generating unique block [DeviceId]s within a process
///
/// Numbering across block devices means that a block `DeviceId` and the queue
/// ID in a block attachment are unique across a VM.
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);

pub const MAX_WORKERS: NonZeroUsize = NonZeroUsize::new(64).unwrap();
Expand Down
8 changes: 6 additions & 2 deletions lib/propolis/src/hw/nvme/admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::mem::size_of;

use crate::common::{GuestAddr, GuestRegion, PAGE_SIZE};
use crate::hw::nvme;
use crate::vmm::MemCtx;

use super::bits::*;
Expand All @@ -17,15 +18,16 @@ use super::{

#[usdt::provider(provider = "propolis")]
mod probes {
fn nvme_abort(cid: u16, sqid: u16) {}
fn nvme_abort(cid: u16, devsq_id: u64) {}
}

impl NvmeCtrl {
/// Abort command.
///
/// See NVMe 1.0e Section 5.1 Abort command
pub(super) fn acmd_abort(&self, cmd: &cmds::AbortCmd) -> cmds::Completion {
probes::nvme_abort!(|| (cmd.cid, cmd.sqid));
let devsq_id = nvme::devq_id(self.device_id, cmd.sqid);
probes::nvme_abort!(|| (cmd.cid, devsq_id));

// Verify the SQ in question currently exists
let sqid = cmd.sqid as usize;
Expand Down Expand Up @@ -74,6 +76,7 @@ impl NvmeCtrl {
match self.create_cq(
super::queue::CreateParams {
id: cmd.qid,
device_id: self.device_id,
base: GuestAddr(cmd.prp),
size: cmd.qsize,
},
Expand Down Expand Up @@ -119,6 +122,7 @@ impl NvmeCtrl {
match self.create_sq(
super::queue::CreateParams {
id: cmd.qid,
device_id: self.device_id,
base: GuestAddr(cmd.prp),
size: cmd.qsize,
},
Expand Down
69 changes: 62 additions & 7 deletions lib/propolis/src/hw/nvme/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::convert::TryInto;
use std::mem::size_of;
use std::num::NonZeroUsize;
use std::sync::atomic::AtomicU32;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex, MutexGuard, Weak};

Expand All @@ -31,13 +32,40 @@ mod requests;
use bits::*;
use queue::{CompQueue, QueueId, SubQueue};

/// Static for generating unique NVMe device identifiers within a process.
///
/// Numbering across NVMe devices means that the combination of NVMe
/// `device_id` and queue ID identify either an NVMe submission or completion
/// queue across a VM.
///
/// Guests typically will configure submission queue N to complete into
/// completion queue N, but this is not a mandated device configuration.
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);

type DeviceId = u32;

#[usdt::provider(provider = "propolis")]
mod probes {
fn nvme_doorbell(off: u64, qid: u16, is_cq: u8, val: u16) {}
fn nvme_doorbell(off: u64, devq_id: u64, is_cq: u8, val: u16) {}
fn nvme_doorbell_admin_cq(val: u16) {}
fn nvme_doorbell_admin_sq(val: u16) {}
fn nvme_admin_cmd(opcode: u8, prp1: u64, prp2: u64) {}
fn nvme_block_notify(sqid: u16, block_qid: u16, occupied_hint: u16) {}
fn nvme_block_notify(devsq_id: u64, block_devqid: u64, occupied_hint: u16) {
}
}

/// Combine an NVMe device and queue ID into a single u64 for probes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... to make a globally unique ID?

Copy link
Member Author

@iximeow iximeow Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't want to go quite that far: the NVMe controller can be reset, which will destroy queues, abandon any in-flight work, and probably have queues with the same ID re-created when the guest sets up the controller again. but I'll adjust this to say a bit more.

pub(crate) fn devq_id(dev: DeviceId, queue: QueueId) -> u64 {
// We'll use the low 16 bits for the queue ID. Assert at compile time that
// queue IDs cannot go above that. Clippy helpfully asserts this is an
// absurd comparison, so silence that. If you see a rustc error here you
// must have changed the type of QueueId such this is no longer absurd!
#[allow(clippy::absurd_extreme_comparisons)]
{
static_assertions::const_assert!(QueueId::MAX <= u16::MAX);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Clippy's defense u16::MAX <= u16::MAX is pretty absurd...

}

((dev as u64) << 16) | (queue as u64)
}

/// The max number of MSI-X interrupts we support
Expand Down Expand Up @@ -154,6 +182,10 @@ const MAX_NUM_IO_QUEUES: usize = MAX_NUM_QUEUES - 1;

/// NVMe Controller
struct NvmeCtrl {
/// A distinguishing identifier for this NVMe controller across the VM.
/// Useful mostly to distinguish queues and commands as seen in probes.
device_id: DeviceId,

/// Internal NVMe Controller state
ctrl: CtrlState,

Expand Down Expand Up @@ -185,6 +217,7 @@ impl NvmeCtrl {
self.create_cq(
queue::CreateParams {
id: queue::ADMIN_QUEUE_ID,
device_id: self.device_id,
base: GuestAddr(self.ctrl.admin_cq_base),
// Convert from 0's based
size: u32::from(self.ctrl.aqa.acqs()) + 1,
Expand All @@ -196,6 +229,7 @@ impl NvmeCtrl {
self.create_sq(
queue::CreateParams {
id: queue::ADMIN_QUEUE_ID,
device_id: self.device_id,
base: GuestAddr(self.ctrl.admin_sq_base),
// Convert from 0's based
size: u32::from(self.ctrl.aqa.asqs()) + 1,
Expand Down Expand Up @@ -589,6 +623,7 @@ impl NvmeCtrl {
self.create_cq(
queue::CreateParams {
id: cqi.id,
device_id: self.device_id,
base: GuestAddr(cqi.base),
size: cqi.size,
},
Expand All @@ -611,6 +646,7 @@ impl NvmeCtrl {
.create_sq(
queue::CreateParams {
id: sqi.id,
device_id: self.device_id,
base: GuestAddr(sqi.base),
size: sqi.size,
},
Expand Down Expand Up @@ -742,6 +778,11 @@ pub struct PciNvme {
/// accesses without stacking them up behind one central lock.
is_enabled: AtomicBool,

/// Duplicate of the controller NVMe device ID, but not requiring locking
/// [NvmeCtrl] to read. This is used to provide additional context in
/// NVMe-related probes.
device_id: DeviceId,

/// Access to NVMe Submission and Completion queues.
///
/// These are protected with per-slot (queue ID) locks, so actions taken on
Expand Down Expand Up @@ -849,6 +890,7 @@ impl PciNvme {
let csts = Status(0);

let state = NvmeCtrl {
device_id: NEXT_DEVICE_ID.fetch_add(1, Ordering::Relaxed),
ctrl: CtrlState { cap, cc, csts, ..Default::default() },
doorbell_buf: None,
msix_hdl: None,
Expand Down Expand Up @@ -879,9 +921,13 @@ impl PciNvme {
}
}));

// Peel out device ID before we move it into the Mutex below.
let device_id = state.device_id;

PciNvme {
state: Mutex::new(state),
is_enabled: AtomicBool::new(false),
device_id,
pci_state,
queues: NvmeQueues::default(),
block_attach,
Expand Down Expand Up @@ -1110,9 +1156,12 @@ impl PciNvme {
// 32-bit register but ignore reserved top 16-bits
let val = wo.read_u32() as u16;

// Mix in the device ID for probe purposes
let devq_id = devq_id(self.device_id, qid);

probes::nvme_doorbell!(|| (
off as u64,
qid,
devq_id,
u8::from(is_cq),
val
));
Expand Down Expand Up @@ -1188,9 +1237,12 @@ impl PciNvme {
})
{
let block_qid = queue::sqid_to_block_qid(sqid);
let devsq_id = devq_id(self.device_id, sqid);
let block_devqid =
block::devq_id(self.block_attach.device_id(), block_qid);
probes::nvme_block_notify!(|| (
sqid,
u16::from(block_qid),
devsq_id,
block_devqid,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bringing block_qid -> block_devqid along here because otherwise we'd know which NVMe device notified a block queue ... somewhere ... with no way to find out which block attachment we'd just poked.

num_occupied
));
self.block_attach.notify(
Expand All @@ -1207,14 +1259,17 @@ impl PciNvme {
let sq = guard.as_ref().unwrap();

let num_occupied = sq.notify_tail(val)?;
let devsq_id = sq.devq_id();
// Notification to block layer cannot be issued with SQ lock held
drop(guard);

assert_ne!(qid, queue::ADMIN_QUEUE_ID);
let block_qid = queue::sqid_to_block_qid(qid);
let block_devqid =
block::devq_id(self.block_attach.device_id(), block_qid);
probes::nvme_block_notify!(|| (
qid,
u16::from(block_qid),
devsq_id,
block_devqid,
num_occupied
));
self.block_attach
Expand Down
Loading
Loading