Skip to content

Commit f39a608

Browse files
committed
distinguish probes across NVMe devices
NVMe commands are unique in a device across queue ID (`qid` in various parts of Propolis) and command ID (`cid` in various parts of Propolis), but are ambiguous when looking at an entire VM: two different NVMe devices will have a "queue 1", and their distinct "queue 1" may also have "command 1234" submitted at the same time. Pick a device ID for NVMe controllers, and plumb that through to distinguish queues similar to how we distinguish queues for block devices: a `devq_id` construction that makes a device and queue unique across the VM. In the NVMe case, a `devq_id` is not uniquely identifying across the life of a VM, as the queues can be destroyed, recreated, the controller reset, etc. But absent such an administrative operation, this helps untangle what's happening across a VM with many disks. Note that block attachment IDs and NVMe IDs are similar in name, and probably similar in practice, but are tracked distinctly. This is similar to the difference between block qeuue IDs and NVMe queue IDs: the architcture is similar, but the mapping between them is more arbitrary and may change as an implementation detail of Propolis.
1 parent fa5c1a8 commit f39a608

File tree

7 files changed

+177
-68
lines changed

7 files changed

+177
-68
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/propolis/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ ispf = { workspace = true, optional = true }
4545
rand = { workspace = true, optional = true }
4646
softnpu = { workspace = true, optional = true }
4747
dlpi = { workspace = true, optional = true }
48+
static_assertions = "1.1.0"
4849

4950
[dev-dependencies]
5051
crossbeam-channel.workspace = true

lib/propolis/src/block/attachment.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ use thiserror::Error;
3939
use tokio::sync::futures::Notified;
4040
use tokio::sync::Notify;
4141

42-
/// Static for generating unique block [DeviceId]s with a process
42+
/// Static for generating unique block [DeviceId]s within a process
43+
///
44+
/// Numbering across block devices means that a block `DeviceId` and the queue
45+
/// ID in a block attachment are unique across a VM.
4346
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);
4447

4548
pub const MAX_WORKERS: NonZeroUsize = NonZeroUsize::new(64).unwrap();

lib/propolis/src/hw/nvme/admin.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use std::mem::size_of;
66

77
use crate::common::{GuestAddr, GuestRegion, PAGE_SIZE};
8+
use crate::hw::nvme;
89
use crate::vmm::MemCtx;
910

1011
use super::bits::*;
@@ -17,15 +18,16 @@ use super::{
1718

1819
#[usdt::provider(provider = "propolis")]
1920
mod probes {
20-
fn nvme_abort(cid: u16, sqid: u16) {}
21+
fn nvme_abort(cid: u16, devsq_id: u64) {}
2122
}
2223

2324
impl NvmeCtrl {
2425
/// Abort command.
2526
///
2627
/// See NVMe 1.0e Section 5.1 Abort command
2728
pub(super) fn acmd_abort(&self, cmd: &cmds::AbortCmd) -> cmds::Completion {
28-
probes::nvme_abort!(|| (cmd.cid, cmd.sqid));
29+
let devsq_id = nvme::devq_id(self.device_id, cmd.sqid);
30+
probes::nvme_abort!(|| (cmd.cid, devsq_id));
2931

3032
// Verify the SQ in question currently exists
3133
let sqid = cmd.sqid as usize;
@@ -74,6 +76,7 @@ impl NvmeCtrl {
7476
match self.create_cq(
7577
super::queue::CreateParams {
7678
id: cmd.qid,
79+
device_id: self.device_id,
7780
base: GuestAddr(cmd.prp),
7881
size: cmd.qsize,
7982
},
@@ -119,6 +122,7 @@ impl NvmeCtrl {
119122
match self.create_sq(
120123
super::queue::CreateParams {
121124
id: cmd.qid,
125+
device_id: self.device_id,
122126
base: GuestAddr(cmd.prp),
123127
size: cmd.qsize,
124128
},

lib/propolis/src/hw/nvme/mod.rs

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use std::convert::TryInto;
66
use std::mem::size_of;
77
use std::num::NonZeroUsize;
8+
use std::sync::atomic::AtomicU32;
89
use std::sync::atomic::{AtomicBool, Ordering};
910
use std::sync::{Arc, Mutex, MutexGuard, Weak};
1011

@@ -31,13 +32,40 @@ mod requests;
3132
use bits::*;
3233
use queue::{CompQueue, QueueId, SubQueue};
3334

35+
/// Static for generating unique NVMe device identifiers within a process.
36+
///
37+
/// Numbering across NVMe devices means that the combination of NVMe
38+
/// `device_id` and queue ID identify either an NVMe submission or completion
39+
/// queue across a VM.
40+
///
41+
/// Guests typically will configure submission queue N to complete into
42+
/// completion queue N, but this is not a mandated device configuration.
43+
static NEXT_DEVICE_ID: AtomicU32 = AtomicU32::new(0);
44+
45+
type DeviceId = u32;
46+
3447
#[usdt::provider(provider = "propolis")]
3548
mod probes {
36-
fn nvme_doorbell(off: u64, qid: u16, is_cq: u8, val: u16) {}
49+
fn nvme_doorbell(off: u64, devq_id: u64, is_cq: u8, val: u16) {}
3750
fn nvme_doorbell_admin_cq(val: u16) {}
3851
fn nvme_doorbell_admin_sq(val: u16) {}
3952
fn nvme_admin_cmd(opcode: u8, prp1: u64, prp2: u64) {}
40-
fn nvme_block_notify(sqid: u16, block_qid: u16, occupied_hint: u16) {}
53+
fn nvme_block_notify(devsq_id: u64, block_devqid: u64, occupied_hint: u16) {
54+
}
55+
}
56+
57+
/// Combine an NVMe device and queue ID into a single u64 for probes
58+
pub(crate) fn devq_id(dev: DeviceId, queue: QueueId) -> u64 {
59+
// We'll use the low 16 bits for the queue ID. Assert at compile time that
60+
// queue IDs cannot go above that. Clippy helpfully asserts this is an
61+
// absurd comparison, so silence that. If you see a rustc error here you
62+
// must have changed the type of QueueId such this is no longer absurd!
63+
#[allow(clippy::absurd_extreme_comparisons)]
64+
{
65+
static_assertions::const_assert!(QueueId::MAX <= u16::MAX);
66+
}
67+
68+
((dev as u64) << 16) | (queue as u64)
4169
}
4270

4371
/// The max number of MSI-X interrupts we support
@@ -154,6 +182,10 @@ const MAX_NUM_IO_QUEUES: usize = MAX_NUM_QUEUES - 1;
154182

155183
/// NVMe Controller
156184
struct NvmeCtrl {
185+
/// A distinguishing identifier for this NVMe controller across the VM.
186+
/// Useful mostly to distinguish queues and commands as seen in probes.
187+
device_id: DeviceId,
188+
157189
/// Internal NVMe Controller state
158190
ctrl: CtrlState,
159191

@@ -185,6 +217,7 @@ impl NvmeCtrl {
185217
self.create_cq(
186218
queue::CreateParams {
187219
id: queue::ADMIN_QUEUE_ID,
220+
device_id: self.device_id,
188221
base: GuestAddr(self.ctrl.admin_cq_base),
189222
// Convert from 0's based
190223
size: u32::from(self.ctrl.aqa.acqs()) + 1,
@@ -196,6 +229,7 @@ impl NvmeCtrl {
196229
self.create_sq(
197230
queue::CreateParams {
198231
id: queue::ADMIN_QUEUE_ID,
232+
device_id: self.device_id,
199233
base: GuestAddr(self.ctrl.admin_sq_base),
200234
// Convert from 0's based
201235
size: u32::from(self.ctrl.aqa.asqs()) + 1,
@@ -589,6 +623,7 @@ impl NvmeCtrl {
589623
self.create_cq(
590624
queue::CreateParams {
591625
id: cqi.id,
626+
device_id: self.device_id,
592627
base: GuestAddr(cqi.base),
593628
size: cqi.size,
594629
},
@@ -611,6 +646,7 @@ impl NvmeCtrl {
611646
.create_sq(
612647
queue::CreateParams {
613648
id: sqi.id,
649+
device_id: self.device_id,
614650
base: GuestAddr(sqi.base),
615651
size: sqi.size,
616652
},
@@ -742,6 +778,11 @@ pub struct PciNvme {
742778
/// accesses without stacking them up behind one central lock.
743779
is_enabled: AtomicBool,
744780

781+
/// Duplicate of the controller NVMe device ID, but not requiring locking
782+
/// [NvmeCtrl] to read. This is used to provide additional context in
783+
/// NVMe-related probes.
784+
device_id: DeviceId,
785+
745786
/// Access to NVMe Submission and Completion queues.
746787
///
747788
/// These are protected with per-slot (queue ID) locks, so actions taken on
@@ -849,6 +890,7 @@ impl PciNvme {
849890
let csts = Status(0);
850891

851892
let state = NvmeCtrl {
893+
device_id: NEXT_DEVICE_ID.fetch_add(1, Ordering::Relaxed),
852894
ctrl: CtrlState { cap, cc, csts, ..Default::default() },
853895
doorbell_buf: None,
854896
msix_hdl: None,
@@ -879,9 +921,13 @@ impl PciNvme {
879921
}
880922
}));
881923

924+
// Peel out device ID before we move it into the Mutex below.
925+
let device_id = state.device_id;
926+
882927
PciNvme {
883928
state: Mutex::new(state),
884929
is_enabled: AtomicBool::new(false),
930+
device_id,
885931
pci_state,
886932
queues: NvmeQueues::default(),
887933
block_attach,
@@ -1110,9 +1156,12 @@ impl PciNvme {
11101156
// 32-bit register but ignore reserved top 16-bits
11111157
let val = wo.read_u32() as u16;
11121158

1159+
// Mix in the device ID for probe purposes
1160+
let devq_id = devq_id(self.device_id, qid);
1161+
11131162
probes::nvme_doorbell!(|| (
11141163
off as u64,
1115-
qid,
1164+
devq_id,
11161165
u8::from(is_cq),
11171166
val
11181167
));
@@ -1188,9 +1237,12 @@ impl PciNvme {
11881237
})
11891238
{
11901239
let block_qid = queue::sqid_to_block_qid(sqid);
1240+
let devsq_id = devq_id(self.device_id, sqid);
1241+
let block_devqid =
1242+
block::devq_id(self.block_attach.device_id(), block_qid);
11911243
probes::nvme_block_notify!(|| (
1192-
sqid,
1193-
u16::from(block_qid),
1244+
devsq_id,
1245+
block_devqid,
11941246
num_occupied
11951247
));
11961248
self.block_attach.notify(
@@ -1207,14 +1259,17 @@ impl PciNvme {
12071259
let sq = guard.as_ref().unwrap();
12081260

12091261
let num_occupied = sq.notify_tail(val)?;
1262+
let devsq_id = sq.devq_id();
12101263
// Notification to block layer cannot be issued with SQ lock held
12111264
drop(guard);
12121265

12131266
assert_ne!(qid, queue::ADMIN_QUEUE_ID);
12141267
let block_qid = queue::sqid_to_block_qid(qid);
1268+
let block_devqid =
1269+
block::devq_id(self.block_attach.device_id(), block_qid);
12151270
probes::nvme_block_notify!(|| (
1216-
qid,
1217-
u16::from(block_qid),
1271+
devsq_id,
1272+
block_devqid,
12181273
num_occupied
12191274
));
12201275
self.block_attach

0 commit comments

Comments
 (0)