Skip to content

Commit 5eadae8

Browse files
authored
Implement PyStackRef borrowed stack references for LOAD_FAST_BORROW (#7290)
* Implement PyStackRef borrowed stack references for LOAD_FAST_BORROW * Fix PyStackRef: use NonZeroUsize for niche optimization, revert borrowed refs - Change PyStackRef.bits from usize to NonZeroUsize so Option<PyStackRef> has the same size as Option<PyObjectRef> via niche optimization - Revert LoadFastBorrow to use clone instead of actual borrowed refs to avoid borrowed refs remaining on stack at yield points - Add static size assertions for Option<PyStackRef> - Add stackref, fastlocal to cspell dictionary - Remove debug eprintln statements - Fix clippy warning for unused push_borrowed * Add borrowed ref debug_assert to InstrumentedYieldValue, clean up comments
1 parent 58a0f74 commit 5eadae8

File tree

5 files changed

+259
-42
lines changed

5 files changed

+259
-42
lines changed

.cspell.dict/cpython.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ eofs
5454
evalloop
5555
excepthandler
5656
exceptiontable
57+
fastlocal
58+
fastlocals
5759
fblock
5860
fblocks
5961
fdescr
@@ -171,6 +173,7 @@ SMALLBUF
171173
SOABI
172174
SSLEOF
173175
stackdepth
176+
stackref
174177
staticbase
175178
stginfo
176179
storefast

crates/vm/src/frame.rs

Lines changed: 98 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#[cfg(feature = "flame")]
22
use crate::bytecode::InstructionMetadata;
33
use crate::{
4-
AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, VirtualMachine,
4+
AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, PyStackRef, TryFromObject,
5+
VirtualMachine,
56
builtins::{
67
PyBaseException, PyBaseExceptionRef, PyCode, PyCoroutine, PyDict, PyDictRef, PyGenerator,
78
PyInterpolation, PyList, PySet, PySlice, PyStr, PyStrInterned, PyTemplate, PyTraceback,
@@ -60,7 +61,7 @@ enum UnwindReason {
6061
struct FrameState {
6162
// We need 1 stack per frame
6263
/// The main data frame of the stack machine
63-
stack: BoxVec<Option<PyObjectRef>>,
64+
stack: BoxVec<Option<PyStackRef>>,
6465
/// Cell and free variable references (cellvars + freevars).
6566
cells_frees: Box<[PyCellRef]>,
6667
/// Previous line number for LINE event suppression.
@@ -1280,14 +1281,9 @@ impl ExecutingFrame<'_> {
12801281
// This is 1-indexed to match CPython
12811282
let idx = index.get(arg) as usize;
12821283
let stack_len = self.state.stack.len();
1283-
if stack_len < idx {
1284-
eprintln!("CopyItem ERROR: stack_len={}, idx={}", stack_len, idx);
1285-
eprintln!(" code: {}", self.code.obj_name);
1286-
eprintln!(" lasti: {}", self.lasti());
1287-
panic!("CopyItem: stack underflow");
1288-
}
1284+
debug_assert!(stack_len >= idx, "CopyItem: stack underflow");
12891285
let value = self.state.stack[stack_len - idx].clone();
1290-
self.push_value_opt(value);
1286+
self.push_stackref_opt(value);
12911287
Ok(None)
12921288
}
12931289
Instruction::CopyFreeVars { .. } => {
@@ -1868,8 +1864,9 @@ impl ExecutingFrame<'_> {
18681864
self.push_value(x2);
18691865
Ok(None)
18701866
}
1871-
// TODO: Implement true borrow optimization (skip Arc::clone).
1872-
// Currently this just clones like LoadFast.
1867+
// Borrow optimization not yet active; falls back to clone.
1868+
// push_borrowed() is available but disabled until stack
1869+
// lifetime issues at yield/exception points are resolved.
18731870
Instruction::LoadFastBorrow(idx) => {
18741871
let idx = idx.get(arg) as usize;
18751872
let x = unsafe { self.fastlocals.borrow() }[idx]
@@ -2502,6 +2499,14 @@ impl ExecutingFrame<'_> {
25022499
Ok(None)
25032500
}
25042501
Instruction::YieldValue { arg: oparg } => {
2502+
debug_assert!(
2503+
self.state
2504+
.stack
2505+
.iter()
2506+
.flatten()
2507+
.all(|sr| !sr.is_borrowed()),
2508+
"borrowed refs on stack at yield point"
2509+
);
25052510
let value = self.pop_value();
25062511
// arg=0: direct yield (wrapped for async generators)
25072512
// arg=1: yield from await/yield-from (NOT wrapped)
@@ -2681,6 +2686,14 @@ impl ExecutingFrame<'_> {
26812686
self.unwind_blocks(vm, UnwindReason::Returning { value })
26822687
}
26832688
Instruction::InstrumentedYieldValue => {
2689+
debug_assert!(
2690+
self.state
2691+
.stack
2692+
.iter()
2693+
.flatten()
2694+
.all(|sr| !sr.is_borrowed()),
2695+
"borrowed refs on stack at yield point"
2696+
);
26842697
let value = self.pop_value();
26852698
if self.monitoring_mask & monitoring::EVENT_PY_YIELD != 0 {
26862699
let offset = (self.lasti() - 1) * 2;
@@ -3604,18 +3617,24 @@ impl ExecutingFrame<'_> {
36043617

36053618
let mut elements = elements;
36063619
// Elements on stack from right-to-left:
3607-
self.state
3608-
.stack
3609-
.extend(elements.drain(before + middle..).rev().map(Some));
3620+
self.state.stack.extend(
3621+
elements
3622+
.drain(before + middle..)
3623+
.rev()
3624+
.map(|e| Some(PyStackRef::new_owned(e))),
3625+
);
36103626

36113627
let middle_elements = elements.drain(before..).collect();
36123628
let t = vm.ctx.new_list(middle_elements);
36133629
self.push_value(t.into());
36143630

36153631
// Lastly the first reversed values:
3616-
self.state
3617-
.stack
3618-
.extend(elements.into_iter().rev().map(Some));
3632+
self.state.stack.extend(
3633+
elements
3634+
.into_iter()
3635+
.rev()
3636+
.map(|e| Some(PyStackRef::new_owned(e))),
3637+
);
36193638

36203639
Ok(None)
36213640
}
@@ -3854,9 +3873,12 @@ impl ExecutingFrame<'_> {
38543873
if let Some(elements) = fast_elements {
38553874
return match elements.len().cmp(&size) {
38563875
core::cmp::Ordering::Equal => {
3857-
self.state
3858-
.stack
3859-
.extend(elements.into_iter().rev().map(Some));
3876+
self.state.stack.extend(
3877+
elements
3878+
.into_iter()
3879+
.rev()
3880+
.map(|e| Some(PyStackRef::new_owned(e))),
3881+
);
38603882
Ok(None)
38613883
}
38623884
core::cmp::Ordering::Greater => Err(vm.new_value_error(format!(
@@ -3922,9 +3944,12 @@ impl ExecutingFrame<'_> {
39223944
Err(vm.new_value_error(msg))
39233945
}
39243946
PyIterReturn::StopIteration(_) => {
3925-
self.state
3926-
.stack
3927-
.extend(elements.into_iter().rev().map(Some));
3947+
self.state.stack.extend(
3948+
elements
3949+
.into_iter()
3950+
.rev()
3951+
.map(|e| Some(PyStackRef::new_owned(e))),
3952+
);
39283953
Ok(None)
39293954
}
39303955
}
@@ -4057,43 +4082,75 @@ impl ExecutingFrame<'_> {
40574082
// Block stack functions removed - exception table handles all exception/cleanup
40584083

40594084
#[inline]
4060-
#[track_caller] // not a real track_caller but push_value is less useful for debugging
4061-
fn push_value_opt(&mut self, obj: Option<PyObjectRef>) {
4085+
#[track_caller]
4086+
fn push_stackref_opt(&mut self, obj: Option<PyStackRef>) {
40624087
match self.state.stack.try_push(obj) {
40634088
Ok(()) => {}
40644089
Err(_e) => self.fatal("tried to push value onto stack but overflowed max_stackdepth"),
40654090
}
40664091
}
40674092

4093+
#[inline]
4094+
#[track_caller] // not a real track_caller but push_value is less useful for debugging
4095+
fn push_value_opt(&mut self, obj: Option<PyObjectRef>) {
4096+
self.push_stackref_opt(obj.map(PyStackRef::new_owned));
4097+
}
4098+
40684099
#[inline]
40694100
#[track_caller]
40704101
fn push_value(&mut self, obj: PyObjectRef) {
4071-
self.push_value_opt(Some(obj));
4102+
self.push_stackref_opt(Some(PyStackRef::new_owned(obj)));
4103+
}
4104+
4105+
/// Push a borrowed reference onto the stack (no refcount increment).
4106+
///
4107+
/// # Safety
4108+
/// The object must remain alive until the borrowed ref is consumed.
4109+
/// The compiler guarantees consumption within the same basic block.
4110+
#[inline]
4111+
#[track_caller]
4112+
#[allow(dead_code)]
4113+
unsafe fn push_borrowed(&mut self, obj: &PyObject) {
4114+
self.push_stackref_opt(Some(unsafe { PyStackRef::new_borrowed(obj) }));
40724115
}
40734116

40744117
#[inline]
40754118
fn push_null(&mut self) {
4076-
self.push_value_opt(None);
4119+
self.push_stackref_opt(None);
40774120
}
40784121

4079-
/// Pop a value from the stack, returning None if the stack slot is NULL
4122+
/// Pop a raw stackref from the stack, returning None if the stack slot is NULL.
40804123
#[inline]
4081-
fn pop_value_opt(&mut self) -> Option<PyObjectRef> {
4124+
fn pop_stackref_opt(&mut self) -> Option<PyStackRef> {
40824125
match self.state.stack.pop() {
4083-
Some(slot) => slot, // slot is Option<PyObjectRef>
4126+
Some(slot) => slot,
40844127
None => self.fatal("tried to pop from empty stack"),
40854128
}
40864129
}
40874130

4131+
/// Pop a raw stackref from the stack. Panics if NULL.
40884132
#[inline]
40894133
#[track_caller]
4090-
fn pop_value(&mut self) -> PyObjectRef {
4134+
fn pop_stackref(&mut self) -> PyStackRef {
40914135
expect_unchecked(
4092-
self.pop_value_opt(),
4093-
"pop value but null found. This is a compiler bug.",
4136+
self.pop_stackref_opt(),
4137+
"pop stackref but null found. This is a compiler bug.",
40944138
)
40954139
}
40964140

4141+
/// Pop a value from the stack, returning None if the stack slot is NULL.
4142+
/// Automatically promotes borrowed refs to owned.
4143+
#[inline]
4144+
fn pop_value_opt(&mut self) -> Option<PyObjectRef> {
4145+
self.pop_stackref_opt().map(|sr| sr.to_pyobj())
4146+
}
4147+
4148+
#[inline]
4149+
#[track_caller]
4150+
fn pop_value(&mut self) -> PyObjectRef {
4151+
self.pop_stackref().to_pyobj()
4152+
}
4153+
40974154
fn call_intrinsic_1(
40984155
&mut self,
40994156
func: bytecode::IntrinsicFunction1,
@@ -4262,22 +4319,23 @@ impl ExecutingFrame<'_> {
42624319
);
42634320
}
42644321
self.state.stack.drain(stack_len - count..).map(|obj| {
4265-
expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.")
4322+
expect_unchecked(obj, "pop_multiple but null found. This is a compiler bug.").to_pyobj()
42664323
})
42674324
}
42684325

42694326
#[inline]
4270-
fn replace_top(&mut self, mut top: Option<PyObjectRef>) -> Option<PyObjectRef> {
4327+
fn replace_top(&mut self, top: Option<PyObjectRef>) -> Option<PyObjectRef> {
4328+
let mut slot = top.map(PyStackRef::new_owned);
42714329
let last = self.state.stack.last_mut().unwrap();
4272-
core::mem::swap(last, &mut top);
4273-
top
4330+
core::mem::swap(last, &mut slot);
4331+
slot.map(|sr| sr.to_pyobj())
42744332
}
42754333

42764334
#[inline]
42774335
#[track_caller]
42784336
fn top_value(&self) -> &PyObject {
42794337
match &*self.state.stack {
4280-
[.., Some(last)] => last,
4338+
[.., Some(last)] => last.as_object(),
42814339
[.., None] => self.fatal("tried to get top of stack but got NULL"),
42824340
[] => self.fatal("tried to get top of stack but stack is empty"),
42834341
}
@@ -4288,7 +4346,7 @@ impl ExecutingFrame<'_> {
42884346
fn nth_value(&self, depth: u32) -> &PyObject {
42894347
let stack = &self.state.stack;
42904348
match &stack[stack.len() - depth as usize - 1] {
4291-
Some(obj) => obj,
4349+
Some(obj) => obj.as_object(),
42924350
None => unsafe { core::hint::unreachable_unchecked() },
42934351
}
42944352
}
@@ -4415,7 +4473,7 @@ fn is_module_initializing(module: &PyObject, vm: &VirtualMachine) -> bool {
44154473
initializing_attr.try_to_bool(vm).unwrap_or(false)
44164474
}
44174475

4418-
fn expect_unchecked(optional: Option<PyObjectRef>, err_msg: &'static str) -> PyObjectRef {
4476+
fn expect_unchecked<T: fmt::Debug>(optional: Option<T>, err_msg: &'static str) -> T {
44194477
if cfg!(debug_assertions) {
44204478
optional.expect(err_msg)
44214479
} else {

crates/vm/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ pub mod windows;
9898
pub use self::convert::{TryFromBorrowedObject, TryFromObject};
9999
pub use self::object::{
100100
AsObject, Py, PyAtomicRef, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact,
101-
PyResult, PyWeakRef,
101+
PyResult, PyStackRef, PyWeakRef,
102102
};
103103
pub use self::vm::{Context, Interpreter, InterpreterBuilder, Settings, VirtualMachine};
104104

0 commit comments

Comments
 (0)