From fafd98e3f39177555cfd5a38aec6662f24992276 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Fri, 15 May 2026 15:59:39 +0200 Subject: [PATCH] =?UTF-8?q?feat(riscv):=20i64=20Phase=201=20=E2=80=94=20ty?= =?UTF-8?q?ped=20vstack=20+=20i64=20arithmetic=20/=20logic=20/=20compares?= =?UTF-8?q?=20/=20loads=20/=20stores?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors the RV32IMAC selector's virtual stack from `Vec` to a typed `Vec` enum so i64 values can live on the stack as a `(lo, hi)` register pair. RV32 doesn't require consecutive pairs (unlike ARM's LDRD/STRD), so any two distinct temps work. New typed helpers replace the untyped `push_val`/`pop_val`/`pop_pair`: push_i32 / push_i64 / pop_i32 / pop_i64 / pop_pair_i32 / pop_pair_i64 A new `SelectorError::StackTypeMismatch` variant surfaces selector-internal type bugs (rather than silently mixing halves). Phase 1 i64 ops implemented (selector-only — encoder already supports every base instruction needed): I64Const two emit_load_imm sequences (lo + hi) I64Add add lo, sltu carry, add hi, add hi+carry I64Sub sltu borrow, sub lo, sub hi, sub hi-borrow I64And/Or/Xor pairwise on lo and hi I64Eq / I64Ne xor diffs, or them, then sltiu/sltu → i32 0/1 I64Eqz or halves, sltiu → i32 0/1 I64ExtendI32U hi = 0 I64ExtendI32S hi = srai src, 31 I32WrapI64 zero-instruction; lo continues as i32 I64Load lw lo @offset, lw hi @offset+4 I64Store sw lo @offset, sw hi @offset+4 The return epilogue now handles both i32 (a0) and i64 (a0=lo, a1=hi) return values, matching the RV32 psABI for 64-bit returns. Out of scope (Phase 2): i64 mul/div/rem (runtime helpers), i64 shifts / rotates / clz / ctz / popcnt (shamt branching at 32-bit boundary), i64 ordered compares (lt/le/gt/ge S+U — hi-then-lo ladder), i64 sign- extending sub-word loads, I64Extend{8,16,32}S, sub-word i64 stores. These remain at the existing `_ => Unsupported` arm and error cleanly. Validation: - cargo test --package synth-backend-riscv: 98 → 110 passing (+12 new) - cargo clippy --package synth-backend-riscv -- -D warnings: clean - cargo fmt --check: clean - cargo build --workspace: clean Co-Authored-By: Claude Opus 4.7 --- crates/synth-backend-riscv/src/selector.rs | 787 +++++++++++++++++++-- 1 file changed, 740 insertions(+), 47 deletions(-) diff --git a/crates/synth-backend-riscv/src/selector.rs b/crates/synth-backend-riscv/src/selector.rs index 7541629..9cf2043 100644 --- a/crates/synth-backend-riscv/src/selector.rs +++ b/crates/synth-backend-riscv/src/selector.rs @@ -5,12 +5,20 @@ //! flow (block / loop / if / br / br_if), and local variable access. //! //! Out of scope (see `select_simple` doc comments for the full list): -//! - i64 (handled by `select_i64.rs` in a follow-up PR) +//! - i64 multiply / divide / remainder / shifts / rotates / count-leading-or- +//! trailing-zeros / popcount / signed-and-unsigned compare ladders / +//! sign-extending sub-word loads (Phase 2 — needs runtime helpers and +//! shamt branching at the 32-bit boundary) //! - F32/F64 (RV32F/D — not yet wired) //! - br_table (lowered in B3 alongside jump tables) //! - Cross-function calls (need linker-resolvable Call ops + relocations) //! - Component Model lifting/lowering //! +//! i64 representation: on RV32, an i64 value is held in a *register pair* +//! `(lo, hi)` where `lo` is bits [31:0] and `hi` is bits [63:32]. The two +//! halves don't need to be consecutive registers (unlike ARM's LDRD/STRD +//! pair requirement) — any two distinct temporaries work. +//! //! Memory model: the wasm linear-memory base lives in `s11` (x27) — chosen //! because it's callee-saved across the AAPCS-style RV calling convention, //! so leaf-style functions can rely on it without a re-load. The startup @@ -37,6 +45,34 @@ pub enum SelectorError { #[error("br depth {depth} out of range (control stack height {height})")] BrOutOfRange { depth: u32, height: usize }, + + #[error("stack type mismatch at op {op:?}: expected {expected}, found {found} on top of stack")] + StackTypeMismatch { + op: WasmOp, + expected: &'static str, + found: &'static str, + }, +} + +/// A value sitting on the selector's virtual stack. RV32 is a 32-bit ISA so +/// i32s map to a single register, while i64s require a `(lo, hi)` pair (see +/// the module-level docs for the bit-ordering convention). +#[derive(Debug, Clone, Copy)] +enum VstackVal { + I32(Reg), + I64 { lo: Reg, hi: Reg }, +} + +/// An i64 register pair: `.0` is `lo` (bits [31:0]), `.1` is `hi` (bits [63:32]). +type I64Pair = (Reg, Reg); + +impl VstackVal { + fn type_name(self) -> &'static str { + match self { + VstackVal::I32(_) => "i32", + VstackVal::I64 { .. } => "i64", + } + } } /// Output of the selector. @@ -112,8 +148,9 @@ enum FrameKind { struct Selector { out: Vec, - /// Virtual stack of registers holding wasm values. - vstack: Vec, + /// Virtual stack of values produced by lowering. i32s occupy a single + /// register; i64s occupy a `(lo, hi)` register pair. + vstack: Vec, /// Control-flow frames; index 0 is the outermost. ctrl: Vec, /// Argument registers for the current function's params (a0..a7). @@ -171,19 +208,69 @@ impl Selector { r } - fn push_val(&mut self, r: Reg) { - self.vstack.push(r); + fn push_i32(&mut self, r: Reg) { + self.vstack.push(VstackVal::I32(r)); } - fn pop_val(&mut self, op: &WasmOp) -> Result { + fn push_i64(&mut self, lo: Reg, hi: Reg) { + self.vstack.push(VstackVal::I64 { lo, hi }); + } + + fn pop_any(&mut self, op: &WasmOp) -> Result { self.vstack .pop() .ok_or_else(|| SelectorError::StackUnderflow(op.clone())) } - fn pop_pair(&mut self, op: &WasmOp) -> Result<(Reg, Reg), SelectorError> { - let rhs = self.pop_val(op)?; - let lhs = self.pop_val(op)?; + /// Pop the top of stack and assert it's an i32. Errors with + /// `StackTypeMismatch` if the top is an i64 (caller's bug, never the + /// wasm input's bug — by this point the wasm has been validated). + fn pop_i32(&mut self, op: &WasmOp) -> Result { + let v = self.pop_any(op)?; + match v { + VstackVal::I32(r) => Ok(r), + other => { + // Restore for diagnostic determinism — the function will + // bail out anyway, but we don't want subsequent code to + // see a different stack layout if someone catches the err. + self.vstack.push(other); + Err(SelectorError::StackTypeMismatch { + op: op.clone(), + expected: "i32", + found: other.type_name(), + }) + } + } + } + + /// Pop the top of stack and assert it's an i64, returning `(lo, hi)`. + fn pop_i64(&mut self, op: &WasmOp) -> Result { + let v = self.pop_any(op)?; + match v { + VstackVal::I64 { lo, hi } => Ok((lo, hi)), + other => { + self.vstack.push(other); + Err(SelectorError::StackTypeMismatch { + op: op.clone(), + expected: "i64", + found: other.type_name(), + }) + } + } + } + + /// Pop two i32s; returns `(lhs, rhs)` in wasm push-order (lhs was pushed + /// first, rhs second — i.e. rhs is on top of stack). + fn pop_pair_i32(&mut self, op: &WasmOp) -> Result<(Reg, Reg), SelectorError> { + let rhs = self.pop_i32(op)?; + let lhs = self.pop_i32(op)?; + Ok((lhs, rhs)) + } + + /// Pop two i64s; returns `((lhs_lo, lhs_hi), (rhs_lo, rhs_hi))`. + fn pop_pair_i64(&mut self, op: &WasmOp) -> Result<(I64Pair, I64Pair), SelectorError> { + let rhs = self.pop_i64(op)?; + let lhs = self.pop_i64(op)?; Ok((lhs, rhs)) } @@ -209,7 +296,14 @@ impl Selector { I32Const(v) => { let dst = self.alloc_temp(); emit_load_imm(&mut self.out, dst, *v); - self.push_val(dst); + self.push_i32(dst); + } + I64Const(v) => { + let lo = self.alloc_temp(); + let hi = self.alloc_temp(); + emit_load_imm(&mut self.out, lo, *v as i32); + emit_load_imm(&mut self.out, hi, (*v >> 32) as i32); + self.push_i64(lo, hi); } // ─── Arithmetic ───────────────────────────────────────────── @@ -248,6 +342,27 @@ impl Selector { I32LeU => self.lower_cmp_unsigned_ge(op, true)?, I32GeU => self.lower_cmp_unsigned_ge(op, false)?, + // ─── i64 arithmetic / logic ───────────────────────────────── + I64Add => self.lower_i64_add(op)?, + I64Sub => self.lower_i64_sub(op)?, + I64And => self.lower_i64_bitwise(op, |rd, rs1, rs2| RiscVOp::And { rd, rs1, rs2 })?, + I64Or => self.lower_i64_bitwise(op, |rd, rs1, rs2| RiscVOp::Or { rd, rs1, rs2 })?, + I64Xor => self.lower_i64_bitwise(op, |rd, rs1, rs2| RiscVOp::Xor { rd, rs1, rs2 })?, + + // ─── i64 comparisons (result is an i32 0/1) ───────────────── + I64Eq => self.lower_i64_eq(op, false)?, + I64Ne => self.lower_i64_eq(op, true)?, + I64Eqz => self.lower_i64_eqz(op)?, + + // ─── i64 / i32 conversions ────────────────────────────────── + I64ExtendI32U => self.lower_i64_extend_i32_u(op)?, + I64ExtendI32S => self.lower_i64_extend_i32_s(op)?, + I32WrapI64 => self.lower_i32_wrap_i64(op)?, + + // ─── i64 memory ───────────────────────────────────────────── + I64Load { offset, align: _ } => self.lower_i64_load(op, *offset)?, + I64Store { offset, align: _ } => self.lower_i64_store(op, *offset)?, + // ─── Memory ───────────────────────────────────────────────── I32Load { offset, align: _ } => self.lower_load_word(op, *offset)?, I32Load8S { offset, align: _ } => { @@ -268,7 +383,9 @@ impl Selector { // ─── Stack manipulation ───────────────────────────────────── Drop => { - self.pop_val(op)?; + // Drop is type-agnostic: pop whatever's on top (i32 or i64). + // Popping an i64 discards the whole pair in one shot. + self.pop_any(op)?; } Nop => {} Unreachable => { @@ -333,12 +450,14 @@ impl Selector { // we don't materialize the frame — emit a clear error instead. return Err(SelectorError::Unsupported(op.clone())); } - self.push_val(dst); + // Phase 1: locals are always treated as i32. i64 locals would need + // two arg-register slots and live outside the scope of this PR. + self.push_i32(dst); Ok(()) } fn lower_local_set(&mut self, idx: u32, op: &WasmOp) -> Result<(), SelectorError> { - let src = self.pop_val(op)?; + let src = self.pop_i32(op)?; if (idx as usize) < self.arg_regs.len() { // mv arg, src self.out.push(RiscVOp::Addi { @@ -353,11 +472,23 @@ impl Selector { } fn lower_local_tee(&mut self, idx: u32, op: &WasmOp) -> Result<(), SelectorError> { - // tee = set + get; the value remains on the stack. - let src = *self + // tee = set + get; the value remains on the stack. Phase 1 only + // handles i32 locals — see lower_local_set for the same restriction. + let top = self .vstack .last() + .copied() .ok_or_else(|| SelectorError::StackUnderflow(op.clone()))?; + let src = match top { + VstackVal::I32(r) => r, + other => { + return Err(SelectorError::StackTypeMismatch { + op: op.clone(), + expected: "i32", + found: other.type_name(), + }); + } + }; if (idx as usize) < self.arg_regs.len() { self.out.push(RiscVOp::Addi { rd: self.arg_regs[idx as usize], @@ -376,10 +507,10 @@ impl Selector { where F: FnOnce(Reg, Reg, Reg) -> RiscVOp, { - let (rs1, rs2) = self.pop_pair(op)?; + let (rs1, rs2) = self.pop_pair_i32(op)?; let rd = self.alloc_temp(); self.out.push(build(rd, rs1, rs2)); - self.push_val(rd); + self.push_i32(rd); Ok(()) } @@ -389,7 +520,7 @@ impl Selector { where F: FnOnce(Reg, Reg, Reg) -> RiscVOp, { - let (rs1, rs2) = self.pop_pair(op)?; + let (rs1, rs2) = self.pop_pair_i32(op)?; let rd = self.alloc_temp(); let ok_label = self.fresh_label("Ldiv_ok"); // bne rs2, zero, Ldiv_ok → skip trap when divisor != 0 @@ -404,14 +535,14 @@ impl Selector { name: ok_label.clone(), }); self.out.push(build(rd, rs1, rs2)); - self.push_val(rd); + self.push_i32(rd); Ok(()) } // ────────── Comparisons ────────── fn lower_eqz(&mut self, op: &WasmOp) -> Result<(), SelectorError> { - let src = self.pop_val(op)?; + let src = self.pop_i32(op)?; let dst = self.alloc_temp(); // sltiu dst, src, 1 → 1 iff src == 0 self.out.push(RiscVOp::Sltiu { @@ -419,12 +550,12 @@ impl Selector { rs1: src, imm: 1, }); - self.push_val(dst); + self.push_i32(dst); Ok(()) } fn lower_cmp_eq(&mut self, op: &WasmOp, invert: bool) -> Result<(), SelectorError> { - let (lhs, rhs) = self.pop_pair(op)?; + let (lhs, rhs) = self.pop_pair_i32(op)?; let diff = self.alloc_temp(); // xor diff, lhs, rhs → 0 iff equal self.out.push(RiscVOp::Xor { @@ -448,22 +579,22 @@ impl Selector { imm: 1, }); } - self.push_val(dst); + self.push_i32(dst); Ok(()) } fn lower_cmp_signed_lt(&mut self, op: &WasmOp, swap: bool) -> Result<(), SelectorError> { - let (a, b) = self.pop_pair(op)?; + let (a, b) = self.pop_pair_i32(op)?; let dst = self.alloc_temp(); let (rs1, rs2) = if swap { (b, a) } else { (a, b) }; self.out.push(RiscVOp::Slt { rd: dst, rs1, rs2 }); - self.push_val(dst); + self.push_i32(dst); Ok(()) } fn lower_cmp_signed_ge(&mut self, op: &WasmOp, swap: bool) -> Result<(), SelectorError> { // a >= b = !(a < b) ; le maps via swap. - let (a, b) = self.pop_pair(op)?; + let (a, b) = self.pop_pair_i32(op)?; let lt = self.alloc_temp(); let dst = self.alloc_temp(); let (rs1, rs2) = if swap { (b, a) } else { (a, b) }; @@ -474,21 +605,21 @@ impl Selector { rs1: lt, imm: 1, }); - self.push_val(dst); + self.push_i32(dst); Ok(()) } fn lower_cmp_unsigned_lt(&mut self, op: &WasmOp, swap: bool) -> Result<(), SelectorError> { - let (a, b) = self.pop_pair(op)?; + let (a, b) = self.pop_pair_i32(op)?; let dst = self.alloc_temp(); let (rs1, rs2) = if swap { (b, a) } else { (a, b) }; self.out.push(RiscVOp::Sltu { rd: dst, rs1, rs2 }); - self.push_val(dst); + self.push_i32(dst); Ok(()) } fn lower_cmp_unsigned_ge(&mut self, op: &WasmOp, swap: bool) -> Result<(), SelectorError> { - let (a, b) = self.pop_pair(op)?; + let (a, b) = self.pop_pair_i32(op)?; let lt = self.alloc_temp(); let dst = self.alloc_temp(); let (rs1, rs2) = if swap { (b, a) } else { (a, b) }; @@ -498,14 +629,14 @@ impl Selector { rs1: lt, imm: 1, }); - self.push_val(dst); + self.push_i32(dst); Ok(()) } // ────────── Memory ────────── fn lower_load_word(&mut self, op: &WasmOp, offset: u32) -> Result<(), SelectorError> { - let addr = self.pop_val(op)?; + let addr = self.pop_i32(op)?; let dst = self.alloc_temp(); // tmp = base + addr let tmp = self.alloc_temp(); @@ -520,7 +651,7 @@ impl Selector { rs1: tmp, imm: offset_to_imm(offset)?, }); - self.push_val(dst); + self.push_i32(dst); Ok(()) } @@ -530,7 +661,7 @@ impl Selector { offset: u32, kind: LoadKind, ) -> Result<(), SelectorError> { - let addr = self.pop_val(op)?; + let addr = self.pop_i32(op)?; let dst = self.alloc_temp(); let tmp = self.alloc_temp(); self.out.push(RiscVOp::Add { @@ -562,7 +693,7 @@ impl Selector { }, }; self.out.push(op_built); - self.push_val(dst); + self.push_i32(dst); Ok(()) } @@ -572,8 +703,8 @@ impl Selector { offset: u32, kind: StoreKind, ) -> Result<(), SelectorError> { - let value = self.pop_val(op)?; - let addr = self.pop_val(op)?; + let value = self.pop_i32(op)?; + let addr = self.pop_i32(op)?; let tmp = self.alloc_temp(); self.out.push(RiscVOp::Add { rd: tmp, @@ -605,7 +736,7 @@ impl Selector { // ────────── Control flow ────────── fn lower_if(&mut self, op: &WasmOp) -> Result<(), SelectorError> { - let cond = self.pop_val(op)?; + let cond = self.pop_i32(op)?; let else_label = self.fresh_label("Lelse"); let end_label = self.fresh_label("Lif_end"); // beq cond, zero, Lelse → skip the then-branch when cond is false @@ -687,7 +818,7 @@ impl Selector { } fn lower_br_if(&mut self, depth: u32, op: &WasmOp) -> Result<(), SelectorError> { - let cond = self.pop_val(op)?; + let cond = self.pop_i32(op)?; let target_label = self.target_label_for_depth(depth)?; // bne cond, zero, target — branch when condition is true (non-zero) self.out.push(RiscVOp::Branch { @@ -721,15 +852,40 @@ impl Selector { } /// Emit `mv a0, top; ret` — the function epilogue. + /// + /// For i64 returns the wasm ABI puts the lo half in `a0` and the hi half + /// in `a1` (matches the RISC-V psABI for 64-bit return values on RV32). + /// We only emit the moves when the source isn't already in the target + /// return register, to avoid a redundant `addi`. fn emit_return_epilogue(&mut self) { - if let Some(&top) = self.vstack.last() - && top != Reg::A0 - { - self.out.push(RiscVOp::Addi { - rd: Reg::A0, - rs1: top, - imm: 0, - }); + if let Some(top) = self.vstack.last().copied() { + match top { + VstackVal::I32(r) => { + if r != Reg::A0 { + self.out.push(RiscVOp::Addi { + rd: Reg::A0, + rs1: r, + imm: 0, + }); + } + } + VstackVal::I64 { lo, hi } => { + if lo != Reg::A0 { + self.out.push(RiscVOp::Addi { + rd: Reg::A0, + rs1: lo, + imm: 0, + }); + } + if hi != Reg::A1 { + self.out.push(RiscVOp::Addi { + rd: Reg::A1, + rs1: hi, + imm: 0, + }); + } + } + } } self.out.push(RiscVOp::Jalr { rd: Reg::ZERO, @@ -737,6 +893,265 @@ impl Selector { imm: 0, }); } + + // ────────── i64 lowerings (Phase 1) ────────── + // + // On RV32 an i64 lives in a register pair `(lo, hi)` per the convention + // in the module docs. Each operation pops one or two pairs from vstack, + // emits the equivalent sequence on the 32-bit halves, and pushes the + // result (either as another i64 pair or as an i32 0/1 for comparisons). + + /// 64-bit add with carry: `lo = al+bl ; carry = (lo Result<(), SelectorError> { + let ((al, ah), (bl, bh)) = self.pop_pair_i64(op)?; + let lo = self.alloc_temp(); + let carry = self.alloc_temp(); + let hi_sum = self.alloc_temp(); + let hi = self.alloc_temp(); + // lo = al + bl + self.out.push(RiscVOp::Add { + rd: lo, + rs1: al, + rs2: bl, + }); + // carry = (lo Result<(), SelectorError> { + let ((al, ah), (bl, bh)) = self.pop_pair_i64(op)?; + let borrow = self.alloc_temp(); + let lo = self.alloc_temp(); + let hi_diff = self.alloc_temp(); + let hi = self.alloc_temp(); + // borrow = (al (&mut self, op: &WasmOp, build: F) -> Result<(), SelectorError> + where + F: Fn(Reg, Reg, Reg) -> RiscVOp, + { + let ((al, ah), (bl, bh)) = self.pop_pair_i64(op)?; + let lo = self.alloc_temp(); + let hi = self.alloc_temp(); + self.out.push(build(lo, al, bl)); + self.out.push(build(hi, ah, bh)); + self.push_i64(lo, hi); + Ok(()) + } + + /// i64 eq / ne — diff both halves, or them together, then sltiu / sltu. + /// Result is an i32 0/1 value. + fn lower_i64_eq(&mut self, op: &WasmOp, invert: bool) -> Result<(), SelectorError> { + let ((al, ah), (bl, bh)) = self.pop_pair_i64(op)?; + let d_lo = self.alloc_temp(); + let d_hi = self.alloc_temp(); + let d = self.alloc_temp(); + let dst = self.alloc_temp(); + // d_lo = al ^ bl ; d_hi = ah ^ bh ; d = d_lo | d_hi → 0 iff equal + self.out.push(RiscVOp::Xor { + rd: d_lo, + rs1: al, + rs2: bl, + }); + self.out.push(RiscVOp::Xor { + rd: d_hi, + rs1: ah, + rs2: bh, + }); + self.out.push(RiscVOp::Or { + rd: d, + rs1: d_lo, + rs2: d_hi, + }); + if invert { + // ne: dst = (0 Result<(), SelectorError> { + let (lo, hi) = self.pop_i64(op)?; + let d = self.alloc_temp(); + let dst = self.alloc_temp(); + // d = lo | hi → 0 iff both halves are zero + self.out.push(RiscVOp::Or { + rd: d, + rs1: lo, + rs2: hi, + }); + // dst = (d Result<(), SelectorError> { + let src = self.pop_i32(op)?; + let hi = self.alloc_temp(); + // hi = 0 (via `addi hi, zero, 0` — no dedicated li-zero op). + self.out.push(RiscVOp::Addi { + rd: hi, + rs1: Reg::ZERO, + imm: 0, + }); + self.push_i64(src, hi); + Ok(()) + } + + /// Sign-extend i32 → i64: lo = src, hi = sra(src, 31). + /// SRA by 31 produces all-ones when src is negative, all-zeros otherwise. + fn lower_i64_extend_i32_s(&mut self, op: &WasmOp) -> Result<(), SelectorError> { + let src = self.pop_i32(op)?; + let hi = self.alloc_temp(); + self.out.push(RiscVOp::Srai { + rd: hi, + rs1: src, + shamt: 31, + }); + self.push_i64(src, hi); + Ok(()) + } + + /// Wrap i64 → i32: keep lo, drop hi. No instructions emitted — the lo + /// register simply continues to live on the value stack as an i32. + fn lower_i32_wrap_i64(&mut self, op: &WasmOp) -> Result<(), SelectorError> { + let (lo, _hi) = self.pop_i64(op)?; + self.push_i32(lo); + Ok(()) + } + + /// i64 load: two word-loads at `offset` and `offset+4`. Little-endian + /// memory layout, matching wasm's spec (lo at lower address). + fn lower_i64_load(&mut self, op: &WasmOp, offset: u32) -> Result<(), SelectorError> { + let addr = self.pop_i32(op)?; + let tmp = self.alloc_temp(); + let lo = self.alloc_temp(); + let hi = self.alloc_temp(); + // Single address calculation reused for both word loads. + self.out.push(RiscVOp::Add { + rd: tmp, + rs1: LINEAR_MEM_BASE, + rs2: addr, + }); + let imm_lo = offset_to_imm(offset)?; + // The high word lives at `offset + 4`; check the same imm12 range. + let imm_hi = offset_to_imm(offset.checked_add(4).ok_or( + SelectorError::ImmediateTooLarge { + value: offset as i64 + 4, + context: "i64 load high-word offset", + }, + )?)?; + self.out.push(RiscVOp::Lw { + rd: lo, + rs1: tmp, + imm: imm_lo, + }); + self.out.push(RiscVOp::Lw { + rd: hi, + rs1: tmp, + imm: imm_hi, + }); + self.push_i64(lo, hi); + Ok(()) + } + + /// i64 store: pops value (i64) and addr (i32), then two `sw` at offset + /// and offset+4. Little-endian, matching wasm's spec. + fn lower_i64_store(&mut self, op: &WasmOp, offset: u32) -> Result<(), SelectorError> { + let (lo, hi) = self.pop_i64(op)?; + let addr = self.pop_i32(op)?; + let tmp = self.alloc_temp(); + self.out.push(RiscVOp::Add { + rd: tmp, + rs1: LINEAR_MEM_BASE, + rs2: addr, + }); + let imm_lo = offset_to_imm(offset)?; + let imm_hi = offset_to_imm(offset.checked_add(4).ok_or( + SelectorError::ImmediateTooLarge { + value: offset as i64 + 4, + context: "i64 store high-word offset", + }, + )?)?; + self.out.push(RiscVOp::Sw { + rs1: tmp, + rs2: lo, + imm: imm_lo, + }); + self.out.push(RiscVOp::Sw { + rs1: tmp, + rs2: hi, + imm: imm_hi, + }); + Ok(()) + } } #[derive(Debug, Clone, Copy)] @@ -1259,4 +1674,282 @@ mod tests { ); assert!(matches!(r, Err(SelectorError::ImmediateTooLarge { .. }))); } + + // ──────────── i64 Phase-1 tests ──────────── + // + // These tests assert the *shape* of the emitted sequence (op counts, kinds, + // and select fields), which is the right granularity for a selector — we + // don't want to over-pin register allocation choices. + + /// Helper: build the op sequence for an i64 test scenario. Appends an + /// `End` so the function epilogue is emitted; tests work with the full + /// output. `num_params` controls how many arg registers are available + /// (use 1+ for sequences that contain `LocalGet`). + fn run_i64_with_params(seq: &[WasmOp], num_params: u32) -> Vec { + let mut full = seq.to_vec(); + full.push(WasmOp::End); + s(&full, num_params) + } + + /// Most i64 tests use only consts → no LocalGet → no arg regs needed. + fn run_i64(seq: &[WasmOp]) -> Vec { + run_i64_with_params(seq, 0) + } + + #[test] + fn i64_const_emits_two_load_imm_sequences() { + // I64Const(0x1_0000_0001) → lo = 1, hi = 1. Each half goes through + // emit_load_imm (here both fit in the addi short path), giving 2 + // Addi-from-ZERO ops. + let out = run_i64(&[WasmOp::I64Const(0x1_0000_0001), WasmOp::Drop]); + let imm_loads = count(&out, |op| { + matches!(op, RiscVOp::Addi { rs1: Reg::ZERO, .. }) + }); + // 2 for the i64 const (lo + hi), nothing else (Drop emits no code). + assert_eq!(imm_loads, 2); + } + + #[test] + fn i64_add_emits_add_sltu_add_add_pattern() { + let out = run_i64(&[ + WasmOp::I64Const(1), + WasmOp::I64Const(2), + WasmOp::I64Add, + WasmOp::Drop, + ]); + // Skip past the const-materialization Addi/Lui ops and the trailing + // Jalr; isolate the I64Add's emitted sequence. + let from_add: Vec<&RiscVOp> = out + .iter() + .skip_while(|o| !matches!(o, RiscVOp::Add { .. })) + .collect(); + // Expected: Add, Sltu, Add, Add, then function epilogue's Jalr. + assert!(matches!(from_add[0], RiscVOp::Add { .. })); + assert!(matches!(from_add[1], RiscVOp::Sltu { .. })); + assert!(matches!(from_add[2], RiscVOp::Add { .. })); + assert!(matches!(from_add[3], RiscVOp::Add { .. })); + } + + #[test] + fn i64_sub_emits_borrow_pattern() { + let out = run_i64(&[ + WasmOp::I64Const(10), + WasmOp::I64Const(3), + WasmOp::I64Sub, + WasmOp::Drop, + ]); + // Expected: Sltu (borrow), Sub (lo), Sub (hi diff), Sub (hi - borrow) + let from_sub: Vec<&RiscVOp> = out + .iter() + .skip_while(|o| !matches!(o, RiscVOp::Sltu { .. })) + .collect(); + assert!(matches!(from_sub[0], RiscVOp::Sltu { .. })); + assert!(matches!(from_sub[1], RiscVOp::Sub { .. })); + assert!(matches!(from_sub[2], RiscVOp::Sub { .. })); + assert!(matches!(from_sub[3], RiscVOp::Sub { .. })); + } + + #[test] + fn i64_and_or_xor_each_emit_two_ops() { + // I64And: two And ops on lo/hi + let out_and = run_i64(&[ + WasmOp::I64Const(1), + WasmOp::I64Const(2), + WasmOp::I64And, + WasmOp::Drop, + ]); + assert_eq!( + count(&out_and, |op| matches!(op, RiscVOp::And { .. })), + 2, + "I64And should emit 2 And ops" + ); + + let out_or = run_i64(&[ + WasmOp::I64Const(1), + WasmOp::I64Const(2), + WasmOp::I64Or, + WasmOp::Drop, + ]); + assert_eq!( + count(&out_or, |op| matches!(op, RiscVOp::Or { .. })), + 2, + "I64Or should emit 2 Or ops" + ); + + let out_xor = run_i64(&[ + WasmOp::I64Const(1), + WasmOp::I64Const(2), + WasmOp::I64Xor, + WasmOp::Drop, + ]); + assert_eq!( + count(&out_xor, |op| matches!(op, RiscVOp::Xor { .. })), + 2, + "I64Xor should emit 2 Xor ops" + ); + } + + #[test] + fn i64_eq_emits_xor_xor_or_sltiu() { + let out = run_i64(&[ + WasmOp::I64Const(1), + WasmOp::I64Const(2), + WasmOp::I64Eq, + WasmOp::Drop, + ]); + assert_eq!( + count(&out, |op| matches!(op, RiscVOp::Xor { .. })), + 2, + "I64Eq emits two Xors (one per half)" + ); + assert_eq!( + count(&out, |op| matches!(op, RiscVOp::Or { .. })), + 1, + "I64Eq ors the half-diffs together" + ); + assert_eq!( + count(&out, |op| matches!(op, RiscVOp::Sltiu { imm: 1, .. })), + 1, + "I64Eq compares the combined diff with 1 (sltiu)" + ); + } + + #[test] + fn i64_ne_emits_xor_xor_or_sltu() { + let out = run_i64(&[ + WasmOp::I64Const(1), + WasmOp::I64Const(2), + WasmOp::I64Ne, + WasmOp::Drop, + ]); + assert_eq!(count(&out, |op| matches!(op, RiscVOp::Xor { .. })), 2); + assert_eq!(count(&out, |op| matches!(op, RiscVOp::Or { .. })), 1); + assert_eq!( + count(&out, |op| matches!( + op, + RiscVOp::Sltu { rs1: Reg::ZERO, .. } + )), + 1, + "I64Ne uses sltu rd, zero, diff" + ); + } + + #[test] + fn i64_eqz_emits_or_sltiu() { + // I64Eqz pops i64 and pushes i32 — verify by checking we don't get a + // Type-mismatch on a subsequent i32 consumer. + let out = run_i64(&[ + WasmOp::I64Const(0), + WasmOp::I64Eqz, + // After Eqz the stack should hold an i32; an I32Eqz consumer + // confirms the type-state on the vstack. + WasmOp::I32Eqz, + WasmOp::Drop, + ]); + assert_eq!( + count(&out, |op| matches!(op, RiscVOp::Or { .. })), + 1, + "I64Eqz emits a single Or" + ); + // Two Sltiu(imm=1): one from I64Eqz, one from the I32Eqz that follows. + assert_eq!( + count(&out, |op| matches!(op, RiscVOp::Sltiu { imm: 1, .. })), + 2, + ); + } + + #[test] + fn i64_extend_i32_u_pushes_zero_hi() { + // I64ExtendI32U: hi = 0 (via `addi rd, ZERO, 0`). The presence of an + // extra Addi-from-ZERO with imm=0 is the giveaway. + let out = run_i64(&[ + WasmOp::I32Const(5), // small enough to take the addi short path + WasmOp::I64ExtendI32U, + WasmOp::Drop, + ]); + // We get: addi (i32const 5), addi (hi=0). Both have rs1=ZERO; the + // hi-zero load uses imm=0. + assert!( + out.iter().any(|op| matches!( + op, + RiscVOp::Addi { + rs1: Reg::ZERO, + imm: 0, + .. + } + )), + "expected addi rd, zero, 0 to zero the hi half" + ); + } + + #[test] + fn i64_extend_i32_s_emits_sra_31() { + let out = run_i64(&[WasmOp::I32Const(5), WasmOp::I64ExtendI32S, WasmOp::Drop]); + assert_eq!( + count(&out, |op| matches!(op, RiscVOp::Srai { shamt: 31, .. })), + 1, + "I64ExtendI32S uses srai by 31 to sign-extend" + ); + } + + #[test] + fn i32_wrap_i64_drops_hi() { + // I32WrapI64 emits zero new instructions; the lo half just continues + // to live on the value stack as an i32. We verify the op count is + // exactly what the surrounding const+drop emits, with no leftover. + let baseline = run_i64(&[WasmOp::I64Const(42), WasmOp::Drop]); + let with_wrap = run_i64(&[WasmOp::I64Const(42), WasmOp::I32WrapI64, WasmOp::Drop]); + assert_eq!( + baseline.len(), + with_wrap.len(), + "I32WrapI64 must not emit any instructions" + ); + } + + #[test] + fn i64_load_emits_two_lw_at_offset_and_offset_plus_4() { + let out = run_i64_with_params( + &[ + WasmOp::LocalGet(0), // address (treated as i32) + WasmOp::I64Load { + offset: 16, + align: 3, + }, + WasmOp::Drop, + ], + 1, + ); + // We expect two Lw ops with imms 16 and 20. + let lws: Vec = out + .iter() + .filter_map(|op| match op { + RiscVOp::Lw { imm, .. } => Some(*imm), + _ => None, + }) + .collect(); + assert_eq!(lws, vec![16, 20], "I64Load emits lw @offset and @offset+4"); + } + + #[test] + fn i64_store_emits_two_sw_at_offset_and_offset_plus_4() { + let out = run_i64_with_params( + &[ + WasmOp::LocalGet(0), // address + WasmOp::I64Const(0xDEADBEEF_CAFEBABE_u64 as i64), + WasmOp::I64Store { + offset: 8, + align: 3, + }, + ], + 1, + ); + let sws: Vec = out + .iter() + .filter_map(|op| match op { + RiscVOp::Sw { imm, .. } => Some(*imm), + _ => None, + }) + .collect(); + assert_eq!(sws, vec![8, 12], "I64Store emits sw @offset and @offset+4"); + } }