Skip to content

Commit adf69f6

Browse files
committed
Adding I-cache
1 parent 587bdfd commit adf69f6

File tree

5 files changed

+231
-21
lines changed

5 files changed

+231
-21
lines changed

crates/libmwemu/src/emu/disassemble.rs

Lines changed: 195 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,201 @@
11
use iced_x86::{Decoder, DecoderOptions, Formatter as _, Instruction};
2-
2+
use serde::{Deserialize, Serialize};
33
use crate::emu::Emu;
44

5+
// about 10 mb should be on l3 cache
6+
// 8192 cache lines,
7+
// 32 instructions for each one,
8+
// 40 for the struct (I think we can make it smaller)
9+
const INSTRUCTION_ARRAY_SIZE: usize = 8192 * 32;
10+
11+
// we want the cache size to be store in L1 cache which is lower than 40kb
12+
const CACHE_SIZE: usize = 2048 * 16;
13+
const CACHE_MASK: usize = CACHE_SIZE - 1; // Assumes power of 2
14+
const MAX_CACHE_PER_LINE: usize = 16;
15+
pub const INVALID_LPF_ADDR: u64 = 0xffffffffffffffff;
16+
17+
pub fn LPF_OF(addr: u64) -> u64 {
18+
// Implementation of LPF_OF macro/function
19+
addr & 0xfffffffffffff000
20+
}
21+
22+
#[derive(Clone, Serialize, Deserialize)]
23+
struct CachedInstruction {
24+
pub lpf: u64,
25+
pub instruction_key : usize,
26+
pub instruction_len: usize,
27+
}
28+
29+
impl Default for CachedInstruction {
30+
fn default() -> Self {
31+
CachedInstruction {
32+
lpf: INVALID_LPF_ADDR,
33+
instruction_key: 0x0,
34+
instruction_len: 0x0,
35+
}
36+
}
37+
}
38+
39+
impl CachedInstruction {
40+
pub fn is_valid(&self) -> bool {
41+
self.lpf == INVALID_LPF_ADDR
42+
}
43+
}
44+
45+
#[derive(Clone, Serialize, Deserialize)]
46+
pub struct InstructionCache {
47+
cache_entries: Vec<CachedInstruction>,
48+
instructions: Vec<Instruction>,
49+
next_instruction_slot: usize,
50+
pub current_instruction_slot: usize,
51+
current_decode_len: usize,
52+
current_decode_idx: usize
53+
// probe_stats: ProbeStats,
54+
}
55+
56+
#[derive(Clone, Serialize, Deserialize, Default)]
57+
struct ProbeStats {
58+
hits: usize,
59+
misses: usize,
60+
collisions: usize,
61+
}
62+
63+
impl InstructionCache {
64+
pub fn new() -> Self {
65+
let mut cache = InstructionCache {
66+
cache_entries: vec![CachedInstruction::default(); CACHE_SIZE],
67+
instructions: vec![Instruction::default(); INSTRUCTION_ARRAY_SIZE],
68+
next_instruction_slot: 0,
69+
current_decode_len: 0,
70+
current_instruction_slot: 0,
71+
current_decode_idx: 0,
72+
// probe_stats: ProbeStats::default(),
73+
};
74+
75+
// Initialize all instructions to default state
76+
for inst in &mut cache.instructions {
77+
*inst = Instruction::default();
78+
}
79+
80+
cache
81+
}
82+
83+
#[inline(always)]
84+
pub fn get_index_of(&self, lpf: u64, len: u64) -> usize {
85+
const TLB_MASK: u32 = ((CACHE_SIZE - 1) << 12) as u32;
86+
(((lpf + len) & (TLB_MASK as u64)) >> 12) as usize
87+
}
88+
89+
#[inline]
90+
fn flush_cache_line(&mut self, idx: usize) {
91+
for i in 0..MAX_CACHE_PER_LINE {
92+
self.cache_entries[idx].lpf = INVALID_LPF_ADDR;
93+
}
94+
}
95+
96+
pub fn lookup_entry(&mut self, addr: u64, len: u64) -> bool {
97+
let lpf = crate::maps::tlb::LPF_OF(addr);
98+
let idx = self.get_index_of(lpf, len);
99+
100+
// do a linear probing for each cache line
101+
for i in 0..MAX_CACHE_PER_LINE {
102+
if self.cache_entries[idx+i].lpf == INVALID_LPF_ADDR {
103+
return false;
104+
}
105+
// found the instruction now do initialization and return true
106+
if self.cache_entries[idx+i].lpf == addr {
107+
let key = self.cache_entries[idx+i].instruction_key;
108+
self.current_instruction_slot = key;
109+
self.current_decode_len = self.cache_entries[idx+i].instruction_len;
110+
self.current_decode_idx = 0;
111+
return true;
112+
}
113+
}
114+
115+
// the cache_line is full now we flush all the cache line
116+
self.flush_cache_line(idx);
117+
true
118+
}
119+
120+
#[inline(always)]
121+
fn flush_cache(&mut self) {
122+
self.cache_entries = vec![CachedInstruction::default(); CACHE_SIZE];
123+
self.instructions = vec![Instruction::default(); INSTRUCTION_ARRAY_SIZE];
124+
self.next_instruction_slot = 0;
125+
}
126+
127+
pub fn insert_from_decoder(&mut self, decoder: &mut Decoder, addition: usize, rip_addr: u64) {
128+
let lpf = crate::maps::tlb::LPF_OF(rip_addr);
129+
let idx = self.get_index_of(lpf, 0);
130+
131+
// copy the instruction to the slot
132+
// now the case when instruction slot is full, instead of complex algorithm
133+
// we just fudge everything and rebuild from scratch can be a better way
134+
// but I think this is simple and good enough
135+
let slot = self.next_instruction_slot;
136+
137+
if self.next_instruction_slot >= INSTRUCTION_ARRAY_SIZE {
138+
self.flush_cache();
139+
}
140+
let mut count: usize = 0;
141+
while decoder.can_decode() && decoder.position() + addition <= decoder.max_position() {
142+
decoder.decode_out(&mut self.instructions[slot+count]);
143+
count += 1;
144+
}
145+
self.next_instruction_slot += count;
146+
147+
// insert to the cache
148+
for i in 0..MAX_CACHE_PER_LINE {
149+
if self.cache_entries[idx+i].lpf == INVALID_LPF_ADDR {
150+
self.cache_entries[idx+i].instruction_key = slot;
151+
self.cache_entries[idx+i].lpf = rip_addr;
152+
self.cache_entries[idx+i].instruction_len = count;
153+
break;
154+
}
155+
}
156+
157+
assert!(self.lookup_entry(rip_addr, 0), "Cache Insertion FAILED: There is support to be entry after insertion using insert_from_decoder");
158+
}
159+
160+
pub fn insert_instruction(&mut self, addr: u64, instrs: Vec<Instruction>) {
161+
let lpf = crate::maps::tlb::LPF_OF(addr);
162+
let idx = self.get_index_of(lpf, 0);
163+
164+
// copy the instruction to the slot
165+
// now the case when instruction slot is full, instead of complex algorithm
166+
// we just fudge everything and rebuild from scratch can be a better way
167+
// but I think this is simple and good enough
168+
let slot = self.next_instruction_slot;
169+
self.next_instruction_slot += instrs.len();
170+
if self.next_instruction_slot >= INSTRUCTION_ARRAY_SIZE {
171+
self.flush_cache();
172+
}
173+
174+
for i in 0..instrs.len() {
175+
self.instructions[slot+i] = instrs[i];
176+
}
177+
178+
// insert to the cache
179+
for i in 0..MAX_CACHE_PER_LINE {
180+
if self.cache_entries[idx+i].lpf == INVALID_LPF_ADDR {
181+
self.cache_entries[idx+i].instruction_key = slot;
182+
self.cache_entries[idx+i].lpf = addr;
183+
self.cache_entries[idx+i].instruction_len = instrs.len();
184+
break;
185+
}
186+
}
187+
}
188+
189+
pub fn decode_out(&mut self, instruction: &mut Instruction) {
190+
*instruction = self.instructions[self.current_instruction_slot + self.current_decode_idx];
191+
self.current_decode_idx += 1;
192+
}
193+
194+
pub fn can_decode(&self) -> bool {
195+
self.current_decode_idx < self.current_decode_len
196+
}
197+
}
198+
5199
impl Emu {
6200
/// Disassemble an amount of instruccions on an specified address.
7201
/// This not used on the emulation engine, just from console,

crates/libmwemu/src/emu/execution.rs

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::console::Console;
77
use crate::emu::Emu;
88
use crate::err::MwemuError;
99
use crate::{constants, engine, serialization};
10+
use crate::emu::disassemble::InstructionCache;
1011

1112
impl Emu {
1213
#[inline]
@@ -358,6 +359,8 @@ impl Emu {
358359
/// Automatically dispatches to single or multi-threaded execution based on cfg.enable_threading.
359360
#[allow(deprecated)]
360361
pub fn run(&mut self, end_addr: Option<u64>) -> Result<u64, MwemuError> {
362+
let mut instruction_cache = InstructionCache::new();
363+
self.instruction_cache = instruction_cache;
361364
if self.cfg.enable_threading && self.threads.len() > 1 {
362365
self.run_multi_threaded(end_addr)
363366
} else {
@@ -831,6 +834,7 @@ impl Emu {
831834
// the need of Reallocate everytime
832835
let mut block: Vec<u8> = Vec::with_capacity(constants::BLOCK_LEN + 1);
833836
block.resize(constants::BLOCK_LEN, 0x0);
837+
let mut instruction_cache = InstructionCache::new();
834838
loop {
835839
while self.is_running.load(atomic::Ordering::Relaxed) == 1 {
836840
//log::info!("reloading rip 0x{:x}", self.regs().rip);
@@ -848,28 +852,32 @@ impl Emu {
848852
}
849853
};
850854

851-
// we just need to read 0x300 bytes because x86 require that the instruction is 16 bytes long
852-
// reading anymore would be a waste of time
853-
let block_sz = constants::BLOCK_LEN;
854-
let block_temp = code.read_bytes(rip, block_sz);
855-
let block_temp_len = block_temp.len();
856-
if block_temp_len != block.len() {
857-
block.resize(block_temp_len, 0);
855+
if !self.instruction_cache.lookup_entry(rip, 0) {
856+
// we just need to read 0x300 bytes because x86 require that the instruction is 16 bytes long
857+
// reading anymore would be a waste of time
858+
let block_sz = constants::BLOCK_LEN;
859+
let block_temp = code.read_bytes(rip, block_sz);
860+
let block_temp_len = block_temp.len();
861+
if block_temp_len != block.len() {
862+
block.resize(block_temp_len, 0);
863+
}
864+
block.clone_from_slice(block_temp);
865+
if block.len() == 0 {
866+
return Err(MwemuError::new("cannot read code block, weird address."));
867+
}
868+
let mut decoder =
869+
Decoder::with_ip(arch, &block, self.regs().rip, DecoderOptions::NONE);
870+
871+
self.rep = None;
872+
let addition = if block_temp_len < 16 {block_temp_len} else {16};
873+
self.instruction_cache.insert_from_decoder(&mut decoder, addition, rip);
858874
}
859-
block.clone_from_slice(block_temp);
860-
if block.len() == 0 {
861-
return Err(MwemuError::new("cannot read code block, weird address."));
862-
}
863-
let mut decoder =
864-
Decoder::with_ip(arch, &block, self.regs().rip, DecoderOptions::NONE);
865-
let mut sz: usize = 0;
866-
let mut addr: u64 = 0;
867875

868-
self.rep = None;
869-
let addition = if block_temp_len < 16 {block_temp_len} else {16};
870-
while decoder.can_decode() && (decoder.position() + addition <= decoder.max_position()) {
876+
let mut sz = 0;
877+
let mut addr = 0;
878+
while self.instruction_cache.can_decode() {
871879
if self.rep.is_none() {
872-
decoder.decode_out(&mut ins);
880+
self.instruction_cache.decode_out(&mut ins);
873881
sz = ins.len();
874882
addr = ins.ip();
875883

@@ -883,7 +891,7 @@ impl Emu {
883891
}
884892

885893
self.instruction = Some(ins);
886-
self.decoder_position = decoder.position();
894+
self.decoder_position = self.instruction_cache.current_instruction_slot;
887895
self.memory_operations.clear();
888896
self.pos += 1;
889897

crates/libmwemu/src/emu/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::{cell::RefCell, fs::File, sync::{atomic::AtomicU32, Arc}, time::Instant
33
use iced_x86::{Instruction, IntelFormatter};
44

55
use crate::{banzai::Banzai, breakpoint::Breakpoints, colors::Colors, config::Config, global_locks::GlobalLocks, hooks::Hooks, maps::Maps, pe::pe32::PE32, pe::pe64::PE64, structures::MemoryOperation, thread_context::ThreadContext};
6+
use crate::emu::disassemble::InstructionCache;
67

78
mod operands;
89
mod display;
@@ -78,4 +79,5 @@ pub struct Emu {
7879
pub threads: Vec<ThreadContext>,
7980
pub current_thread_id: usize, // Index into threads vec
8081
pub global_locks: GlobalLocks, // Critical section lock tracking
82+
instruction_cache: InstructionCache
8183
}

crates/libmwemu/src/emu/operands.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,9 @@ impl Emu {
457457
0
458458
};
459459

460+
// now we flush the cacheline if it is written to executable memory and the cacheline exist
461+
let mem1 = self.maps.get_mem_by_addr(mem_addr).expect("The memory doesn't exists");
462+
460463
match sz {
461464
64 => {
462465
if !self.maps.write_qword(mem_addr, value2) {

crates/libmwemu/src/maps/mem64.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@ use std::io::Write;
1313
use bytemuck::cast_slice;
1414
use crate::emu_context;
1515

16+
17+
1618
#[derive(Clone, Serialize, Deserialize)]
1719
pub struct Mem64 {
1820
mem_name: String,
1921
base_addr: u64,
2022
bottom_addr: u64,
23+
permission:
2124
mem: Vec<u8>,
2225
}
2326

0 commit comments

Comments
 (0)