diff --git a/ghostscope-dwarf/src/core/demangle.rs b/ghostscope-dwarf/src/core/demangle.rs index 9d3a628..fb994c1 100644 --- a/ghostscope-dwarf/src/core/demangle.rs +++ b/ghostscope-dwarf/src/core/demangle.rs @@ -5,23 +5,32 @@ use gimli::DwLang; /// Demangle a symbol string using language hint when available. /// Returns None if demangling fails or is not applicable. pub fn demangle_by_lang(lang: Option, s: &str) -> Option { + // 1) Trust DW_AT_language when available match lang { - Some(gimli::DW_LANG_Rust) => demangle_rust(s), + Some(gimli::DW_LANG_Rust) => { + if let Some(d) = demangle_rust(s) { + return Some(d); + } + } Some(gimli::DW_LANG_C_plus_plus) | Some(gimli::DW_LANG_C_plus_plus_11) | Some(gimli::DW_LANG_C_plus_plus_14) | Some(gimli::DW_LANG_C_plus_plus_17) - | Some(gimli::DW_LANG_C_plus_plus_20) => demangle_cpp(s), - _ => { - // Try common patterns heuristically - if is_rust_mangled(s) { - demangle_rust(s) - } else if is_itanium_cpp_mangled(s) { - demangle_cpp(s) - } else { - None + | Some(gimli::DW_LANG_C_plus_plus_20) => { + if let Some(d) = demangle_cpp(s) { + return Some(d); } } + _ => {} + } + + // 2) Fall back to heuristics if language hint missing or demangle failed + if is_rust_mangled(s) || looks_like_legacy_rust(s) { + demangle_rust(s) + } else if is_itanium_cpp_mangled(s) { + demangle_cpp(s) + } else { + None } } @@ -53,7 +62,11 @@ pub fn demangled_leaf(full: &str) -> String { /// Heuristic: Rust v0 mangling starts with "_R". pub fn is_rust_mangled(s: &str) -> bool { - s.starts_with("_R") + s.starts_with("_R") || looks_like_legacy_rust(s) +} + +fn looks_like_legacy_rust(s: &str) -> bool { + s.starts_with("_ZN") && s.contains("17h") && s.ends_with('E') } /// Heuristic: Itanium C++ mangling starts with "_Z". diff --git a/ghostscope-dwarf/src/data/lightweight_index.rs b/ghostscope-dwarf/src/data/lightweight_index.rs index 96a3748..9b42287 100644 --- a/ghostscope-dwarf/src/data/lightweight_index.rs +++ b/ghostscope-dwarf/src/data/lightweight_index.rs @@ -6,7 +6,7 @@ //! - Support for parallel construction with index shards //! - Fast binary search for symbol lookup -use crate::core::IndexEntry; +use crate::core::{demangle_by_lang, demangled_leaf, IndexEntry}; use gimli::{DebugInfoOffset, EndianArcSlice, LittleEndian}; use std::collections::{BTreeMap, HashMap}; use tracing::debug; @@ -89,11 +89,11 @@ impl LightweightIndex { } // Add all type entries (struct/class/union/enum) - for (name, ty_entries) in types { + for (name, ty_entries) in &types { let start_idx = entries.len(); - entries.extend(ty_entries); + entries.extend(ty_entries.clone()); let indices: Vec = (start_idx..entries.len()).collect(); - type_map.insert(name, indices); + type_map.insert(name.clone(), indices); } // IMPORTANT: Do NOT sort entries! This would invalidate the indices @@ -118,6 +118,25 @@ impl LightweightIndex { total_functions, total_variables, entries.len(), address_map.len() ); + // Ensure demangled aliases exist for variables even if DW_AT_name was missing. + for (idx, entry) in entries.iter().enumerate() { + if entry.tag == gimli::constants::DW_TAG_variable { + if let Some(demangled) = demangle_by_lang(entry.language, entry.name.as_ref()) { + let leaf = demangled_leaf(&demangled); + if leaf != entry.name.as_ref() { + tracing::trace!( + "LightweightIndex: alias '{}' -> '{}' (idx {}, lang={:?})", + entry.name, + leaf, + idx, + entry.language + ); + variable_map.entry(leaf).or_default().push(idx); + } + } + } + } + Self { entries, function_map, diff --git a/ghostscope-dwarf/src/module/data.rs b/ghostscope-dwarf/src/module/data.rs index dcf2d59..b46d9a0 100644 --- a/ghostscope-dwarf/src/module/data.rs +++ b/ghostscope-dwarf/src/module/data.rs @@ -2155,10 +2155,17 @@ impl ModuleData { }; let mut out = Vec::new(); + // Track DIEs we've already emitted (unit_offset, die_offset) + let mut seen_offsets: HashSet<(u64, u64)> = HashSet::new(); + // Try demangled full (preserve the demangled name that matched) if let Some(indices) = self.demangled_variable_map.get(name) { for &idx in indices { if let Some(entry) = self.lightweight_index.entry(idx) { + let key = (entry.unit_offset.0 as u64, entry.die_offset.0 as u64); + if !seen_offsets.insert(key) { + continue; + } let link_address = entry.address_ranges.first().and_then(|(lo, hi)| { if lo == hi { Some(*lo) @@ -2185,6 +2192,10 @@ impl ModuleData { if let Some(indices) = self.demangled_variable_leaf_map.get(name) { for &idx in indices { if let Some(entry) = self.lightweight_index.entry(idx) { + let key = (entry.unit_offset.0 as u64, entry.die_offset.0 as u64); + if !seen_offsets.insert(key) { + continue; + } let link_address = entry.address_ranges.first().and_then(|(lo, hi)| { if lo == hi { Some(*lo) @@ -2213,6 +2224,10 @@ impl ModuleData { for key in self.lightweight_index.get_variable_names() { if key.rsplit("::").next().map(|s| s == name).unwrap_or(false) { for e in self.lightweight_index.find_variables_by_name(key) { + let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64); + if !seen_offsets.insert(key) { + continue; + } let link_address = e.address_ranges .first() @@ -2242,6 +2257,10 @@ impl ModuleData { if e.tag != gimli::constants::DW_TAG_variable { continue; } + let key_offsets = (e.unit_offset.0 as u64, e.die_offset.0 as u64); + if !seen_offsets.insert(key_offsets) { + continue; + } let last = e.name.rsplit("::").next().unwrap_or(e.name.as_ref()); if last == name || e.name == name.into() { let link_address = @@ -2378,6 +2397,7 @@ impl ModuleData { pub(crate) fn find_global_variables_by_name(&self, name: &str) -> Vec { let mut out = Vec::new(); let entries = self.lightweight_index.find_variables_by_name(name); + let mut seen_offsets: HashSet<(u64, u64)> = HashSet::new(); // Parse object file once for section classification let obj = match object::File::parse(&self._binary_mapped_file.data[..]) { @@ -2385,6 +2405,10 @@ impl ModuleData { Err(_) => { // Cannot classify sections, but still return entries with link_address for e in entries { + let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64); + if !seen_offsets.insert(key) { + continue; + } let link_address = e.address_ranges .first() @@ -2402,6 +2426,10 @@ impl ModuleData { }; for e in entries { + let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64); + if !seen_offsets.insert(key) { + continue; + } let link_address = e.address_ranges .first() diff --git a/ghostscope-dwarf/src/parser/fast_parser.rs b/ghostscope-dwarf/src/parser/fast_parser.rs index 5432a35..933a27f 100644 --- a/ghostscope-dwarf/src/parser/fast_parser.rs +++ b/ghostscope-dwarf/src/parser/fast_parser.rs @@ -1,7 +1,10 @@ //! Unified DWARF parser - true single-pass parsing use crate::{ - core::{IndexEntry, Result}, + core::{ + demangle::{demangle_by_lang, demangled_leaf}, + IndexEntry, Result, + }, data::{ directory_from_index, resolve_file_path, LightweightFileIndex, LightweightIndex, LineMappingTable, ScopedFileIndexManager, @@ -246,65 +249,118 @@ impl<'a> DwarfParser<'a> { } } gimli::constants::DW_TAG_variable => { + tracing::trace!( + "Evaluating global variable DIE {:?} in CU {:?}", + entry.offset(), + unit_offset + ); + let is_static_symbol = self.is_static_symbol(entry).unwrap_or(false); let in_function_scope = tag_stack.iter().any(|t| { *t == gimli::constants::DW_TAG_subprogram || *t == gimli::constants::DW_TAG_inlined_subroutine }); - if in_function_scope { + if in_function_scope && !is_static_symbol { + tracing::trace!( + "Skipping variable at {:?} (in function scope, stack={:?})", + entry.offset(), + tag_stack + ); // Skip local variables tag_stack.push(entry.tag()); continue; + } else if in_function_scope { + // Rust (and some C compilers) sometimes nest file-scoped statics under the + // function that first references them, even though DW_AT_location uses + // DW_OP_addr. When DW_AT_external is false we treat them as true globals. + tracing::trace!( + "Treating static variable at {:?} as global despite function scope (stack={:?})", + entry.offset(), + tag_stack + ); } if Self::is_declaration(entry).unwrap_or(false) { + tracing::trace!( + "Skipping variable at {:?} (declaration-only DIE)", + entry.offset() + ); tag_stack.push(entry.tag()); continue; } + let mut collected_names: Vec<(String, bool)> = Vec::new(); + let mut push_unique_name = |candidate: String, is_linkage_alias: bool| { + if candidate.is_empty() { + return; + } + if collected_names + .iter() + .any(|(existing, _)| existing == &candidate) + { + return; + } + collected_names.push((candidate, is_linkage_alias)); + }; + + let mut have_primary_name = false; if let Some(name) = self.extract_name(self.dwarf, unit, entry)? { - let flags = crate::core::IndexFlags { - is_static: self.is_static_symbol(entry).unwrap_or(false), - ..Default::default() - }; - // Restore variable address for globals/statics via DW_AT_location - let var_addr = self.extract_variable_address(entry, unit)?; - let var_ranges = var_addr.map(|a| vec![(a, a)]).unwrap_or_default(); + push_unique_name(name, false); + have_primary_name = true; + } + + if let Some((linkage_name, _)) = + self.extract_linkage_name(self.dwarf, unit, entry)? + { + if let Some(demangled) = + demangle_by_lang(cu_language, linkage_name.as_str()) + { + let leaf = demangled_leaf(&demangled); + push_unique_name(leaf, false); + have_primary_name = true; + } + push_unique_name(linkage_name.clone(), true); + } + + if !have_primary_name { + tracing::trace!( + "DWARF variable at {:?} missing usable name (CU lang={:?}); skipping alias registration", + entry.offset(), + cu_language + ); + tag_stack.push(entry.tag()); + continue; + } + + let flags = crate::core::IndexFlags { + is_static: is_static_symbol, + ..Default::default() + }; + let var_addr = self.extract_variable_address(entry, unit)?; + let var_ranges = var_addr.map(|a| vec![(a, a)]).unwrap_or_default(); + + for (name, is_linkage_alias) in collected_names { + let mut entry_flags = flags; + entry_flags.is_linkage = is_linkage_alias; let index_entry = IndexEntry { name: std::sync::Arc::from(name.as_str()), die_offset: entry.offset(), unit_offset, tag: entry.tag(), - flags, + flags: entry_flags, language: cu_language, address_ranges: var_ranges.clone(), entry_pc: None, }; + tracing::trace!( + "Registering variable alias '{}' (linkage={}, lang={:?}, die={:?})", + name, + entry_flags.is_linkage, + cu_language, + entry.offset() + ); shard .variables .entry(name.clone()) .or_default() .push(index_entry); - if let Some((linkage_name, _)) = - self.extract_linkage_name(self.dwarf, unit, entry)? - { - if linkage_name != name { - let mut alias_flags = flags; - alias_flags.is_linkage = true; - let index_entry_linkage = IndexEntry { - name: std::sync::Arc::from(linkage_name.as_str()), - die_offset: entry.offset(), - unit_offset, - tag: entry.tag(), - flags: alias_flags, - language: cu_language, - address_ranges: var_ranges, - entry_pc: None, - }; - shard - .variables - .entry(linkage_name) - .or_default() - .push(index_entry_linkage); - } - } } } gimli::constants::DW_TAG_structure_type diff --git a/ghostscope/tests/fixtures/rust_global_program/src/main.rs b/ghostscope/tests/fixtures/rust_global_program/src/main.rs index bdc16e0..9bf96d9 100644 --- a/ghostscope/tests/fixtures/rust_global_program/src/main.rs +++ b/ghostscope/tests/fixtures/rust_global_program/src/main.rs @@ -1,6 +1,12 @@ #![allow(non_upper_case_globals)] +#![allow(static_mut_refs)] -use std::{thread, time::Duration}; +use std::{ + marker::{PhantomData, PhantomPinned}, + num::NonZeroU32, + thread, + time::Duration, +}; pub static mut G_COUNTER: i32 = 0; pub static G_MESSAGE: &str = "hello from rust"; @@ -13,6 +19,92 @@ pub struct Config { pub static mut CONFIG: Config = Config { a: 7, b: 11 }; +pub struct Pair(pub i32, pub i32); + +pub struct PhantomWrapper { + pub value: i32, + _marker: PhantomData, +} + +pub union NumberUnion { + pub int_value: i32, + pub float_value: f32, +} + +#[derive(Clone, Copy)] +pub enum GlobalState { + Idle, + Counter(i32), + Slice(&'static [u8]), + TupleState { left: i32, right: bool }, +} + +pub trait Greeter { + fn greet(&self) -> &'static str; +} + +pub struct StaticGreeter; + +impl Greeter for StaticGreeter { + fn greet(&self) -> &'static str { + "static-greeter" + } +} + +pub struct DynHolder<'a> { + pub greet: Option<&'a dyn Greeter>, +} + +impl<'a> DynHolder<'a> { + fn toggle(&mut self) { + if self.greet.is_some() { + self.greet = None; + } else { + self.greet = Some(&STATIC_GREETER); + } + } +} + +pub struct PinnedCounter { + pub value: i32, + _pin: PhantomPinned, +} + +impl PinnedCounter { + const fn new(value: i32) -> Self { + Self { + value, + _pin: PhantomPinned, + } + } + + fn bump(&mut self) -> i32 { + self.value += 1; + self.value + } +} + +static STATIC_GREETER: StaticGreeter = StaticGreeter; +static DATA_ALPHA: &[u8] = b"alpha"; +static DATA_OMEGA: &[u8] = b"omega"; +static DATA_STRINGS: [&[u8]; 2] = [DATA_ALPHA, DATA_OMEGA]; + +pub static mut GLOBAL_TUPLE: (i32, bool) = (1, true); +pub static mut GLOBAL_PAIR: Pair = Pair(2, 3); +pub static mut GLOBAL_UNION: NumberUnion = NumberUnion { int_value: 10 }; +pub static mut GLOBAL_SLICE: &'static [u8] = DATA_ALPHA; +pub static mut GLOBAL_NICHE: Option = NonZeroU32::new(7); +pub static mut GLOBAL_PHANTOM: PhantomWrapper<&'static str> = PhantomWrapper { + value: 0, + _marker: PhantomData, +}; +pub static mut GLOBAL_DYN: DynHolder<'static> = DynHolder { greet: None }; +pub static mut GLOBAL_PINNED: PinnedCounter = PinnedCounter::new(0); +pub static mut GLOBAL_ENUM: GlobalState = GlobalState::Idle; +/// Mirror of GLOBAL_ENUM, kept as plain i32 so tests can assert DWARF global resolution +/// without relying on enum pretty printing. +pub static mut GLOBAL_ENUM_BITS: i32 = 0; + pub mod math { #[inline(never)] pub fn do_stuff(x: i32) -> i32 { @@ -23,20 +115,74 @@ pub mod math { fn touch_globals() -> i32 { unsafe { - G_COUNTER += 1; - CONFIG.a += 1; - CONFIG.a + (G_MESSAGE.len() as i32) + G_COUNTER = G_COUNTER.wrapping_add(1); + CONFIG.a = CONFIG.a.wrapping_add(1); + + GLOBAL_TUPLE.0 = GLOBAL_TUPLE.0.wrapping_add(1); + GLOBAL_TUPLE.1 = !GLOBAL_TUPLE.1; + + GLOBAL_PAIR.0 = GLOBAL_PAIR.0.wrapping_add(GLOBAL_PAIR.1); + GLOBAL_PAIR.1 = GLOBAL_PAIR.0.wrapping_sub(GLOBAL_PAIR.1); + + let mut union_value = GLOBAL_UNION.int_value.wrapping_add(1); + GLOBAL_UNION.int_value = union_value; + if union_value % 5 == 0 { + // Flip to float representation occasionally to keep DWARF union data interesting. + union_value = union_value.wrapping_add(1); + GLOBAL_UNION.float_value = union_value as f32 * 1.5; + } + + let current_slice = GLOBAL_SLICE; + GLOBAL_SLICE = if current_slice.as_ptr() == DATA_STRINGS[0].as_ptr() { + DATA_STRINGS[1] + } else { + DATA_STRINGS[0] + }; + + let nonzero_seed = GLOBAL_PAIR.0.unsigned_abs().max(1); + GLOBAL_NICHE = NonZeroU32::new(nonzero_seed); + GLOBAL_PHANTOM.value = GLOBAL_PHANTOM.value.wrapping_add(1); + GLOBAL_DYN.toggle(); + let pinned_value = GLOBAL_PINNED.bump(); + + let enum_snapshot = GLOBAL_ENUM; + let next_state = match enum_snapshot { + GlobalState::Idle => GlobalState::Counter(G_COUNTER), + GlobalState::Counter(val) if val % 2 == 0 => GlobalState::Slice(GLOBAL_SLICE), + GlobalState::Counter(_) => GlobalState::TupleState { + left: GLOBAL_TUPLE.0, + right: GLOBAL_TUPLE.1, + }, + GlobalState::Slice(_) => GlobalState::Idle, + GlobalState::TupleState { .. } => GlobalState::Counter(GLOBAL_PAIR.0), + }; + GLOBAL_ENUM = next_state; + let enum_contrib = match enum_snapshot { + GlobalState::Idle => 0, + GlobalState::Counter(val) => val, + GlobalState::Slice(slice) => slice.len() as i32, + GlobalState::TupleState { left, right } => left + if right { 1 } else { 0 }, + }; + GLOBAL_ENUM_BITS = enum_contrib; + + let total = CONFIG.a as i64 + + G_MESSAGE.len() as i64 + + union_value as i64 + + pinned_value as i64 + + GLOBAL_PHANTOM.value as i64 + + enum_contrib as i64; + + total as i32 } } fn main() { - let mut acc = 0; + let mut acc: i64 = 0; for _ in 0..50000 { - acc += math::do_stuff(3); - acc += touch_globals(); + acc += math::do_stuff(3) as i64; + acc += touch_globals() as i64; thread::sleep(Duration::from_millis(1000)); } // Prevent optimization from dropping acc if acc == 0x7fff_ffff { println!("dead"); } } - diff --git a/ghostscope/tests/rust_script_execution.rs b/ghostscope/tests/rust_script_execution.rs index a65b007..47275cc 100644 --- a/ghostscope/tests/rust_script_execution.rs +++ b/ghostscope/tests/rust_script_execution.rs @@ -163,6 +163,51 @@ trace do_stuff { Ok(()) } +#[tokio::test] +async fn test_rust_script_global_enum_as_int() -> anyhow::Result<()> { + init(); + + let binary_path = FIXTURES.get_test_binary("rust_global_program")?; + let bin_dir = binary_path.parent().unwrap(); + struct KillOnDrop(tokio::process::Child); + impl Drop for KillOnDrop { + fn drop(&mut self) { + let _ = self.0.start_kill().is_ok(); + } + } + let mut cmd = Command::new(&binary_path); + cmd.current_dir(bin_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + let child = cmd.spawn()?; + let pid = child.id().ok_or_else(|| anyhow::anyhow!("no pid"))?; + let mut prog = KillOnDrop(child); + tokio::time::sleep(Duration::from_millis(1500)).await; + + // Read GLOBAL_ENUM by forcing it into an integer slot via reinterpret cast. + // This exercises the static-resolution path for globals that only have DW_OP_addr. + let script = r#" +trace do_stuff { + print "ENUM_RAW:{}", GLOBAL_ENUM_BITS; +} +"#; + + let (exit_code, stdout, stderr) = run_ghostscope_with_script_for_pid(script, 9, pid).await?; + let _ = prog.0.kill().await.is_ok(); + assert_eq!(exit_code, 0, "stderr={stderr} stdout={stdout}"); + + let mut seen = false; + for line in stdout.lines() { + if line.contains("ENUM_RAW:") { + seen = true; + break; + } + } + assert!(seen, "Expected ENUM_RAW output. STDOUT: {stdout}"); + + Ok(()) +} + #[tokio::test] async fn test_rust_script_bss_counter_direct() -> anyhow::Result<()> { // Regression coverage: ensure we can read a pure .bss global (G_COUNTER) directly, without