Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions ghostscope-dwarf/src/core/demangle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,32 @@ use gimli::DwLang;
/// Demangle a symbol string using language hint when available.
/// Returns None if demangling fails or is not applicable.
pub fn demangle_by_lang(lang: Option<DwLang>, s: &str) -> Option<String> {
// 1) Trust DW_AT_language when available
match lang {
Some(gimli::DW_LANG_Rust) => demangle_rust(s),
Some(gimli::DW_LANG_Rust) => {
if let Some(d) = demangle_rust(s) {
return Some(d);
}
}
Some(gimli::DW_LANG_C_plus_plus)
| Some(gimli::DW_LANG_C_plus_plus_11)
| Some(gimli::DW_LANG_C_plus_plus_14)
| Some(gimli::DW_LANG_C_plus_plus_17)
| Some(gimli::DW_LANG_C_plus_plus_20) => demangle_cpp(s),
_ => {
// Try common patterns heuristically
if is_rust_mangled(s) {
demangle_rust(s)
} else if is_itanium_cpp_mangled(s) {
demangle_cpp(s)
} else {
None
| Some(gimli::DW_LANG_C_plus_plus_20) => {
if let Some(d) = demangle_cpp(s) {
return Some(d);
}
}
_ => {}
}

// 2) Fall back to heuristics if language hint missing or demangle failed
if is_rust_mangled(s) || looks_like_legacy_rust(s) {
demangle_rust(s)
} else if is_itanium_cpp_mangled(s) {
demangle_cpp(s)
} else {
None
}
}

Expand Down Expand Up @@ -53,7 +62,11 @@ pub fn demangled_leaf(full: &str) -> String {

/// Heuristic: Rust v0 mangling starts with "_R".
pub fn is_rust_mangled(s: &str) -> bool {
s.starts_with("_R")
s.starts_with("_R") || looks_like_legacy_rust(s)
}

fn looks_like_legacy_rust(s: &str) -> bool {
s.starts_with("_ZN") && s.contains("17h") && s.ends_with('E')
}

/// Heuristic: Itanium C++ mangling starts with "_Z".
Expand Down
27 changes: 23 additions & 4 deletions ghostscope-dwarf/src/data/lightweight_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//! - Support for parallel construction with index shards
//! - Fast binary search for symbol lookup

use crate::core::IndexEntry;
use crate::core::{demangle_by_lang, demangled_leaf, IndexEntry};
use gimli::{DebugInfoOffset, EndianArcSlice, LittleEndian};
use std::collections::{BTreeMap, HashMap};
use tracing::debug;
Expand Down Expand Up @@ -89,11 +89,11 @@ impl LightweightIndex {
}

// Add all type entries (struct/class/union/enum)
for (name, ty_entries) in types {
for (name, ty_entries) in &types {
let start_idx = entries.len();
entries.extend(ty_entries);
entries.extend(ty_entries.clone());
let indices: Vec<usize> = (start_idx..entries.len()).collect();
type_map.insert(name, indices);
type_map.insert(name.clone(), indices);
}

// IMPORTANT: Do NOT sort entries! This would invalidate the indices
Expand All @@ -118,6 +118,25 @@ impl LightweightIndex {
total_functions, total_variables, entries.len(), address_map.len()
);

// Ensure demangled aliases exist for variables even if DW_AT_name was missing.
for (idx, entry) in entries.iter().enumerate() {
if entry.tag == gimli::constants::DW_TAG_variable {
if let Some(demangled) = demangle_by_lang(entry.language, entry.name.as_ref()) {
let leaf = demangled_leaf(&demangled);
if leaf != entry.name.as_ref() {
tracing::trace!(
"LightweightIndex: alias '{}' -> '{}' (idx {}, lang={:?})",
entry.name,
leaf,
idx,
entry.language
);
variable_map.entry(leaf).or_default().push(idx);
}
}
}
}

Self {
entries,
function_map,
Expand Down
28 changes: 28 additions & 0 deletions ghostscope-dwarf/src/module/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2155,10 +2155,17 @@ impl ModuleData {
};

let mut out = Vec::new();
// Track DIEs we've already emitted (unit_offset, die_offset)
let mut seen_offsets: HashSet<(u64, u64)> = HashSet::new();

// Try demangled full (preserve the demangled name that matched)
if let Some(indices) = self.demangled_variable_map.get(name) {
for &idx in indices {
if let Some(entry) = self.lightweight_index.entry(idx) {
let key = (entry.unit_offset.0 as u64, entry.die_offset.0 as u64);
if !seen_offsets.insert(key) {
continue;
}
let link_address = entry.address_ranges.first().and_then(|(lo, hi)| {
if lo == hi {
Some(*lo)
Expand All @@ -2185,6 +2192,10 @@ impl ModuleData {
if let Some(indices) = self.demangled_variable_leaf_map.get(name) {
for &idx in indices {
if let Some(entry) = self.lightweight_index.entry(idx) {
let key = (entry.unit_offset.0 as u64, entry.die_offset.0 as u64);
if !seen_offsets.insert(key) {
continue;
}
let link_address = entry.address_ranges.first().and_then(|(lo, hi)| {
if lo == hi {
Some(*lo)
Expand Down Expand Up @@ -2213,6 +2224,10 @@ impl ModuleData {
for key in self.lightweight_index.get_variable_names() {
if key.rsplit("::").next().map(|s| s == name).unwrap_or(false) {
for e in self.lightweight_index.find_variables_by_name(key) {
let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
if !seen_offsets.insert(key) {
continue;
}
let link_address =
e.address_ranges
.first()
Expand Down Expand Up @@ -2242,6 +2257,10 @@ impl ModuleData {
if e.tag != gimli::constants::DW_TAG_variable {
continue;
}
let key_offsets = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
if !seen_offsets.insert(key_offsets) {
continue;
}
let last = e.name.rsplit("::").next().unwrap_or(e.name.as_ref());
if last == name || e.name == name.into() {
let link_address =
Expand Down Expand Up @@ -2378,13 +2397,18 @@ impl ModuleData {
pub(crate) fn find_global_variables_by_name(&self, name: &str) -> Vec<GlobalVariableInfo> {
let mut out = Vec::new();
let entries = self.lightweight_index.find_variables_by_name(name);
let mut seen_offsets: HashSet<(u64, u64)> = HashSet::new();

// Parse object file once for section classification
let obj = match object::File::parse(&self._binary_mapped_file.data[..]) {
Ok(f) => f,
Err(_) => {
// Cannot classify sections, but still return entries with link_address
for e in entries {
let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
if !seen_offsets.insert(key) {
continue;
}
let link_address =
e.address_ranges
.first()
Expand All @@ -2402,6 +2426,10 @@ impl ModuleData {
};

for e in entries {
let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
if !seen_offsets.insert(key) {
continue;
}
let link_address =
e.address_ranges
.first()
Expand Down
122 changes: 89 additions & 33 deletions ghostscope-dwarf/src/parser/fast_parser.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
//! Unified DWARF parser - true single-pass parsing

use crate::{
core::{IndexEntry, Result},
core::{
demangle::{demangle_by_lang, demangled_leaf},
IndexEntry, Result,
},
data::{
directory_from_index, resolve_file_path, LightweightFileIndex, LightweightIndex,
LineMappingTable, ScopedFileIndexManager,
Expand Down Expand Up @@ -246,65 +249,118 @@ impl<'a> DwarfParser<'a> {
}
}
gimli::constants::DW_TAG_variable => {
tracing::trace!(
"Evaluating global variable DIE {:?} in CU {:?}",
entry.offset(),
unit_offset
);
let is_static_symbol = self.is_static_symbol(entry).unwrap_or(false);
let in_function_scope = tag_stack.iter().any(|t| {
*t == gimli::constants::DW_TAG_subprogram
|| *t == gimli::constants::DW_TAG_inlined_subroutine
});
if in_function_scope {
if in_function_scope && !is_static_symbol {
tracing::trace!(
"Skipping variable at {:?} (in function scope, stack={:?})",
entry.offset(),
tag_stack
);
// Skip local variables
tag_stack.push(entry.tag());
continue;
} else if in_function_scope {
// Rust (and some C compilers) sometimes nest file-scoped statics under the
// function that first references them, even though DW_AT_location uses
// DW_OP_addr. When DW_AT_external is false we treat them as true globals.
tracing::trace!(
"Treating static variable at {:?} as global despite function scope (stack={:?})",
entry.offset(),
tag_stack
);
}
if Self::is_declaration(entry).unwrap_or(false) {
tracing::trace!(
"Skipping variable at {:?} (declaration-only DIE)",
entry.offset()
);
tag_stack.push(entry.tag());
continue;
}
let mut collected_names: Vec<(String, bool)> = Vec::new();
let mut push_unique_name = |candidate: String, is_linkage_alias: bool| {
if candidate.is_empty() {
return;
}
if collected_names
.iter()
.any(|(existing, _)| existing == &candidate)
{
return;
}
collected_names.push((candidate, is_linkage_alias));
};

let mut have_primary_name = false;
if let Some(name) = self.extract_name(self.dwarf, unit, entry)? {
let flags = crate::core::IndexFlags {
is_static: self.is_static_symbol(entry).unwrap_or(false),
..Default::default()
};
// Restore variable address for globals/statics via DW_AT_location
let var_addr = self.extract_variable_address(entry, unit)?;
let var_ranges = var_addr.map(|a| vec![(a, a)]).unwrap_or_default();
push_unique_name(name, false);
have_primary_name = true;
}

if let Some((linkage_name, _)) =
self.extract_linkage_name(self.dwarf, unit, entry)?
{
if let Some(demangled) =
demangle_by_lang(cu_language, linkage_name.as_str())
{
let leaf = demangled_leaf(&demangled);
push_unique_name(leaf, false);
have_primary_name = true;
}
push_unique_name(linkage_name.clone(), true);
}

if !have_primary_name {
tracing::trace!(
"DWARF variable at {:?} missing usable name (CU lang={:?}); skipping alias registration",
entry.offset(),
cu_language
);
tag_stack.push(entry.tag());
continue;
}

let flags = crate::core::IndexFlags {
is_static: is_static_symbol,
..Default::default()
};
let var_addr = self.extract_variable_address(entry, unit)?;
let var_ranges = var_addr.map(|a| vec![(a, a)]).unwrap_or_default();

for (name, is_linkage_alias) in collected_names {
let mut entry_flags = flags;
entry_flags.is_linkage = is_linkage_alias;
let index_entry = IndexEntry {
name: std::sync::Arc::from(name.as_str()),
die_offset: entry.offset(),
unit_offset,
tag: entry.tag(),
flags,
flags: entry_flags,
language: cu_language,
address_ranges: var_ranges.clone(),
entry_pc: None,
};
tracing::trace!(
"Registering variable alias '{}' (linkage={}, lang={:?}, die={:?})",
name,
entry_flags.is_linkage,
cu_language,
entry.offset()
);
shard
.variables
.entry(name.clone())
.or_default()
.push(index_entry);
if let Some((linkage_name, _)) =
self.extract_linkage_name(self.dwarf, unit, entry)?
{
if linkage_name != name {
let mut alias_flags = flags;
alias_flags.is_linkage = true;
let index_entry_linkage = IndexEntry {
name: std::sync::Arc::from(linkage_name.as_str()),
die_offset: entry.offset(),
unit_offset,
tag: entry.tag(),
flags: alias_flags,
language: cu_language,
address_ranges: var_ranges,
entry_pc: None,
};
shard
.variables
.entry(linkage_name)
.or_default()
.push(index_entry_linkage);
}
}
}
}
gimli::constants::DW_TAG_structure_type
Expand Down
Loading