diff --git a/libwild/src/args/elf.rs b/libwild/src/args/elf.rs index 5a8e6071a..ed4a81533 100644 --- a/libwild/src/args/elf.rs +++ b/libwild/src/args/elf.rs @@ -134,6 +134,8 @@ pub struct ElfArgs { experimental_sframe: bool, pub(crate) debug_compression_kind: Option, + + pub(crate) gdb_index: bool, } #[derive(Debug)] @@ -234,7 +236,6 @@ const SILENTLY_IGNORED_SHORT_FLAGS: &[&str] = &[ ]; const IGNORED_FLAGS: &[&str] = &[ - "gdb-index", "fix-cortex-a53-835769", "fix-cortex-a53-843419", "discard-all", @@ -334,6 +335,7 @@ impl Default for ElfArgs { experimental_sframe: false, debug_compression_kind: None, + gdb_index: false, } } } @@ -1098,6 +1100,24 @@ fn setup_argument_parser() -> ArgumentParser { Ok(()) }); + parser + .declare() + .long("gdb-index") + .help("Create .gdb_index section") + .execute(|args, _modifier_stack| { + args.gdb_index = true; + Ok(()) + }); + + parser + .declare() + .long("no-gdb-index") + .help("Don't create .gdb_index section") + .execute(|args, _modifier_stack| { + args.gdb_index = false; + Ok(()) + }); + parser .declare() .long("export-dynamic") @@ -2086,6 +2106,10 @@ impl platform::Args for ElfArgs { self.trace } + fn should_write_gdb_index(&self) -> bool { + self.gdb_index + } + fn relocation_model(&self) -> crate::args::RelocationModel { self.relocation_model } diff --git a/libwild/src/elf.rs b/libwild/src/elf.rs index 7faead799..7c74cb421 100644 --- a/libwild/src/elf.rs +++ b/libwild/src/elf.rs @@ -1882,6 +1882,7 @@ impl platform::Platform for Elf { builder.add_sections(&custom.bss); builder.add_sections(&custom.nonalloc); + builder.add_section(output_section_id::GDB_INDEX); builder.add_section(output_section_id::COMMENT); builder.add_section(output_section_id::RISCV_ATTRIBUTES); builder.add_section(output_section_id::SHSTRTAB); @@ -1994,6 +1995,12 @@ impl platform::Platform for Elf { total_sizes.merge(&extra_sizes); } + fn compute_gdb_index_size( + groups: &[crate::layout::GroupState], + ) -> crate::error::Result { + crate::gdb_index::compute_gdb_index_size(groups) + } + fn align_load_segment_start( _segment_def: Self::ProgramSegmentDef, segment_alignment: Alignment, @@ -4644,6 +4651,11 @@ const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { kind: SectionKind::Secondary(output_section_id::SYMTAB_SHNDX_LOCAL), ..DEFAULT_DEFS }; + defs[output_section_id::GDB_INDEX.as_usize()] = BuiltInSectionDetails { + kind: SectionKind::Primary(SectionName(GDB_INDEX_SECTION_NAME)), + ty: sht::PROGBITS, + ..DEFAULT_DEFS + }; // Start of regular sections defs[output_section_id::RODATA.as_usize()] = BuiltInSectionDetails { kind: SectionKind::Primary(SectionName(RODATA_SECTION_NAME)), diff --git a/libwild/src/elf_writer.rs b/libwild/src/elf_writer.rs index d4b3af7d9..11cf055f5 100644 --- a/libwild/src/elf_writer.rs +++ b/libwild/src/elf_writer.rs @@ -185,6 +185,9 @@ pub(crate) fn write<'data, A: Arch>( crate::validation::validate_bytes(layout, &sized_output.out)?; } + // Write .gdb_index before splitting, since it needs to read .debug_info from the output. + write_gdb_index_section(&mut sized_output.out, layout)?; + let mut section_buffers = split_output_into_sections(layout, &mut sized_output.out).0; if layout.args().should_write_eh_frame_hdr { @@ -313,6 +316,24 @@ fn fill_padding(mut section_buffers: OutputSectionMap<&mut [u8]>) { }); } +fn write_gdb_index_section(output: &mut [u8], layout: &ElfLayout) -> Result { + use crate::platform::Args as _; + if !layout.args().should_write_gdb_index() { + return Ok(()); + } + let sl = layout.section_layouts.get(output_section_id::GDB_INDEX); + if sl.file_size == 0 { + return Ok(()); + } + timing_phase!("Write .gdb_index"); + let start = sl.file_offset; + // Split the output buffer so that the part before our section is readable (for .debug_info) + // and our section is writable. + let (before, rest) = output.split_at_mut(start); + let gdb_buf = &mut rest[..sl.file_size]; + crate::gdb_index::write_gdb_index(gdb_buf, before, layout) +} + fn write_sframe_section(sframe_buffer: &mut [u8], layout: &ElfLayout) -> Result { if layout.args().discard_sframe || sframe_buffer.is_empty() { return Ok(()); diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs new file mode 100644 index 000000000..53138fafc --- /dev/null +++ b/libwild/src/gdb_index.rs @@ -0,0 +1,644 @@ +//! Generates a `.gdb_index` section. +//! +//! The `.gdb_index` section is an accelerator structure that lets GDB skip parsing all +//! `.debug_info` at startup. We emit version 9, which includes a shortcut table. +//! +//! Format reference: + +use crate::elf::Elf; +use crate::error::Context as _; +use crate::error::Result; +use crate::layout::FileLayout; +use crate::layout::FileLayoutState; +use crate::layout::GroupState; +use crate::layout::Layout; +use crate::output_section_id::SectionName; +use crate::platform::ObjectFile as _; +use crate::platform::SectionHeader as _; +use crate::resolution::SectionSlot; +use hashbrown::HashMap; +use itertools::Itertools as _; +use linker_utils::bit_misc::BitExtraction; +use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME; +use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME_STR; +use linker_utils::utils::u32_from_slice; +use linker_utils::utils::u64_from_slice; +use std::collections::BTreeSet; +use std::mem::size_of; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +const GDB_INDEX_VERSION: u32 = 9; + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexHeader { + version: u32, + cu_list_offset: u32, + tu_list_offset: u32, + address_area_offset: u32, + symbol_table_offset: u32, + shortcut_table_offset: u32, + constant_pool_offset: u32, +} + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexCuEntry { + cu_offset: u64, + cu_length: u64, +} + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexAddressEntry { + low_address: u64, + high_address: u64, + cu_index: u32, +} + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexShortcutTable { + language_of_main: u32, + name_of_main_offset: u32, +} + +const HEADER_SIZE: usize = size_of::(); +const CU_ENTRY_SIZE: usize = size_of::(); +const ADDRESS_ENTRY_SIZE: usize = size_of::(); +const SHORTCUT_TABLE_SIZE: usize = size_of::(); +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexHashSlot { + name_offset: u32, + cu_vector_offset: u32, +} + +const HASH_SLOT_SIZE: usize = size_of::(); + +/// The GDB index hash function. +fn gdb_hash(name: &[u8]) -> u32 { + let mut r: u32 = 0; + for &c in name { + r = r + .wrapping_mul(67) + .wrapping_add(u32::from(c.to_ascii_lowercase())) + .wrapping_sub(113); + } + r +} + +/// Encode a CU vector entry: bits 0-23 = CU index, bits 28-30 = kind, bit 31 = is_static. +/// +/// The attrs byte from `.debug_gnu_pubnames`/`.debug_gnu_pubtypes` packs kind in bits 4-6 +/// and is_static in bit 7. +fn encode_cu_vector_entry(cu_index: u32, attrs: u8) -> u32 { + let attrs = u64::from(attrs); + let kind = attrs.extract_bit_range(4..7) as u32; + let is_static = attrs.extract_bit_range(7..8) as u32; + (cu_index & 0x00FF_FFFF) | (kind << 28) | (is_static << 31) +} + +/// Number of hash table slots: next power of two >= 4/3 * n. +fn compute_hash_table_slots(num_symbols: usize) -> usize { + if num_symbols == 0 { + return 0; + } + (num_symbols * 4 / 3 + 1).next_power_of_two() +} + +struct CuBoundary { + offset: u64, + length: u64, +} + +/// Walk `.debug_info` bytes and return `(offset, total_length)` for each CU. +/// +/// Each CU starts with an initial length field (§7.5.1.1) encoded per §7.4: a 4-byte value, or +/// `0xFFFF_FFFF` followed by an 8-byte length for DWARF-64. +fn parse_cu_boundaries(data: &[u8]) -> Vec { + let mut cus = Vec::new(); + let mut offset = 0usize; + while offset + 4 <= data.len() { + let init_len = u32_from_slice(&data[offset..]); + let total = if init_len == 0xFFFF_FFFF { + if offset + 12 > data.len() { + break; + } + let len = u64_from_slice(&data[offset + 4..]); + 12 + len as usize + } else { + 4 + init_len as usize + }; + if total == 0 || offset + total > data.len() { + break; + } + cus.push(CuBoundary { + offset: offset as u64, + length: total as u64, + }); + offset += total; + } + cus +} + +struct PubnamesSet<'data> { + debug_info_offset: u64, + entries: Vec<(&'data [u8], u8)>, +} + +/// Parse `.debug_gnu_pubnames` / `.debug_gnu_pubtypes` section data. +/// +/// Each set has a header pointing to a CU in `.debug_info`, followed by +/// (die_offset, attrs_byte, NUL-terminated name) entries. +fn parse_pubnames_sets(data: &[u8]) -> Vec> { + let mut sets = Vec::new(); + let mut pos = 0; + while pos + 4 <= data.len() { + let init_len = u32_from_slice(&data[pos..]); + + let (header_size, set_end, debug_info_offset) = if init_len == 0xFFFF_FFFF { + // DWARF64: 4 + 8(len) + 2(ver) + 8(offset) + 8(size) = 30 + if pos + 30 > data.len() { + break; + } + let len = u64_from_slice(&data[pos + 4..]); + let dio = u64_from_slice(&data[pos + 14..]); + (30, pos + 12 + len as usize, dio) + } else { + // DWARF32: 4(len) + 2(ver) + 4(offset) + 4(size) = 14 + if pos + 14 > data.len() { + break; + } + let dio = u64::from(u32_from_slice(&data[pos + 6..])); + (14, pos + 4 + init_len as usize, dio) + }; + + let set_end = set_end.min(data.len()); + let mut ep = pos + header_size; + let mut entries = Vec::new(); + let is_64 = init_len == 0xFFFF_FFFF; + + while ep < set_end { + let die_offset = if is_64 { + if ep + 8 > set_end { + break; + } + let v = u64_from_slice(&data[ep..]); + ep += 8; + v + } else { + if ep + 4 > set_end { + break; + } + let v = u64::from(u32_from_slice(&data[ep..])); + ep += 4; + v + }; + if die_offset == 0 { + break; + } + if ep >= set_end { + break; + } + let attrs = data[ep]; + ep += 1; + let name_start = ep; + while ep < set_end && data[ep] != 0 { + ep += 1; + } + if ep >= set_end { + break; + } + entries.push((&data[name_start..ep], attrs)); + ep += 1; + } + + sets.push(PubnamesSet { + debug_info_offset, + entries, + }); + pos = set_end; + } + sets +} + +/// Read raw section data from an input object by name. +fn raw_section_by_name<'data>( + object: &crate::elf::File<'data>, + name: &str, +) -> Result> { + let Some((_index, header)) = object.section_by_name(name) else { + return Ok(None); + }; + Ok(Some(object.raw_section_data(header)?)) +} + +/// Pre-scan all input objects to compute the `.gdb_index` section size. +pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> Result { + let objects = groups.iter().flat_map(|g| g.files.iter()).filter_map(|f| { + let FileLayoutState::Object(obj) = f else { + return None; + }; + Some((obj.object, obj.sections.as_slice())) + }); + let scan = scan_objects_for_gdb_index(objects)?; + + if scan.total_cus == 0 { + return Ok(0); + } + + let mut cv_bytes = 0usize; + let mut str_bytes = 0usize; + for (name, sd) in &scan.sorted_symbols { + // 4 bytes for the entry count, then 4 bytes per entry. + cv_bytes += 4 + sd.cv_entries.len() * 4; + str_bytes += name.len() + 1; + } + + Ok((HEADER_SIZE + + scan.total_cus * CU_ENTRY_SIZE + + scan.total_addr_entries * ADDRESS_ENTRY_SIZE + + scan.ht_slots * HASH_SLOT_SIZE + + SHORTCUT_TABLE_SIZE + + cv_bytes + + str_bytes) as u64) +} + +/// Write the `.gdb_index` section into `buf`. +/// +/// Reads the output `.debug_info` (already written into `output_buf`) for the CU list, +/// and re-scans input objects for address ranges and pubnames/pubtypes symbols. +pub(crate) fn write_gdb_index( + buf: &mut [u8], + output_buf: &[u8], + layout: &Layout<'_, Elf>, +) -> Result { + if buf.is_empty() { + return Ok(()); + } + + let cu_entries = build_cu_list(output_buf, layout)?; + let objects = layout + .group_layouts + .iter() + .flat_map(|g| g.files.iter()) + .filter_map(|f| { + let FileLayout::Object(obj) = f else { + return None; + }; + Some((obj.object, obj.sections.as_slice())) + }); + let GdbIndexScanResult { + sorted_symbols: sorted_names, + ht_slots, + .. + } = scan_objects_for_gdb_index(objects)?; + if !cu_entries.is_empty() && sorted_names.is_empty() { + layout.symbol_db.warning( + "Objects lack .debug_gnu_pubnames/.debug_gnu_pubtypes sections, so the symbol table in .gdb_index will be empty. \ + Compile with -ggnu-pubnames to populate it.", + ); + } + let addr_entries = build_address_entries(layout)?; + + let cu_list_off = HEADER_SIZE as u32; + let tu_list_off = cu_list_off + (cu_entries.len() * CU_ENTRY_SIZE) as u32; + let addr_off = tu_list_off; + let sym_off = addr_off + (addr_entries.len() * ADDRESS_ENTRY_SIZE) as u32; + let short_off = sym_off + (ht_slots * HASH_SLOT_SIZE) as u32; + let cp_off = short_off + SHORTCUT_TABLE_SIZE as u32; + + // Write constant pool: CU vectors first, then name strings. + let mut cv_offsets = Vec::with_capacity(sorted_names.len()); + let mut off = cp_off as usize; + for (_, sd) in &sorted_names { + cv_offsets.push((off - cp_off as usize) as u32); + buf[off..off + 4].copy_from_slice(&(sd.cv_entries.len() as u32).to_le_bytes()); + off += 4; + for &e in &sd.cv_entries { + buf[off..off + 4].copy_from_slice(&e.to_le_bytes()); + off += 4; + } + } + let mut name_offsets = Vec::with_capacity(sorted_names.len()); + for (name, _) in &sorted_names { + name_offsets.push((off - cp_off as usize) as u32); + buf[off..off + name.len()].copy_from_slice(name); + off += name.len(); + buf[off] = 0; + off += 1; + } + + let hdr = GdbIndexHeader { + version: GDB_INDEX_VERSION, + cu_list_offset: cu_list_off, + tu_list_offset: tu_list_off, + address_area_offset: addr_off, + symbol_table_offset: sym_off, + shortcut_table_offset: short_off, + constant_pool_offset: cp_off, + }; + buf[..HEADER_SIZE].copy_from_slice(hdr.as_bytes()); + + let mut off = cu_list_off as usize; + for cu in &cu_entries { + buf[off..off + CU_ENTRY_SIZE].copy_from_slice(cu.as_bytes()); + off += CU_ENTRY_SIZE; + } + + off = addr_off as usize; + for a in &addr_entries { + buf[off..off + ADDRESS_ENTRY_SIZE].copy_from_slice(a.as_bytes()); + off += ADDRESS_ENTRY_SIZE; + } + + write_hash_table( + buf, + ht_slots, + sym_off as usize, + &sorted_names, + &name_offsets, + &cv_offsets, + )?; + + // The shortcut table lets GDB quickly determine the language of `main` without scanning the + // full index. Filling it requires looking up the DWARF language attribute of the main CU, which + // we don't currently do. GDB handles zeroed values here by falling back to its own lookup. + let so = short_off as usize; + let sc = GdbIndexShortcutTable { + language_of_main: 0, + name_of_main_offset: 0, + }; + buf[so..so + SHORTCUT_TABLE_SIZE].copy_from_slice(sc.as_bytes()); + Ok(()) +} + +/// Build the CU list from the already-written output `.debug_info`. +fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Result> { + let Some(id) = layout + .output_sections + .section_id_by_name(SectionName(DEBUG_INFO_SECTION_NAME)) + else { + return Ok(Vec::new()); + }; + let sl = layout.section_layouts.get(id); + let start = sl.file_offset; + let end = start + sl.file_size; + crate::ensure!( + end <= output_buf.len(), + ".debug_info layout extends beyond output buffer ({end} > {})", + output_buf.len() + ); + Ok(parse_cu_boundaries(&output_buf[start..end]) + .into_iter() + .map(|cu| GdbIndexCuEntry { + cu_offset: cu.offset, + cu_length: cu.length, + }) + .collect()) +} + +struct SymData { + cv_entries: BTreeSet, + hash: u32, +} + +struct GdbIndexScanResult<'data> { + total_cus: usize, + total_addr_entries: usize, + sorted_symbols: Vec<(&'data [u8], SymData)>, + ht_slots: usize, +} + +/// Scan input objects to build the symbol table and count CUs / address entries. +fn scan_objects_for_gdb_index<'data>( + objects: impl Iterator, &'data [SectionSlot])>, +) -> Result> { + let mut total_cus = 0usize; + let mut total_addr_entries = 0usize; + let mut sym_map: HashMap<&'data [u8], SymData> = HashMap::new(); + let mut cu_offset = 0u32; + + for (object, sections) in objects { + let boundaries = match raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? { + Some(data) => parse_cu_boundaries(data), + None => continue, + }; + if boundaries.is_empty() { + continue; + } + + let base = cu_offset; + + let mut obj_addr_count = 0usize; + for (si, slot) in sections.iter().enumerate() { + let SectionSlot::Loaded(section) = slot else { + continue; + }; + if section.size == 0 { + continue; + } + let header = object.section(object::SectionIndex(si))?; + if header.is_alloc() && header.is_executable() { + obj_addr_count += 1; + } + } + + total_cus += boundaries.len(); + total_addr_entries += obj_addr_count; + + let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); + for (i, cu) in boundaries.iter().enumerate() { + offset_to_idx.insert(cu.offset, base + i as u32); + } + cu_offset += boundaries.len() as u32; + + for (name, entry) in collect_pubname_entries(object, &offset_to_idx)? { + let sd = sym_map.entry(name).or_insert_with(|| SymData { + cv_entries: BTreeSet::new(), + hash: gdb_hash(name), + }); + sd.cv_entries.insert(entry); + } + } + + let sorted: Vec<(&[u8], SymData)> = sym_map + .into_iter() + .sorted_unstable_by_key(|(name, _)| *name) + .collect(); + let ht_slots = compute_hash_table_slots(sorted.len()); + Ok(GdbIndexScanResult { + total_cus, + total_addr_entries, + sorted_symbols: sorted, + ht_slots, + }) +} + +/// Build address entries using resolved addresses from the final layout. +fn build_address_entries(layout: &Layout<'_, Elf>) -> Result> { + let mut entries = Vec::new(); + let mut cu_offset = 0u32; + + for group in &layout.group_layouts { + for file in &group.files { + let FileLayout::Object(obj) = file else { + continue; + }; + let object = obj.object; + + let obj_cu_count = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? + .map_or(0, |data| parse_cu_boundaries(data).len() as u32); + if obj_cu_count == 0 { + continue; + } + let base_cu = cu_offset; + + for (si, slot) in obj.sections.iter().enumerate() { + let SectionSlot::Loaded(section) = slot else { + continue; + }; + if section.size == 0 { + continue; + } + let header = object.section(object::SectionIndex(si))?; + if !header.is_alloc() || !header.is_executable() { + continue; + } + if let Some(addr) = obj.section_resolutions[si].address() + && addr != 0 + { + entries.push(GdbIndexAddressEntry { + low_address: addr, + high_address: addr + section.size, + cu_index: base_cu, + }); + } + } + + cu_offset += obj_cu_count; + } + } + Ok(entries) +} + +/// Collect encoded pubname/pubtype entries from an object's `.debug_gnu_pubnames` +/// and `.debug_gnu_pubtypes` sections, returning `(name, encoded_cu_vector_entry)` pairs. +fn collect_pubname_entries<'data>( + object: &crate::elf::File<'data>, + offset_to_idx: &HashMap, +) -> Result> { + let mut entries = Vec::new(); + for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { + let Some(data) = raw_section_by_name(object, section_name)? else { + continue; + }; + for set in parse_pubnames_sets(data) { + let Some(&cu_idx) = offset_to_idx.get(&set.debug_info_offset) else { + continue; + }; + for (name, attrs) in set.entries { + entries.push((name, encode_cu_vector_entry(cu_idx, attrs))); + } + } + } + Ok(entries) +} + +/// Insert symbols into the open-addressing hash table region of `buf`. +fn write_hash_table( + buf: &mut [u8], + ht_slots: usize, + ht_start: usize, + sorted: &[(&[u8], SymData)], + name_offsets: &[u32], + cv_offsets: &[u32], +) -> Result { + let ht_end = ht_start + ht_slots * HASH_SLOT_SIZE; + buf[ht_start..ht_end].fill(0); + + if ht_slots == 0 { + return Ok(()); + } + let mask = (ht_slots - 1) as u32; + for (i, (_, sd)) in sorted.iter().enumerate() { + let h = sd.hash; + let step = (h.wrapping_mul(17) & mask) | 1; + let mut slot = h & mask; + loop { + let so = ht_start + slot as usize * HASH_SLOT_SIZE; + let existing = GdbIndexHashSlot::read_from_bytes(&buf[so..so + HASH_SLOT_SIZE]) + .context("Failed to read .gdb_index hash table slot")?; + if existing.name_offset == 0 && existing.cu_vector_offset == 0 { + let new_slot = GdbIndexHashSlot { + name_offset: name_offsets[i], + cu_vector_offset: cv_offsets[i], + }; + buf[so..so + HASH_SLOT_SIZE].copy_from_slice(new_slot.as_bytes()); + break; + } + slot = (slot + step) & mask; + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_gdb_hash_case_insensitive() { + assert_eq!(gdb_hash(b"main"), gdb_hash(b"MAIN")); + assert_eq!(gdb_hash(b"main"), gdb_hash(b"Main")); + assert_ne!(gdb_hash(b"main"), gdb_hash(b"foo")); + } + + #[test] + fn test_hash_table_slots_power_of_two() { + assert_eq!(compute_hash_table_slots(0), 0); + assert_eq!(compute_hash_table_slots(1), 2); + for n in 1..100 { + let s = compute_hash_table_slots(n); + assert!(s.is_power_of_two()); + assert!(s >= n); + } + } + + #[test] + fn test_encode_cu_vector_entry() { + // Global function: kind=3 in bits 4-6, is_static=0 in bit 7 + let e = encode_cu_vector_entry(5, 0b0011_0000); + assert_eq!(e & 0x00FF_FFFF, 5); + assert_eq!((e >> 28) & 0x7, 3); + assert_eq!((e >> 31) & 0x1, 0); + + // Static function: kind=3, is_static=1 + let e2 = encode_cu_vector_entry(42, 0b1011_0000); + assert_eq!(e2 & 0x00FF_FFFF, 42); + assert_eq!((e2 >> 28) & 0x7, 3); + assert_eq!((e2 >> 31) & 0x1, 1); + } + + #[test] + fn test_parse_cu_boundaries() { + assert!(parse_cu_boundaries(&[]).is_empty()); + + // Single DWARF32 CU: init_length=8, total = 4 + 8 = 12 bytes. + let mut data = vec![0u8; 12]; + data[0..4].copy_from_slice(&8u32.to_le_bytes()); + let cus = parse_cu_boundaries(&data); + assert_eq!(cus.len(), 1); + assert_eq!(cus[0].offset, 0); + assert_eq!(cus[0].length, 12); + } + + #[test] + fn test_header_size() { + assert_eq!(HEADER_SIZE, 7 * 4); + } +} diff --git a/libwild/src/layout.rs b/libwild/src/layout.rs index 2089e1223..1948a26fd 100644 --- a/libwild/src/layout.rs +++ b/libwild/src/layout.rs @@ -1141,6 +1141,8 @@ impl<'data, P: Platform> CommonGroupState<'data, P> { *self.mem_sizes.get(part_id::SYMTAB_SHNDX_GLOBAL), ); + memory_offsets.increment(part_id::GDB_INDEX, *self.mem_sizes.get(part_id::GDB_INDEX)); + strtab_offset_start } @@ -1840,6 +1842,21 @@ fn compute_total_section_part_sizes<'data, P: Platform>( total_sizes.merge(&group_state.common.mem_sizes); } + // Compute and allocate the .gdb_index section size if --gdb-index is enabled. + let gdb_index_size = if resources.symbol_db.args.should_write_gdb_index() { + P::compute_gdb_index_size(group_states)? + } else { + 0 + }; + if gdb_index_size > 0 { + let first_group = group_states.first_mut().unwrap(); + first_group + .common + .mem_sizes + .increment(part_id::GDB_INDEX, gdb_index_size); + total_sizes.increment(part_id::GDB_INDEX, gdb_index_size); + } + // We need to apply late-stage adjustments for the epilogue before we do so for the prelude, // since the prelude needs to know if the .hash section will be written, which is decided by the // epilogue. diff --git a/libwild/src/lib.rs b/libwild/src/lib.rs index 053c5c575..df43064ec 100644 --- a/libwild/src/lib.rs +++ b/libwild/src/lib.rs @@ -22,6 +22,7 @@ pub(crate) mod file_kind; pub(crate) mod file_writer; pub(crate) mod fs; pub(crate) mod gc_stats; +pub(crate) mod gdb_index; pub(crate) mod glob_match; pub(crate) mod grouping; pub(crate) mod hash; diff --git a/libwild/src/output_section_id.rs b/libwild/src/output_section_id.rs index fe19fcded..3cc50a964 100644 --- a/libwild/src/output_section_id.rs +++ b/libwild/src/output_section_id.rs @@ -99,6 +99,7 @@ pub(crate) const SYMTAB_SHNDX_LOCAL: OutputSectionId = part_id::SYMTAB_SHNDX_LOCAL.output_section_id(); pub(crate) const SYMTAB_SHNDX_GLOBAL: OutputSectionId = part_id::SYMTAB_SHNDX_GLOBAL.output_section_id(); +pub(crate) const GDB_INDEX: OutputSectionId = part_id::GDB_INDEX.output_section_id(); // Mach-O specific sections pub(crate) const PAGEZERO_SEGMENT: OutputSectionId = part_id::PAGEZERO_SEGMENT.output_section_id(); diff --git a/libwild/src/part_id.rs b/libwild/src/part_id.rs index 490531c42..7cb87502b 100644 --- a/libwild/src/part_id.rs +++ b/libwild/src/part_id.rs @@ -51,34 +51,35 @@ pub(crate) const RELRO_PADDING: PartId = PartId(28); pub(crate) const RELR_DYN: PartId = PartId(29); pub(crate) const SYMTAB_SHNDX_LOCAL: PartId = PartId(30); pub(crate) const SYMTAB_SHNDX_GLOBAL: PartId = PartId(31); +pub(crate) const GDB_INDEX: PartId = PartId(32); // Mach-O specific sections -pub(crate) const PAGEZERO_SEGMENT: PartId = PartId(32); -pub(crate) const TEXT_SEGMENT: PartId = PartId(33); -pub(crate) const DATA_SEGMENT: PartId = PartId(34); -pub(crate) const LINK_EDIT_SEGMENT: PartId = PartId(35); -pub(crate) const ENTRY_POINT: PartId = PartId(36); -pub(crate) const DYLD_CHAINED_FIXUPS: PartId = PartId(37); -pub(crate) const CHAINED_FIXUP_TABLE: PartId = PartId(38); -pub(crate) const SYMTAB_COMMAND: PartId = PartId(39); -pub(crate) const CODE_SIGNATURE_COMMAND: PartId = PartId(40); -pub(crate) const CODE_SIGNATURE: PartId = PartId(41); +pub(crate) const PAGEZERO_SEGMENT: PartId = PartId(33); +pub(crate) const TEXT_SEGMENT: PartId = PartId(34); +pub(crate) const DATA_SEGMENT: PartId = PartId(35); +pub(crate) const LINK_EDIT_SEGMENT: PartId = PartId(36); +pub(crate) const ENTRY_POINT: PartId = PartId(37); +pub(crate) const DYLD_CHAINED_FIXUPS: PartId = PartId(38); +pub(crate) const CHAINED_FIXUP_TABLE: PartId = PartId(39); +pub(crate) const SYMTAB_COMMAND: PartId = PartId(40); +pub(crate) const CODE_SIGNATURE_COMMAND: PartId = PartId(41); +pub(crate) const CODE_SIGNATURE: PartId = PartId(42); // Wasm specific sections. Each one corresponds to a single standard Wasm section. -pub(crate) const WASM_TYPE: PartId = PartId(42); -pub(crate) const WASM_IMPORT: PartId = PartId(43); -pub(crate) const WASM_FUNCTION: PartId = PartId(44); -pub(crate) const WASM_TABLE: PartId = PartId(45); -pub(crate) const WASM_MEMORY: PartId = PartId(46); -pub(crate) const WASM_GLOBAL: PartId = PartId(47); -pub(crate) const WASM_EXPORT: PartId = PartId(48); -pub(crate) const WASM_START: PartId = PartId(49); -pub(crate) const WASM_ELEMENT: PartId = PartId(50); -pub(crate) const WASM_DATA_COUNT: PartId = PartId(51); -pub(crate) const WASM_CODE: PartId = PartId(52); -pub(crate) const WASM_DATA: PartId = PartId(53); - -pub(crate) const NUM_SINGLE_PART_SECTIONS: u32 = 54; +pub(crate) const WASM_TYPE: PartId = PartId(43); +pub(crate) const WASM_IMPORT: PartId = PartId(44); +pub(crate) const WASM_FUNCTION: PartId = PartId(45); +pub(crate) const WASM_TABLE: PartId = PartId(46); +pub(crate) const WASM_MEMORY: PartId = PartId(47); +pub(crate) const WASM_GLOBAL: PartId = PartId(48); +pub(crate) const WASM_EXPORT: PartId = PartId(49); +pub(crate) const WASM_START: PartId = PartId(50); +pub(crate) const WASM_ELEMENT: PartId = PartId(51); +pub(crate) const WASM_DATA_COUNT: PartId = PartId(52); +pub(crate) const WASM_CODE: PartId = PartId(53); +pub(crate) const WASM_DATA: PartId = PartId(54); + +pub(crate) const NUM_SINGLE_PART_SECTIONS: u32 = 55; #[cfg(test)] pub(crate) const NUM_BUILT_IN_PARTS: usize = NUM_SINGLE_PART_SECTIONS as usize diff --git a/libwild/src/platform.rs b/libwild/src/platform.rs index b1ec11c76..72870d96f 100644 --- a/libwild/src/platform.rs +++ b/libwild/src/platform.rs @@ -725,6 +725,13 @@ pub(crate) trait Platform: _total_sizes: &mut OutputSectionPartMap, ) { } + + /// Compute the size of the `.gdb_index` section, if applicable. + fn compute_gdb_index_size( + _groups: &[crate::layout::GroupState], + ) -> crate::error::Result { + Ok(0) + } } /// Abstracts over the different object file formats that we support (or may support). e.g. ELF. @@ -1290,6 +1297,10 @@ pub(crate) trait Args: std::fmt::Debug + Send + Sync + 'static { false } + fn should_write_gdb_index(&self) -> bool { + false + } + fn relocation_model(&self) -> crate::args::RelocationModel; fn should_output_executable(&self) -> bool; diff --git a/linker-utils/src/elf.rs b/linker-utils/src/elf.rs index ae5bff340..fd3381d77 100644 --- a/linker-utils/src/elf.rs +++ b/linker-utils/src/elf.rs @@ -326,6 +326,10 @@ pub mod secnames { pub const RELRO_PADDING_SECTION_NAME: &[u8] = RELRO_PADDING_SECTION_NAME_STR.as_bytes(); pub const SYMTAB_SHNDX_SECTION_NAME_STR: &str = ".symtab_shndx"; pub const SYMTAB_SHNDX_SECTION_NAME: &[u8] = SYMTAB_SHNDX_SECTION_NAME_STR.as_bytes(); + pub const DEBUG_INFO_SECTION_NAME_STR: &str = ".debug_info"; + pub const DEBUG_INFO_SECTION_NAME: &[u8] = DEBUG_INFO_SECTION_NAME_STR.as_bytes(); + pub const GDB_INDEX_SECTION_NAME_STR: &str = ".gdb_index"; + pub const GDB_INDEX_SECTION_NAME: &[u8] = GDB_INDEX_SECTION_NAME_STR.as_bytes(); pub const GNU_LTO_SYMTAB_PREFIX: &str = ".gnu.lto_.symtab"; } diff --git a/wild/tests/external_tests/mold_skip_tests.toml b/wild/tests/external_tests/mold_skip_tests.toml index 6c20cfc7c..fe6d6af96 100644 --- a/wild/tests/external_tests/mold_skip_tests.toml +++ b/wild/tests/external_tests/mold_skip_tests.toml @@ -16,6 +16,7 @@ tests = [ "execute-only.sh", "fatal-warnings.sh", # `-warn-common` and `-fatal-warnings` "filter.sh", + "gdb-index-compress-output.sh", # --compress-debug-sections=zlib-gabi "global-offset-table.sh", # `-defsym=foo=_GLOBAL_OFFSET_TABLE_` "icf-gcc-except-table.sh", # `--icf` "icf.sh", # `--icf` @@ -51,20 +52,6 @@ tests = [ "warn-once.sh", ] -[skipped_groups.gdb_index] -reason = "GDB index support" -tracking_issue = "https://github.com/wild-linker/wild/issues/811" -tests = [ - "gdb-index-compress-output.sh", - "gdb-index-dwarf2.sh", - "gdb-index-dwarf3.sh", - "gdb-index-dwarf4.sh", - "gdb-index-dwarf5.sh", - "gdb-index-dwarf64.sh", - "gdb-index-split-dwarf.sh", - "gdb-index-rnglistx.sh", -] - [skipped_groups.version_script] reason = "Version script support" tests = ["version-script15.sh"] diff --git a/wild/tests/integration_tests.rs b/wild/tests/integration_tests.rs index 69befee5c..85b4832fa 100644 --- a/wild/tests/integration_tests.rs +++ b/wild/tests/integration_tests.rs @@ -67,9 +67,19 @@ //! //! Contains:{string} Checks that the output binary does contain the specified string. //! +//! ExpectSection:{section_name} Checks that the specified section exists in the output binary. +//! +//! NoSection:{section_name} Checks that the specified section does not exist in the output binary. +//! //! ExpectSectionBytes:{section_name}=0x{hex_bytes} Checks that the specified section contains //! exactly the given bytes. //! +//! ExpectGdbIndexCuCount:{count} Checks that the `.gdb_index` section contains exactly the +//! specified number of CU entries. +//! +//! ExpectGdbIndexSymbol:{name} Checks that the `.gdb_index` symbol table contains an entry for the +//! specified symbol name. +//! //! Mode:{mode} Set linking mode to static (default), dynamic or unspecified. Cannot be used //! together with LinkerDriver. //! @@ -1181,7 +1191,11 @@ struct Assertions { expected_load_alignments: Vec, expected_dynamic_entries: Vec, absent_dynamic_entries: Vec, + expected_sections: Vec, + absent_sections: Vec, expected_section_bytes: Vec, + expected_gdb_index_cu_count: Option, + expected_gdb_index_symbols: Vec, output_file_matches: Vec, max_thunks: u64, expected_program_headers: Vec, @@ -1510,6 +1524,25 @@ fn process_directive( .assertions .contains_strings .push(arg.trim().to_owned()), + "ExpectSection" => config + .assertions + .expected_sections + .push(arg.trim().to_owned()), + "NoSection" => config + .assertions + .absent_sections + .push(arg.trim().to_owned()), + "ExpectGdbIndexCuCount" => { + config.assertions.expected_gdb_index_cu_count = Some( + arg.trim() + .parse::() + .with_context(|| format!("Invalid CU count: {arg}"))?, + ); + } + "ExpectGdbIndexSymbol" => config + .assertions + .expected_gdb_index_symbols + .push(arg.trim().to_owned()), "ExpectSectionBytes" => { let (section_name, hex_str) = arg.trim().split_once('=').with_context(|| { format!("ExpectSectionBytes requires section_name=0xhex_bytes, got `{arg}`") @@ -3498,7 +3531,11 @@ impl Assertions { "dynsym", )?; self.verify_symbols_absent(&self.no_sym, obj.symbols(), "symtab")?; + self.verify_expected_sections(&obj)?; + self.verify_absent_sections(&obj)?; self.verify_section_bytes(&obj)?; + self.verify_gdb_index_cu_count(&obj)?; + self.verify_gdb_index_symbols(&obj)?; self.verify_strings(&bytes)?; verify_no_overlapping_sections(&obj)?; verify_no_overlapping_segments(&obj)?; @@ -3544,6 +3581,88 @@ impl Assertions { Ok(()) } + fn verify_expected_sections(&self, obj: &object::File) -> Result { + for name in &self.expected_sections { + ensure!( + obj.section_by_name(name).is_some(), + "Expected section `{name}` not found" + ); + } + Ok(()) + } + + fn verify_absent_sections(&self, obj: &object::File) -> Result { + for name in &self.absent_sections { + ensure!( + obj.section_by_name(name).is_none(), + "Section `{name}` should not exist but was found" + ); + } + Ok(()) + } + + fn verify_gdb_index_cu_count(&self, obj: &object::File) -> Result { + let Some(expected) = self.expected_gdb_index_cu_count else { + return Ok(()); + }; + let data = gdb_index_section_data(obj, "ExpectGdbIndexCuCount")?; + let hdr = GdbIndexOffsets::parse(&data)?; + let cu_count = (hdr.tu_list - hdr.cu_list) / 16; + ensure!( + cu_count == expected, + "ExpectGdbIndexCuCount: expected {expected} CUs, got {cu_count}" + ); + Ok(()) + } + + fn verify_gdb_index_symbols(&self, obj: &object::File) -> Result { + if self.expected_gdb_index_symbols.is_empty() { + return Ok(()); + } + let data = gdb_index_section_data(obj, "ExpectGdbIndexSymbol")?; + let hdr = GdbIndexOffsets::parse(&data)?; + let num_slots = (hdr.constant_pool - hdr.symbol_table) / 8; + + // Walk the hash table to collect all indexed symbol names. + let mut found: HashSet<&str> = HashSet::new(); + for i in 0..num_slots { + let so = hdr.symbol_table + i * 8; + if so + 8 > data.len() { + break; + } + let name_rel = u32::from_le_bytes(data[so..so + 4].try_into().unwrap()) as usize; + let cv_rel = u32::from_le_bytes(data[so + 4..so + 8].try_into().unwrap()) as usize; + if name_rel == 0 && cv_rel == 0 { + continue; + } + let abs = hdr.constant_pool + name_rel; + if abs >= data.len() { + continue; + } + let end = data[abs..] + .iter() + .position(|&b| b == 0) + .map_or(data.len(), |p| abs + p); + if let Ok(name) = std::str::from_utf8(&data[abs..end]) { + found.insert(name); + } + } + + for expected in &self.expected_gdb_index_symbols { + ensure!( + found.contains(expected.as_str()), + "ExpectGdbIndexSymbol: `{expected}` not found in .gdb_index.\n\ + Found symbols: {:?}", + { + let mut v: Vec<_> = found.iter().collect(); + v.sort(); + v + } + ); + } + Ok(()) + } + fn verify_section_bytes(&self, obj: &object::File) -> Result { for expected in &self.expected_section_bytes { let section = obj @@ -3807,6 +3926,52 @@ impl Assertions { } } +/// Parsed offsets from a `.gdb_index` header, version-agnostic. +struct GdbIndexOffsets { + cu_list: usize, + tu_list: usize, + symbol_table: usize, + constant_pool: usize, +} + +impl GdbIndexOffsets { + /// Parse from section data. Handles both the 6-field header (versions <= 7) + /// and the 7-field header (versions 8+, which adds a shortcut table offset). + fn parse(data: &[u8]) -> Result { + ensure!(data.len() >= 24, ".gdb_index too small for header"); + let version = u32::from_le_bytes(data[0..4].try_into().unwrap()); + let cu_list = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + let tu_list = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize; + let symbol_table = u32::from_le_bytes(data[16..20].try_into().unwrap()) as usize; + let constant_pool = if version >= 8 { + ensure!( + data.len() >= 28, + ".gdb_index v{version} too small for header" + ); + // Version 8+: header has a shortcut_table_offset between symbol_table and + // constant_pool. + u32::from_le_bytes(data[24..28].try_into().unwrap()) as usize + } else { + // Version <= 7: no shortcut table; constant_pool immediately follows + // symbol_table_offset. + u32::from_le_bytes(data[20..24].try_into().unwrap()) as usize + }; + Ok(Self { + cu_list, + tu_list, + symbol_table, + constant_pool, + }) + } +} + +fn gdb_index_section_data(obj: &object::File, directive: &str) -> Result> { + let section = obj + .section_by_name(".gdb_index") + .with_context(|| format!("{directive}: .gdb_index section not found"))?; + Ok(section.data()?.to_vec()) +} + fn verify_no_overlapping_sections(obj: &object::File) -> Result { let mut previous_range = None; for section in obj.sections() { diff --git a/wild/tests/sources/elf/gdb-index/gdb-index.c b/wild/tests/sources/elf/gdb-index/gdb-index.c new file mode 100644 index 000000000..a168ec5f5 --- /dev/null +++ b/wild/tests/sources/elf/gdb-index/gdb-index.c @@ -0,0 +1,37 @@ +//#AbstractConfig:default +//#CompArgs:-g -ggnu-pubnames +//#Object:runtime.c +//#Object:gdb-index2.c +//#SkipLinker:ld +//#EnableLinker:lld + +//#Config:enabled:default +//#LinkArgs:--gdb-index +//#DiffIgnore:section.gdb_index +//#ExpectSection:.gdb_index +//#ExpectGdbIndexCuCount:3 +//#ExpectGdbIndexSymbol:compute +//#ExpectGdbIndexSymbol:_start +//#ExpectGdbIndexSymbol:foo + +//#Config:disabled:default +//#LinkArgs:--gdb-index --no-gdb-index +//#NoSection:.gdb_index + +#include "../common/runtime.h" + +int foo(int a, int b); + +int compute(int x) { return x + 1; } + +void _start(void) { + runtime_init(); + if (compute(41) != 42) { + exit_syscall(10); + } + if (foo(20, 22) != 42) { + exit_syscall(11); + } + + exit_syscall(42); +} diff --git a/wild/tests/sources/elf/gdb-index/gdb-index2.c b/wild/tests/sources/elf/gdb-index/gdb-index2.c new file mode 100644 index 000000000..a1f74bf24 --- /dev/null +++ b/wild/tests/sources/elf/gdb-index/gdb-index2.c @@ -0,0 +1 @@ +int foo(int a, int b) { return a + b; }