From cdf84ddd3ad988d3193e9d9d2d59bbc0b6bace71 Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 18:35:48 +0900 Subject: [PATCH 01/24] Add section name constant and part/section IDs --- libwild/src/output_section_id.rs | 1 + libwild/src/part_id.rs | 49 ++++++++++++++++---------------- linker-utils/src/elf.rs | 2 ++ 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/libwild/src/output_section_id.rs b/libwild/src/output_section_id.rs index fe19fcded..3cc50a964 100644 --- a/libwild/src/output_section_id.rs +++ b/libwild/src/output_section_id.rs @@ -99,6 +99,7 @@ pub(crate) const SYMTAB_SHNDX_LOCAL: OutputSectionId = part_id::SYMTAB_SHNDX_LOCAL.output_section_id(); pub(crate) const SYMTAB_SHNDX_GLOBAL: OutputSectionId = part_id::SYMTAB_SHNDX_GLOBAL.output_section_id(); +pub(crate) const GDB_INDEX: OutputSectionId = part_id::GDB_INDEX.output_section_id(); // Mach-O specific sections pub(crate) const PAGEZERO_SEGMENT: OutputSectionId = part_id::PAGEZERO_SEGMENT.output_section_id(); diff --git a/libwild/src/part_id.rs b/libwild/src/part_id.rs index 490531c42..7cb87502b 100644 --- a/libwild/src/part_id.rs +++ b/libwild/src/part_id.rs @@ -51,34 +51,35 @@ pub(crate) const RELRO_PADDING: PartId = PartId(28); pub(crate) const RELR_DYN: PartId = PartId(29); pub(crate) const SYMTAB_SHNDX_LOCAL: PartId = PartId(30); pub(crate) const SYMTAB_SHNDX_GLOBAL: PartId = PartId(31); +pub(crate) const GDB_INDEX: PartId = PartId(32); // Mach-O specific sections -pub(crate) const PAGEZERO_SEGMENT: PartId = PartId(32); -pub(crate) const TEXT_SEGMENT: PartId = PartId(33); -pub(crate) const DATA_SEGMENT: PartId = PartId(34); -pub(crate) const LINK_EDIT_SEGMENT: PartId = PartId(35); -pub(crate) const ENTRY_POINT: PartId = PartId(36); -pub(crate) const DYLD_CHAINED_FIXUPS: PartId = PartId(37); -pub(crate) const CHAINED_FIXUP_TABLE: PartId = PartId(38); -pub(crate) const SYMTAB_COMMAND: PartId = PartId(39); -pub(crate) const CODE_SIGNATURE_COMMAND: PartId = PartId(40); -pub(crate) const CODE_SIGNATURE: PartId = PartId(41); +pub(crate) const PAGEZERO_SEGMENT: PartId = PartId(33); +pub(crate) const TEXT_SEGMENT: PartId = PartId(34); +pub(crate) const DATA_SEGMENT: PartId = PartId(35); +pub(crate) const LINK_EDIT_SEGMENT: PartId = PartId(36); +pub(crate) const ENTRY_POINT: PartId = PartId(37); +pub(crate) const DYLD_CHAINED_FIXUPS: PartId = PartId(38); +pub(crate) const CHAINED_FIXUP_TABLE: PartId = PartId(39); +pub(crate) const SYMTAB_COMMAND: PartId = PartId(40); +pub(crate) const CODE_SIGNATURE_COMMAND: PartId = PartId(41); +pub(crate) const CODE_SIGNATURE: PartId = PartId(42); // Wasm specific sections. Each one corresponds to a single standard Wasm section. -pub(crate) const WASM_TYPE: PartId = PartId(42); -pub(crate) const WASM_IMPORT: PartId = PartId(43); -pub(crate) const WASM_FUNCTION: PartId = PartId(44); -pub(crate) const WASM_TABLE: PartId = PartId(45); -pub(crate) const WASM_MEMORY: PartId = PartId(46); -pub(crate) const WASM_GLOBAL: PartId = PartId(47); -pub(crate) const WASM_EXPORT: PartId = PartId(48); -pub(crate) const WASM_START: PartId = PartId(49); -pub(crate) const WASM_ELEMENT: PartId = PartId(50); -pub(crate) const WASM_DATA_COUNT: PartId = PartId(51); -pub(crate) const WASM_CODE: PartId = PartId(52); -pub(crate) const WASM_DATA: PartId = PartId(53); - -pub(crate) const NUM_SINGLE_PART_SECTIONS: u32 = 54; +pub(crate) const WASM_TYPE: PartId = PartId(43); +pub(crate) const WASM_IMPORT: PartId = PartId(44); +pub(crate) const WASM_FUNCTION: PartId = PartId(45); +pub(crate) const WASM_TABLE: PartId = PartId(46); +pub(crate) const WASM_MEMORY: PartId = PartId(47); +pub(crate) const WASM_GLOBAL: PartId = PartId(48); +pub(crate) const WASM_EXPORT: PartId = PartId(49); +pub(crate) const WASM_START: PartId = PartId(50); +pub(crate) const WASM_ELEMENT: PartId = PartId(51); +pub(crate) const WASM_DATA_COUNT: PartId = PartId(52); +pub(crate) const WASM_CODE: PartId = PartId(53); +pub(crate) const WASM_DATA: PartId = PartId(54); + +pub(crate) const NUM_SINGLE_PART_SECTIONS: u32 = 55; #[cfg(test)] pub(crate) const NUM_BUILT_IN_PARTS: usize = NUM_SINGLE_PART_SECTIONS as usize diff --git a/linker-utils/src/elf.rs b/linker-utils/src/elf.rs index ae5bff340..e24247936 100644 --- a/linker-utils/src/elf.rs +++ b/linker-utils/src/elf.rs @@ -326,6 +326,8 @@ pub mod secnames { pub const RELRO_PADDING_SECTION_NAME: &[u8] = RELRO_PADDING_SECTION_NAME_STR.as_bytes(); pub const SYMTAB_SHNDX_SECTION_NAME_STR: &str = ".symtab_shndx"; pub const SYMTAB_SHNDX_SECTION_NAME: &[u8] = SYMTAB_SHNDX_SECTION_NAME_STR.as_bytes(); + pub const GDB_INDEX_SECTION_NAME_STR: &str = ".gdb_index"; + pub const GDB_INDEX_SECTION_NAME: &[u8] = GDB_INDEX_SECTION_NAME_STR.as_bytes(); pub const GNU_LTO_SYMTAB_PREFIX: &str = ".gnu.lto_.symtab"; } From ea7a3024c6e17df22809cb26c3ce6d002b0899c0 Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 18:35:57 +0900 Subject: [PATCH 02/24] Add `--gdb-index` and `--no-gdb-index` --- libwild/src/args/elf.rs | 26 +++++++++++++++++++++++++- libwild/src/platform.rs | 9 +++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/libwild/src/args/elf.rs b/libwild/src/args/elf.rs index 5a8e6071a..ed4a81533 100644 --- a/libwild/src/args/elf.rs +++ b/libwild/src/args/elf.rs @@ -134,6 +134,8 @@ pub struct ElfArgs { experimental_sframe: bool, pub(crate) debug_compression_kind: Option, + + pub(crate) gdb_index: bool, } #[derive(Debug)] @@ -234,7 +236,6 @@ const SILENTLY_IGNORED_SHORT_FLAGS: &[&str] = &[ ]; const IGNORED_FLAGS: &[&str] = &[ - "gdb-index", "fix-cortex-a53-835769", "fix-cortex-a53-843419", "discard-all", @@ -334,6 +335,7 @@ impl Default for ElfArgs { experimental_sframe: false, debug_compression_kind: None, + gdb_index: false, } } } @@ -1098,6 +1100,24 @@ fn setup_argument_parser() -> ArgumentParser { Ok(()) }); + parser + .declare() + .long("gdb-index") + .help("Create .gdb_index section") + .execute(|args, _modifier_stack| { + args.gdb_index = true; + Ok(()) + }); + + parser + .declare() + .long("no-gdb-index") + .help("Don't create .gdb_index section") + .execute(|args, _modifier_stack| { + args.gdb_index = false; + Ok(()) + }); + parser .declare() .long("export-dynamic") @@ -2086,6 +2106,10 @@ impl platform::Args for ElfArgs { self.trace } + fn should_write_gdb_index(&self) -> bool { + self.gdb_index + } + fn relocation_model(&self) -> crate::args::RelocationModel { self.relocation_model } diff --git a/libwild/src/platform.rs b/libwild/src/platform.rs index b1ec11c76..5d7a761d4 100644 --- a/libwild/src/platform.rs +++ b/libwild/src/platform.rs @@ -725,6 +725,11 @@ pub(crate) trait Platform: _total_sizes: &mut OutputSectionPartMap, ) { } + + /// Compute the size of the `.gdb_index` section, if applicable. + fn compute_gdb_index_size(_groups: &[crate::layout::GroupState]) -> u64 { + 0 + } } /// Abstracts over the different object file formats that we support (or may support). e.g. ELF. @@ -1290,6 +1295,10 @@ pub(crate) trait Args: std::fmt::Debug + Send + Sync + 'static { false } + fn should_write_gdb_index(&self) -> bool { + false + } + fn relocation_model(&self) -> crate::args::RelocationModel; fn should_output_executable(&self) -> bool; From 0e2139c1ad82210bf48aedd87df4050db59878b5 Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 18:36:06 +0900 Subject: [PATCH 03/24] Implement `.gdb_index` section generation --- libwild/src/elf.rs | 10 + libwild/src/elf_writer.rs | 21 ++ libwild/src/gdb_index.rs | 633 ++++++++++++++++++++++++++++++++++++++ libwild/src/layout.rs | 17 + libwild/src/lib.rs | 1 + 5 files changed, 682 insertions(+) create mode 100644 libwild/src/gdb_index.rs diff --git a/libwild/src/elf.rs b/libwild/src/elf.rs index 7faead799..f26b91fde 100644 --- a/libwild/src/elf.rs +++ b/libwild/src/elf.rs @@ -1882,6 +1882,7 @@ impl platform::Platform for Elf { builder.add_sections(&custom.bss); builder.add_sections(&custom.nonalloc); + builder.add_section(output_section_id::GDB_INDEX); builder.add_section(output_section_id::COMMENT); builder.add_section(output_section_id::RISCV_ATTRIBUTES); builder.add_section(output_section_id::SHSTRTAB); @@ -1994,6 +1995,10 @@ impl platform::Platform for Elf { total_sizes.merge(&extra_sizes); } + fn compute_gdb_index_size(groups: &[crate::layout::GroupState]) -> u64 { + crate::gdb_index::compute_gdb_index_size(groups) + } + fn align_load_segment_start( _segment_def: Self::ProgramSegmentDef, segment_alignment: Alignment, @@ -4644,6 +4649,11 @@ const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { kind: SectionKind::Secondary(output_section_id::SYMTAB_SHNDX_LOCAL), ..DEFAULT_DEFS }; + defs[output_section_id::GDB_INDEX.as_usize()] = BuiltInSectionDetails { + kind: SectionKind::Primary(SectionName(GDB_INDEX_SECTION_NAME)), + ty: sht::PROGBITS, + ..DEFAULT_DEFS + }; // Start of regular sections defs[output_section_id::RODATA.as_usize()] = BuiltInSectionDetails { kind: SectionKind::Primary(SectionName(RODATA_SECTION_NAME)), diff --git a/libwild/src/elf_writer.rs b/libwild/src/elf_writer.rs index d4b3af7d9..37ed2af75 100644 --- a/libwild/src/elf_writer.rs +++ b/libwild/src/elf_writer.rs @@ -185,6 +185,9 @@ pub(crate) fn write<'data, A: Arch>( crate::validation::validate_bytes(layout, &sized_output.out)?; } + // Write .gdb_index before splitting, since it needs to read .debug_info from the output. + write_gdb_index_section(&mut sized_output.out, layout); + let mut section_buffers = split_output_into_sections(layout, &mut sized_output.out).0; if layout.args().should_write_eh_frame_hdr { @@ -313,6 +316,24 @@ fn fill_padding(mut section_buffers: OutputSectionMap<&mut [u8]>) { }); } +fn write_gdb_index_section(output: &mut [u8], layout: &ElfLayout) { + use crate::platform::Args as _; + if !layout.args().should_write_gdb_index() { + return; + } + let sl = layout.section_layouts.get(output_section_id::GDB_INDEX); + if sl.file_size == 0 { + return; + } + timing_phase!("Write .gdb_index"); + let start = sl.file_offset; + // Split the output buffer so that the part before our section is readable (for .debug_info) + // and our section is writable. + let (before, rest) = output.split_at_mut(start); + let gdb_buf = &mut rest[..sl.file_size]; + crate::gdb_index::write_gdb_index(gdb_buf, before, layout); +} + fn write_sframe_section(sframe_buffer: &mut [u8], layout: &ElfLayout) -> Result { if layout.args().discard_sframe || sframe_buffer.is_empty() { return Ok(()); diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs new file mode 100644 index 000000000..d2ff8a713 --- /dev/null +++ b/libwild/src/gdb_index.rs @@ -0,0 +1,633 @@ +//! Generates a `.gdb_index` section. +//! +//! The `.gdb_index` section is an accelerator structure that lets GDB skip parsing all +//! `.debug_info` at startup. We emit version 9, which includes a shortcut table. +//! +//! Format reference: + +use crate::elf::Elf; +use crate::layout::FileLayout; +use crate::layout::FileLayoutState; +use crate::layout::GroupState; +use crate::layout::Layout; +use crate::output_section_id::SectionName; +use crate::platform::ObjectFile as _; +use crate::platform::SectionHeader as _; +use crate::resolution::SectionSlot; +use hashbrown::HashMap; +use std::mem::size_of; +use zerocopy::FromBytes; +use zerocopy::Immutable; +use zerocopy::IntoBytes; +use zerocopy::KnownLayout; + +const GDB_INDEX_VERSION: u32 = 9; + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexHeader { + version: u32, + cu_list_offset: u32, + tu_list_offset: u32, + address_area_offset: u32, + symbol_table_offset: u32, + shortcut_table_offset: u32, + constant_pool_offset: u32, +} + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexCuEntry { + cu_offset: u64, + cu_length: u64, +} + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexAddressEntry { + low_address: u64, + high_address: u64, + cu_index: u32, +} + +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexShortcutTable { + language_of_main: u32, + name_of_main_offset: u32, +} + +const HEADER_SIZE: usize = size_of::(); +const CU_ENTRY_SIZE: usize = size_of::(); +const ADDRESS_ENTRY_SIZE: usize = size_of::(); +const SHORTCUT_TABLE_SIZE: usize = size_of::(); +const HASH_SLOT_SIZE: usize = 8; // (name_offset, cu_vector_offset) pair + +/// The GDB index hash function. +fn gdb_hash(name: &[u8]) -> u32 { + let mut r: u32 = 0; + for &c in name { + r = r + .wrapping_mul(67) + .wrapping_add(u32::from(c.to_ascii_lowercase())) + .wrapping_sub(113); + } + r +} + +/// Encode a CU vector entry: bits 0-23 = CU index, bits 28-30 = kind, bit 31 = is_static. +/// +/// The attrs byte from `.debug_gnu_pubnames`/`.debug_gnu_pubtypes` packs kind in bits 4-6 +/// and is_static in bit 7. +fn encode_cu_vector_entry(cu_index: u32, attrs: u8) -> u32 { + let kind = u32::from((attrs >> 4) & 0x7); + let is_static = u32::from((attrs >> 7) & 0x1); + (cu_index & 0x00FF_FFFF) | (kind << 28) | (is_static << 31) +} + +/// Number of hash table slots: next power of two >= 4/3 * n. +fn compute_hash_table_slots(num_symbols: usize) -> usize { + if num_symbols == 0 { + return 0; + } + (num_symbols * 4 / 3 + 1).next_power_of_two() +} + +struct CuBoundary { + offset: u64, + length: u64, +} + +/// Walk `.debug_info` bytes and return `(offset, total_length)` for each CU. +fn parse_cu_boundaries(data: &[u8]) -> Vec { + let mut cus = Vec::new(); + let mut offset = 0usize; + while offset + 4 <= data.len() { + let init_len = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap()); + let total = if init_len == 0xFFFF_FFFF { + if offset + 12 > data.len() { + break; + } + let len = u64::from_le_bytes(data[offset + 4..offset + 12].try_into().unwrap()); + 12 + len as usize + } else { + 4 + init_len as usize + }; + if total == 0 || offset + total > data.len() { + break; + } + cus.push(CuBoundary { + offset: offset as u64, + length: total as u64, + }); + offset += total; + } + cus +} + +struct PubnamesSet<'data> { + debug_info_offset: u64, + entries: Vec<(&'data [u8], u8)>, +} + +/// Parse `.debug_gnu_pubnames` / `.debug_gnu_pubtypes` section data. +/// +/// Each set has a header pointing to a CU in `.debug_info`, followed by +/// (die_offset, attrs_byte, NUL-terminated name) entries. +fn parse_pubnames_sets(data: &[u8]) -> Vec> { + let mut sets = Vec::new(); + let mut pos = 0; + while pos + 4 <= data.len() { + let init_len = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()); + + let (header_size, set_end, debug_info_offset) = if init_len == 0xFFFF_FFFF { + // DWARF64: 4 + 8(len) + 2(ver) + 8(offset) + 8(size) = 30 + if pos + 30 > data.len() { + break; + } + let len = u64::from_le_bytes(data[pos + 4..pos + 12].try_into().unwrap()); + let dio = u64::from_le_bytes(data[pos + 14..pos + 22].try_into().unwrap()); + (30, pos + 12 + len as usize, dio) + } else { + // DWARF32: 4(len) + 2(ver) + 4(offset) + 4(size) = 14 + if pos + 14 > data.len() { + break; + } + let dio = u64::from(u32::from_le_bytes( + data[pos + 6..pos + 10].try_into().unwrap(), + )); + (14, pos + 4 + init_len as usize, dio) + }; + + let set_end = set_end.min(data.len()); + let mut ep = pos + header_size; + let mut entries = Vec::new(); + let is_64 = init_len == 0xFFFF_FFFF; + + while ep < set_end { + let die_offset = if is_64 { + if ep + 8 > set_end { + break; + } + let v = u64::from_le_bytes(data[ep..ep + 8].try_into().unwrap()); + ep += 8; + v + } else { + if ep + 4 > set_end { + break; + } + let v = u64::from(u32::from_le_bytes(data[ep..ep + 4].try_into().unwrap())); + ep += 4; + v + }; + if die_offset == 0 { + break; + } + if ep >= set_end { + break; + } + let attrs = data[ep]; + ep += 1; + let name_start = ep; + while ep < set_end && data[ep] != 0 { + ep += 1; + } + if ep >= set_end { + break; + } + entries.push((&data[name_start..ep], attrs)); + ep += 1; + } + + sets.push(PubnamesSet { + debug_info_offset, + entries, + }); + pos = set_end; + } + sets +} + +/// Read raw section data from an input object by name. +fn raw_section_by_name<'data>(object: &crate::elf::File<'data>, name: &str) -> Option<&'data [u8]> { + let (_index, header) = object.section_by_name(name)?; + object.raw_section_data(header).ok() +} + +/// Pre-scan all input objects to compute the `.gdb_index` section size. +pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { + let mut total_cus = 0usize; + let mut total_addr_entries = 0usize; + let mut symbol_map: HashMap<&[u8], Vec> = HashMap::new(); + let mut cu_index_base = 0u32; + + for group in groups { + for file in &group.files { + let FileLayoutState::Object(obj) = file else { + continue; + }; + let object = obj.object; + + let obj_cu_count = raw_section_by_name(object, ".debug_info") + .map_or(0, |data| parse_cu_boundaries(data).len()); + + if obj_cu_count == 0 { + continue; + } + + let mut obj_addr_count = 0usize; + for (si, slot) in obj.sections.iter().enumerate() { + let SectionSlot::Loaded(section) = slot else { + continue; + }; + if section.size == 0 { + continue; + } + let Ok(header) = object.section(object::SectionIndex(si)) else { + continue; + }; + if header.is_alloc() && header.is_executable() { + obj_addr_count += 1; + } + } + + total_cus += obj_cu_count; + total_addr_entries += obj_addr_count; + + let base_idx = cu_index_base; + let mut offset_to_idx: HashMap = HashMap::new(); + if let Some(di_data) = raw_section_by_name(object, ".debug_info") { + for (i, cu) in parse_cu_boundaries(di_data).iter().enumerate() { + offset_to_idx.insert(cu.offset, base_idx + i as u32); + } + } + cu_index_base += obj_cu_count as u32; + + collect_pubnames_symbols(object, &offset_to_idx, base_idx, &mut symbol_map); + } + } + + if total_cus == 0 { + return 0; + } + + let mut cv_bytes = 0usize; + let mut str_bytes = 0usize; + for (name, entries) in &mut symbol_map { + entries.sort_unstable(); + entries.dedup(); + cv_bytes += 4 + entries.len() * 4; + str_bytes += name.len() + 1; + } + + let ht_slots = compute_hash_table_slots(symbol_map.len()); + + (HEADER_SIZE + + total_cus * CU_ENTRY_SIZE + + total_addr_entries * ADDRESS_ENTRY_SIZE + + ht_slots * HASH_SLOT_SIZE + + SHORTCUT_TABLE_SIZE + + cv_bytes + + str_bytes) as u64 +} + +/// Write the `.gdb_index` section into `buf`. +/// +/// Reads the output `.debug_info` (already written into `output_buf`) for the CU list, +/// and re-scans input objects for address ranges and pubnames/pubtypes symbols. +pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout<'_, Elf>) { + if buf.is_empty() { + return; + } + + let cu_entries = build_cu_list(output_buf, layout); + let addr_entries = build_address_entries(layout); + let (sorted, ht_slots) = build_symbol_table(layout); + + let cu_list_off = HEADER_SIZE as u32; + let tu_list_off = cu_list_off + (cu_entries.len() * CU_ENTRY_SIZE) as u32; + let addr_off = tu_list_off; + let sym_off = addr_off + (addr_entries.len() * ADDRESS_ENTRY_SIZE) as u32; + let short_off = sym_off + (ht_slots * HASH_SLOT_SIZE) as u32; + let cp_off = short_off + SHORTCUT_TABLE_SIZE as u32; + + // Build constant pool: CU vectors first, then name strings. + let mut cv_data = Vec::new(); + let mut str_data = Vec::new(); + let mut cv_offsets = Vec::with_capacity(sorted.len()); + let mut name_offsets = Vec::with_capacity(sorted.len()); + + for (_, sd) in &sorted { + cv_offsets.push(cv_data.len() as u32); + cv_data.extend_from_slice(&(sd.cv_entries.len() as u32).to_le_bytes()); + for &e in &sd.cv_entries { + cv_data.extend_from_slice(&e.to_le_bytes()); + } + } + for (name, _) in &sorted { + name_offsets.push((cv_data.len() + str_data.len()) as u32); + str_data.extend_from_slice(name); + str_data.push(0); + } + + // Emit into the output buffer. + let total = cp_off as usize + cv_data.len() + str_data.len(); + let len = buf.len().min(total); + let buf = &mut buf[..len]; + + let hdr = GdbIndexHeader { + version: GDB_INDEX_VERSION, + cu_list_offset: cu_list_off, + tu_list_offset: tu_list_off, + address_area_offset: addr_off, + symbol_table_offset: sym_off, + shortcut_table_offset: short_off, + constant_pool_offset: cp_off, + }; + buf[..HEADER_SIZE].copy_from_slice(hdr.as_bytes()); + + let mut off = cu_list_off as usize; + for cu in &cu_entries { + buf[off..off + CU_ENTRY_SIZE].copy_from_slice(cu.as_bytes()); + off += CU_ENTRY_SIZE; + } + + off = addr_off as usize; + for a in &addr_entries { + buf[off..off + ADDRESS_ENTRY_SIZE].copy_from_slice(a.as_bytes()); + off += ADDRESS_ENTRY_SIZE; + } + + write_hash_table( + buf, + ht_slots, + sym_off as usize, + &sorted, + &name_offsets, + &cv_offsets, + ); + + let so = short_off as usize; + let sc = GdbIndexShortcutTable { + language_of_main: 0, + name_of_main_offset: 0, + }; + buf[so..so + SHORTCUT_TABLE_SIZE].copy_from_slice(sc.as_bytes()); + + let cpo = cp_off as usize; + buf[cpo..cpo + cv_data.len()].copy_from_slice(&cv_data); + buf[cpo + cv_data.len()..cpo + cv_data.len() + str_data.len()].copy_from_slice(&str_data); +} + +/// Build the CU list from the already-written output `.debug_info`. +fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Vec { + let Some(id) = layout + .output_sections + .section_id_by_name(SectionName(b".debug_info")) + else { + return Vec::new(); + }; + let sl = layout.section_layouts.get(id); + let start = sl.file_offset; + let end = start + sl.file_size; + if end > output_buf.len() { + return Vec::new(); + } + parse_cu_boundaries(&output_buf[start..end]) + .into_iter() + .map(|cu| GdbIndexCuEntry { + cu_offset: cu.offset, + cu_length: cu.length, + }) + .collect() +} + +/// Build address entries by mapping each executable section to its resolved address. +fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec { + let mut entries = Vec::new(); + let mut cu_offset = 0u32; + + for group in &layout.group_layouts { + for file in &group.files { + let FileLayout::Object(obj) = file else { + continue; + }; + let object = obj.object; + + let obj_cu_count = raw_section_by_name(object, ".debug_info") + .map_or(0, |data| parse_cu_boundaries(data).len() as u32); + if obj_cu_count == 0 { + continue; + } + let base_cu = cu_offset; + + for (si, slot) in obj.sections.iter().enumerate() { + let SectionSlot::Loaded(section) = slot else { + continue; + }; + if section.size == 0 { + continue; + } + let Ok(header) = object.section(object::SectionIndex(si)) else { + continue; + }; + if !header.is_alloc() || !header.is_executable() { + continue; + } + if let Some(addr) = obj.section_resolutions[si].address() + && addr != 0 + { + entries.push(GdbIndexAddressEntry { + low_address: addr, + high_address: addr + section.size, + cu_index: base_cu, + }); + } + } + + cu_offset += obj_cu_count; + } + } + entries +} + +struct SymData { + cv_entries: Vec, + hash: u32, +} + +/// Build the symbol table from `.debug_gnu_pubnames`/`.debug_gnu_pubtypes`, returning +/// the symbols sorted by name and the computed hash table slot count. +fn build_symbol_table<'data>( + layout: &'data Layout<'_, Elf>, +) -> (Vec<(&'data [u8], SymData)>, usize) { + let mut sym_map: HashMap<&'data [u8], SymData> = HashMap::new(); + let mut cu_offset = 0u32; + + for group in &layout.group_layouts { + for file in &group.files { + let FileLayout::Object(obj) = file else { + continue; + }; + let object = obj.object; + + let boundaries = raw_section_by_name(object, ".debug_info") + .map(parse_cu_boundaries) + .unwrap_or_default(); + if boundaries.is_empty() { + continue; + } + + let base = cu_offset; + let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); + for (i, cu) in boundaries.iter().enumerate() { + offset_to_idx.insert(cu.offset, base + i as u32); + } + cu_offset += boundaries.len() as u32; + + for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { + let Some(data) = raw_section_by_name(object, section_name) else { + continue; + }; + for set in parse_pubnames_sets(data) { + let cu_idx = offset_to_idx + .get(&set.debug_info_offset) + .copied() + .unwrap_or(base); + for (name, attrs) in set.entries { + let entry = encode_cu_vector_entry(cu_idx, attrs); + let sd = sym_map.entry(name).or_insert_with(|| SymData { + cv_entries: Vec::new(), + hash: gdb_hash(name), + }); + sd.cv_entries.push(entry); + } + } + } + } + } + + for sd in sym_map.values_mut() { + sd.cv_entries.sort_unstable(); + sd.cv_entries.dedup(); + } + + let mut sorted: Vec<(&[u8], SymData)> = sym_map.into_iter().collect(); + sorted.sort_unstable_by_key(|(name, _)| *name); + let ht_slots = compute_hash_table_slots(sorted.len()); + (sorted, ht_slots) +} + +/// Collect pubnames/pubtypes symbols from an object into the global map. +fn collect_pubnames_symbols<'data>( + object: &crate::elf::File<'data>, + offset_to_idx: &HashMap, + fallback_cu: u32, + symbol_map: &mut HashMap<&'data [u8], Vec>, +) { + for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { + let Some(data) = raw_section_by_name(object, section_name) else { + continue; + }; + for set in parse_pubnames_sets(data) { + let cu_idx = offset_to_idx + .get(&set.debug_info_offset) + .copied() + .unwrap_or(fallback_cu); + for (name, attrs) in set.entries { + let entry = encode_cu_vector_entry(cu_idx, attrs); + symbol_map.entry(name).or_default().push(entry); + } + } + } +} + +/// Insert symbols into the open-addressing hash table region of `buf`. +fn write_hash_table( + buf: &mut [u8], + ht_slots: usize, + ht_start: usize, + sorted: &[(&[u8], SymData)], + name_offsets: &[u32], + cv_offsets: &[u32], +) { + let ht_end = ht_start + ht_slots * HASH_SLOT_SIZE; + buf[ht_start..ht_end].fill(0); + + if ht_slots == 0 { + return; + } + let mask = (ht_slots - 1) as u32; + for (i, (_, sd)) in sorted.iter().enumerate() { + let h = sd.hash; + let step = ((h >> 3) & mask) | 1; + let mut slot = h & mask; + loop { + let so = ht_start + slot as usize * HASH_SLOT_SIZE; + let existing_name = u32::from_le_bytes(buf[so..so + 4].try_into().unwrap()); + let existing_vec = u32::from_le_bytes(buf[so + 4..so + 8].try_into().unwrap()); + if existing_name == 0 && existing_vec == 0 { + buf[so..so + 4].copy_from_slice(&name_offsets[i].to_le_bytes()); + buf[so + 4..so + 8].copy_from_slice(&cv_offsets[i].to_le_bytes()); + break; + } + slot = (slot + step) & mask; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_gdb_hash_case_insensitive() { + assert_eq!(gdb_hash(b"main"), gdb_hash(b"MAIN")); + assert_eq!(gdb_hash(b"main"), gdb_hash(b"Main")); + assert_ne!(gdb_hash(b"main"), gdb_hash(b"foo")); + } + + #[test] + fn test_hash_table_slots_power_of_two() { + assert_eq!(compute_hash_table_slots(0), 0); + assert_eq!(compute_hash_table_slots(1), 2); + for n in 1..100 { + let s = compute_hash_table_slots(n); + assert!(s.is_power_of_two()); + assert!(s >= n); + } + } + + #[test] + fn test_encode_cu_vector_entry() { + // Global function: kind=3 in bits 4-6, is_static=0 in bit 7 + let e = encode_cu_vector_entry(5, 0b0011_0000); + assert_eq!(e & 0x00FF_FFFF, 5); + assert_eq!((e >> 28) & 0x7, 3); + assert_eq!((e >> 31) & 0x1, 0); + + // Static function: kind=3, is_static=1 + let e2 = encode_cu_vector_entry(42, 0b1011_0000); + assert_eq!(e2 & 0x00FF_FFFF, 42); + assert_eq!((e2 >> 28) & 0x7, 3); + assert_eq!((e2 >> 31) & 0x1, 1); + } + + #[test] + fn test_parse_cu_boundaries() { + assert!(parse_cu_boundaries(&[]).is_empty()); + + // Single DWARF32 CU: init_length=8, total = 4 + 8 = 12 bytes. + let mut data = vec![0u8; 12]; + data[0..4].copy_from_slice(&8u32.to_le_bytes()); + let cus = parse_cu_boundaries(&data); + assert_eq!(cus.len(), 1); + assert_eq!(cus[0].offset, 0); + assert_eq!(cus[0].length, 12); + } + + #[test] + fn test_header_size() { + assert_eq!(HEADER_SIZE, 7 * 4); + } +} diff --git a/libwild/src/layout.rs b/libwild/src/layout.rs index 2089e1223..abbe49aa8 100644 --- a/libwild/src/layout.rs +++ b/libwild/src/layout.rs @@ -1141,6 +1141,8 @@ impl<'data, P: Platform> CommonGroupState<'data, P> { *self.mem_sizes.get(part_id::SYMTAB_SHNDX_GLOBAL), ); + memory_offsets.increment(part_id::GDB_INDEX, *self.mem_sizes.get(part_id::GDB_INDEX)); + strtab_offset_start } @@ -1840,6 +1842,21 @@ fn compute_total_section_part_sizes<'data, P: Platform>( total_sizes.merge(&group_state.common.mem_sizes); } + // Compute and allocate the .gdb_index section size if --gdb-index is enabled. + let gdb_index_size = if resources.symbol_db.args.should_write_gdb_index() { + P::compute_gdb_index_size(group_states) + } else { + 0 + }; + if gdb_index_size > 0 { + let first_group = group_states.first_mut().unwrap(); + first_group + .common + .mem_sizes + .increment(part_id::GDB_INDEX, gdb_index_size); + total_sizes.increment(part_id::GDB_INDEX, gdb_index_size); + } + // We need to apply late-stage adjustments for the epilogue before we do so for the prelude, // since the prelude needs to know if the .hash section will be written, which is decided by the // epilogue. diff --git a/libwild/src/lib.rs b/libwild/src/lib.rs index 053c5c575..df43064ec 100644 --- a/libwild/src/lib.rs +++ b/libwild/src/lib.rs @@ -22,6 +22,7 @@ pub(crate) mod file_kind; pub(crate) mod file_writer; pub(crate) mod fs; pub(crate) mod gc_stats; +pub(crate) mod gdb_index; pub(crate) mod glob_match; pub(crate) mod grouping; pub(crate) mod hash; From 4bcb28aab5d82c73e8effeeab3b37ee8fdb83b8f Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 18:36:13 +0900 Subject: [PATCH 04/24] Update skip tests list --- wild/tests/external_tests/mold_skip_tests.toml | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/wild/tests/external_tests/mold_skip_tests.toml b/wild/tests/external_tests/mold_skip_tests.toml index 6c20cfc7c..fe6d6af96 100644 --- a/wild/tests/external_tests/mold_skip_tests.toml +++ b/wild/tests/external_tests/mold_skip_tests.toml @@ -16,6 +16,7 @@ tests = [ "execute-only.sh", "fatal-warnings.sh", # `-warn-common` and `-fatal-warnings` "filter.sh", + "gdb-index-compress-output.sh", # --compress-debug-sections=zlib-gabi "global-offset-table.sh", # `-defsym=foo=_GLOBAL_OFFSET_TABLE_` "icf-gcc-except-table.sh", # `--icf` "icf.sh", # `--icf` @@ -51,20 +52,6 @@ tests = [ "warn-once.sh", ] -[skipped_groups.gdb_index] -reason = "GDB index support" -tracking_issue = "https://github.com/wild-linker/wild/issues/811" -tests = [ - "gdb-index-compress-output.sh", - "gdb-index-dwarf2.sh", - "gdb-index-dwarf3.sh", - "gdb-index-dwarf4.sh", - "gdb-index-dwarf5.sh", - "gdb-index-dwarf64.sh", - "gdb-index-split-dwarf.sh", - "gdb-index-rnglistx.sh", -] - [skipped_groups.version_script] reason = "Version script support" tests = ["version-script15.sh"] From 634c7ccbddcc47c47a2024c7dc8d96862d62de00 Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 18:36:21 +0900 Subject: [PATCH 05/24] Add integration tests --- wild/tests/integration_tests.rs | 165 ++++++++++++++++++ wild/tests/sources/elf/gdb-index/gdb-index.c | 37 ++++ wild/tests/sources/elf/gdb-index/gdb-index2.c | 1 + 3 files changed, 203 insertions(+) create mode 100644 wild/tests/sources/elf/gdb-index/gdb-index.c create mode 100644 wild/tests/sources/elf/gdb-index/gdb-index2.c diff --git a/wild/tests/integration_tests.rs b/wild/tests/integration_tests.rs index 69befee5c..2d7fb775e 100644 --- a/wild/tests/integration_tests.rs +++ b/wild/tests/integration_tests.rs @@ -67,9 +67,19 @@ //! //! Contains:{string} Checks that the output binary does contain the specified string. //! +//! ExpectSection:{section_name} Checks that the specified section exists in the output binary. +//! +//! NoSection:{section_name} Checks that the specified section does not exist in the output binary. +//! //! ExpectSectionBytes:{section_name}=0x{hex_bytes} Checks that the specified section contains //! exactly the given bytes. //! +//! ExpectGdbIndexCuCount:{count} Checks that the `.gdb_index` section contains exactly the +//! specified number of CU entries. +//! +//! ExpectGdbIndexSymbol:{name} Checks that the `.gdb_index` symbol table contains an entry for the +//! specified symbol name. +//! //! Mode:{mode} Set linking mode to static (default), dynamic or unspecified. Cannot be used //! together with LinkerDriver. //! @@ -1181,7 +1191,11 @@ struct Assertions { expected_load_alignments: Vec, expected_dynamic_entries: Vec, absent_dynamic_entries: Vec, + expected_sections: Vec, + absent_sections: Vec, expected_section_bytes: Vec, + expected_gdb_index_cu_count: Option, + expected_gdb_index_symbols: Vec, output_file_matches: Vec, max_thunks: u64, expected_program_headers: Vec, @@ -1510,6 +1524,25 @@ fn process_directive( .assertions .contains_strings .push(arg.trim().to_owned()), + "ExpectSection" => config + .assertions + .expected_sections + .push(arg.trim().to_owned()), + "NoSection" => config + .assertions + .absent_sections + .push(arg.trim().to_owned()), + "ExpectGdbIndexCuCount" => { + config.assertions.expected_gdb_index_cu_count = Some( + arg.trim() + .parse::() + .with_context(|| format!("Invalid CU count: {arg}"))?, + ); + } + "ExpectGdbIndexSymbol" => config + .assertions + .expected_gdb_index_symbols + .push(arg.trim().to_owned()), "ExpectSectionBytes" => { let (section_name, hex_str) = arg.trim().split_once('=').with_context(|| { format!("ExpectSectionBytes requires section_name=0xhex_bytes, got `{arg}`") @@ -3498,7 +3531,11 @@ impl Assertions { "dynsym", )?; self.verify_symbols_absent(&self.no_sym, obj.symbols(), "symtab")?; + self.verify_expected_sections(&obj)?; + self.verify_absent_sections(&obj)?; self.verify_section_bytes(&obj)?; + self.verify_gdb_index_cu_count(&obj)?; + self.verify_gdb_index_symbols(&obj)?; self.verify_strings(&bytes)?; verify_no_overlapping_sections(&obj)?; verify_no_overlapping_segments(&obj)?; @@ -3544,6 +3581,88 @@ impl Assertions { Ok(()) } + fn verify_expected_sections(&self, obj: &object::File) -> Result { + for name in &self.expected_sections { + ensure!( + obj.section_by_name(name).is_some(), + "Expected section `{name}` not found" + ); + } + Ok(()) + } + + fn verify_absent_sections(&self, obj: &object::File) -> Result { + for name in &self.absent_sections { + ensure!( + obj.section_by_name(name).is_none(), + "Section `{name}` should not exist but was found" + ); + } + Ok(()) + } + + fn verify_gdb_index_cu_count(&self, obj: &object::File) -> Result { + let Some(expected) = self.expected_gdb_index_cu_count else { + return Ok(()); + }; + let data = gdb_index_section_data(obj, "ExpectGdbIndexCuCount")?; + let hdr = GdbIndexOffsets::parse(&data)?; + let cu_count = (hdr.tu_list - hdr.cu_list) / 16; + ensure!( + cu_count == expected, + "ExpectGdbIndexCuCount: expected {expected} CUs, got {cu_count}" + ); + Ok(()) + } + + fn verify_gdb_index_symbols(&self, obj: &object::File) -> Result { + if self.expected_gdb_index_symbols.is_empty() { + return Ok(()); + } + let data = gdb_index_section_data(obj, "ExpectGdbIndexSymbol")?; + let hdr = GdbIndexOffsets::parse(&data)?; + let num_slots = (hdr.constant_pool - hdr.symbol_table) / 8; + + // Walk the hash table to collect all indexed symbol names. + let mut found: HashSet<&str> = HashSet::new(); + for i in 0..num_slots { + let so = hdr.symbol_table + i * 8; + if so + 8 > data.len() { + break; + } + let name_rel = u32::from_le_bytes(data[so..so + 4].try_into().unwrap()) as usize; + let cv_rel = u32::from_le_bytes(data[so + 4..so + 8].try_into().unwrap()) as usize; + if name_rel == 0 && cv_rel == 0 { + continue; + } + let abs = hdr.constant_pool + name_rel; + if abs >= data.len() { + continue; + } + let end = data[abs..] + .iter() + .position(|&b| b == 0) + .map_or(data.len(), |p| abs + p); + if let Ok(name) = std::str::from_utf8(&data[abs..end]) { + found.insert(name); + } + } + + for expected in &self.expected_gdb_index_symbols { + ensure!( + found.contains(expected.as_str()), + "ExpectGdbIndexSymbol: `{expected}` not found in .gdb_index.\n\ + Found symbols: {:?}", + { + let mut v: Vec<_> = found.iter().collect(); + v.sort(); + v + } + ); + } + Ok(()) + } + fn verify_section_bytes(&self, obj: &object::File) -> Result { for expected in &self.expected_section_bytes { let section = obj @@ -3807,6 +3926,52 @@ impl Assertions { } } +/// Parsed offsets from a `.gdb_index` header, version-agnostic. +struct GdbIndexOffsets { + cu_list: usize, + tu_list: usize, + symbol_table: usize, + constant_pool: usize, +} + +impl GdbIndexOffsets { + /// Parse from section data. Handles both the 6-field header (versions <= 7) + /// and the 7-field header (versions 8+, which adds a shortcut table offset). + fn parse(data: &[u8]) -> Result { + ensure!(data.len() >= 24, ".gdb_index too small for header"); + let version = u32::from_le_bytes(data[0..4].try_into().unwrap()); + let cu_list = u32::from_le_bytes(data[4..8].try_into().unwrap()) as usize; + let tu_list = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize; + let symbol_table = u32::from_le_bytes(data[16..20].try_into().unwrap()) as usize; + let constant_pool = if version >= 8 { + ensure!( + data.len() >= 28, + ".gdb_index v{version} too small for header" + ); + // Version 8+: header has a shortcut_table_offset between symbol_table and + // constant_pool. + u32::from_le_bytes(data[24..28].try_into().unwrap()) as usize + } else { + // Version <= 7: no shortcut table; constant_pool immediately follows + // symbol_table_offset. + u32::from_le_bytes(data[20..24].try_into().unwrap()) as usize + }; + Ok(Self { + cu_list, + tu_list, + symbol_table, + constant_pool, + }) + } +} + +fn gdb_index_section_data<'a>(obj: &'a object::File, directive: &str) -> Result> { + let section = obj + .section_by_name(".gdb_index") + .with_context(|| format!("{directive}: .gdb_index section not found"))?; + Ok(section.data()?.to_vec()) +} + fn verify_no_overlapping_sections(obj: &object::File) -> Result { let mut previous_range = None; for section in obj.sections() { diff --git a/wild/tests/sources/elf/gdb-index/gdb-index.c b/wild/tests/sources/elf/gdb-index/gdb-index.c new file mode 100644 index 000000000..a168ec5f5 --- /dev/null +++ b/wild/tests/sources/elf/gdb-index/gdb-index.c @@ -0,0 +1,37 @@ +//#AbstractConfig:default +//#CompArgs:-g -ggnu-pubnames +//#Object:runtime.c +//#Object:gdb-index2.c +//#SkipLinker:ld +//#EnableLinker:lld + +//#Config:enabled:default +//#LinkArgs:--gdb-index +//#DiffIgnore:section.gdb_index +//#ExpectSection:.gdb_index +//#ExpectGdbIndexCuCount:3 +//#ExpectGdbIndexSymbol:compute +//#ExpectGdbIndexSymbol:_start +//#ExpectGdbIndexSymbol:foo + +//#Config:disabled:default +//#LinkArgs:--gdb-index --no-gdb-index +//#NoSection:.gdb_index + +#include "../common/runtime.h" + +int foo(int a, int b); + +int compute(int x) { return x + 1; } + +void _start(void) { + runtime_init(); + if (compute(41) != 42) { + exit_syscall(10); + } + if (foo(20, 22) != 42) { + exit_syscall(11); + } + + exit_syscall(42); +} diff --git a/wild/tests/sources/elf/gdb-index/gdb-index2.c b/wild/tests/sources/elf/gdb-index/gdb-index2.c new file mode 100644 index 000000000..a1f74bf24 --- /dev/null +++ b/wild/tests/sources/elf/gdb-index/gdb-index2.c @@ -0,0 +1 @@ +int foo(int a, int b) { return a + b; } From 9534ac86cbe0c17d62ab860e49a725ff90ec79b7 Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 19:49:30 +0900 Subject: [PATCH 06/24] Eliminate duplicate parse_cu_boundaries in compute_gdb_index_size --- libwild/src/gdb_index.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index d2ff8a713..d924f9212 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -228,10 +228,10 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { }; let object = obj.object; - let obj_cu_count = raw_section_by_name(object, ".debug_info") - .map_or(0, |data| parse_cu_boundaries(data).len()); - - if obj_cu_count == 0 { + let boundaries = raw_section_by_name(object, ".debug_info") + .map(parse_cu_boundaries) + .unwrap_or_default(); + if boundaries.is_empty() { continue; } @@ -251,17 +251,16 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { } } - total_cus += obj_cu_count; + total_cus += boundaries.len(); total_addr_entries += obj_addr_count; let base_idx = cu_index_base; - let mut offset_to_idx: HashMap = HashMap::new(); - if let Some(di_data) = raw_section_by_name(object, ".debug_info") { - for (i, cu) in parse_cu_boundaries(di_data).iter().enumerate() { - offset_to_idx.insert(cu.offset, base_idx + i as u32); - } + let mut offset_to_idx: HashMap = + HashMap::with_capacity(boundaries.len()); + for (i, cu) in boundaries.iter().enumerate() { + offset_to_idx.insert(cu.offset, base_idx + i as u32); } - cu_index_base += obj_cu_count as u32; + cu_index_base += boundaries.len() as u32; collect_pubnames_symbols(object, &offset_to_idx, base_idx, &mut symbol_map); } From 36fae042891309679b7a30f6d04e64e85f5abeaa Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 19:51:21 +0900 Subject: [PATCH 07/24] Merge address and symbol table construction into single object scan --- libwild/src/gdb_index.rs | 86 ++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index d924f9212..3a4c0ba94 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -255,8 +255,7 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { total_addr_entries += obj_addr_count; let base_idx = cu_index_base; - let mut offset_to_idx: HashMap = - HashMap::with_capacity(boundaries.len()); + let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); for (i, cu) in boundaries.iter().enumerate() { offset_to_idx.insert(cu.offset, base_idx + i as u32); } @@ -300,8 +299,11 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout } let cu_entries = build_cu_list(output_buf, layout); - let addr_entries = build_address_entries(layout); - let (sorted, ht_slots) = build_symbol_table(layout); + let AddressAndSymbolData { + addr_entries, + sorted_symbols: sorted, + ht_slots, + } = build_address_and_symbol_tables(layout); let cu_list_off = HEADER_SIZE as u32; let tu_list_off = cu_list_off + (cu_entries.len() * CU_ENTRY_SIZE) as u32; @@ -401,9 +403,23 @@ fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Vec) -> Vec { - let mut entries = Vec::new(); +struct SymData { + cv_entries: Vec, + hash: u32, +} + +struct AddressAndSymbolData<'data> { + addr_entries: Vec, + sorted_symbols: Vec<(&'data [u8], SymData)>, + ht_slots: usize, +} + +/// Build address entries and symbol table in a single pass over input objects. +fn build_address_and_symbol_tables<'data>( + layout: &'data Layout<'_, Elf>, +) -> AddressAndSymbolData<'data> { + let mut addr_entries = Vec::new(); + let mut sym_map: HashMap<&'data [u8], SymData> = HashMap::new(); let mut cu_offset = 0u32; for group in &layout.group_layouts { @@ -413,13 +429,16 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec }; let object = obj.object; - let obj_cu_count = raw_section_by_name(object, ".debug_info") - .map_or(0, |data| parse_cu_boundaries(data).len() as u32); - if obj_cu_count == 0 { + let boundaries = raw_section_by_name(object, ".debug_info") + .map(parse_cu_boundaries) + .unwrap_or_default(); + if boundaries.is_empty() { continue; } - let base_cu = cu_offset; + let base = cu_offset; + + // Address entries: map each executable section to its resolved address. for (si, slot) in obj.sections.iter().enumerate() { let SectionSlot::Loaded(section) = slot else { continue; @@ -436,48 +455,15 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec if let Some(addr) = obj.section_resolutions[si].address() && addr != 0 { - entries.push(GdbIndexAddressEntry { + addr_entries.push(GdbIndexAddressEntry { low_address: addr, high_address: addr + section.size, - cu_index: base_cu, + cu_index: base, }); } } - cu_offset += obj_cu_count; - } - } - entries -} - -struct SymData { - cv_entries: Vec, - hash: u32, -} - -/// Build the symbol table from `.debug_gnu_pubnames`/`.debug_gnu_pubtypes`, returning -/// the symbols sorted by name and the computed hash table slot count. -fn build_symbol_table<'data>( - layout: &'data Layout<'_, Elf>, -) -> (Vec<(&'data [u8], SymData)>, usize) { - let mut sym_map: HashMap<&'data [u8], SymData> = HashMap::new(); - let mut cu_offset = 0u32; - - for group in &layout.group_layouts { - for file in &group.files { - let FileLayout::Object(obj) = file else { - continue; - }; - let object = obj.object; - - let boundaries = raw_section_by_name(object, ".debug_info") - .map(parse_cu_boundaries) - .unwrap_or_default(); - if boundaries.is_empty() { - continue; - } - - let base = cu_offset; + // Symbol table: collect from pubnames/pubtypes. let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); for (i, cu) in boundaries.iter().enumerate() { offset_to_idx.insert(cu.offset, base + i as u32); @@ -514,7 +500,11 @@ fn build_symbol_table<'data>( let mut sorted: Vec<(&[u8], SymData)> = sym_map.into_iter().collect(); sorted.sort_unstable_by_key(|(name, _)| *name); let ht_slots = compute_hash_table_slots(sorted.len()); - (sorted, ht_slots) + AddressAndSymbolData { + addr_entries, + sorted_symbols: sorted, + ht_slots, + } } /// Collect pubnames/pubtypes symbols from an object into the global map. From 66ffff80552dbbbde0dbde75b2f33e5b79b4b35a Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 20:27:06 +0900 Subject: [PATCH 08/24] clippy fix --- wild/tests/integration_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wild/tests/integration_tests.rs b/wild/tests/integration_tests.rs index 2d7fb775e..85b4832fa 100644 --- a/wild/tests/integration_tests.rs +++ b/wild/tests/integration_tests.rs @@ -3965,7 +3965,7 @@ impl GdbIndexOffsets { } } -fn gdb_index_section_data<'a>(obj: &'a object::File, directive: &str) -> Result> { +fn gdb_index_section_data(obj: &object::File, directive: &str) -> Result> { let section = obj .section_by_name(".gdb_index") .with_context(|| format!("{directive}: .gdb_index section not found"))?; From aa288f0d11fcff167fbfb4d547f65c72e2dc0a61 Mon Sep 17 00:00:00 2001 From: lapla Date: Wed, 27 May 2026 20:47:22 +0900 Subject: [PATCH 09/24] Extract shared pubnames/pubtypes iteration into a helper --- libwild/src/gdb_index.rs | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 3a4c0ba94..3c9cb13c9 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -261,7 +261,9 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { } cu_index_base += boundaries.len() as u32; - collect_pubnames_symbols(object, &offset_to_idx, base_idx, &mut symbol_map); + for_each_pubname_entry(object, &offset_to_idx, base_idx, |name, entry| { + symbol_map.entry(name).or_default().push(entry); + }); } } @@ -470,25 +472,13 @@ fn build_address_and_symbol_tables<'data>( } cu_offset += boundaries.len() as u32; - for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { - let Some(data) = raw_section_by_name(object, section_name) else { - continue; - }; - for set in parse_pubnames_sets(data) { - let cu_idx = offset_to_idx - .get(&set.debug_info_offset) - .copied() - .unwrap_or(base); - for (name, attrs) in set.entries { - let entry = encode_cu_vector_entry(cu_idx, attrs); - let sd = sym_map.entry(name).or_insert_with(|| SymData { - cv_entries: Vec::new(), - hash: gdb_hash(name), - }); - sd.cv_entries.push(entry); - } - } - } + for_each_pubname_entry(object, &offset_to_idx, base, |name, entry| { + let sd = sym_map.entry(name).or_insert_with(|| SymData { + cv_entries: Vec::new(), + hash: gdb_hash(name), + }); + sd.cv_entries.push(entry); + }); } } @@ -507,12 +497,13 @@ fn build_address_and_symbol_tables<'data>( } } -/// Collect pubnames/pubtypes symbols from an object into the global map. -fn collect_pubnames_symbols<'data>( +/// Iterate over `.debug_gnu_pubnames` and `.debug_gnu_pubtypes` entries in an object, +/// calling `on_entry(name, encoded_entry)` for each symbol. +fn for_each_pubname_entry<'data>( object: &crate::elf::File<'data>, offset_to_idx: &HashMap, fallback_cu: u32, - symbol_map: &mut HashMap<&'data [u8], Vec>, + mut on_entry: impl FnMut(&'data [u8], u32), ) { for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { let Some(data) = raw_section_by_name(object, section_name) else { @@ -524,8 +515,7 @@ fn collect_pubnames_symbols<'data>( .copied() .unwrap_or(fallback_cu); for (name, attrs) in set.entries { - let entry = encode_cu_vector_entry(cu_idx, attrs); - symbol_map.entry(name).or_default().push(entry); + on_entry(name, encode_cu_vector_entry(cu_idx, attrs)); } } } From e3cc626208cf06971dd06c0b6ed0657e9cafb5a1 Mon Sep 17 00:00:00 2001 From: lapla Date: Fri, 29 May 2026 11:42:51 +0900 Subject: [PATCH 10/24] Use more idiomatic APIs and fix step calc --- libwild/src/gdb_index.rs | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 3c9cb13c9..942f34c84 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -15,6 +15,9 @@ use crate::platform::ObjectFile as _; use crate::platform::SectionHeader as _; use crate::resolution::SectionSlot; use hashbrown::HashMap; +use linker_utils::bit_misc::BitExtraction; +use linker_utils::utils::u32_from_slice; +use linker_utils::utils::u64_from_slice; use std::mem::size_of; use zerocopy::FromBytes; use zerocopy::Immutable; @@ -80,8 +83,9 @@ fn gdb_hash(name: &[u8]) -> u32 { /// The attrs byte from `.debug_gnu_pubnames`/`.debug_gnu_pubtypes` packs kind in bits 4-6 /// and is_static in bit 7. fn encode_cu_vector_entry(cu_index: u32, attrs: u8) -> u32 { - let kind = u32::from((attrs >> 4) & 0x7); - let is_static = u32::from((attrs >> 7) & 0x1); + let attrs = u64::from(attrs); + let kind = attrs.extract_bit_range(4..7) as u32; + let is_static = attrs.extract_bit_range(7..8) as u32; (cu_index & 0x00FF_FFFF) | (kind << 28) | (is_static << 31) } @@ -103,12 +107,12 @@ fn parse_cu_boundaries(data: &[u8]) -> Vec { let mut cus = Vec::new(); let mut offset = 0usize; while offset + 4 <= data.len() { - let init_len = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap()); + let init_len = u32_from_slice(&data[offset..]); let total = if init_len == 0xFFFF_FFFF { if offset + 12 > data.len() { break; } - let len = u64::from_le_bytes(data[offset + 4..offset + 12].try_into().unwrap()); + let len = u64_from_slice(&data[offset + 4..]); 12 + len as usize } else { 4 + init_len as usize @@ -138,24 +142,22 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { let mut sets = Vec::new(); let mut pos = 0; while pos + 4 <= data.len() { - let init_len = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()); + let init_len = u32_from_slice(&data[pos..]); let (header_size, set_end, debug_info_offset) = if init_len == 0xFFFF_FFFF { // DWARF64: 4 + 8(len) + 2(ver) + 8(offset) + 8(size) = 30 if pos + 30 > data.len() { break; } - let len = u64::from_le_bytes(data[pos + 4..pos + 12].try_into().unwrap()); - let dio = u64::from_le_bytes(data[pos + 14..pos + 22].try_into().unwrap()); + let len = u64_from_slice(&data[pos + 4..]); + let dio = u64_from_slice(&data[pos + 14..]); (30, pos + 12 + len as usize, dio) } else { // DWARF32: 4(len) + 2(ver) + 4(offset) + 4(size) = 14 if pos + 14 > data.len() { break; } - let dio = u64::from(u32::from_le_bytes( - data[pos + 6..pos + 10].try_into().unwrap(), - )); + let dio = u64::from(u32_from_slice(&data[pos + 6..])); (14, pos + 4 + init_len as usize, dio) }; @@ -169,14 +171,14 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { if ep + 8 > set_end { break; } - let v = u64::from_le_bytes(data[ep..ep + 8].try_into().unwrap()); + let v = u64_from_slice(&data[ep..]); ep += 8; v } else { if ep + 4 > set_end { break; } - let v = u64::from(u32::from_le_bytes(data[ep..ep + 4].try_into().unwrap())); + let v = u64::from(u32_from_slice(&data[ep..])); ep += 4; v }; @@ -539,12 +541,12 @@ fn write_hash_table( let mask = (ht_slots - 1) as u32; for (i, (_, sd)) in sorted.iter().enumerate() { let h = sd.hash; - let step = ((h >> 3) & mask) | 1; + let step = (h.wrapping_mul(17) & mask) | 1; let mut slot = h & mask; loop { let so = ht_start + slot as usize * HASH_SLOT_SIZE; - let existing_name = u32::from_le_bytes(buf[so..so + 4].try_into().unwrap()); - let existing_vec = u32::from_le_bytes(buf[so + 4..so + 8].try_into().unwrap()); + let existing_name = u32_from_slice(&buf[so..]); + let existing_vec = u32_from_slice(&buf[so + 4..]); if existing_name == 0 && existing_vec == 0 { buf[so..so + 4].copy_from_slice(&name_offsets[i].to_le_bytes()); buf[so + 4..so + 8].copy_from_slice(&cv_offsets[i].to_le_bytes()); From 31ff30173de96d9fdc7a082f468cfb46a973f64f Mon Sep 17 00:00:00 2001 From: lapla Date: Fri, 29 May 2026 11:51:11 +0900 Subject: [PATCH 11/24] Calculate hashslots using zerocopy --- libwild/src/gdb_index.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 942f34c84..fc84b26b1 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -64,7 +64,14 @@ const HEADER_SIZE: usize = size_of::(); const CU_ENTRY_SIZE: usize = size_of::(); const ADDRESS_ENTRY_SIZE: usize = size_of::(); const SHORTCUT_TABLE_SIZE: usize = size_of::(); -const HASH_SLOT_SIZE: usize = 8; // (name_offset, cu_vector_offset) pair +#[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[repr(C, packed)] +struct GdbIndexHashSlot { + name_offset: u32, + cu_vector_offset: u32, +} + +const HASH_SLOT_SIZE: usize = size_of::(); /// The GDB index hash function. fn gdb_hash(name: &[u8]) -> u32 { @@ -545,11 +552,14 @@ fn write_hash_table( let mut slot = h & mask; loop { let so = ht_start + slot as usize * HASH_SLOT_SIZE; - let existing_name = u32_from_slice(&buf[so..]); - let existing_vec = u32_from_slice(&buf[so + 4..]); - if existing_name == 0 && existing_vec == 0 { - buf[so..so + 4].copy_from_slice(&name_offsets[i].to_le_bytes()); - buf[so + 4..so + 8].copy_from_slice(&cv_offsets[i].to_le_bytes()); + let existing = + GdbIndexHashSlot::read_from_bytes(&buf[so..so + HASH_SLOT_SIZE]).unwrap(); + if existing.name_offset == 0 && existing.cu_vector_offset == 0 { + let new_slot = GdbIndexHashSlot { + name_offset: name_offsets[i], + cu_vector_offset: cv_offsets[i], + }; + buf[so..so + HASH_SLOT_SIZE].copy_from_slice(new_slot.as_bytes()); break; } slot = (slot + step) & mask; From ac5bbc46973c82150ad39a097c6a25bc1ec98aad Mon Sep 17 00:00:00 2001 From: lapla Date: Fri, 29 May 2026 11:58:16 +0900 Subject: [PATCH 12/24] Add a comment for `cv_bytes` addition --- libwild/src/gdb_index.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index fc84b26b1..0b0349ba5 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -285,6 +285,7 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { for (name, entries) in &mut symbol_map { entries.sort_unstable(); entries.dedup(); + // 4 bytes for the entry count, then 4 bytes per entry. cv_bytes += 4 + entries.len() * 4; str_bytes += name.len() + 1; } From 121674b7c63f177acd9be561401500fd6fe0e1c3 Mon Sep 17 00:00:00 2001 From: lapla Date: Fri, 29 May 2026 12:24:36 +0900 Subject: [PATCH 13/24] Add consts for `.debug_info` --- libwild/src/gdb_index.rs | 8 +++++--- linker-utils/src/elf.rs | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 0b0349ba5..102217722 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -16,6 +16,8 @@ use crate::platform::SectionHeader as _; use crate::resolution::SectionSlot; use hashbrown::HashMap; use linker_utils::bit_misc::BitExtraction; +use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME; +use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME_STR; use linker_utils::utils::u32_from_slice; use linker_utils::utils::u64_from_slice; use std::mem::size_of; @@ -237,7 +239,7 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { }; let object = obj.object; - let boundaries = raw_section_by_name(object, ".debug_info") + let boundaries = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) .map(parse_cu_boundaries) .unwrap_or_default(); if boundaries.is_empty() { @@ -396,7 +398,7 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Vec { let Some(id) = layout .output_sections - .section_id_by_name(SectionName(b".debug_info")) + .section_id_by_name(SectionName(DEBUG_INFO_SECTION_NAME)) else { return Vec::new(); }; @@ -441,7 +443,7 @@ fn build_address_and_symbol_tables<'data>( }; let object = obj.object; - let boundaries = raw_section_by_name(object, ".debug_info") + let boundaries = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) .map(parse_cu_boundaries) .unwrap_or_default(); if boundaries.is_empty() { diff --git a/linker-utils/src/elf.rs b/linker-utils/src/elf.rs index e24247936..fd3381d77 100644 --- a/linker-utils/src/elf.rs +++ b/linker-utils/src/elf.rs @@ -326,6 +326,8 @@ pub mod secnames { pub const RELRO_PADDING_SECTION_NAME: &[u8] = RELRO_PADDING_SECTION_NAME_STR.as_bytes(); pub const SYMTAB_SHNDX_SECTION_NAME_STR: &str = ".symtab_shndx"; pub const SYMTAB_SHNDX_SECTION_NAME: &[u8] = SYMTAB_SHNDX_SECTION_NAME_STR.as_bytes(); + pub const DEBUG_INFO_SECTION_NAME_STR: &str = ".debug_info"; + pub const DEBUG_INFO_SECTION_NAME: &[u8] = DEBUG_INFO_SECTION_NAME_STR.as_bytes(); pub const GDB_INDEX_SECTION_NAME_STR: &str = ".gdb_index"; pub const GDB_INDEX_SECTION_NAME: &[u8] = GDB_INDEX_SECTION_NAME_STR.as_bytes(); From aef860f29257815d2aaf3b83e224e135a6cfe933 Mon Sep 17 00:00:00 2001 From: lapla Date: Fri, 29 May 2026 12:30:17 +0900 Subject: [PATCH 14/24] Write constant pool directly to output buffer --- libwild/src/gdb_index.rs | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 102217722..01ed08ca8 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -326,30 +326,27 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout let short_off = sym_off + (ht_slots * HASH_SLOT_SIZE) as u32; let cp_off = short_off + SHORTCUT_TABLE_SIZE as u32; - // Build constant pool: CU vectors first, then name strings. - let mut cv_data = Vec::new(); - let mut str_data = Vec::new(); + // Write constant pool: CU vectors first, then name strings. let mut cv_offsets = Vec::with_capacity(sorted.len()); - let mut name_offsets = Vec::with_capacity(sorted.len()); - + let mut off = cp_off as usize; for (_, sd) in &sorted { - cv_offsets.push(cv_data.len() as u32); - cv_data.extend_from_slice(&(sd.cv_entries.len() as u32).to_le_bytes()); + cv_offsets.push((off - cp_off as usize) as u32); + buf[off..off + 4].copy_from_slice(&(sd.cv_entries.len() as u32).to_le_bytes()); + off += 4; for &e in &sd.cv_entries { - cv_data.extend_from_slice(&e.to_le_bytes()); + buf[off..off + 4].copy_from_slice(&e.to_le_bytes()); + off += 4; } } + let mut name_offsets = Vec::with_capacity(sorted.len()); for (name, _) in &sorted { - name_offsets.push((cv_data.len() + str_data.len()) as u32); - str_data.extend_from_slice(name); - str_data.push(0); + name_offsets.push((off - cp_off as usize) as u32); + buf[off..off + name.len()].copy_from_slice(name); + off += name.len(); + buf[off] = 0; + off += 1; } - // Emit into the output buffer. - let total = cp_off as usize + cv_data.len() + str_data.len(); - let len = buf.len().min(total); - let buf = &mut buf[..len]; - let hdr = GdbIndexHeader { version: GDB_INDEX_VERSION, cu_list_offset: cu_list_off, @@ -388,10 +385,6 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout name_of_main_offset: 0, }; buf[so..so + SHORTCUT_TABLE_SIZE].copy_from_slice(sc.as_bytes()); - - let cpo = cp_off as usize; - buf[cpo..cpo + cv_data.len()].copy_from_slice(&cv_data); - buf[cpo + cv_data.len()..cpo + cv_data.len() + str_data.len()].copy_from_slice(&str_data); } /// Build the CU list from the already-written output `.debug_info`. From b8f4da76f5dc38a6e58efa37911ba818c62e1076 Mon Sep 17 00:00:00 2001 From: lapla Date: Fri, 29 May 2026 12:34:30 +0900 Subject: [PATCH 15/24] Use itertools `sorted_unstable_by_key()` --- libwild/src/gdb_index.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 01ed08ca8..dd284a981 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -15,6 +15,7 @@ use crate::platform::ObjectFile as _; use crate::platform::SectionHeader as _; use crate::resolution::SectionSlot; use hashbrown::HashMap; +use itertools::Itertools as _; use linker_utils::bit_misc::BitExtraction; use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME; use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME_STR; @@ -492,8 +493,10 @@ fn build_address_and_symbol_tables<'data>( sd.cv_entries.dedup(); } - let mut sorted: Vec<(&[u8], SymData)> = sym_map.into_iter().collect(); - sorted.sort_unstable_by_key(|(name, _)| *name); + let sorted: Vec<(&[u8], SymData)> = sym_map + .into_iter() + .sorted_unstable_by_key(|(name, _)| *name) + .collect(); let ht_slots = compute_hash_table_slots(sorted.len()); AddressAndSymbolData { addr_entries, From d9abf65a55344e5cf1d64401c8f03043d4c96753 Mon Sep 17 00:00:00 2001 From: lapla Date: Sat, 30 May 2026 01:06:52 +0900 Subject: [PATCH 16/24] Unify object scannig logic --- libwild/src/gdb_index.rs | 220 ++++++++++++++++++++------------------- 1 file changed, 111 insertions(+), 109 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index dd284a981..5db8a67c4 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -228,77 +228,30 @@ fn raw_section_by_name<'data>(object: &crate::elf::File<'data>, name: &str) -> O /// Pre-scan all input objects to compute the `.gdb_index` section size. pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { - let mut total_cus = 0usize; - let mut total_addr_entries = 0usize; - let mut symbol_map: HashMap<&[u8], Vec> = HashMap::new(); - let mut cu_index_base = 0u32; - - for group in groups { - for file in &group.files { - let FileLayoutState::Object(obj) = file else { - continue; - }; - let object = obj.object; - - let boundaries = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) - .map(parse_cu_boundaries) - .unwrap_or_default(); - if boundaries.is_empty() { - continue; - } - - let mut obj_addr_count = 0usize; - for (si, slot) in obj.sections.iter().enumerate() { - let SectionSlot::Loaded(section) = slot else { - continue; - }; - if section.size == 0 { - continue; - } - let Ok(header) = object.section(object::SectionIndex(si)) else { - continue; - }; - if header.is_alloc() && header.is_executable() { - obj_addr_count += 1; - } - } - - total_cus += boundaries.len(); - total_addr_entries += obj_addr_count; - - let base_idx = cu_index_base; - let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); - for (i, cu) in boundaries.iter().enumerate() { - offset_to_idx.insert(cu.offset, base_idx + i as u32); - } - cu_index_base += boundaries.len() as u32; - - for_each_pubname_entry(object, &offset_to_idx, base_idx, |name, entry| { - symbol_map.entry(name).or_default().push(entry); - }); - } - } + let objects = groups.iter().flat_map(|g| g.files.iter()).filter_map(|f| { + let FileLayoutState::Object(obj) = f else { + return None; + }; + Some((obj.object, obj.sections.as_slice())) + }); + let scan = scan_objects_for_gdb_index(objects); - if total_cus == 0 { + if scan.total_cus == 0 { return 0; } let mut cv_bytes = 0usize; let mut str_bytes = 0usize; - for (name, entries) in &mut symbol_map { - entries.sort_unstable(); - entries.dedup(); + for (name, sd) in &scan.sorted_symbols { // 4 bytes for the entry count, then 4 bytes per entry. - cv_bytes += 4 + entries.len() * 4; + cv_bytes += 4 + sd.cv_entries.len() * 4; str_bytes += name.len() + 1; } - let ht_slots = compute_hash_table_slots(symbol_map.len()); - (HEADER_SIZE - + total_cus * CU_ENTRY_SIZE - + total_addr_entries * ADDRESS_ENTRY_SIZE - + ht_slots * HASH_SLOT_SIZE + + scan.total_cus * CU_ENTRY_SIZE + + scan.total_addr_entries * ADDRESS_ENTRY_SIZE + + scan.ht_slots * HASH_SLOT_SIZE + SHORTCUT_TABLE_SIZE + cv_bytes + str_bytes) as u64 @@ -314,11 +267,22 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout } let cu_entries = build_cu_list(output_buf, layout); - let AddressAndSymbolData { - addr_entries, + let objects = layout + .group_layouts + .iter() + .flat_map(|g| g.files.iter()) + .filter_map(|f| { + let FileLayout::Object(obj) = f else { + return None; + }; + Some((obj.object, obj.sections.as_slice())) + }); + let GdbIndexScanResult { sorted_symbols: sorted, ht_slots, - } = build_address_and_symbol_tables(layout); + .. + } = scan_objects_for_gdb_index(objects); + let addr_entries = build_address_entries(layout); let cu_list_off = HEADER_SIZE as u32; let tu_list_off = cu_list_off + (cu_entries.len() * CU_ENTRY_SIZE) as u32; @@ -416,20 +380,89 @@ struct SymData { hash: u32, } -struct AddressAndSymbolData<'data> { - addr_entries: Vec, +struct GdbIndexScanResult<'data> { + total_cus: usize, + total_addr_entries: usize, sorted_symbols: Vec<(&'data [u8], SymData)>, ht_slots: usize, } -/// Build address entries and symbol table in a single pass over input objects. -fn build_address_and_symbol_tables<'data>( - layout: &'data Layout<'_, Elf>, -) -> AddressAndSymbolData<'data> { - let mut addr_entries = Vec::new(); +/// Scan input objects to build the symbol table and count CUs / address entries. +fn scan_objects_for_gdb_index<'data>( + objects: impl Iterator, &'data [SectionSlot])>, +) -> GdbIndexScanResult<'data> { + let mut total_cus = 0usize; + let mut total_addr_entries = 0usize; let mut sym_map: HashMap<&'data [u8], SymData> = HashMap::new(); let mut cu_offset = 0u32; + for (object, sections) in objects { + let boundaries = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) + .map(parse_cu_boundaries) + .unwrap_or_default(); + if boundaries.is_empty() { + continue; + } + + let base = cu_offset; + + let mut obj_addr_count = 0usize; + for (si, slot) in sections.iter().enumerate() { + let SectionSlot::Loaded(section) = slot else { + continue; + }; + if section.size == 0 { + continue; + } + let Ok(header) = object.section(object::SectionIndex(si)) else { + continue; + }; + if header.is_alloc() && header.is_executable() { + obj_addr_count += 1; + } + } + + total_cus += boundaries.len(); + total_addr_entries += obj_addr_count; + + let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); + for (i, cu) in boundaries.iter().enumerate() { + offset_to_idx.insert(cu.offset, base + i as u32); + } + cu_offset += boundaries.len() as u32; + + for_each_pubname_entry(object, &offset_to_idx, base, |name, entry| { + let sd = sym_map.entry(name).or_insert_with(|| SymData { + cv_entries: Vec::new(), + hash: gdb_hash(name), + }); + sd.cv_entries.push(entry); + }); + } + + for sd in sym_map.values_mut() { + sd.cv_entries.sort_unstable(); + sd.cv_entries.dedup(); + } + + let sorted: Vec<(&[u8], SymData)> = sym_map + .into_iter() + .sorted_unstable_by_key(|(name, _)| *name) + .collect(); + let ht_slots = compute_hash_table_slots(sorted.len()); + GdbIndexScanResult { + total_cus, + total_addr_entries, + sorted_symbols: sorted, + ht_slots, + } +} + +/// Build address entries using resolved addresses from the final layout. +fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec { + let mut entries = Vec::new(); + let mut cu_offset = 0u32; + for group in &layout.group_layouts { for file in &group.files { let FileLayout::Object(obj) = file else { @@ -437,16 +470,13 @@ fn build_address_and_symbol_tables<'data>( }; let object = obj.object; - let boundaries = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) - .map(parse_cu_boundaries) - .unwrap_or_default(); - if boundaries.is_empty() { + let obj_cu_count = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) + .map_or(0, |data| parse_cu_boundaries(data).len() as u32); + if obj_cu_count == 0 { continue; } + let base_cu = cu_offset; - let base = cu_offset; - - // Address entries: map each executable section to its resolved address. for (si, slot) in obj.sections.iter().enumerate() { let SectionSlot::Loaded(section) = slot else { continue; @@ -463,46 +493,18 @@ fn build_address_and_symbol_tables<'data>( if let Some(addr) = obj.section_resolutions[si].address() && addr != 0 { - addr_entries.push(GdbIndexAddressEntry { + entries.push(GdbIndexAddressEntry { low_address: addr, high_address: addr + section.size, - cu_index: base, + cu_index: base_cu, }); } } - // Symbol table: collect from pubnames/pubtypes. - let mut offset_to_idx: HashMap = HashMap::with_capacity(boundaries.len()); - for (i, cu) in boundaries.iter().enumerate() { - offset_to_idx.insert(cu.offset, base + i as u32); - } - cu_offset += boundaries.len() as u32; - - for_each_pubname_entry(object, &offset_to_idx, base, |name, entry| { - let sd = sym_map.entry(name).or_insert_with(|| SymData { - cv_entries: Vec::new(), - hash: gdb_hash(name), - }); - sd.cv_entries.push(entry); - }); + cu_offset += obj_cu_count; } } - - for sd in sym_map.values_mut() { - sd.cv_entries.sort_unstable(); - sd.cv_entries.dedup(); - } - - let sorted: Vec<(&[u8], SymData)> = sym_map - .into_iter() - .sorted_unstable_by_key(|(name, _)| *name) - .collect(); - let ht_slots = compute_hash_table_slots(sorted.len()); - AddressAndSymbolData { - addr_entries, - sorted_symbols: sorted, - ht_slots, - } + entries } /// Iterate over `.debug_gnu_pubnames` and `.debug_gnu_pubtypes` entries in an object, From 9d099f323fd8e11b7f1121c29a2a64dfc873a670 Mon Sep 17 00:00:00 2001 From: lapla Date: Sat, 30 May 2026 02:07:36 +0900 Subject: [PATCH 17/24] Refactor pubnames collection --- libwild/src/gdb_index.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 5db8a67c4..6653eedbd 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -431,13 +431,13 @@ fn scan_objects_for_gdb_index<'data>( } cu_offset += boundaries.len() as u32; - for_each_pubname_entry(object, &offset_to_idx, base, |name, entry| { + for (name, entry) in collect_pubname_entries(object, &offset_to_idx, base) { let sd = sym_map.entry(name).or_insert_with(|| SymData { cv_entries: Vec::new(), hash: gdb_hash(name), }); sd.cv_entries.push(entry); - }); + } } for sd in sym_map.values_mut() { @@ -507,14 +507,14 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec entries } -/// Iterate over `.debug_gnu_pubnames` and `.debug_gnu_pubtypes` entries in an object, -/// calling `on_entry(name, encoded_entry)` for each symbol. -fn for_each_pubname_entry<'data>( +/// Collect encoded pubname/pubtype entries from an object's `.debug_gnu_pubnames` +/// and `.debug_gnu_pubtypes` sections, returning `(name, encoded_cu_vector_entry)` pairs. +fn collect_pubname_entries<'data>( object: &crate::elf::File<'data>, offset_to_idx: &HashMap, fallback_cu: u32, - mut on_entry: impl FnMut(&'data [u8], u32), -) { +) -> Vec<(&'data [u8], u32)> { + let mut entries = Vec::new(); for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { let Some(data) = raw_section_by_name(object, section_name) else { continue; @@ -525,10 +525,11 @@ fn for_each_pubname_entry<'data>( .copied() .unwrap_or(fallback_cu); for (name, attrs) in set.entries { - on_entry(name, encode_cu_vector_entry(cu_idx, attrs)); + entries.push((name, encode_cu_vector_entry(cu_idx, attrs))); } } } + entries } /// Insert symbols into the open-addressing hash table region of `buf`. From 21b9dca539f24fbd72beea9a9d19c127d396de7b Mon Sep 17 00:00:00 2001 From: lapla Date: Sat, 30 May 2026 02:15:21 +0900 Subject: [PATCH 18/24] Use `BTreeSet` for `cv_entries` --- libwild/src/gdb_index.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 6653eedbd..15fd9dd3a 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -21,6 +21,7 @@ use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME; use linker_utils::elf::secnames::DEBUG_INFO_SECTION_NAME_STR; use linker_utils::utils::u32_from_slice; use linker_utils::utils::u64_from_slice; +use std::collections::BTreeSet; use std::mem::size_of; use zerocopy::FromBytes; use zerocopy::Immutable; @@ -376,7 +377,7 @@ fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Vec, + cv_entries: BTreeSet, hash: u32, } @@ -433,18 +434,13 @@ fn scan_objects_for_gdb_index<'data>( for (name, entry) in collect_pubname_entries(object, &offset_to_idx, base) { let sd = sym_map.entry(name).or_insert_with(|| SymData { - cv_entries: Vec::new(), + cv_entries: BTreeSet::new(), hash: gdb_hash(name), }); - sd.cv_entries.push(entry); + sd.cv_entries.insert(entry); } } - for sd in sym_map.values_mut() { - sd.cv_entries.sort_unstable(); - sd.cv_entries.dedup(); - } - let sorted: Vec<(&[u8], SymData)> = sym_map .into_iter() .sorted_unstable_by_key(|(name, _)| *name) From 55610bc8cbf96f570b33911031d8c9e17a3e6dba Mon Sep 17 00:00:00 2001 From: lapla Date: Sun, 31 May 2026 11:08:21 +0900 Subject: [PATCH 19/24] Improve error handling --- libwild/src/elf.rs | 4 +- libwild/src/elf_writer.rs | 10 ++-- libwild/src/gdb_index.rs | 108 +++++++++++++++++++++----------------- libwild/src/layout.rs | 2 +- libwild/src/platform.rs | 6 ++- 5 files changed, 72 insertions(+), 58 deletions(-) diff --git a/libwild/src/elf.rs b/libwild/src/elf.rs index f26b91fde..7c74cb421 100644 --- a/libwild/src/elf.rs +++ b/libwild/src/elf.rs @@ -1995,7 +1995,9 @@ impl platform::Platform for Elf { total_sizes.merge(&extra_sizes); } - fn compute_gdb_index_size(groups: &[crate::layout::GroupState]) -> u64 { + fn compute_gdb_index_size( + groups: &[crate::layout::GroupState], + ) -> crate::error::Result { crate::gdb_index::compute_gdb_index_size(groups) } diff --git a/libwild/src/elf_writer.rs b/libwild/src/elf_writer.rs index 37ed2af75..11cf055f5 100644 --- a/libwild/src/elf_writer.rs +++ b/libwild/src/elf_writer.rs @@ -186,7 +186,7 @@ pub(crate) fn write<'data, A: Arch>( } // Write .gdb_index before splitting, since it needs to read .debug_info from the output. - write_gdb_index_section(&mut sized_output.out, layout); + write_gdb_index_section(&mut sized_output.out, layout)?; let mut section_buffers = split_output_into_sections(layout, &mut sized_output.out).0; @@ -316,14 +316,14 @@ fn fill_padding(mut section_buffers: OutputSectionMap<&mut [u8]>) { }); } -fn write_gdb_index_section(output: &mut [u8], layout: &ElfLayout) { +fn write_gdb_index_section(output: &mut [u8], layout: &ElfLayout) -> Result { use crate::platform::Args as _; if !layout.args().should_write_gdb_index() { - return; + return Ok(()); } let sl = layout.section_layouts.get(output_section_id::GDB_INDEX); if sl.file_size == 0 { - return; + return Ok(()); } timing_phase!("Write .gdb_index"); let start = sl.file_offset; @@ -331,7 +331,7 @@ fn write_gdb_index_section(output: &mut [u8], layout: &ElfLayout) { // and our section is writable. let (before, rest) = output.split_at_mut(start); let gdb_buf = &mut rest[..sl.file_size]; - crate::gdb_index::write_gdb_index(gdb_buf, before, layout); + crate::gdb_index::write_gdb_index(gdb_buf, before, layout) } fn write_sframe_section(sframe_buffer: &mut [u8], layout: &ElfLayout) -> Result { diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 15fd9dd3a..b5ca073c3 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -6,6 +6,8 @@ //! Format reference: use crate::elf::Elf; +use crate::error::Context as _; +use crate::error::Result; use crate::layout::FileLayout; use crate::layout::FileLayoutState; use crate::layout::GroupState; @@ -222,23 +224,28 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { } /// Read raw section data from an input object by name. -fn raw_section_by_name<'data>(object: &crate::elf::File<'data>, name: &str) -> Option<&'data [u8]> { - let (_index, header) = object.section_by_name(name)?; - object.raw_section_data(header).ok() +fn raw_section_by_name<'data>( + object: &crate::elf::File<'data>, + name: &str, +) -> Result> { + let Some((_index, header)) = object.section_by_name(name) else { + return Ok(None); + }; + Ok(Some(object.raw_section_data(header)?)) } /// Pre-scan all input objects to compute the `.gdb_index` section size. -pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { +pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> Result { let objects = groups.iter().flat_map(|g| g.files.iter()).filter_map(|f| { let FileLayoutState::Object(obj) = f else { return None; }; Some((obj.object, obj.sections.as_slice())) }); - let scan = scan_objects_for_gdb_index(objects); + let scan = scan_objects_for_gdb_index(objects)?; if scan.total_cus == 0 { - return 0; + return Ok(0); } let mut cv_bytes = 0usize; @@ -249,25 +256,29 @@ pub(crate) fn compute_gdb_index_size(groups: &[GroupState<'_, Elf>]) -> u64 { str_bytes += name.len() + 1; } - (HEADER_SIZE + Ok((HEADER_SIZE + scan.total_cus * CU_ENTRY_SIZE + scan.total_addr_entries * ADDRESS_ENTRY_SIZE + scan.ht_slots * HASH_SLOT_SIZE + SHORTCUT_TABLE_SIZE + cv_bytes - + str_bytes) as u64 + + str_bytes) as u64) } /// Write the `.gdb_index` section into `buf`. /// /// Reads the output `.debug_info` (already written into `output_buf`) for the CU list, /// and re-scans input objects for address ranges and pubnames/pubtypes symbols. -pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout<'_, Elf>) { +pub(crate) fn write_gdb_index( + buf: &mut [u8], + output_buf: &[u8], + layout: &Layout<'_, Elf>, +) -> Result { if buf.is_empty() { - return; + return Ok(()); } - let cu_entries = build_cu_list(output_buf, layout); + let cu_entries = build_cu_list(output_buf, layout)?; let objects = layout .group_layouts .iter() @@ -282,8 +293,8 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout sorted_symbols: sorted, ht_slots, .. - } = scan_objects_for_gdb_index(objects); - let addr_entries = build_address_entries(layout); + } = scan_objects_for_gdb_index(objects)?; + let addr_entries = build_address_entries(layout)?; let cu_list_off = HEADER_SIZE as u32; let tu_list_off = cu_list_off + (cu_entries.len() * CU_ENTRY_SIZE) as u32; @@ -343,7 +354,7 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout &sorted, &name_offsets, &cv_offsets, - ); + )?; let so = short_off as usize; let sc = GdbIndexShortcutTable { @@ -351,29 +362,32 @@ pub(crate) fn write_gdb_index(buf: &mut [u8], output_buf: &[u8], layout: &Layout name_of_main_offset: 0, }; buf[so..so + SHORTCUT_TABLE_SIZE].copy_from_slice(sc.as_bytes()); + Ok(()) } /// Build the CU list from the already-written output `.debug_info`. -fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Vec { +fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Result> { let Some(id) = layout .output_sections .section_id_by_name(SectionName(DEBUG_INFO_SECTION_NAME)) else { - return Vec::new(); + return Ok(Vec::new()); }; let sl = layout.section_layouts.get(id); let start = sl.file_offset; let end = start + sl.file_size; - if end > output_buf.len() { - return Vec::new(); - } - parse_cu_boundaries(&output_buf[start..end]) + crate::ensure!( + end <= output_buf.len(), + ".debug_info layout extends beyond output buffer ({end} > {})", + output_buf.len() + ); + Ok(parse_cu_boundaries(&output_buf[start..end]) .into_iter() .map(|cu| GdbIndexCuEntry { cu_offset: cu.offset, cu_length: cu.length, }) - .collect() + .collect()) } struct SymData { @@ -391,16 +405,17 @@ struct GdbIndexScanResult<'data> { /// Scan input objects to build the symbol table and count CUs / address entries. fn scan_objects_for_gdb_index<'data>( objects: impl Iterator, &'data [SectionSlot])>, -) -> GdbIndexScanResult<'data> { +) -> Result> { let mut total_cus = 0usize; let mut total_addr_entries = 0usize; let mut sym_map: HashMap<&'data [u8], SymData> = HashMap::new(); let mut cu_offset = 0u32; for (object, sections) in objects { - let boundaries = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) - .map(parse_cu_boundaries) - .unwrap_or_default(); + let boundaries = match raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? { + Some(data) => parse_cu_boundaries(data), + None => continue, + }; if boundaries.is_empty() { continue; } @@ -415,9 +430,7 @@ fn scan_objects_for_gdb_index<'data>( if section.size == 0 { continue; } - let Ok(header) = object.section(object::SectionIndex(si)) else { - continue; - }; + let header = object.section(object::SectionIndex(si))?; if header.is_alloc() && header.is_executable() { obj_addr_count += 1; } @@ -432,7 +445,7 @@ fn scan_objects_for_gdb_index<'data>( } cu_offset += boundaries.len() as u32; - for (name, entry) in collect_pubname_entries(object, &offset_to_idx, base) { + for (name, entry) in collect_pubname_entries(object, &offset_to_idx)? { let sd = sym_map.entry(name).or_insert_with(|| SymData { cv_entries: BTreeSet::new(), hash: gdb_hash(name), @@ -446,16 +459,16 @@ fn scan_objects_for_gdb_index<'data>( .sorted_unstable_by_key(|(name, _)| *name) .collect(); let ht_slots = compute_hash_table_slots(sorted.len()); - GdbIndexScanResult { + Ok(GdbIndexScanResult { total_cus, total_addr_entries, sorted_symbols: sorted, ht_slots, - } + }) } /// Build address entries using resolved addresses from the final layout. -fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec { +fn build_address_entries(layout: &Layout<'_, Elf>) -> Result> { let mut entries = Vec::new(); let mut cu_offset = 0u32; @@ -466,7 +479,7 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec }; let object = obj.object; - let obj_cu_count = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR) + let obj_cu_count = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? .map_or(0, |data| parse_cu_boundaries(data).len() as u32); if obj_cu_count == 0 { continue; @@ -480,9 +493,7 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec if section.size == 0 { continue; } - let Ok(header) = object.section(object::SectionIndex(si)) else { - continue; - }; + let header = object.section(object::SectionIndex(si))?; if !header.is_alloc() || !header.is_executable() { continue; } @@ -500,7 +511,7 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec cu_offset += obj_cu_count; } } - entries + Ok(entries) } /// Collect encoded pubname/pubtype entries from an object's `.debug_gnu_pubnames` @@ -508,24 +519,22 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Vec fn collect_pubname_entries<'data>( object: &crate::elf::File<'data>, offset_to_idx: &HashMap, - fallback_cu: u32, -) -> Vec<(&'data [u8], u32)> { +) -> Result> { let mut entries = Vec::new(); for section_name in [".debug_gnu_pubnames", ".debug_gnu_pubtypes"] { - let Some(data) = raw_section_by_name(object, section_name) else { + let Some(data) = raw_section_by_name(object, section_name)? else { continue; }; for set in parse_pubnames_sets(data) { - let cu_idx = offset_to_idx - .get(&set.debug_info_offset) - .copied() - .unwrap_or(fallback_cu); + let Some(&cu_idx) = offset_to_idx.get(&set.debug_info_offset) else { + continue; + }; for (name, attrs) in set.entries { entries.push((name, encode_cu_vector_entry(cu_idx, attrs))); } } } - entries + Ok(entries) } /// Insert symbols into the open-addressing hash table region of `buf`. @@ -536,12 +545,12 @@ fn write_hash_table( sorted: &[(&[u8], SymData)], name_offsets: &[u32], cv_offsets: &[u32], -) { +) -> Result { let ht_end = ht_start + ht_slots * HASH_SLOT_SIZE; buf[ht_start..ht_end].fill(0); if ht_slots == 0 { - return; + return Ok(()); } let mask = (ht_slots - 1) as u32; for (i, (_, sd)) in sorted.iter().enumerate() { @@ -550,8 +559,8 @@ fn write_hash_table( let mut slot = h & mask; loop { let so = ht_start + slot as usize * HASH_SLOT_SIZE; - let existing = - GdbIndexHashSlot::read_from_bytes(&buf[so..so + HASH_SLOT_SIZE]).unwrap(); + let existing = GdbIndexHashSlot::read_from_bytes(&buf[so..so + HASH_SLOT_SIZE]) + .context("Failed to read .gdb_index hash table slot")?; if existing.name_offset == 0 && existing.cu_vector_offset == 0 { let new_slot = GdbIndexHashSlot { name_offset: name_offsets[i], @@ -563,6 +572,7 @@ fn write_hash_table( slot = (slot + step) & mask; } } + Ok(()) } #[cfg(test)] diff --git a/libwild/src/layout.rs b/libwild/src/layout.rs index abbe49aa8..1948a26fd 100644 --- a/libwild/src/layout.rs +++ b/libwild/src/layout.rs @@ -1844,7 +1844,7 @@ fn compute_total_section_part_sizes<'data, P: Platform>( // Compute and allocate the .gdb_index section size if --gdb-index is enabled. let gdb_index_size = if resources.symbol_db.args.should_write_gdb_index() { - P::compute_gdb_index_size(group_states) + P::compute_gdb_index_size(group_states)? } else { 0 }; diff --git a/libwild/src/platform.rs b/libwild/src/platform.rs index 5d7a761d4..72870d96f 100644 --- a/libwild/src/platform.rs +++ b/libwild/src/platform.rs @@ -727,8 +727,10 @@ pub(crate) trait Platform: } /// Compute the size of the `.gdb_index` section, if applicable. - fn compute_gdb_index_size(_groups: &[crate::layout::GroupState]) -> u64 { - 0 + fn compute_gdb_index_size( + _groups: &[crate::layout::GroupState], + ) -> crate::error::Result { + Ok(0) } } From 9de1cc65d65fd1e035fddfe91c58cfb7e485ef80 Mon Sep 17 00:00:00 2001 From: lapla Date: Sun, 31 May 2026 11:20:20 +0900 Subject: [PATCH 20/24] Use `sorted_names` rather than `sorted` as a variable name --- libwild/src/gdb_index.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index b5ca073c3..979bc19f7 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -290,7 +290,7 @@ pub(crate) fn write_gdb_index( Some((obj.object, obj.sections.as_slice())) }); let GdbIndexScanResult { - sorted_symbols: sorted, + sorted_symbols: sorted_names, ht_slots, .. } = scan_objects_for_gdb_index(objects)?; @@ -304,9 +304,9 @@ pub(crate) fn write_gdb_index( let cp_off = short_off + SHORTCUT_TABLE_SIZE as u32; // Write constant pool: CU vectors first, then name strings. - let mut cv_offsets = Vec::with_capacity(sorted.len()); + let mut cv_offsets = Vec::with_capacity(sorted_names.len()); let mut off = cp_off as usize; - for (_, sd) in &sorted { + for (_, sd) in &sorted_names { cv_offsets.push((off - cp_off as usize) as u32); buf[off..off + 4].copy_from_slice(&(sd.cv_entries.len() as u32).to_le_bytes()); off += 4; @@ -315,8 +315,8 @@ pub(crate) fn write_gdb_index( off += 4; } } - let mut name_offsets = Vec::with_capacity(sorted.len()); - for (name, _) in &sorted { + let mut name_offsets = Vec::with_capacity(sorted_names.len()); + for (name, _) in &sorted_names { name_offsets.push((off - cp_off as usize) as u32); buf[off..off + name.len()].copy_from_slice(name); off += name.len(); @@ -351,7 +351,7 @@ pub(crate) fn write_gdb_index( buf, ht_slots, sym_off as usize, - &sorted, + &sorted_names, &name_offsets, &cv_offsets, )?; From 638e55029aa25433291134d3ffd34cc531a0086d Mon Sep 17 00:00:00 2001 From: lapla Date: Mon, 1 Jun 2026 13:44:40 +0900 Subject: [PATCH 21/24] Emit errors when GDB indices are corrupted --- libwild/src/gdb_index.rs | 80 ++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 979bc19f7..12f0deacd 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -116,30 +116,32 @@ struct CuBoundary { } /// Walk `.debug_info` bytes and return `(offset, total_length)` for each CU. -fn parse_cu_boundaries(data: &[u8]) -> Vec { +fn parse_cu_boundaries(data: &[u8]) -> Result> { let mut cus = Vec::new(); let mut offset = 0usize; while offset + 4 <= data.len() { let init_len = u32_from_slice(&data[offset..]); let total = if init_len == 0xFFFF_FFFF { - if offset + 12 > data.len() { - break; - } + crate::ensure!( + offset + 12 <= data.len(), + "truncated DWARF64 initial length in .debug_info at offset {offset:#x}" + ); let len = u64_from_slice(&data[offset + 4..]); 12 + len as usize } else { 4 + init_len as usize }; - if total == 0 || offset + total > data.len() { - break; - } + crate::ensure!( + total > 0 && offset + total <= data.len(), + "invalid CU length {total} in .debug_info at offset {offset:#x}" + ); cus.push(CuBoundary { offset: offset as u64, length: total as u64, }); offset += total; } - cus + Ok(cus) } struct PubnamesSet<'data> { @@ -151,7 +153,7 @@ struct PubnamesSet<'data> { /// /// Each set has a header pointing to a CU in `.debug_info`, followed by /// (die_offset, attrs_byte, NUL-terminated name) entries. -fn parse_pubnames_sets(data: &[u8]) -> Vec> { +fn parse_pubnames_sets(data: &[u8]) -> Result>> { let mut sets = Vec::new(); let mut pos = 0; while pos + 4 <= data.len() { @@ -159,17 +161,19 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { let (header_size, set_end, debug_info_offset) = if init_len == 0xFFFF_FFFF { // DWARF64: 4 + 8(len) + 2(ver) + 8(offset) + 8(size) = 30 - if pos + 30 > data.len() { - break; - } + crate::ensure!( + pos + 30 <= data.len(), + "truncated DWARF64 pubnames header at offset {pos:#x}" + ); let len = u64_from_slice(&data[pos + 4..]); let dio = u64_from_slice(&data[pos + 14..]); (30, pos + 12 + len as usize, dio) } else { // DWARF32: 4(len) + 2(ver) + 4(offset) + 4(size) = 14 - if pos + 14 > data.len() { - break; - } + crate::ensure!( + pos + 14 <= data.len(), + "truncated DWARF32 pubnames header at offset {pos:#x}" + ); let dio = u64::from(u32_from_slice(&data[pos + 6..])); (14, pos + 4 + init_len as usize, dio) }; @@ -181,16 +185,18 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { while ep < set_end { let die_offset = if is_64 { - if ep + 8 > set_end { - break; - } + crate::ensure!( + ep + 8 <= set_end, + "truncated DWARF64 pubnames entry at offset {ep:#x}" + ); let v = u64_from_slice(&data[ep..]); ep += 8; v } else { - if ep + 4 > set_end { - break; - } + crate::ensure!( + ep + 4 <= set_end, + "truncated DWARF32 pubnames entry at offset {ep:#x}" + ); let v = u64::from(u32_from_slice(&data[ep..])); ep += 4; v @@ -198,18 +204,20 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { if die_offset == 0 { break; } - if ep >= set_end { - break; - } + crate::ensure!( + ep < set_end, + "truncated pubnames entry (missing attrs/name) at offset {ep:#x}" + ); let attrs = data[ep]; ep += 1; let name_start = ep; while ep < set_end && data[ep] != 0 { ep += 1; } - if ep >= set_end { - break; - } + crate::ensure!( + ep < set_end, + "unterminated pubnames name string at offset {name_start:#x}" + ); entries.push((&data[name_start..ep], attrs)); ep += 1; } @@ -220,7 +228,7 @@ fn parse_pubnames_sets(data: &[u8]) -> Vec> { }); pos = set_end; } - sets + Ok(sets) } /// Read raw section data from an input object by name. @@ -381,7 +389,7 @@ fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Result {})", output_buf.len() ); - Ok(parse_cu_boundaries(&output_buf[start..end]) + Ok(parse_cu_boundaries(&output_buf[start..end])? .into_iter() .map(|cu| GdbIndexCuEntry { cu_offset: cu.offset, @@ -413,7 +421,7 @@ fn scan_objects_for_gdb_index<'data>( for (object, sections) in objects { let boundaries = match raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? { - Some(data) => parse_cu_boundaries(data), + Some(data) => parse_cu_boundaries(data)?, None => continue, }; if boundaries.is_empty() { @@ -479,8 +487,10 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Result parse_cu_boundaries(data)?.len() as u32, + None => 0, + }; if obj_cu_count == 0 { continue; } @@ -525,7 +535,7 @@ fn collect_pubname_entries<'data>( let Some(data) = raw_section_by_name(object, section_name)? else { continue; }; - for set in parse_pubnames_sets(data) { + for set in parse_pubnames_sets(data)? { let Some(&cu_idx) = offset_to_idx.get(&set.debug_info_offset) else { continue; }; @@ -614,12 +624,12 @@ mod tests { #[test] fn test_parse_cu_boundaries() { - assert!(parse_cu_boundaries(&[]).is_empty()); + assert!(parse_cu_boundaries(&[]).unwrap().is_empty()); // Single DWARF32 CU: init_length=8, total = 4 + 8 = 12 bytes. let mut data = vec![0u8; 12]; data[0..4].copy_from_slice(&8u32.to_le_bytes()); - let cus = parse_cu_boundaries(&data); + let cus = parse_cu_boundaries(&data).unwrap(); assert_eq!(cus.len(), 1); assert_eq!(cus[0].offset, 0); assert_eq!(cus[0].length, 12); From fac7c23770fcdd98209ff2a8c2998b272f4eec56 Mon Sep 17 00:00:00 2001 From: lapla Date: Mon, 1 Jun 2026 13:53:30 +0900 Subject: [PATCH 22/24] Add some comments and warnings --- libwild/src/gdb_index.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 12f0deacd..6241c246b 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -302,6 +302,12 @@ pub(crate) fn write_gdb_index( ht_slots, .. } = scan_objects_for_gdb_index(objects)?; + if !cu_entries.is_empty() && sorted_names.is_empty() { + layout.symbol_db.warning( + "Objects lack .debug_gnu_pubnames/.debug_gnu_pubtypes sections, so the symbol table in .gdb_index will be empty. \ + Compile with -ggnu-pubnames to populate it.", + ); + } let addr_entries = build_address_entries(layout)?; let cu_list_off = HEADER_SIZE as u32; @@ -364,6 +370,9 @@ pub(crate) fn write_gdb_index( &cv_offsets, )?; + // The shortcut table lets GDB quickly determine the language of `main` without scanning the + // full index. Filling it requires looking up the DWARF language attribute of the main CU, which + // we don't currently do. GDB handles zeroed values here by falling back to its own lookup. let so = short_off as usize; let sc = GdbIndexShortcutTable { language_of_main: 0, From 70df43a26cd9b58a281abd438e180e668095f6ef Mon Sep 17 00:00:00 2001 From: lapla Date: Mon, 1 Jun 2026 14:03:23 +0900 Subject: [PATCH 23/24] Revert "Emit errors when GDB indices are corrupted" This reverts commit 638e55029aa25433291134d3ffd34cc531a0086d. --- libwild/src/gdb_index.rs | 80 ++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 6241c246b..334637c6c 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -116,32 +116,30 @@ struct CuBoundary { } /// Walk `.debug_info` bytes and return `(offset, total_length)` for each CU. -fn parse_cu_boundaries(data: &[u8]) -> Result> { +fn parse_cu_boundaries(data: &[u8]) -> Vec { let mut cus = Vec::new(); let mut offset = 0usize; while offset + 4 <= data.len() { let init_len = u32_from_slice(&data[offset..]); let total = if init_len == 0xFFFF_FFFF { - crate::ensure!( - offset + 12 <= data.len(), - "truncated DWARF64 initial length in .debug_info at offset {offset:#x}" - ); + if offset + 12 > data.len() { + break; + } let len = u64_from_slice(&data[offset + 4..]); 12 + len as usize } else { 4 + init_len as usize }; - crate::ensure!( - total > 0 && offset + total <= data.len(), - "invalid CU length {total} in .debug_info at offset {offset:#x}" - ); + if total == 0 || offset + total > data.len() { + break; + } cus.push(CuBoundary { offset: offset as u64, length: total as u64, }); offset += total; } - Ok(cus) + cus } struct PubnamesSet<'data> { @@ -153,7 +151,7 @@ struct PubnamesSet<'data> { /// /// Each set has a header pointing to a CU in `.debug_info`, followed by /// (die_offset, attrs_byte, NUL-terminated name) entries. -fn parse_pubnames_sets(data: &[u8]) -> Result>> { +fn parse_pubnames_sets(data: &[u8]) -> Vec> { let mut sets = Vec::new(); let mut pos = 0; while pos + 4 <= data.len() { @@ -161,19 +159,17 @@ fn parse_pubnames_sets(data: &[u8]) -> Result>> { let (header_size, set_end, debug_info_offset) = if init_len == 0xFFFF_FFFF { // DWARF64: 4 + 8(len) + 2(ver) + 8(offset) + 8(size) = 30 - crate::ensure!( - pos + 30 <= data.len(), - "truncated DWARF64 pubnames header at offset {pos:#x}" - ); + if pos + 30 > data.len() { + break; + } let len = u64_from_slice(&data[pos + 4..]); let dio = u64_from_slice(&data[pos + 14..]); (30, pos + 12 + len as usize, dio) } else { // DWARF32: 4(len) + 2(ver) + 4(offset) + 4(size) = 14 - crate::ensure!( - pos + 14 <= data.len(), - "truncated DWARF32 pubnames header at offset {pos:#x}" - ); + if pos + 14 > data.len() { + break; + } let dio = u64::from(u32_from_slice(&data[pos + 6..])); (14, pos + 4 + init_len as usize, dio) }; @@ -185,18 +181,16 @@ fn parse_pubnames_sets(data: &[u8]) -> Result>> { while ep < set_end { let die_offset = if is_64 { - crate::ensure!( - ep + 8 <= set_end, - "truncated DWARF64 pubnames entry at offset {ep:#x}" - ); + if ep + 8 > set_end { + break; + } let v = u64_from_slice(&data[ep..]); ep += 8; v } else { - crate::ensure!( - ep + 4 <= set_end, - "truncated DWARF32 pubnames entry at offset {ep:#x}" - ); + if ep + 4 > set_end { + break; + } let v = u64::from(u32_from_slice(&data[ep..])); ep += 4; v @@ -204,20 +198,18 @@ fn parse_pubnames_sets(data: &[u8]) -> Result>> { if die_offset == 0 { break; } - crate::ensure!( - ep < set_end, - "truncated pubnames entry (missing attrs/name) at offset {ep:#x}" - ); + if ep >= set_end { + break; + } let attrs = data[ep]; ep += 1; let name_start = ep; while ep < set_end && data[ep] != 0 { ep += 1; } - crate::ensure!( - ep < set_end, - "unterminated pubnames name string at offset {name_start:#x}" - ); + if ep >= set_end { + break; + } entries.push((&data[name_start..ep], attrs)); ep += 1; } @@ -228,7 +220,7 @@ fn parse_pubnames_sets(data: &[u8]) -> Result>> { }); pos = set_end; } - Ok(sets) + sets } /// Read raw section data from an input object by name. @@ -398,7 +390,7 @@ fn build_cu_list(output_buf: &[u8], layout: &Layout<'_, Elf>) -> Result {})", output_buf.len() ); - Ok(parse_cu_boundaries(&output_buf[start..end])? + Ok(parse_cu_boundaries(&output_buf[start..end]) .into_iter() .map(|cu| GdbIndexCuEntry { cu_offset: cu.offset, @@ -430,7 +422,7 @@ fn scan_objects_for_gdb_index<'data>( for (object, sections) in objects { let boundaries = match raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? { - Some(data) => parse_cu_boundaries(data)?, + Some(data) => parse_cu_boundaries(data), None => continue, }; if boundaries.is_empty() { @@ -496,10 +488,8 @@ fn build_address_entries(layout: &Layout<'_, Elf>) -> Result parse_cu_boundaries(data)?.len() as u32, - None => 0, - }; + let obj_cu_count = raw_section_by_name(object, DEBUG_INFO_SECTION_NAME_STR)? + .map_or(0, |data| parse_cu_boundaries(data).len() as u32); if obj_cu_count == 0 { continue; } @@ -544,7 +534,7 @@ fn collect_pubname_entries<'data>( let Some(data) = raw_section_by_name(object, section_name)? else { continue; }; - for set in parse_pubnames_sets(data)? { + for set in parse_pubnames_sets(data) { let Some(&cu_idx) = offset_to_idx.get(&set.debug_info_offset) else { continue; }; @@ -633,12 +623,12 @@ mod tests { #[test] fn test_parse_cu_boundaries() { - assert!(parse_cu_boundaries(&[]).unwrap().is_empty()); + assert!(parse_cu_boundaries(&[]).is_empty()); // Single DWARF32 CU: init_length=8, total = 4 + 8 = 12 bytes. let mut data = vec![0u8; 12]; data[0..4].copy_from_slice(&8u32.to_le_bytes()); - let cus = parse_cu_boundaries(&data).unwrap(); + let cus = parse_cu_boundaries(&data); assert_eq!(cus.len(), 1); assert_eq!(cus[0].offset, 0); assert_eq!(cus[0].length, 12); From 1aa313a8e420275887328c83100ebd9bc993f80f Mon Sep 17 00:00:00 2001 From: lapla Date: Mon, 1 Jun 2026 14:11:55 +0900 Subject: [PATCH 24/24] Add a comment about CU --- libwild/src/gdb_index.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libwild/src/gdb_index.rs b/libwild/src/gdb_index.rs index 334637c6c..53138fafc 100644 --- a/libwild/src/gdb_index.rs +++ b/libwild/src/gdb_index.rs @@ -116,6 +116,9 @@ struct CuBoundary { } /// Walk `.debug_info` bytes and return `(offset, total_length)` for each CU. +/// +/// Each CU starts with an initial length field (§7.5.1.1) encoded per §7.4: a 4-byte value, or +/// `0xFFFF_FFFF` followed by an 8-byte length for DWARF-64. fn parse_cu_boundaries(data: &[u8]) -> Vec { let mut cus = Vec::new(); let mut offset = 0usize;