diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 011acd4d..0160ee24 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -454,6 +454,9 @@ def rebase(self, new_base): super().rebase(new_base) self.addr_to_line = SortedDict((addr + delta, value) for addr, value in self.addr_to_line.items()) + self.functions_debug_info = {addr + delta: value for addr, value in self.functions_debug_info.items()} + for f in self.functions_debug_info.values(): + f.rebase(delta) # # Private Methods @@ -698,20 +701,24 @@ def _load_line_info(self, dwarf): self.addr_to_line[relocated_addr].add((str(filename), line.state.line)) @staticmethod - def _load_ranges_form_die(die: DIE, aranges) -> list[tuple[int, int]] | None: + def _load_ranges_form_die(die: DIE, aranges, base_addr: int | None = None) -> list[tuple[int, int]] | None: if aranges is not None and "DW_AT_ranges" in die.attributes: - base_addr = 0 result = [] for entry in aranges.get_range_list_at_offset(die.attributes["DW_AT_ranges"].value, die.cu): if isinstance(entry, BaseAddressEntry): base_addr = entry.base_address elif isinstance(entry, RangeEntry): - result.append((base_addr + entry.begin_offset, base_addr + entry.end_offset)) + if entry.is_absolute: + result.append((entry.begin_offset, entry.end_offset)) + else: + if base_addr is None: + base_addr = die.cu.get_top_DIE().attributes["DW_AT_low_pc"].value + result.append((base_addr + entry.begin_offset, base_addr + entry.end_offset)) return result return None @staticmethod - def _load_low_high_pc_form_die(die: DIE) -> tuple[int | None, int | None]: + def _load_low_high_pc_form_die(die: DIE, base_addr: int = 0) -> tuple[int | None, int | None]: """ Load low and high pc from a DIE. @@ -723,7 +730,7 @@ def _load_low_high_pc_form_die(die: DIE) -> tuple[int | None, int | None]: lowpc = die.attributes["DW_AT_low_pc"].value if "DW_AT_high_pc" not in die.attributes: - return lowpc, None + return lowpc + base_addr, None # DWARF v4 in section 2.17 describes how to interpret the # DW_AT_high_pc attribute based on the class of its form. @@ -739,7 +746,7 @@ def _load_low_high_pc_form_die(die: DIE) -> tuple[int | None, int | None]: else: log.warning("Error: invalid DW_AT_high_pc class:%s", highpc_attr_class) return lowpc, None - return lowpc, highpc + return lowpc + base_addr, highpc + base_addr def _load_dies(self, dwarf: DWARFInfo): """ @@ -827,7 +834,7 @@ def _load_die_namespace( if sub_prog is not None: assert isinstance(sub_prog, Subprogram) cu_.functions[sub_prog.low_pc] = sub_prog - self.functions_debug_info[sub_prog.low_pc] = sub_prog + self.functions_debug_info[sub_prog.ranges[0][0]] = sub_prog elif die_child.tag == "DW_TAG_namespace": if "DW_AT_name" in die_child.attributes: new_namespace = namespace + [die_child.attributes["DW_AT_name"].value.decode("utf-8")] @@ -847,14 +854,48 @@ def _load_die_lex_block( subprogram, namespace: list[str] | None = None, ) -> LexicalBlock | None: + if "DW_AT_abstract_origin" in die.attributes: + origin = cu.get_DIE_from_refaddr(cu.cu_offset + die.attributes["DW_AT_abstract_origin"].value) + else: + origin = None + if "DW_AT_name" in die.attributes: name = "::".join((namespace or []) + [die.attributes["DW_AT_name"].value.decode("utf-8")]) - elif "DW_AT_abstract_origin" in die.attributes: - origin = die.get_DIE_from_attribute("DW_AT_abstract_origin") + elif origin is not None: name = self._dwarf_get_name_with_namespace(origin) else: name = None + if "DW_AT_decl_file" in die.attributes: + filename_idx = die.attributes["DW_AT_decl_file"].value + elif origin is not None and "DW_AT_decl_file" in origin.attributes: + filename_idx = origin.attributes["DW_AT_decl_file"].value + else: + filename_idx = None + + if filename_idx is not None: + debug_line = dwarf.line_program_for_CU(cu) + assert debug_line is not None + if debug_line.header.file_names is None: + assert filename_idx == 1 + filename = file_path + else: + basename = debug_line.header.file_names[filename_idx] + basename_str = basename.DW_LNCT_path.decode(errors="replace") + dirname_idx = basename.DW_LNCT_directory_index + dirname = debug_line.header.directories[dirname_idx] + dirname_str = dirname.DW_LNCT_path.decode(errors="replace") + filename = f"{dirname_str}/{basename_str}" + else: + filename = None + + if "DW_AT_decl_line" in die.attributes: + line = die.attributes["DW_AT_decl_line"].value + elif origin is not None and "DW_AT_decl_line" in origin.attributes: + line = origin.attributes["DW_AT_decl_line"].value + else: + line = None + low_pc, high_pc = self._load_low_high_pc_form_die(die) ranges = None if low_pc is None or high_pc is None: @@ -863,9 +904,9 @@ def _load_die_lex_block( return None if subprogram is None: - subprogram = block = Subprogram(name, low_pc, high_pc, ranges) + subprogram = block = Subprogram(name, low_pc, high_pc, ranges, filename, line) else: - block = LexicalBlock(low_pc, high_pc, ranges) + block = LexicalBlock(low_pc, high_pc, ranges, filename, line) for sub_die in cu.iter_DIE_children(die): if sub_die.tag in ["DW_TAG_variable", "DW_TAG_formal_parameter"]: @@ -880,8 +921,10 @@ def _load_die_lex_block( if sub_block is not None: block.child_blocks.append(sub_block) elif sub_die.tag == "DW_TAG_inlined_subroutine": - subr = InlinedFunction() + subr = InlinedFunction(sub_die.offset) low_pc, high_pc = self._load_low_high_pc_form_die(sub_die) + if "DW_AT_entry_pc" in sub_die.attributes: + subr.entry = sub_die.attributes["DW_AT_entry_pc"].value if low_pc is not None and high_pc is not None: subr.ranges.append((low_pc, high_pc)) elif "DW_AT_ranges" in sub_die.attributes: @@ -892,8 +935,22 @@ def _load_die_lex_block( if "DW_AT_abstract_origin" in sub_die.attributes: origin = sub_die.get_DIE_from_attribute("DW_AT_abstract_origin") subr.name = self._dwarf_get_name_with_namespace(origin) + if "DW_AT_external" in origin.attributes: + subr.extern = origin.attributes["DW_AT_external"].value + nargs = 0 + for arg_die in origin.iter_children(): + if arg_die.tag == "DW_TAG_formal_parameter": + nargs += 1 + subr.nargs = nargs + subprogram.inlined_functions.append(subr) + sub_block = self._load_die_lex_block( + sub_die, dwarf, aranges, expr_parser, type_list, cu, file_path, subprogram, namespace + ) + if sub_block is not None: + block.child_blocks.append(sub_block) + return block @staticmethod diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index 570c3db0..61750290 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -22,8 +22,16 @@ class LexicalBlock: :type child_blocks: List[LexicalBlock] """ - def __init__(self, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None) -> None: - self.ranges = ranges + def __init__( + self, + low_pc: int | None, + high_pc: int | None, + ranges: list[tuple[int, int]] | None = None, + source_file: str | None = None, + source_line: int | None = None, + ) -> None: + self.source_file = source_file + self.source_line = source_line if low_pc is None and high_pc is None: if ranges is not None: @@ -31,10 +39,20 @@ def __init__(self, low_pc: int | None, high_pc: int | None, ranges: list[tuple[i high_pc = max(x for _, x in ranges) if low_pc is None or high_pc is None: raise ValueError("Must provide low_pc/high_pc or ranges") + if ranges is None: + ranges = [(low_pc, high_pc)] + self.ranges = ranges self.low_pc = low_pc self.high_pc = high_pc self.child_blocks: list[LexicalBlock] = [] + def rebase(self, delta: int): + self.low_pc += delta + self.high_pc += delta + self.ranges = [(lo + delta, hi + delta) for lo, hi in self.ranges] + for blk in self.child_blocks: + blk.rebase(delta) + class Subprogram(LexicalBlock): """ @@ -52,11 +70,22 @@ class Subprogram(LexicalBlock): """ def __init__( - self, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None + self, + name: str | None, + low_pc: int | None, + high_pc: int | None, + ranges: list[tuple[int, int]] | None = None, + source_file: str | None = None, + source_line: int | None = None, ) -> None: # pass self as the super_block of this subprogram self.subprogram = self - super().__init__(low_pc, high_pc, ranges) + super().__init__(low_pc, high_pc, ranges, source_file, source_line) self.name = name self.local_variables: list[Variable] = [] self.inlined_functions: list[InlinedFunction] = [] + + def rebase(self, delta: int): + super().rebase(delta) + for inl in self.inlined_functions: + inl.rebase(delta) diff --git a/cle/backends/inlined_function.py b/cle/backends/inlined_function.py index f4dd86b1..4345c5aa 100644 --- a/cle/backends/inlined_function.py +++ b/cle/backends/inlined_function.py @@ -3,14 +3,18 @@ from dataclasses import dataclass, field -@dataclass +@dataclass(eq=False) class InlinedFunction: """ A representation of a piece of a function which is inlined from another function. """ + dwoffset: int name: str | None = None ranges: list[tuple[int, int]] = field(default_factory=list) + extern: bool = False + entry: int | None = None + nargs: int | None = None @property def low_pc(self): @@ -19,3 +23,6 @@ def low_pc(self): @property def high_pc(self): return max(x for _, x in self.ranges) + + def rebase(self, delta: int): + self.ranges = [(lo + delta, hi + delta) for lo, hi in self.ranges]