From 6938a6a7b6d8d111b9b8c4406c08e1632624bc58 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 7 Jul 2025 14:48:34 -0400 Subject: [PATCH 01/15] Fixed abstract origin address with form ref_addr (in contrast with the more typical ref4) --- cle/backends/elf/elf.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 9de20b5a..8c8f1c8d 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -833,10 +833,19 @@ def _load_die_lex_block( subprogram, namespace: list[str] | None = None, ) -> LexicalBlock | None: + def resolve_abstract_origin(die): + abstract_origin_attribute = die.attributes["DW_AT_abstract_origin"] + if abstract_origin_attribute.form == "DW_FORM_ref_addr": + abstract_origin = abstract_origin_attribute.value + origin_cu = dwarf.get_CU_containing(abstract_origin) + return origin_cu.get_DIE_from_refaddr(abstract_origin) + else: + return cu.get_DIE_from_refaddr(cu.cu_offset + die.attributes["DW_AT_abstract_origin"].value) + if "DW_AT_name" in die.attributes: name = "::".join((namespace or []) + [die.attributes["DW_AT_name"].value.decode("utf-8")]) elif "DW_AT_abstract_origin" in die.attributes: - origin = cu.get_DIE_from_refaddr(cu.cu_offset + die.attributes["DW_AT_abstract_origin"].value) + origin = resolve_abstract_origin(die) name = self._dwarf_get_name_with_namespace(origin) else: name = None @@ -876,7 +885,7 @@ def _load_die_lex_block( if ranges is not None: subr.ranges = ranges if "DW_AT_abstract_origin" in sub_die.attributes: - origin = cu.get_DIE_from_refaddr(cu.cu_offset + sub_die.attributes["DW_AT_abstract_origin"].value) + origin = resolve_abstract_origin(sub_die) subr.name = self._dwarf_get_name_with_namespace(origin) subprogram.inlined_functions.append(subr) From 16128ba9cc53dd1f7cfdb002d9835e76e3056227 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 7 Jul 2025 14:50:48 -0400 Subject: [PATCH 02/15] Code now loads parameter information from DWARF --- cle/backends/elf/elf.py | 8 ++++++-- cle/backends/elf/subprogram.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 8c8f1c8d..c61d14e5 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -858,16 +858,20 @@ def resolve_abstract_origin(die): return None if subprogram is None: - subprogram = block = Subprogram(name, low_pc, high_pc, ranges) + subprogram = block = Subprogram.from_die(die, self, name, low_pc, high_pc, ranges) else: block = LexicalBlock(low_pc, high_pc, ranges) for sub_die in cu.iter_DIE_children(die): - if sub_die.tag in ["DW_TAG_variable", "DW_TAG_formal_parameter"]: + is_variable = sub_die.tag == "DW_TAG_variable" + is_formal_parameter = sub_die.tag == "DW_TAG_formal_parameter" + if is_variable or is_formal_parameter: # load local variable var = Variable.from_die(sub_die, expr_parser, self, block) var.decl_file = file_path subprogram.local_variables.append(var) + if is_formal_parameter: + subprogram.parameters.append(var) elif sub_die.tag == "DW_TAG_lexical_block": sub_block = self._load_die_lex_block( sub_die, dwarf, aranges, expr_parser, type_list, cu, file_path, subprogram, namespace diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index 570c3db0..3f43800f 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -1,9 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +from elftools.dwarf.die import DIE + from cle.backends.inlined_function import InlinedFunction from .variable import Variable +from .variable_type import VariableType + +if TYPE_CHECKING: + from .elf import ELF class LexicalBlock: """ @@ -41,6 +49,7 @@ class Subprogram(LexicalBlock): DW_TAG_subprogram for DWARF. The behavior is mostly inherited from LexicalBlock to avoid redundancy. + :param elf_object: The ELF object containing the subprogram :param name: The name of the function/program :param low_pc: The relative start address of the subprogram :param high_pc: The relative end address of the subprogram @@ -52,11 +61,30 @@ class Subprogram(LexicalBlock): """ def __init__( - self, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None + self, elf_object: ELF, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None, ) -> None: + self._elf_object = elf_object # pass self as the super_block of this subprogram self.subprogram = self super().__init__(low_pc, high_pc, ranges) self.name = name + self.parameters: list[Variable] = [] self.local_variables: list[Variable] = [] self.inlined_functions: list[InlinedFunction] = [] + self._return_type_offset = None + + @staticmethod + def from_die(die: DIE, elf_object: ELF, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None): + sub_prg = Subprogram(elf_object, name, low_pc, high_pc, ranges=ranges) + if "DW_AT_type" in die.attributes: + sub_prg._return_type_offset = die.attributes["DW_AT_type"].value + die.cu.cu_offset + return sub_prg + + @property + def return_type(self) -> VariableType | None: + # Note that in DWARF an omitted return type typically means a void return type + try: + return self._elf_object.type_list[self._return_type_offset] + except KeyError: + return None + From 7ef5052317230de3287d146a6de43026e8fb09a8 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Tue, 8 Jul 2025 17:02:14 -0400 Subject: [PATCH 03/15] Changes to DWARF types: added encoding field to BaseType if available. Added count, lower_bound and upper_bound to ArrayType. --- cle/backends/elf/variable_type.py | 72 ++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 4cc7c17f..bfdbcf8d 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -1,5 +1,7 @@ from __future__ import annotations +from enum import Enum + from elftools.dwarf.die import DIE @@ -71,10 +73,14 @@ def read_from_die(cls, die: DIE, elf_object): read an entry of DW_TAG_pointer_type. return None when there is no byte_size or type attribute. """ - byte_size = die.attributes.get("DW_AT_byte_size", None) + byte_size_attr = die.attributes.get("DW_AT_byte_size", None) - if byte_size is None: - return None + if byte_size_attr is not None: + byte_size = byte_size_attr.value + else: + # In testing it looks like the Rust compiler does not emit a byte_size attribute + # Instead let's just say that the size of a pointer is given by the ELF's architecture + byte_size = elf_object.arch.bytes dw_at_type = die.attributes.get("DW_AT_type", None) if dw_at_type is None: @@ -82,7 +88,7 @@ def read_from_die(cls, die: DIE, elf_object): else: referenced_offset = dw_at_type.value + die.cu.cu_offset - return cls(byte_size.value, elf_object, referenced_offset) + return cls(byte_size, elf_object, referenced_offset) @property def referenced_type(self): @@ -94,12 +100,37 @@ def referenced_type(self): return type_list[self._referenced_offset] return None +class BaseTypeEncoding(Enum): + ADDRESS = 0x1 + BOOLEAN = 0x2 + COMPLEX_FLOAT = 0x3 + FLOAT = 0x4 + SIGNED = 0x5 + SIGNED_CHAR = 0x6 + UNSIGNED = 0x7 + UNSIGNED_CHAR = 0x8 + IMAGINARY_FLOAT = 0x9 + PACKED_DECIMAL = 0xa + NUMERIC_STRING = 0xb + EDITED = 0xc + SIGNED_FIXED = 0xd + UNSIGNED_FIXED = 0xe + DECIMAL_FLOAT = 0xf + UTF = 0x10 + UCS = 0x11 + ASCII = 0x12 + LO_USER = 0x80 + HI_USER = 0xff class BaseType(VariableType): """ Entry class for DW_TAG_base_type. It is inherited from VariableType """ + def __init__(self, name: str, byte_size: int, elf_object, encoding): + super().__init__(name, byte_size, elf_object) + self.encoding = encoding + # for __init__ see VariableType @classmethod @@ -111,9 +142,14 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_name = die.attributes.get("DW_AT_name", None) byte_size = die.attributes.get("DW_AT_byte_size", None) + encoding_attr = die.attributes.get("DW_AT_encoding", None) + if encoding_attr is not None: + encoding = BaseTypeEncoding(encoding_attr.value) + else: + encoding = None if byte_size is None: return None - return cls(dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, elf_object) + return cls(dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, elf_object, encoding) class StructType(VariableType): @@ -227,9 +263,12 @@ class ArrayType(VariableType): :param element_offset: type of the array elements as offset in the compilation_unit """ - def __init__(self, byte_size, elf_object, element_offset): + def __init__(self, byte_size, elf_object, element_offset, count: int | None, lower_bound: int | None, upper_bound: int | None): super().__init__("array", byte_size, elf_object) self._element_offset = element_offset + self.count = count + self.lower_bound = lower_bound + self.upper_bound = upper_bound @classmethod def read_from_die(cls, die: DIE, elf_object): @@ -243,8 +282,27 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_type = die.attributes.get("DW_AT_type", None) if dw_at_type is None: return None + + count = None + lower_bound = None + upper_bound = None + for child in die.iter_children(): + match child.tag: + case "DW_TAG_subrange_type": + count_attr = child.attributes.get("DW_AT_count", None) + if count_attr is not None: + count = count_attr.value + lower_bound_attr = child.attributes.get("DW_AT_lower_bound", None) + if lower_bound_attr is not None: + lower_bound = lower_bound_attr.value + upper_bound_attr = child.attributes.get("DW_AT_upper_bound", None) + if upper_bound_attr is not None: + upper_bound = upper_bound_attr.value + break + return cls( - dw_byte_size.value if dw_byte_size is not None else None, elf_object, dw_at_type.value + die.cu.cu_offset + dw_byte_size.value if dw_byte_size is not None else None, elf_object, dw_at_type.value + die.cu.cu_offset, + count, lower_bound, upper_bound ) @property From 19245474799b6c3a810cf8e3abd71daa31b3d4aa Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 9 Jul 2025 15:03:18 -0400 Subject: [PATCH 04/15] Added support for enumeration and subroutine DWARF types --- cle/backends/elf/variable_type.py | 114 ++++++++++++++++++++++++++++-- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index bfdbcf8d..404c4dd7 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -4,7 +4,6 @@ from elftools.dwarf.die import DIE - class VariableType: """ Entry class for DW_TAG_xxx_type @@ -40,6 +39,10 @@ def read_from_die(die: DIE, elf_object): return TypedefType.read_from_die(die, elf_object) elif die.tag == "DW_TAG_union_type": return UnionType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_enumeration_type": + return EnumerationType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_subroutine_type": + return SubroutineType.read_from_die(die, elf_object) return None @staticmethod @@ -51,6 +54,8 @@ def supported_die(die: DIE) -> bool: "DW_TAG_array_type", "DW_TAG_typedef", "DW_TAG_union_type", + "DW_TAG_enumeration_type", + "DW_TAG_subroutine_type" ) @@ -63,8 +68,10 @@ class PointerType(VariableType): :param referenced_offset: type of the referenced as offset in the compilation_unit """ - def __init__(self, byte_size: int, elf_object, referenced_offset: int): - super().__init__("pointer", byte_size, elf_object) + def __init__(self, name: str | None, byte_size: int, elf_object, referenced_offset: int): + if name is None: + name = "pointer" + super().__init__(name, byte_size, elf_object) self._referenced_offset = referenced_offset @classmethod @@ -75,6 +82,9 @@ def read_from_die(cls, die: DIE, elf_object): """ byte_size_attr = die.attributes.get("DW_AT_byte_size", None) + name_attr = die.attributes.get("DW_AT_name", None) + name = None if name_attr is None else name_attr.value.decode() + if byte_size_attr is not None: byte_size = byte_size_attr.value else: @@ -88,7 +98,7 @@ def read_from_die(cls, die: DIE, elf_object): else: referenced_offset = dw_at_type.value + die.cu.cu_offset - return cls(byte_size, elf_object, referenced_offset) + return cls(name, byte_size, elf_object, referenced_offset) @property def referenced_type(self): @@ -352,3 +362,99 @@ def type(self): if self._type_offset in type_list.keys(): return type_list[self._type_offset] return None + +class EnumeratorValue: + def __init__(self, name: str, const_value): + self.name = name + self.const_value = const_value + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + name_attr = die.attributes.get("DW_AT_name", None) + name = None if name_attr is None else name_attr.value.decode() + + const_value_attr = die.attributes.get("DW_AT_const_value", None) + const_value = None if const_value_attr is None else const_value_attr.value + + return cls(name, const_value) + +class EnumerationType(VariableType): + def __init__(self, name: str, byte_size: int, elf_object, enumerator_values: list[EnumeratorValue]): + super().__init__(name, byte_size, elf_object) + self.enumerator_values = enumerator_values + + def __len__(self): + return len(self.enumerator_values) + + def __iter__(self): + return iter(self.enumerator_values) + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + """ + read an entry of DW_TAG_enumeration_type. + """ + + dw_byte_size = die.attributes.get("DW_AT_byte_size", None) + byte_size = dw_byte_size.value if dw_byte_size is not None else None + + name_attr = die.attributes.get("DW_AT_name", None) + name = None if name_attr is None else name_attr.value.decode() + + enumerators = [] + for child in die.iter_children(): + match child.tag: + case "DW_TAG_enumerator": + enumerators.append(EnumeratorValue.read_from_die(child, elf_object)) + + return cls(name, byte_size, elf_object, enumerators) + +class SubroutineType(VariableType): + def __init__(self, name: str, byte_size: int, elf_object, type_offset: int | None, parameter_offsets): + super().__init__(name, byte_size, elf_object) + self._type_offset = type_offset + self._parameter_offsets = parameter_offsets + + @property + def type(self): + """ + The return type of the subroutine, or None if the subroutine returns no value + """ + if self._type_offset is None: + return None + else: + return self._elf_object.type_list[self._type_offset] + + @property + def parameters(self): + """ + Iterates over the parameters of the subroutine + """ + type_list = self._elf_object.type_list + for offset in self._parameter_offsets: + yield type_list[offset] + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + """ + read an entry of DW_TAG_subroutine_type + """ + + dw_byte_size = die.attributes.get("DW_AT_byte_size", None) + byte_size = dw_byte_size.value if dw_byte_size is not None else None + + name_attr = die.attributes.get("DW_AT_name", None) + name = None if name_attr is None else name_attr.value.decode() + + dw_at_type = die.attributes.get("DW_AT_type", None) + type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset + + parameter_offsets: list[int] = [] + for child in die.iter_children(): + match child.tag: + case "DW_TAG_formal_parameter": + param_type_attr = child.attributes.get("DW_AT_type", None) + param_type_offset = None if param_type_attr is None else param_type_attr.value + die.cu.cu_offset + parameter_offsets.append(param_type_offset) + + return cls(name, byte_size, elf_object, type_offset, parameter_offsets) \ No newline at end of file From 26be21d6592d6bf56a2c7f367742f8dc9da8e5a7 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 9 Jul 2025 16:21:52 -0400 Subject: [PATCH 05/15] EnumerationType now stores its underlying type --- cle/backends/elf/variable_type.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 404c4dd7..f3c7fb35 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -379,9 +379,10 @@ def read_from_die(cls, die: DIE, elf_object): return cls(name, const_value) class EnumerationType(VariableType): - def __init__(self, name: str, byte_size: int, elf_object, enumerator_values: list[EnumeratorValue]): + def __init__(self, name: str, byte_size: int, elf_object, type_offset, enumerator_values: list[EnumeratorValue]): super().__init__(name, byte_size, elf_object) self.enumerator_values = enumerator_values + self._type_offset = type_offset def __len__(self): return len(self.enumerator_values) @@ -389,6 +390,13 @@ def __len__(self): def __iter__(self): return iter(self.enumerator_values) + @property + def type(self): + """ + The underlying type of the enumeration + """ + return self._elf_object.type_list[self._type_offset] + @classmethod def read_from_die(cls, die: DIE, elf_object): """ @@ -401,13 +409,16 @@ def read_from_die(cls, die: DIE, elf_object): name_attr = die.attributes.get("DW_AT_name", None) name = None if name_attr is None else name_attr.value.decode() + dw_at_type = die.attributes.get("DW_AT_type", None) + type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset + enumerators = [] for child in die.iter_children(): match child.tag: case "DW_TAG_enumerator": enumerators.append(EnumeratorValue.read_from_die(child, elf_object)) - return cls(name, byte_size, elf_object, enumerators) + return cls(name, byte_size, elf_object, type_offset, enumerators) class SubroutineType(VariableType): def __init__(self, name: str, byte_size: int, elf_object, type_offset: int | None, parameter_offsets): From 23530d9013ad1027bc9c44b3a965814e4819637f Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 14 Jul 2025 12:52:38 -0400 Subject: [PATCH 06/15] Added VariantType --- cle/backends/elf/variable_type.py | 65 +++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index f3c7fb35..e9001b2c 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -182,6 +182,10 @@ def read_from_die(cls, die: DIE, elf_object): byte_size attribute. """ + for die_child in die.iter_children(): + if die_child.tag == "DW_TAG_variant_part": + return VariantType.read_from_die(die, elf_object) + dw_at_name = die.attributes.get("DW_AT_name", None) byte_size = die.attributes.get("DW_AT_byte_size", None) @@ -209,6 +213,67 @@ class UnionType(StructType): Entry class for DW_TAG_union_type. Inherits from StructType to make it trivial. """ +class VariantValue: + """ + This class represents one possible value for a variant. + + :param value: The discriminator/tag value used for indicating this variant + :param member: The member representing the layout of this particular variant + """ + def __init__(self, value: int | None, member: StructMember | None): + self.value = value + self.member = member + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + value_attr = die.attributes.get("DW_AT_discr_value", None) + if value_attr is not None: + value = value_attr.value + else: + value = None + + member = None + for die_child in die.iter_children(): + if die_child.tag == "DW_TAG_member": + member = StructMember.read_from_die(die_child, elf_object) + break + + return cls(value, member) + +class VariantType(VariableType): + def __init__(self, name, byte_size, elf_object, discr: StructMember | None, discr_values: list[VariantValue]): + super().__init__(name, byte_size, elf_object) + self.discr = discr + self.discr_values = discr_values + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + dw_at_name = die.attributes.get("DW_AT_name", None) + byte_size = die.attributes.get("DW_AT_byte_size", None) + + name = dw_at_name.value.decode() if dw_at_name is not None else "unknown" + + variant_part = None + for die_child in die.iter_children(): + if die_child.tag == "DW_TAG_variant_part": + variant_part = die_child + break + + discr_attr = variant_part.attributes.get("DW_AT_discr", None) + if discr_attr is not None: + discr_offset = discr_attr.value + die.cu.cu_offset + discr_die = die.cu.get_DIE_from_refaddr(discr_offset) + discr = StructMember.read_from_die(discr_die, elf_object) + else: + discr = None + + values = [] + if variant_part is not None: + for die_child in variant_part.iter_children(): + if die_child.tag == "DW_TAG_variant": + values.append(VariantValue.read_from_die(die_child, elf_object)) + + return cls(name, byte_size.value, elf_object, discr, values) class StructMember: """ From d789bfb3a6da30c00477282f3a2955c30617ddf1 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 14 Jul 2025 13:29:19 -0400 Subject: [PATCH 07/15] Moved DWARF reference resolving code to a more abstract function that can handle any attribute. An error is now thrown if the reference form is not one we can understand. --- cle/backends/elf/elf.py | 15 +++---------- cle/backends/elf/variable_type.py | 37 ++++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 8c8f1c8d..9655ceec 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -39,7 +39,7 @@ from .subprogram import LexicalBlock, Subprogram from .symbol import ELFSymbol, Symbol, SymbolType from .variable import Variable -from .variable_type import VariableType +from .variable_type import VariableType, resolve_reference try: import pypcode @@ -833,19 +833,10 @@ def _load_die_lex_block( subprogram, namespace: list[str] | None = None, ) -> LexicalBlock | None: - def resolve_abstract_origin(die): - abstract_origin_attribute = die.attributes["DW_AT_abstract_origin"] - if abstract_origin_attribute.form == "DW_FORM_ref_addr": - abstract_origin = abstract_origin_attribute.value - origin_cu = dwarf.get_CU_containing(abstract_origin) - return origin_cu.get_DIE_from_refaddr(abstract_origin) - else: - return cu.get_DIE_from_refaddr(cu.cu_offset + die.attributes["DW_AT_abstract_origin"].value) - if "DW_AT_name" in die.attributes: name = "::".join((namespace or []) + [die.attributes["DW_AT_name"].value.decode("utf-8")]) elif "DW_AT_abstract_origin" in die.attributes: - origin = resolve_abstract_origin(die) + origin = resolve_reference(dwarf, cu, die.attributes["DW_AT_abstract_origin"]) name = self._dwarf_get_name_with_namespace(origin) else: name = None @@ -885,7 +876,7 @@ def resolve_abstract_origin(die): if ranges is not None: subr.ranges = ranges if "DW_AT_abstract_origin" in sub_die.attributes: - origin = resolve_abstract_origin(sub_die) + origin = resolve_reference(dwarf, cu, sub_die.attributes["DW_AT_abstract_origin"]) subr.name = self._dwarf_get_name_with_namespace(origin) subprogram.inlined_functions.append(subr) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 4cc7c17f..8542be9b 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -1,7 +1,34 @@ from __future__ import annotations -from elftools.dwarf.die import DIE +from elftools.dwarf.compileunit import CompileUnit +from elftools.dwarf.die import DIE, AttributeValue +from elftools.dwarf.dwarfinfo import DWARFInfo +def resolve_reference_addr(cu: CompileUnit, attr: AttributeValue) -> int: + """ + Resolves a reference attribute to the address of the underlying DWARF DIE + :param cu: The compilation unit that the attribute is located in + :param attr: The attribute to resolve + :return: The resolved absolute address, or throws ValueError if the reference is of a form that we cannot resolve + """ + if attr.form == "DW_FORM_ref_addr": + return attr.value + elif attr.form in ["DW_FORM_ref1", "DW_FORM_ref2", "DW_FORM_ref4", "DW_FORM_ref8"]: + return cu.cu_offset + attr.value + else: + raise ValueError(f"Unable to resolve DWARF reference with form {attr.form}. Support for this form is not currently implemented.") + +def resolve_reference(dwarf: DWARFInfo, cu: CompileUnit, attr: AttributeValue) -> DIE: + """ + Resolves a reference attribute to the underlying DWARF DIE + :param dwarf: The DWARF info + :param cu: The compilation unit that the attribute is located in + :param attr: The attribute to resolve + :return: The resolved DIE, or throws ValueError if the reference is of a form that we cannot resolve + """ + addr = resolve_reference_addr(cu, attr) + origin_cu = dwarf.get_CU_containing(addr) + return origin_cu.get_DIE_from_refaddr(addr) class VariableType: """ @@ -80,7 +107,7 @@ def read_from_die(cls, die: DIE, elf_object): if dw_at_type is None: referenced_offset = None else: - referenced_offset = dw_at_type.value + die.cu.cu_offset + referenced_offset = resolve_reference_addr(die.cu, dw_at_type) return cls(byte_size.value, elf_object, referenced_offset) @@ -194,7 +221,7 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_type = die.attributes.get("DW_AT_type", None) dw_at_memloc = die.attributes.get("DW_AT_data_member_location", None) name = None if dw_at_name is None else dw_at_name.value.decode() - ty = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset + ty = None if dw_at_type is None else resolve_reference_addr(die.cu, dw_at_type) # From the DWARF5 manual, page 118: # The member entry corresponding to a data member that is defined in a structure, @@ -244,7 +271,7 @@ def read_from_die(cls, die: DIE, elf_object): if dw_at_type is None: return None return cls( - dw_byte_size.value if dw_byte_size is not None else None, elf_object, dw_at_type.value + die.cu.cu_offset + dw_byte_size.value if dw_byte_size is not None else None, elf_object, resolve_reference_addr(die.cu, dw_at_type) ) @property @@ -279,7 +306,7 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_type = die.attributes.get("DW_AT_type", None) dw_at_byte_size = die.attributes.get("DW_AT_byte_size", None) name = None if dw_at_name is None else dw_at_name.value.decode() - type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset + type_offset = None if dw_at_type is None else resolve_reference_addr(die.cu, dw_at_type) byte_size = None if dw_at_byte_size is None else dw_at_byte_size.value return cls(name, byte_size, elf_object, type_offset) From 55928523a5d774840a7ab7394c3faa5a52ae8200 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 16 Jul 2025 13:28:48 -0400 Subject: [PATCH 08/15] Removed resolve_reference in favor of using die.get_DIE_from_attribute. Changed and simplified implementation of resolve_reference_addr to make use of this method. Cleaned up imports that are now unused. --- cle/backends/elf/elf.py | 6 ++--- cle/backends/elf/variable_type.py | 43 ++++++++++--------------------- 2 files changed, 16 insertions(+), 33 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 9655ceec..cfc050e1 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -39,7 +39,7 @@ from .subprogram import LexicalBlock, Subprogram from .symbol import ELFSymbol, Symbol, SymbolType from .variable import Variable -from .variable_type import VariableType, resolve_reference +from .variable_type import VariableType try: import pypcode @@ -836,7 +836,7 @@ def _load_die_lex_block( if "DW_AT_name" in die.attributes: name = "::".join((namespace or []) + [die.attributes["DW_AT_name"].value.decode("utf-8")]) elif "DW_AT_abstract_origin" in die.attributes: - origin = resolve_reference(dwarf, cu, die.attributes["DW_AT_abstract_origin"]) + origin = die.get_DIE_from_attribute("DW_AT_abstract_origin") name = self._dwarf_get_name_with_namespace(origin) else: name = None @@ -876,7 +876,7 @@ def _load_die_lex_block( if ranges is not None: subr.ranges = ranges if "DW_AT_abstract_origin" in sub_die.attributes: - origin = resolve_reference(dwarf, cu, sub_die.attributes["DW_AT_abstract_origin"]) + origin = sub_die.get_DIE_from_attribute("DW_AT_abstract_origin") subr.name = self._dwarf_get_name_with_namespace(origin) subprogram.inlined_functions.append(subr) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 8542be9b..77d3e658 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -1,34 +1,16 @@ from __future__ import annotations -from elftools.dwarf.compileunit import CompileUnit -from elftools.dwarf.die import DIE, AttributeValue -from elftools.dwarf.dwarfinfo import DWARFInfo +from elftools.dwarf.die import DIE -def resolve_reference_addr(cu: CompileUnit, attr: AttributeValue) -> int: +def resolve_reference_addr(die: DIE, attr_name: str) -> int: """ - Resolves a reference attribute to the address of the underlying DWARF DIE - :param cu: The compilation unit that the attribute is located in - :param attr: The attribute to resolve - :return: The resolved absolute address, or throws ValueError if the reference is of a form that we cannot resolve + Resolves a reference attribute to the underlying DIE location + :param die: The DIE containing the reference attribute + :param attr_name: The name of the attribute as a string + :return: The address of the DIE referred to by the reference """ - if attr.form == "DW_FORM_ref_addr": - return attr.value - elif attr.form in ["DW_FORM_ref1", "DW_FORM_ref2", "DW_FORM_ref4", "DW_FORM_ref8"]: - return cu.cu_offset + attr.value - else: - raise ValueError(f"Unable to resolve DWARF reference with form {attr.form}. Support for this form is not currently implemented.") - -def resolve_reference(dwarf: DWARFInfo, cu: CompileUnit, attr: AttributeValue) -> DIE: - """ - Resolves a reference attribute to the underlying DWARF DIE - :param dwarf: The DWARF info - :param cu: The compilation unit that the attribute is located in - :param attr: The attribute to resolve - :return: The resolved DIE, or throws ValueError if the reference is of a form that we cannot resolve - """ - addr = resolve_reference_addr(cu, attr) - origin_cu = dwarf.get_CU_containing(addr) - return origin_cu.get_DIE_from_refaddr(addr) + resolved_die = die.get_DIE_from_attribute(attr_name) + return resolved_die.offset + resolved_die.cu.cu_offset class VariableType: """ @@ -107,7 +89,7 @@ def read_from_die(cls, die: DIE, elf_object): if dw_at_type is None: referenced_offset = None else: - referenced_offset = resolve_reference_addr(die.cu, dw_at_type) + referenced_offset = resolve_reference_addr(die, "DW_AT_type") return cls(byte_size.value, elf_object, referenced_offset) @@ -221,7 +203,7 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_type = die.attributes.get("DW_AT_type", None) dw_at_memloc = die.attributes.get("DW_AT_data_member_location", None) name = None if dw_at_name is None else dw_at_name.value.decode() - ty = None if dw_at_type is None else resolve_reference_addr(die.cu, dw_at_type) + ty = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") # From the DWARF5 manual, page 118: # The member entry corresponding to a data member that is defined in a structure, @@ -271,7 +253,8 @@ def read_from_die(cls, die: DIE, elf_object): if dw_at_type is None: return None return cls( - dw_byte_size.value if dw_byte_size is not None else None, elf_object, resolve_reference_addr(die.cu, dw_at_type) + dw_byte_size.value if dw_byte_size is not None else None, elf_object, + resolve_reference_addr(die, "DW_AT_type") ) @property @@ -306,7 +289,7 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_type = die.attributes.get("DW_AT_type", None) dw_at_byte_size = die.attributes.get("DW_AT_byte_size", None) name = None if dw_at_name is None else dw_at_name.value.decode() - type_offset = None if dw_at_type is None else resolve_reference_addr(die.cu, dw_at_type) + type_offset = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") byte_size = None if dw_at_byte_size is None else dw_at_byte_size.value return cls(name, byte_size, elf_object, type_offset) From 50dda2e26de73330c5c726ed2a87f55804621e9b Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Fri, 18 Jul 2025 16:00:37 -0400 Subject: [PATCH 09/15] Renamed VariantValue to VariantCaseType. Renamed uses of discr to tag and discr_values to variant_cases. Added some more accessors. --- cle/backends/elf/variable_type.py | 64 +++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 97e40a1f..0bf8d10b 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -223,24 +223,24 @@ class UnionType(StructType): Entry class for DW_TAG_union_type. Inherits from StructType to make it trivial. """ -class VariantValue: +class VariantCaseType: """ This class represents one possible value for a variant. - :param value: The discriminator/tag value used for indicating this variant - :param member: The member representing the layout of this particular variant + :param tag: The discriminator/tag value used for indicating this variant case + :param member: The member representing the layout of this particular variant case """ - def __init__(self, value: int | None, member: StructMember | None): - self.value = value + def __init__(self, tag: int | None, member: StructMember | None): + self.tag = tag self.member = member @classmethod def read_from_die(cls, die: DIE, elf_object): - value_attr = die.attributes.get("DW_AT_discr_value", None) - if value_attr is not None: - value = value_attr.value + tag_attr = die.attributes.get("DW_AT_discr_value", None) + if tag_attr is not None: + tag = tag_attr.value else: - value = None + tag = None member = None for die_child in die.iter_children(): @@ -248,13 +248,36 @@ def read_from_die(cls, die: DIE, elf_object): member = StructMember.read_from_die(die_child, elf_object) break - return cls(value, member) + return cls(tag, member) + + @property + def name(self): + return self.member.name + + @property + def type(self): + return self.member.type class VariantType(VariableType): - def __init__(self, name, byte_size, elf_object, discr: StructMember | None, discr_values: list[VariantValue]): + def __init__(self, name, byte_size, elf_object, tag: StructMember | None, variant_cases: list[VariantCaseType]): super().__init__(name, byte_size, elf_object) - self.discr = discr - self.discr_values = discr_values + self.tag = tag + self.variant_cases = variant_cases + + def __getitem__(self, item: int | str) -> VariantCaseType: + if isinstance(item, int): + for v in self.variant_cases: + if v.tag == item: + return v + return self.variant_cases[item] + elif isinstance(item, str): + for v in self.variant_cases: + if v.name == item: + return v + raise KeyError(f"Unknown variant case {item}") + + def __getattr__(self, item: str) -> VariantCaseType: + return self[item] @classmethod def read_from_die(cls, die: DIE, elf_object): @@ -269,21 +292,20 @@ def read_from_die(cls, die: DIE, elf_object): variant_part = die_child break - discr_attr = variant_part.attributes.get("DW_AT_discr", None) - if discr_attr is not None: - discr_offset = discr_attr.value + die.cu.cu_offset - discr_die = die.cu.get_DIE_from_refaddr(discr_offset) - discr = StructMember.read_from_die(discr_die, elf_object) + tag_attr = variant_part.attributes.get("DW_AT_discr", None) + if tag_attr is not None: + tag_die = variant_part.get_DIE_from_attribute("DW_AT_discr") + tag = StructMember.read_from_die(tag_die, elf_object) else: - discr = None + tag = None values = [] if variant_part is not None: for die_child in variant_part.iter_children(): if die_child.tag == "DW_TAG_variant": - values.append(VariantValue.read_from_die(die_child, elf_object)) + values.append(VariantCaseType.read_from_die(die_child, elf_object)) - return cls(name, byte_size.value, elf_object, discr, values) + return cls(name, byte_size.value, elf_object, tag, values) class StructMember: """ From b65d5caa2ce2fa81587bd626603c42ddadc3f034 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 28 Jul 2025 16:12:15 -0400 Subject: [PATCH 10/15] Renamed tag to tag_value in VariantCaseType. Added some documentation for VariantType --- cle/backends/elf/variable_type.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 0bf8d10b..079f3032 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -227,11 +227,11 @@ class VariantCaseType: """ This class represents one possible value for a variant. - :param tag: The discriminator/tag value used for indicating this variant case + :param tag_value: The discriminator/tag value used for indicating this variant case :param member: The member representing the layout of this particular variant case """ - def __init__(self, tag: int | None, member: StructMember | None): - self.tag = tag + def __init__(self, tag_value: int | None, member: StructMember | None): + self.tag_value = tag_value self.member = member @classmethod @@ -259,19 +259,27 @@ def type(self): return self.member.type class VariantType(VariableType): - def __init__(self, name, byte_size, elf_object, tag: StructMember | None, variant_cases: list[VariantCaseType]): + """ + :param tag: The type of the variant tag. This value may be None if the tag is not present in the variant. This\ + can occur in situations where one or more of a variant case is unconstructable. For example in Rust the variant\ + Result has two cases: a non-error result and an error result. However, the Infallible type\ + is impossible to construct, so the compiler knows only one case of the variant is possible to construct. This\ + removes the need for the tag. + :param cases: The various arms of the variant. Note that the order of these cases is not relevant; if you\ + want to know the corresponding tag value for a VariantCaseType, access the specific tag_value in the VariantCaseType + """ + def __init__(self, name, byte_size, elf_object, tag: StructMember | None, cases: list[VariantCaseType]): super().__init__(name, byte_size, elf_object) self.tag = tag - self.variant_cases = variant_cases + self.cases = cases def __getitem__(self, item: int | str) -> VariantCaseType: if isinstance(item, int): - for v in self.variant_cases: - if v.tag == item: + for v in self.cases: + if v.tag_value == item: return v - return self.variant_cases[item] elif isinstance(item, str): - for v in self.variant_cases: + for v in self.cases: if v.name == item: return v raise KeyError(f"Unknown variant case {item}") From e2eecaa166ec38c74b7275578ac0f9d7784d383c Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Tue, 29 Jul 2025 14:56:40 -0400 Subject: [PATCH 11/15] Added support for getting alginment information from DWARF info --- cle/backends/elf/variable_type.py | 35 +++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 079f3032..5bb811fb 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -181,8 +181,9 @@ class StructType(VariableType): :param elf_object: elf object to reference to (useful for pointer,...) """ - def __init__(self, name: str, byte_size: int, elf_object, members): + def __init__(self, name: str, byte_size: int, elf_object, members, align: int | None=None): super().__init__(name, byte_size, elf_object) + self.align = align self.members = members @classmethod @@ -202,13 +203,20 @@ def read_from_die(cls, die: DIE, elf_object): if byte_size is None: return None + align_attr = die.attributes.get("DW_AT_alignment", None) + if align_attr is not None: + align_val = align_attr.value + else: + align_val = None + members = [] for die_child in die.iter_children(): if die_child.tag == "DW_TAG_member": members.append(StructMember.read_from_die(die_child, elf_object)) return cls( - dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, elf_object, members + dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, + elf_object, members, align=align_val ) def __getitem__(self, member_name): @@ -268,10 +276,12 @@ class VariantType(VariableType): :param cases: The various arms of the variant. Note that the order of these cases is not relevant; if you\ want to know the corresponding tag value for a VariantCaseType, access the specific tag_value in the VariantCaseType """ - def __init__(self, name, byte_size, elf_object, tag: StructMember | None, cases: list[VariantCaseType]): + def __init__(self, name, byte_size, elf_object, tag: StructMember | None, + cases: list[VariantCaseType], align: int | None=None): super().__init__(name, byte_size, elf_object) self.tag = tag self.cases = cases + self.align = align def __getitem__(self, item: int | str) -> VariantCaseType: if isinstance(item, int): @@ -307,13 +317,19 @@ def read_from_die(cls, die: DIE, elf_object): else: tag = None + align_attr = die.attributes.get("DW_AT_alignment", None) + if align_attr is not None: + align = align_attr.value + else: + align = None + values = [] if variant_part is not None: for die_child in variant_part.iter_children(): if die_child.tag == "DW_TAG_variant": values.append(VariantCaseType.read_from_die(die_child, elf_object)) - return cls(name, byte_size.value, elf_object, tag, values) + return cls(name, byte_size.value, elf_object, tag, values, align=align) class StructMember: """ @@ -328,11 +344,12 @@ class StructMember: :ivar name: name of the member """ - def __init__(self, name: str, addr_offset: int, type_offset, elf_object): + def __init__(self, name: str, addr_offset: int, type_offset, elf_object, align: int | None=None): self.name = name self.addr_offset = addr_offset self._elf_object = elf_object self._type_offset = type_offset + self.align = align @classmethod def read_from_die(cls, die: DIE, elf_object): @@ -347,6 +364,12 @@ def read_from_die(cls, die: DIE, elf_object): name = None if dw_at_name is None else dw_at_name.value.decode() ty = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") + align_attr = die.attributes.get("DW_AT_alignment", None) + if align_attr is not None: + align = align_attr.value + else: + align = None + # From the DWARF5 manual, page 118: # The member entry corresponding to a data member that is defined in a structure, # union or class may have either a DW_AT_data_member_location attribute or a @@ -355,7 +378,7 @@ def read_from_die(cls, die: DIE, elf_object): # TODO bit_offset addr_offset = 0 if dw_at_memloc is None else dw_at_memloc.value - return cls(name, addr_offset, ty, elf_object) + return cls(name, addr_offset, ty, elf_object, align=align) @property def type(self): From 6e6f52f4ad443fe0faab6cc1e2f28cb65202b932 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Wed, 19 Nov 2025 17:07:29 -0500 Subject: [PATCH 12/15] Bug fixes for getting SubprogramType working. Started adding stuff for namespaces --- cle/backends/elf/elf.py | 4 +- cle/backends/elf/subprogram.py | 9 +- cle/backends/elf/variable_type.py | 163 +++++++++++++++++++++++++----- 3 files changed, 146 insertions(+), 30 deletions(-) diff --git a/cle/backends/elf/elf.py b/cle/backends/elf/elf.py index 581690f9..17472507 100644 --- a/cle/backends/elf/elf.py +++ b/cle/backends/elf/elf.py @@ -39,7 +39,7 @@ from .subprogram import LexicalBlock, Subprogram from .symbol import ELFSymbol, Symbol, SymbolType from .variable import Variable -from .variable_type import VariableType +from .variable_type import VariableType, SubprogramType try: import pypcode @@ -750,7 +750,7 @@ def _load_dies(self, dwarf: DWARFInfo): if VariableType.supported_die(die): var_type = VariableType.read_from_die(die, self) if var_type is not None: - type_list[die.offset] = var_type + type_list[cu.cu_offset + die.offset] = var_type except KeyError: # pyelftools is not very resilient - we need to catch KeyErrors here continue diff --git a/cle/backends/elf/subprogram.py b/cle/backends/elf/subprogram.py index 3f43800f..154c7879 100644 --- a/cle/backends/elf/subprogram.py +++ b/cle/backends/elf/subprogram.py @@ -61,13 +61,15 @@ class Subprogram(LexicalBlock): """ def __init__( - self, elf_object: ELF, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None, + self, elf_object: ELF, name: str | None, linkage_name: str | None, low_pc: int | None, high_pc: int | None, + ranges: list[tuple[int, int]] | None = None, ) -> None: self._elf_object = elf_object # pass self as the super_block of this subprogram self.subprogram = self super().__init__(low_pc, high_pc, ranges) self.name = name + self.linkage_name = linkage_name self.parameters: list[Variable] = [] self.local_variables: list[Variable] = [] self.inlined_functions: list[InlinedFunction] = [] @@ -75,7 +77,10 @@ def __init__( @staticmethod def from_die(die: DIE, elf_object: ELF, name: str | None, low_pc: int | None, high_pc: int | None, ranges: list[tuple[int, int]] | None = None): - sub_prg = Subprogram(elf_object, name, low_pc, high_pc, ranges=ranges) + linkage_name_attr = die.attributes.get("DW_AT_linkage_name", None) + linkage_name = None if linkage_name_attr is None else linkage_name_attr.value.decode() + + sub_prg = Subprogram(elf_object, name, linkage_name, low_pc, high_pc, ranges=ranges) if "DW_AT_type" in die.attributes: sub_prg._return_type_offset = die.attributes["DW_AT_type"].value + die.cu.cu_offset return sub_prg diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 5bb811fb..6db44a70 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -32,6 +32,19 @@ def __init__(self, name: str, byte_size: int, elf_object): self.byte_size = byte_size self._elf_object = elf_object + @staticmethod + def find_namespace(die: DIE): + reverse_namespace = [] + current_die: DIE | None = die.get_parent() + while current_die is not None: + if "DW_AT_name" in current_die.attributes: + name = current_die.attributes["DW_AT_name"].value.decode() + reverse_namespace.append(name) + else: + break + current_die = current_die.get_parent() + return reversed(reverse_namespace) + @staticmethod def read_from_die(die: DIE, elf_object): """ @@ -53,6 +66,8 @@ def read_from_die(die: DIE, elf_object): return EnumerationType.read_from_die(die, elf_object) elif die.tag == "DW_TAG_subroutine_type": return SubroutineType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_subprogram": + return SubprogramType.read_from_die(die, elf_object) return None @staticmethod @@ -65,7 +80,8 @@ def supported_die(die: DIE) -> bool: "DW_TAG_typedef", "DW_TAG_union_type", "DW_TAG_enumeration_type", - "DW_TAG_subroutine_type" + "DW_TAG_subroutine_type", + "DW_TAG_subprogram" ) @@ -181,8 +197,9 @@ class StructType(VariableType): :param elf_object: elf object to reference to (useful for pointer,...) """ - def __init__(self, name: str, byte_size: int, elf_object, members, align: int | None=None): + def __init__(self, name: str, byte_size: int, elf_object, namespace, members, align: int | None=None): super().__init__(name, byte_size, elf_object) + self.namespace = namespace self.align = align self.members = members @@ -209,6 +226,8 @@ def read_from_die(cls, die: DIE, elf_object): else: align_val = None + namespace = VariableType.find_namespace(die) + members = [] for die_child in die.iter_children(): if die_child.tag == "DW_TAG_member": @@ -216,7 +235,7 @@ def read_from_die(cls, die: DIE, elf_object): return cls( dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, - elf_object, members, align=align_val + elf_object, namespace, members, align=align_val ) def __getitem__(self, member_name): @@ -266,6 +285,10 @@ def name(self): def type(self): return self.member.type + @property + def align(self): + return self.member.align + class VariantType(VariableType): """ :param tag: The type of the variant tag. This value may be None if the tag is not present in the variant. This\ @@ -276,9 +299,11 @@ class VariantType(VariableType): :param cases: The various arms of the variant. Note that the order of these cases is not relevant; if you\ want to know the corresponding tag value for a VariantCaseType, access the specific tag_value in the VariantCaseType """ - def __init__(self, name, byte_size, elf_object, tag: StructMember | None, + def __init__(self, name, byte_size, elf_object, namespace, + tag: StructMember | None, cases: list[VariantCaseType], align: int | None=None): super().__init__(name, byte_size, elf_object) + self.namespace = namespace self.tag = tag self.cases = cases self.align = align @@ -304,6 +329,8 @@ def read_from_die(cls, die: DIE, elf_object): name = dw_at_name.value.decode() if dw_at_name is not None else "unknown" + namespace = VariableType.find_namespace(die) + variant_part = None for die_child in die.iter_children(): if die_child.tag == "DW_TAG_variant_part": @@ -329,7 +356,7 @@ def read_from_die(cls, die: DIE, elf_object): if die_child.tag == "DW_TAG_variant": values.append(VariantCaseType.read_from_die(die_child, elf_object)) - return cls(name, byte_size.value, elf_object, tag, values, align=align) + return cls(name, byte_size.value, elf_object, namespace, tag, values, align=align) class StructMember: """ @@ -401,7 +428,8 @@ class ArrayType(VariableType): :param element_offset: type of the array elements as offset in the compilation_unit """ - def __init__(self, byte_size, elf_object, element_offset, count: int | None, lower_bound: int | None, upper_bound: int | None): + def __init__(self, byte_size, elf_object, element_offset, count: int | None, + lower_bound: int | None, upper_bound: int | None): super().__init__("array", byte_size, elf_object) self._element_offset = element_offset self.count = count @@ -507,8 +535,9 @@ def read_from_die(cls, die: DIE, elf_object): return cls(name, const_value) class EnumerationType(VariableType): - def __init__(self, name: str, byte_size: int, elf_object, type_offset, enumerator_values: list[EnumeratorValue]): + def __init__(self, name: str, byte_size: int, elf_object, namespace, type_offset, enumerator_values: list[EnumeratorValue]): super().__init__(name, byte_size, elf_object) + self.namespace = namespace self.enumerator_values = enumerator_values self._type_offset = type_offset @@ -537,8 +566,11 @@ def read_from_die(cls, die: DIE, elf_object): name_attr = die.attributes.get("DW_AT_name", None) name = None if name_attr is None else name_attr.value.decode() + namespace = VariableType.find_namespace(die) + dw_at_type = die.attributes.get("DW_AT_type", None) - type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset + type_offset = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") + #type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset enumerators = [] for child in die.iter_children(): @@ -546,13 +578,24 @@ def read_from_die(cls, die: DIE, elf_object): case "DW_TAG_enumerator": enumerators.append(EnumeratorValue.read_from_die(child, elf_object)) - return cls(name, byte_size, elf_object, type_offset, enumerators) + return cls(name, byte_size, elf_object, namespace, type_offset, enumerators) + +class SubroutineParameter: + def __init__(self, name: str | None, type_offset: int, elf_object): + self.name = name + self._type_offset = type_offset + self._elf_object = elf_object + + @property + def type(self): + return self._elf_object.type_list[self._type_offset] class SubroutineType(VariableType): - def __init__(self, name: str, byte_size: int, elf_object, type_offset: int | None, parameter_offsets): + def __init__(self, name: str, byte_size: int, elf_object, type_offset: int | None, + parameters: list[SubroutineParameter]): super().__init__(name, byte_size, elf_object) self._type_offset = type_offset - self._parameter_offsets = parameter_offsets + self.parameters = parameters @property def type(self): @@ -564,14 +607,76 @@ def type(self): else: return self._elf_object.type_list[self._type_offset] + @classmethod + def read_from_die(cls, die: DIE, elf_object): + """ + read an entry of DW_TAG_subroutine_type + """ + + dw_byte_size = die.attributes.get("DW_AT_byte_size", None) + byte_size = dw_byte_size.value if dw_byte_size is not None else None + + name_attr = die.attributes.get("DW_AT_name", None) + name = None if name_attr is None else name_attr.value.decode() + + if "DW_AT_type" in die.attributes: + type_offset = resolve_reference_addr(die, "DW_AT_type") + else: + type_offset = None + + parameters: list[SubroutineParameter] = [] + for child in die.iter_children(): + match child.tag: + case "DW_TAG_formal_parameter": + param_type_offset = resolve_reference_addr(child, "DW_AT_type") + param_name_attr = child.attributes.get("DW_AT_name", None) + param_name = None if param_name_attr is None else param_name_attr.value.decode() + param = SubroutineParameter(param_name, param_type_offset, elf_object) + parameters.append(param) + + return cls(name, byte_size, elf_object, type_offset, parameters) + +class SubprogramParameter: + def __init__(self, name: str | None, type_offset: int, elf_object): + self.name = name + self._type_offset = type_offset + self._elf_object = elf_object + @property - def parameters(self): + def type(self): + return self._elf_object.type_list[self._type_offset] + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + param_type_offset = resolve_reference_addr(die, "DW_AT_type") + + name_attr = die.attributes.get("DW_AT_name", None) + name = None if name_attr is None else name_attr.value.decode() + + location_attr = die.attributes.get("DW_AT_location", None) + + return cls(name, param_type_offset, elf_object) + +class SubprogramType: + def __init__(self, name: str, linkage_name: str | None, low_pc: int | None, high_pc: int | None, elf_object, + type_offset: int | None, parameters: list[SubprogramParameter]): + self.name = name + self.linkage_name = linkage_name + self._elf_object = elf_object + self._type_offset = type_offset + self.parameters = parameters + self.low_pc = low_pc + self.high_pc = high_pc + + @property + def type(self): """ - Iterates over the parameters of the subroutine + The return type of the subroutine, or None if the subroutine returns no value """ - type_list = self._elf_object.type_list - for offset in self._parameter_offsets: - yield type_list[offset] + if self._type_offset is None: + return None + else: + return self._elf_object.type_list[self._type_offset] @classmethod def read_from_die(cls, die: DIE, elf_object): @@ -579,21 +684,27 @@ def read_from_die(cls, die: DIE, elf_object): read an entry of DW_TAG_subroutine_type """ - dw_byte_size = die.attributes.get("DW_AT_byte_size", None) - byte_size = dw_byte_size.value if dw_byte_size is not None else None - name_attr = die.attributes.get("DW_AT_name", None) name = None if name_attr is None else name_attr.value.decode() - dw_at_type = die.attributes.get("DW_AT_type", None) - type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset + linkage_name_attr = die.attributes.get("DW_AT_linkage_name", None) + linkage_name = None if linkage_name_attr is None else linkage_name_attr.value.decode() + + if "DW_AT_type" in die.attributes: + type_offset = resolve_reference_addr(die, "DW_AT_type") + else: + type_offset = None + + low_pc_attr = die.attributes.get("DW_AT_low_pc", None) + low_pc = None if low_pc_attr is None else low_pc_attr.value + + high_pc_attr = die.attributes.get("DW_AT_high_pc", None) + high_pc = None if high_pc_attr is None else high_pc_attr.value - parameter_offsets: list[int] = [] + parameters: list[SubprogramParameter] = [] for child in die.iter_children(): match child.tag: case "DW_TAG_formal_parameter": - param_type_attr = child.attributes.get("DW_AT_type", None) - param_type_offset = None if param_type_attr is None else param_type_attr.value + die.cu.cu_offset - parameter_offsets.append(param_type_offset) + parameters.append(SubprogramParameter.read_from_die(child, elf_object)) - return cls(name, byte_size, elf_object, type_offset, parameter_offsets) \ No newline at end of file + return cls(name, linkage_name, low_pc, high_pc, elf_object, type_offset, parameters) \ No newline at end of file From 571e1c67ea6a02a1e2dcbd2ada7e7cd2453c0aa1 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Thu, 20 Nov 2025 15:47:56 -0500 Subject: [PATCH 13/15] Fixed namespace generation for DWARF DIEs --- cle/backends/elf/variable_type.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 6db44a70..f00fa5bf 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -37,13 +37,13 @@ def find_namespace(die: DIE): reverse_namespace = [] current_die: DIE | None = die.get_parent() while current_die is not None: - if "DW_AT_name" in current_die.attributes: + if "DW_AT_name" in current_die.attributes and current_die.tag not in ("DW_TAG_compile_unit",): name = current_die.attributes["DW_AT_name"].value.decode() reverse_namespace.append(name) else: break current_die = current_die.get_parent() - return reversed(reverse_namespace) + return list(reversed(reverse_namespace)) @staticmethod def read_from_die(die: DIE, elf_object): From 3570f3333660a318b3475fcc1666ae3123fce0c8 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Mon, 1 Dec 2025 14:29:20 -0500 Subject: [PATCH 14/15] Added support for DWARF const types to CLE. StructType is more resiliant to having byte_size missing. TypeDefType now knows what namespace it is in. Added more resilience to DWARF information about inline functions. --- cle/backends/elf/variable_type.py | 49 +++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index f00fa5bf..50397fd1 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -68,6 +68,8 @@ def read_from_die(die: DIE, elf_object): return SubroutineType.read_from_die(die, elf_object) elif die.tag == "DW_TAG_subprogram": return SubprogramType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_const_type": + return ConstType.read_from_die(die, elf_object) return None @staticmethod @@ -81,7 +83,8 @@ def supported_die(die: DIE) -> bool: "DW_TAG_union_type", "DW_TAG_enumeration_type", "DW_TAG_subroutine_type", - "DW_TAG_subprogram" + "DW_TAG_subprogram", + "DW_TAG_const_type" ) @@ -197,7 +200,7 @@ class StructType(VariableType): :param elf_object: elf object to reference to (useful for pointer,...) """ - def __init__(self, name: str, byte_size: int, elf_object, namespace, members, align: int | None=None): + def __init__(self, name: str, byte_size: int | None, elf_object, namespace, members, align: int | None=None): super().__init__(name, byte_size, elf_object) self.namespace = namespace self.align = align @@ -216,9 +219,7 @@ def read_from_die(cls, die: DIE, elf_object): dw_at_name = die.attributes.get("DW_AT_name", None) byte_size = die.attributes.get("DW_AT_byte_size", None) - - if byte_size is None: - return None + byte_size_val = byte_size.value if byte_size is not None else None align_attr = die.attributes.get("DW_AT_alignment", None) if align_attr is not None: @@ -234,7 +235,7 @@ def read_from_die(cls, die: DIE, elf_object): members.append(StructMember.read_from_die(die_child, elf_object)) return cls( - dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, + dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size_val, elf_object, namespace, members, align=align_val ) @@ -488,9 +489,10 @@ class TypedefType(VariableType): :param type_offset: type as offset in the compilation_unit """ - def __init__(self, name: str, byte_size, elf_object, type_offset): + def __init__(self, name: str, byte_size, elf_object, namespace, type_offset): super().__init__(name, byte_size, elf_object) self._type_offset = type_offset + self.namespace = namespace @classmethod def read_from_die(cls, die: DIE, elf_object): @@ -506,7 +508,9 @@ def read_from_die(cls, die: DIE, elf_object): type_offset = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") byte_size = None if dw_at_byte_size is None else dw_at_byte_size.value - return cls(name, byte_size, elf_object, type_offset) + namespace = VariableType.find_namespace(die) + + return cls(name, byte_size, elf_object, namespace, type_offset) @property def type(self): @@ -684,6 +688,10 @@ def read_from_die(cls, die: DIE, elf_object): read an entry of DW_TAG_subroutine_type """ + if "DW_AT_abstract_origin" in die.attributes: + # abstract_origin seems to be used for inline functions. Let's just ignore these for now... + return None + name_attr = die.attributes.get("DW_AT_name", None) name = None if name_attr is None else name_attr.value.decode() @@ -707,4 +715,27 @@ def read_from_die(cls, die: DIE, elf_object): case "DW_TAG_formal_parameter": parameters.append(SubprogramParameter.read_from_die(child, elf_object)) - return cls(name, linkage_name, low_pc, high_pc, elf_object, type_offset, parameters) \ No newline at end of file + return cls(name, linkage_name, low_pc, high_pc, elf_object, type_offset, parameters) + +class ConstType(VariableType): + def __init__(self, elf_object, type_offset): + super().__init__("const", None, elf_object) + self._type_offset = type_offset + + @property + def type(self): + """ + The underlying type of the const + """ + return self._elf_object.type_list[self._type_offset] + + @classmethod + def read_from_die(cls, die: DIE, elf_object): + """ + read an entry of DW_TAG_const_type. + """ + + dw_at_type = die.attributes.get("DW_AT_type", None) + type_offset = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") + + return cls(elf_object, type_offset) From 77589be0c60e281f6ba73ef2630e30009ec01b31 Mon Sep 17 00:00:00 2001 From: Caleb Helbling Date: Tue, 2 Dec 2025 17:01:32 -0500 Subject: [PATCH 15/15] Added support for DWARF atomic, immutable, packed, reference, restrict, rvalue reference, shared and volatile types. Accomplished this by making a new TypeModifier superclass. ConstType now inherits from this new superclass. --- cle/backends/elf/variable_type.py | 58 ++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/cle/backends/elf/variable_type.py b/cle/backends/elf/variable_type.py index 50397fd1..1d2a7873 100644 --- a/cle/backends/elf/variable_type.py +++ b/cle/backends/elf/variable_type.py @@ -68,8 +68,24 @@ def read_from_die(die: DIE, elf_object): return SubroutineType.read_from_die(die, elf_object) elif die.tag == "DW_TAG_subprogram": return SubprogramType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_atomic_type": + return AtomicType.read_from_die(die, elf_object) elif die.tag == "DW_TAG_const_type": return ConstType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_immutable_type": + return ImmutableType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_packed_type": + return PackedType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_reference_type": + return ReferenceType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_restrict_type": + return RestrictType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_rvalue_reference_type": + return RValueReferenceType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_shared_type": + return SharedType.read_from_die(die, elf_object) + elif die.tag == "DW_TAG_volatile_type": + return VolatileType.read_from_die(die, elf_object) return None @staticmethod @@ -84,7 +100,14 @@ def supported_die(die: DIE) -> bool: "DW_TAG_enumeration_type", "DW_TAG_subroutine_type", "DW_TAG_subprogram", - "DW_TAG_const_type" + "DW_TAG_const_type", + "DW_TAG_immutable_type", + "DW_TAG_packed_type", + "DW_TAG_reference_type", + "DW_TAG_restrict_type", + "DW_TAG_rvalue_reference_type", + "DW_TAG_shared_type", + "DW_TAG_volatile_type" ) @@ -717,9 +740,9 @@ def read_from_die(cls, die: DIE, elf_object): return cls(name, linkage_name, low_pc, high_pc, elf_object, type_offset, parameters) -class ConstType(VariableType): +class TypeModifier(VariableType): def __init__(self, elf_object, type_offset): - super().__init__("const", None, elf_object) + super().__init__(self.name, None, elf_object) self._type_offset = type_offset @property @@ -727,7 +750,7 @@ def type(self): """ The underlying type of the const """ - return self._elf_object.type_list[self._type_offset] + return self._elf_object.type_list.get(self._type_offset, None) @classmethod def read_from_die(cls, die: DIE, elf_object): @@ -739,3 +762,30 @@ def read_from_die(cls, die: DIE, elf_object): type_offset = None if dw_at_type is None else resolve_reference_addr(die, "DW_AT_type") return cls(elf_object, type_offset) + +class AtomicType(TypeModifier): + name = "atomic" + +class ConstType(TypeModifier): + name = "const" + +class ImmutableType(TypeModifier): + name = "immutable" + +class PackedType(TypeModifier): + name = "packed" + +class ReferenceType(TypeModifier): + name = "reference" + +class RestrictType(TypeModifier): + name = "restrict" + +class RValueReferenceType(TypeModifier): + name = "r_value_reference" + +class SharedType(TypeModifier): + name = "shared" + +class VolatileType(TypeModifier): + name = "volatile"