Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 31 additions & 35 deletions src/stirling/obj_tools/dwarf_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ StatusOr<std::unique_ptr<DwarfReader>> DwarfReader::CreateWithoutIndexing(
auto dwarf_reader = std::unique_ptr<DwarfReader>(
new DwarfReader(std::move(buffer), DWARFContext::create(*obj_file)));

PX_RETURN_IF_ERROR(dwarf_reader->DetectSourceLanguage());

return dwarf_reader;
}

Expand Down Expand Up @@ -154,36 +152,29 @@ bool IsNamespace(llvm::dwarf::Tag tag) { return tag == llvm::dwarf::DW_TAG_names

} // namespace

Status DwarfReader::DetectSourceLanguage() {
for (size_t i = 0; i < dwarf_context_->getNumCompileUnits(); ++i) {
const auto& unit_die = dwarf_context_->getUnitAtIndex(i)->getUnitDIE();
if (unit_die.getTag() != llvm::dwarf::DW_TAG_compile_unit) {
// Skip over DW_TAG_partial_unit, and potentially other tags.
continue;
}

PX_ASSIGN_OR(const DWARFFormValue& lang_attr,
GetAttribute(unit_die, llvm::dwarf::DW_AT_language), continue);
source_language_ =
static_cast<llvm::dwarf::SourceLanguage>(lang_attr.getAsUnsignedConstant().getValue());

const DWARFFormValue& producer_attr =
GetAttribute(unit_die, llvm::dwarf::DW_AT_producer).ValueOr({});

auto s = producer_attr.getAsCString();
StatusOr<std::pair<llvm::dwarf::SourceLanguage, std::string>>
DwarfReader::DetectSourceLanguageFromCUDIE(const llvm::DWARFDie& unit_die) {
if (unit_die.getTag() != llvm::dwarf::DW_TAG_compile_unit) {
// Skip over DW_TAG_partial_unit, and potentially other tags.
return error::NotFound("Expected DW_TAG_compile_unit, but got DW_TAG=$0 for unit DIE: $1",
magic_enum::enum_name(unit_die.getTag()), Dump(unit_die));
}
const DWARFFormValue& producer_attr =
GetAttribute(unit_die, llvm::dwarf::DW_AT_producer).ValueOr({});
auto s = producer_attr.getAsCString();
std::string compiler;
#if LLVM_VERSION_MAJOR >= 14
if (!s.takeError()) {
compiler_ = s.get();
}
if (!s.takeError()) {
compiler = s.get();
}
#else
compiler_ = s.getValueOr("");
compiler = s.getValueOr("");
#endif

return Status::OK();
}
return error::Internal(
"Could not determine the source language of the DWARF info. DW_AT_language not found on "
"any compilation unit.");
PX_ASSIGN_OR_RETURN(const DWARFFormValue& lang_attr,
GetAttribute(unit_die, llvm::dwarf::DW_AT_language));
auto source_language =
static_cast<llvm::dwarf::SourceLanguage>(lang_attr.getAsUnsignedConstant().getValue());
return std::make_pair(source_language, compiler);
}

void DwarfReader::IndexDIEs(
Expand Down Expand Up @@ -923,16 +914,21 @@ StatusOr<std::map<std::string, ArgInfo>> DwarfReader::GetFunctionArgInfo(
// but DW_AT_location has been found to be blank in some cases, making it unreliable.
// Instead, we use a FunctionArgTracker that tries to reverse engineer the calling convention.

ABI abi = LanguageToABI(source_language_, compiler_);
PX_ASSIGN_OR_RETURN(const DWARFDie& function_die,
GetMatchingDIE(function_symbol_name, llvm::dwarf::DW_TAG_subprogram));
// Certain binaries can have DW_TAG_compile_units with different source languages. When compiling
// programs with ASAN/TSAN enabled this is common.
llvm::DWARFUnit* cu = function_die.getDwarfUnit();
llvm::DWARFDie unit_die = cu->getUnitDIE();
PX_ASSIGN_OR_RETURN(auto source_lang_info, DetectSourceLanguageFromCUDIE(unit_die));
auto [source_lang, compiler] = source_lang_info;
ABI abi = LanguageToABI(source_lang, compiler);
if (abi == ABI::kUnknown) {
return error::Unimplemented("Unable to determine ABI from language: $0",
magic_enum::enum_name(source_language_));
magic_enum::enum_name(source_lang));
}
std::unique_ptr<ABICallingConventionModel> arg_tracker = ABICallingConventionModel::Create(abi);

PX_ASSIGN_OR_RETURN(const DWARFDie& function_die,
GetMatchingDIE(function_symbol_name, llvm::dwarf::DW_TAG_subprogram));

// If function has a return value, process that first.
// This is important, because in some ABIs (e.g. SystemV ABI),
// if the return value is not able to be passed back in the available registers,
Expand Down Expand Up @@ -968,7 +964,7 @@ StatusOr<std::map<std::string, ArgInfo>> DwarfReader::GetFunctionArgInfo(
PX_ASSIGN_OR_RETURN(const DWARFDie type_die, GetTypeDie(die));
PX_ASSIGN_OR_RETURN(arg.type_info, GetTypeInfo(die, type_die));

if (source_language_ == llvm::dwarf::DW_LANG_Go) {
if (source_lang == llvm::dwarf::DW_LANG_Go) {
arg.retarg = IsGolangRetArg(die).ValueOr(false);
}

Expand Down
18 changes: 6 additions & 12 deletions src/stirling/obj_tools/dwarf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ class DwarfReader {
StatusOr<llvm::DWARFDie> GetMatchingDIE(std::string_view name,
std::optional<llvm::dwarf::Tag> type = {});

/**
* Detects the source language and compiler from a DW_TAG_compile_unit's attributes.
*/
StatusOr<std::pair<llvm::dwarf::SourceLanguage, std::string>> DetectSourceLanguageFromCUDIE(
const llvm::DWARFDie& die);

/**
* Return the size of a struct.
*/
Expand Down Expand Up @@ -291,16 +297,10 @@ class DwarfReader {

bool IsValid() const { return dwarf_context_->getNumCompileUnits() != 0; }

const llvm::dwarf::SourceLanguage& source_language() const { return source_language_; }
const std::string& compiler() const { return compiler_; }

private:
DwarfReader(std::unique_ptr<llvm::MemoryBuffer> buffer,
std::unique_ptr<llvm::DWARFContext> dwarf_context);

// Detects the source language of the dwarf content being read.
Status DetectSourceLanguage();

// Builds an index for certain commonly used DIE types (e.g. structs and functions).
// When making multiple DwarfReader calls, this speeds up the process at the cost of some memory.
//
Expand All @@ -317,12 +317,6 @@ class DwarfReader {
void InsertToDIEMap(std::string name, llvm::dwarf::Tag tag, llvm::DWARFDie die);
std::optional<llvm::DWARFDie> FindInDIEMap(const std::string& name, llvm::dwarf::Tag tag) const;

// Records the source language of the DWARF information.
llvm::dwarf::SourceLanguage source_language_;

// Records the name of the compiler that produces this file.
std::string compiler_;

std::unique_ptr<llvm::MemoryBuffer> memory_buffer_;
std::unique_ptr<llvm::DWARFContext> dwarf_context_;

Expand Down
25 changes: 17 additions & 8 deletions src/stirling/obj_tools/dwarf_reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,23 +125,32 @@ TEST_P(CppDwarfReaderTest, NonExistentPath) {

TEST_P(CppDwarfReaderTest, SourceLanguage) {
{
// Check that source language detect for individual DIEs works.
ASSERT_OK_AND_ASSIGN(
auto die, dwarf_reader->GetMatchingDIE("CanYouFindThis", llvm::dwarf::DW_TAG_subprogram));
llvm::DWARFUnit* cu = die.getDwarfUnit();
llvm::DWARFDie unit_die = cu->getUnitDIE();
ASSERT_OK_AND_ASSIGN(auto p, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die));
// We use C++17, but the dwarf shows 14.
EXPECT_EQ(dwarf_reader->source_language(), llvm::dwarf::DW_LANG_C_plus_plus_14);
EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("clang"));
EXPECT_EQ(p.first, llvm::dwarf::DW_LANG_C_plus_plus_14);
EXPECT_THAT(p.second, ::testing::HasSubstr("clang"));
}
}

TEST_P(GolangDwarfReaderTest, SourceLanguage) {
{
EXPECT_EQ(dwarf_reader->source_language(), llvm::dwarf::DW_LANG_Go);
EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("go"));

// Check that source language detect for individual DIEs works.
ASSERT_OK_AND_ASSIGN(const bool uses_regabi, UsesRegABI());

ASSERT_OK_AND_ASSIGN(auto die, dwarf_reader->GetMatchingDIE("main.(*Vertex).Scale",
llvm::dwarf::DW_TAG_subprogram));
llvm::DWARFUnit* cu = die.getDwarfUnit();
llvm::DWARFDie unit_die = cu->getUnitDIE();
ASSERT_OK_AND_ASSIGN(auto p, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die));
EXPECT_EQ(p.first, llvm::dwarf::DW_LANG_Go);
if (uses_regabi) {
EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("regabi"));
EXPECT_THAT(p.second, ::testing::HasSubstr("regabi"));
} else {
EXPECT_THAT(dwarf_reader->compiler(), ::testing::Not(::testing::HasSubstr("regabi")));
EXPECT_THAT(p.second, ::testing::Not(::testing::HasSubstr("regabi")));
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/stirling/obj_tools/elf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class ElfReader {
const std::string& binary_path, const std::filesystem::path& debug_file_dir = kDebugFileDir);

std::filesystem::path& debug_symbols_path() { return debug_symbols_path_; }
const std::string& binary_path() const { return binary_path_; }

struct SymbolInfo {
std::string name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ StatusOr<ir::shared::Language> TransformSourceLanguage(
return ir::shared::Language::GOLANG;
case llvm::dwarf::DW_LANG_C:
case llvm::dwarf::DW_LANG_C99:
case llvm::dwarf::DW_LANG_C11:
case llvm::dwarf::DW_LANG_C_plus_plus:
case llvm::dwarf::DW_LANG_C_plus_plus_03:
case llvm::dwarf::DW_LANG_C_plus_plus_11:
Expand All @@ -53,42 +54,49 @@ StatusOr<ir::shared::Language> TransformSourceLanguage(
}
}

StatusOr<ir::shared::Language> TryDetectSourceLanguageFromDwarf(
obj_tools::DwarfReader* dwarf_reader, const std::string& symbol_name) {
PX_ASSIGN_OR_RETURN(const auto& function_die,
dwarf_reader->GetMatchingDIE(symbol_name, llvm::dwarf::DW_TAG_subprogram));
llvm::DWARFUnit* cu = function_die.getDwarfUnit();
llvm::DWARFDie unit_die = cu->getUnitDIE();

PX_ASSIGN_OR_RETURN(auto lang_pair, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die));
llvm::dwarf::SourceLanguage source_lang = lang_pair.first;
PX_ASSIGN_OR_RETURN(auto detected_language, TransformSourceLanguage(source_lang));

return detected_language;
}

} // namespace

void DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader,
ir::logical::TracepointDeployment* input_program) {
ir::logical::TracepointSpec* program, const std::string& symbol_name) {
ir::shared::Language detected_language = ir::shared::Language::LANG_UNKNOWN;

// Primary detection mechanism is DWARF info, when available.
if (dwarf_reader != nullptr) {
detected_language = TransformSourceLanguage(dwarf_reader->source_language())
.ConsumeValueOr(ir::shared::Language::LANG_UNKNOWN);
auto result = TryDetectSourceLanguageFromDwarf(dwarf_reader, symbol_name);
if (result.ok()) {
detected_language = result.ConsumeValueOrDie();
}
} else {
// Back-up detection policy looks for certain language-specific symbols
if (IsGoExecutable(elf_reader)) {
detected_language = ir::shared::Language::GOLANG;
}

// TODO(oazizi): Make this stronger by adding more elf-based tests.
}

if (detected_language != ir::shared::Language::LANG_UNKNOWN) {
LOG(INFO) << absl::Substitute("Using language $0 for object $1 and others",
magic_enum::enum_name(dwarf_reader->source_language()),
input_program->deployment_spec().path_list().paths(0));

// Since we only support tracing of a single object, all tracepoints have the same language.
for (auto& tracepoint : *input_program->mutable_tracepoints()) {
tracepoint.mutable_program()->set_language(detected_language);
}
program->set_language(detected_language);
LOG(INFO) << absl::Substitute("Using language $0 for object $1 and symbol $2",
magic_enum::enum_name(detected_language),
elf_reader->binary_path(), symbol_name);
} else {
// For now, just print a warning, and let the probe proceed.
// This is so we can use things like function argument tracing even when other features may not
// work.
// Fall back to warning and assume C/C++ ABI
LOG(WARNING) << absl::Substitute(
"Language for object $0 and others is unknown or unsupported, so assuming C/C++ ABI. "
"Some dynamic tracing features may not work, or may produce unexpected results.",
input_program->deployment_spec().path_list().paths(0));
elf_reader->binary_path());
}
}
namespace {
Expand All @@ -110,8 +118,9 @@ bool IsWholeWordSuffix(std::string_view name, std::string_view suffix) {

} // namespace

Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader,
ir::logical::TracepointDeployment* input_program) {
Status ResolveProbeSymbolAndLanguage(obj_tools::ElfReader* elf_reader,
obj_tools::DwarfReader* dwarf_reader,
ir::logical::TracepointDeployment* input_program) {
// Expand symbol
for (auto& t : *input_program->mutable_tracepoints()) {
for (auto& probe : *t.mutable_program()->mutable_probes()) {
Expand Down Expand Up @@ -156,7 +165,9 @@ Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader,
return error::Internal("Could not find valid symbol match");
}

*probe.mutable_tracepoint()->mutable_symbol() = *symbol_name;
auto tracepoint = probe.mutable_tracepoint();
*tracepoint->mutable_symbol() = *symbol_name;
DetectSourceLanguage(elf_reader, dwarf_reader, t.mutable_program(), *symbol_name);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#pragma once

#include <string>

#include "src/common/base/base.h"
#include "src/stirling/obj_tools/dwarf_reader.h"
#include "src/stirling/obj_tools/elf_reader.h"
Expand All @@ -31,18 +33,22 @@ namespace dynamic_tracing {
/**
* Uses ELF or DWARF information to detect the source language.
* Populates the tracepoint program's language field in input_program.
* If the language cannot be determined, it assumes a C/C++ language ABI.
*/
void DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader,
ir::logical::TracepointDeployment* input_program);
ir::logical::TracepointSpec* program, const std::string& symbol_name);

/**
* Uses ELF information to check if the provided symbol exists.
* If it does not exist, it checks whether it is a short-hand (suffix) of a full symbol.
* If it is a short-hand reference to a symbol, the symbol is replaced with the full-form.
* Potentially modifies each tracepoint's symbol field in input_program.
* Also detects the source language for each resolved symbol using DWARF or ELF information.
* Potentially modifies each tracepoint's symbol field and program's language field in
* input_program.
*/
Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader,
ir::logical::TracepointDeployment* input_program);
Status ResolveProbeSymbolAndLanguage(obj_tools::ElfReader* elf_reader,
obj_tools::DwarfReader* dwarf_reader,
ir::logical::TracepointDeployment* input_program);

/**
* If any tracepoint in input_program contains no fields to trace, this function uses DWARF info
Expand Down
Loading
Loading