diff --git a/src/stirling/obj_tools/dwarf_reader.cc b/src/stirling/obj_tools/dwarf_reader.cc index 3e344b3c4de..c240f87b21f 100644 --- a/src/stirling/obj_tools/dwarf_reader.cc +++ b/src/stirling/obj_tools/dwarf_reader.cc @@ -92,8 +92,6 @@ StatusOr> DwarfReader::CreateWithoutIndexing( auto dwarf_reader = std::unique_ptr( new DwarfReader(std::move(buffer), DWARFContext::create(*obj_file))); - PX_RETURN_IF_ERROR(dwarf_reader->DetectSourceLanguage()); - return dwarf_reader; } @@ -154,36 +152,29 @@ bool IsNamespace(llvm::dwarf::Tag tag) { return tag == llvm::dwarf::DW_TAG_names } // namespace -Status DwarfReader::DetectSourceLanguage() { - for (size_t i = 0; i < dwarf_context_->getNumCompileUnits(); ++i) { - const auto& unit_die = dwarf_context_->getUnitAtIndex(i)->getUnitDIE(); - if (unit_die.getTag() != llvm::dwarf::DW_TAG_compile_unit) { - // Skip over DW_TAG_partial_unit, and potentially other tags. - continue; - } - - PX_ASSIGN_OR(const DWARFFormValue& lang_attr, - GetAttribute(unit_die, llvm::dwarf::DW_AT_language), continue); - source_language_ = - static_cast(lang_attr.getAsUnsignedConstant().getValue()); - - const DWARFFormValue& producer_attr = - GetAttribute(unit_die, llvm::dwarf::DW_AT_producer).ValueOr({}); - - auto s = producer_attr.getAsCString(); +StatusOr> +DwarfReader::DetectSourceLanguageFromCUDIE(const llvm::DWARFDie& unit_die) { + if (unit_die.getTag() != llvm::dwarf::DW_TAG_compile_unit) { + // Skip over DW_TAG_partial_unit, and potentially other tags. + return error::NotFound("Expected DW_TAG_compile_unit, but got DW_TAG=$0 for unit DIE: $1", + magic_enum::enum_name(unit_die.getTag()), Dump(unit_die)); + } + const DWARFFormValue& producer_attr = + GetAttribute(unit_die, llvm::dwarf::DW_AT_producer).ValueOr({}); + auto s = producer_attr.getAsCString(); + std::string compiler; #if LLVM_VERSION_MAJOR >= 14 - if (!s.takeError()) { - compiler_ = s.get(); - } + if (!s.takeError()) { + compiler = s.get(); + } #else - compiler_ = s.getValueOr(""); + compiler = s.getValueOr(""); #endif - - return Status::OK(); - } - return error::Internal( - "Could not determine the source language of the DWARF info. DW_AT_language not found on " - "any compilation unit."); + PX_ASSIGN_OR_RETURN(const DWARFFormValue& lang_attr, + GetAttribute(unit_die, llvm::dwarf::DW_AT_language)); + auto source_language = + static_cast(lang_attr.getAsUnsignedConstant().getValue()); + return std::make_pair(source_language, compiler); } void DwarfReader::IndexDIEs( @@ -923,16 +914,21 @@ StatusOr> DwarfReader::GetFunctionArgInfo( // but DW_AT_location has been found to be blank in some cases, making it unreliable. // Instead, we use a FunctionArgTracker that tries to reverse engineer the calling convention. - ABI abi = LanguageToABI(source_language_, compiler_); + PX_ASSIGN_OR_RETURN(const DWARFDie& function_die, + GetMatchingDIE(function_symbol_name, llvm::dwarf::DW_TAG_subprogram)); + // Certain binaries can have DW_TAG_compile_units with different source languages. When compiling + // programs with ASAN/TSAN enabled this is common. + llvm::DWARFUnit* cu = function_die.getDwarfUnit(); + llvm::DWARFDie unit_die = cu->getUnitDIE(); + PX_ASSIGN_OR_RETURN(auto source_lang_info, DetectSourceLanguageFromCUDIE(unit_die)); + auto [source_lang, compiler] = source_lang_info; + ABI abi = LanguageToABI(source_lang, compiler); if (abi == ABI::kUnknown) { return error::Unimplemented("Unable to determine ABI from language: $0", - magic_enum::enum_name(source_language_)); + magic_enum::enum_name(source_lang)); } std::unique_ptr arg_tracker = ABICallingConventionModel::Create(abi); - PX_ASSIGN_OR_RETURN(const DWARFDie& function_die, - GetMatchingDIE(function_symbol_name, llvm::dwarf::DW_TAG_subprogram)); - // If function has a return value, process that first. // This is important, because in some ABIs (e.g. SystemV ABI), // if the return value is not able to be passed back in the available registers, @@ -968,7 +964,7 @@ StatusOr> DwarfReader::GetFunctionArgInfo( PX_ASSIGN_OR_RETURN(const DWARFDie type_die, GetTypeDie(die)); PX_ASSIGN_OR_RETURN(arg.type_info, GetTypeInfo(die, type_die)); - if (source_language_ == llvm::dwarf::DW_LANG_Go) { + if (source_lang == llvm::dwarf::DW_LANG_Go) { arg.retarg = IsGolangRetArg(die).ValueOr(false); } diff --git a/src/stirling/obj_tools/dwarf_reader.h b/src/stirling/obj_tools/dwarf_reader.h index 3e14e636370..8c9d848562f 100644 --- a/src/stirling/obj_tools/dwarf_reader.h +++ b/src/stirling/obj_tools/dwarf_reader.h @@ -178,6 +178,12 @@ class DwarfReader { StatusOr GetMatchingDIE(std::string_view name, std::optional type = {}); + /** + * Detects the source language and compiler from a DW_TAG_compile_unit's attributes. + */ + StatusOr> DetectSourceLanguageFromCUDIE( + const llvm::DWARFDie& die); + /** * Return the size of a struct. */ @@ -291,16 +297,10 @@ class DwarfReader { bool IsValid() const { return dwarf_context_->getNumCompileUnits() != 0; } - const llvm::dwarf::SourceLanguage& source_language() const { return source_language_; } - const std::string& compiler() const { return compiler_; } - private: DwarfReader(std::unique_ptr buffer, std::unique_ptr dwarf_context); - // Detects the source language of the dwarf content being read. - Status DetectSourceLanguage(); - // Builds an index for certain commonly used DIE types (e.g. structs and functions). // When making multiple DwarfReader calls, this speeds up the process at the cost of some memory. // @@ -317,12 +317,6 @@ class DwarfReader { void InsertToDIEMap(std::string name, llvm::dwarf::Tag tag, llvm::DWARFDie die); std::optional FindInDIEMap(const std::string& name, llvm::dwarf::Tag tag) const; - // Records the source language of the DWARF information. - llvm::dwarf::SourceLanguage source_language_; - - // Records the name of the compiler that produces this file. - std::string compiler_; - std::unique_ptr memory_buffer_; std::unique_ptr dwarf_context_; diff --git a/src/stirling/obj_tools/dwarf_reader_test.cc b/src/stirling/obj_tools/dwarf_reader_test.cc index 253718ae503..1b4a6fd3d89 100644 --- a/src/stirling/obj_tools/dwarf_reader_test.cc +++ b/src/stirling/obj_tools/dwarf_reader_test.cc @@ -125,23 +125,32 @@ TEST_P(CppDwarfReaderTest, NonExistentPath) { TEST_P(CppDwarfReaderTest, SourceLanguage) { { + // Check that source language detect for individual DIEs works. + ASSERT_OK_AND_ASSIGN( + auto die, dwarf_reader->GetMatchingDIE("CanYouFindThis", llvm::dwarf::DW_TAG_subprogram)); + llvm::DWARFUnit* cu = die.getDwarfUnit(); + llvm::DWARFDie unit_die = cu->getUnitDIE(); + ASSERT_OK_AND_ASSIGN(auto p, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die)); // We use C++17, but the dwarf shows 14. - EXPECT_EQ(dwarf_reader->source_language(), llvm::dwarf::DW_LANG_C_plus_plus_14); - EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("clang")); + EXPECT_EQ(p.first, llvm::dwarf::DW_LANG_C_plus_plus_14); + EXPECT_THAT(p.second, ::testing::HasSubstr("clang")); } } TEST_P(GolangDwarfReaderTest, SourceLanguage) { { - EXPECT_EQ(dwarf_reader->source_language(), llvm::dwarf::DW_LANG_Go); - EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("go")); - + // Check that source language detect for individual DIEs works. ASSERT_OK_AND_ASSIGN(const bool uses_regabi, UsesRegABI()); - + ASSERT_OK_AND_ASSIGN(auto die, dwarf_reader->GetMatchingDIE("main.(*Vertex).Scale", + llvm::dwarf::DW_TAG_subprogram)); + llvm::DWARFUnit* cu = die.getDwarfUnit(); + llvm::DWARFDie unit_die = cu->getUnitDIE(); + ASSERT_OK_AND_ASSIGN(auto p, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die)); + EXPECT_EQ(p.first, llvm::dwarf::DW_LANG_Go); if (uses_regabi) { - EXPECT_THAT(dwarf_reader->compiler(), ::testing::HasSubstr("regabi")); + EXPECT_THAT(p.second, ::testing::HasSubstr("regabi")); } else { - EXPECT_THAT(dwarf_reader->compiler(), ::testing::Not(::testing::HasSubstr("regabi"))); + EXPECT_THAT(p.second, ::testing::Not(::testing::HasSubstr("regabi"))); } } } diff --git a/src/stirling/obj_tools/elf_reader.h b/src/stirling/obj_tools/elf_reader.h index 67dba7c3ef8..8655b4e6fd3 100644 --- a/src/stirling/obj_tools/elf_reader.h +++ b/src/stirling/obj_tools/elf_reader.h @@ -62,6 +62,7 @@ class ElfReader { const std::string& binary_path, const std::filesystem::path& debug_file_dir = kDebugFileDir); std::filesystem::path& debug_symbols_path() { return debug_symbols_path_; } + const std::string& binary_path() const { return binary_path_; } struct SymbolInfo { std::string name; diff --git a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.cc b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.cc index b984f1fdbaf..0cfad75e7d9 100644 --- a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.cc +++ b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.cc @@ -42,6 +42,7 @@ StatusOr TransformSourceLanguage( return ir::shared::Language::GOLANG; case llvm::dwarf::DW_LANG_C: case llvm::dwarf::DW_LANG_C99: + case llvm::dwarf::DW_LANG_C11: case llvm::dwarf::DW_LANG_C_plus_plus: case llvm::dwarf::DW_LANG_C_plus_plus_03: case llvm::dwarf::DW_LANG_C_plus_plus_11: @@ -53,42 +54,49 @@ StatusOr TransformSourceLanguage( } } +StatusOr TryDetectSourceLanguageFromDwarf( + obj_tools::DwarfReader* dwarf_reader, const std::string& symbol_name) { + PX_ASSIGN_OR_RETURN(const auto& function_die, + dwarf_reader->GetMatchingDIE(symbol_name, llvm::dwarf::DW_TAG_subprogram)); + llvm::DWARFUnit* cu = function_die.getDwarfUnit(); + llvm::DWARFDie unit_die = cu->getUnitDIE(); + + PX_ASSIGN_OR_RETURN(auto lang_pair, dwarf_reader->DetectSourceLanguageFromCUDIE(unit_die)); + llvm::dwarf::SourceLanguage source_lang = lang_pair.first; + PX_ASSIGN_OR_RETURN(auto detected_language, TransformSourceLanguage(source_lang)); + + return detected_language; +} + } // namespace void DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader, - ir::logical::TracepointDeployment* input_program) { + ir::logical::TracepointSpec* program, const std::string& symbol_name) { ir::shared::Language detected_language = ir::shared::Language::LANG_UNKNOWN; - // Primary detection mechanism is DWARF info, when available. if (dwarf_reader != nullptr) { - detected_language = TransformSourceLanguage(dwarf_reader->source_language()) - .ConsumeValueOr(ir::shared::Language::LANG_UNKNOWN); + auto result = TryDetectSourceLanguageFromDwarf(dwarf_reader, symbol_name); + if (result.ok()) { + detected_language = result.ConsumeValueOrDie(); + } } else { // Back-up detection policy looks for certain language-specific symbols if (IsGoExecutable(elf_reader)) { detected_language = ir::shared::Language::GOLANG; } - - // TODO(oazizi): Make this stronger by adding more elf-based tests. } if (detected_language != ir::shared::Language::LANG_UNKNOWN) { - LOG(INFO) << absl::Substitute("Using language $0 for object $1 and others", - magic_enum::enum_name(dwarf_reader->source_language()), - input_program->deployment_spec().path_list().paths(0)); - - // Since we only support tracing of a single object, all tracepoints have the same language. - for (auto& tracepoint : *input_program->mutable_tracepoints()) { - tracepoint.mutable_program()->set_language(detected_language); - } + program->set_language(detected_language); + LOG(INFO) << absl::Substitute("Using language $0 for object $1 and symbol $2", + magic_enum::enum_name(detected_language), + elf_reader->binary_path(), symbol_name); } else { - // For now, just print a warning, and let the probe proceed. - // This is so we can use things like function argument tracing even when other features may not - // work. + // Fall back to warning and assume C/C++ ABI LOG(WARNING) << absl::Substitute( "Language for object $0 and others is unknown or unsupported, so assuming C/C++ ABI. " "Some dynamic tracing features may not work, or may produce unexpected results.", - input_program->deployment_spec().path_list().paths(0)); + elf_reader->binary_path()); } } namespace { @@ -110,8 +118,9 @@ bool IsWholeWordSuffix(std::string_view name, std::string_view suffix) { } // namespace -Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader, - ir::logical::TracepointDeployment* input_program) { +Status ResolveProbeSymbolAndLanguage(obj_tools::ElfReader* elf_reader, + obj_tools::DwarfReader* dwarf_reader, + ir::logical::TracepointDeployment* input_program) { // Expand symbol for (auto& t : *input_program->mutable_tracepoints()) { for (auto& probe : *t.mutable_program()->mutable_probes()) { @@ -156,7 +165,9 @@ Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader, return error::Internal("Could not find valid symbol match"); } - *probe.mutable_tracepoint()->mutable_symbol() = *symbol_name; + auto tracepoint = probe.mutable_tracepoint(); + *tracepoint->mutable_symbol() = *symbol_name; + DetectSourceLanguage(elf_reader, dwarf_reader, t.mutable_program(), *symbol_name); } } diff --git a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.h b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.h index 50be88e1f93..0777636638e 100644 --- a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.h +++ b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen.h @@ -18,6 +18,8 @@ #pragma once +#include + #include "src/common/base/base.h" #include "src/stirling/obj_tools/dwarf_reader.h" #include "src/stirling/obj_tools/elf_reader.h" @@ -31,18 +33,22 @@ namespace dynamic_tracing { /** * Uses ELF or DWARF information to detect the source language. * Populates the tracepoint program's language field in input_program. + * If the language cannot be determined, it assumes a C/C++ language ABI. */ void DetectSourceLanguage(obj_tools::ElfReader* elf_reader, obj_tools::DwarfReader* dwarf_reader, - ir::logical::TracepointDeployment* input_program); + ir::logical::TracepointSpec* program, const std::string& symbol_name); /** * Uses ELF information to check if the provided symbol exists. * If it does not exist, it checks whether it is a short-hand (suffix) of a full symbol. * If it is a short-hand reference to a symbol, the symbol is replaced with the full-form. - * Potentially modifies each tracepoint's symbol field in input_program. + * Also detects the source language for each resolved symbol using DWARF or ELF information. + * Potentially modifies each tracepoint's symbol field and program's language field in + * input_program. */ -Status ResolveProbeSymbol(obj_tools::ElfReader* elf_reader, - ir::logical::TracepointDeployment* input_program); +Status ResolveProbeSymbolAndLanguage(obj_tools::ElfReader* elf_reader, + obj_tools::DwarfReader* dwarf_reader, + ir::logical::TracepointDeployment* input_program); /** * If any tracepoint in input_program contains no fields to trace, this function uses DWARF info diff --git a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen_test.cc b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen_test.cc index dbf07f112a1..dfc8833e45d 100644 --- a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen_test.cc +++ b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/autogen_test.cc @@ -66,7 +66,7 @@ tracepoints { probes { name: "probe0" tracepoint { - symbol: "MixedArgTypes" + symbol: "main.MixedArgTypes" type: LOGICAL } } @@ -82,7 +82,6 @@ deployment_spec { } tracepoints { program { - language: GOLANG probes { name: "probe0" tracepoint { @@ -212,12 +211,15 @@ TEST_P(DetectSourceLanguageTest, Transform) { std::string expected_output = absl::Substitute(p.expected_output, binary_path_); - DetectSourceLanguage(elf_reader_.get(), dwarf_reader_.get(), &program); + DetectSourceLanguage(elf_reader_.get(), dwarf_reader_.get(), + program.mutable_tracepoints(0)->mutable_program(), + program.tracepoints(0).program().probes(0).tracepoint().symbol()); ASSERT_THAT(program, EqualsProto(expected_output)); } INSTANTIATE_TEST_SUITE_P(DetectSourceLanguageTestSuite, DetectSourceLanguageTest, - ::testing::Values(ProbeGenTestParam{kInputProgram, kProgramWithLanguage})); + ::testing::Values(ProbeGenTestParam{kProgramWithSymbol, + kProgramWithLanguage})); //------------------------------------- // ResolveProbeSymbol Tests @@ -233,14 +235,13 @@ TEST_P(ResolveProbeSymbolTest, Transform) { std::string expected_output = absl::Substitute(p.expected_output, binary_path_); - ASSERT_OK(ResolveProbeSymbol(elf_reader_.get(), &program)); + ASSERT_OK(ResolveProbeSymbolAndLanguage(elf_reader_.get(), dwarf_reader_.get(), &program)); ASSERT_THAT(program, EqualsProto(expected_output)); } INSTANTIATE_TEST_SUITE_P(ResolveProbeSymbolTestSuite, ResolveProbeSymbolTest, - ::testing::Values(ProbeGenTestParam{kProgramWithLanguage, - kProgramWithSymbol})); + ::testing::Values(ProbeGenTestParam{kInputProgram, kProgramWithLanguage})); TEST_F(ResolveProbeSymbolTest, IncompleteSymbol) { constexpr std::string_view kInputProgramWithIncompleteSymbol = R"( @@ -265,7 +266,7 @@ tracepoints { ir::logical::TracepointDeployment program; ASSERT_NO_FATAL_FAILURE(PrepareInput(kInputProgramWithIncompleteSymbol, &program)); - ASSERT_NOT_OK(ResolveProbeSymbol(elf_reader_.get(), &program)); + ASSERT_NOT_OK(ResolveProbeSymbolAndLanguage(elf_reader_.get(), dwarf_reader_.get(), &program)); } TEST_F(ResolveProbeSymbolTest, AmbiguousSymbol) { @@ -291,7 +292,7 @@ tracepoints { ir::logical::TracepointDeployment program; ASSERT_NO_FATAL_FAILURE(PrepareInput(kInputProgramWithAmbiguousSymbol, &program)); - Status result = ResolveProbeSymbol(elf_reader_.get(), &program); + Status result = ResolveProbeSymbolAndLanguage(elf_reader_.get(), dwarf_reader_.get(), &program); ASSERT_NOT_OK(result); ASSERT_THAT(result.ToString(), HasSubstr("Symbol is ambiguous. Found at least 2 possible matches")); @@ -317,7 +318,7 @@ TEST_P(AutoTraceExpansionTest, Transform) { } INSTANTIATE_TEST_SUITE_P(AutoTraceExpansionTestSuite, AutoTraceExpansionTest, - ::testing::Values(ProbeGenTestParam{kProgramWithSymbol, + ::testing::Values(ProbeGenTestParam{kProgramWithLanguage, kAutoTraceExpansionOutput})); } // namespace dynamic_tracing diff --git a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/dynamic_tracer.cc b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/dynamic_tracer.cc index 814676479e9..301b1ef45f0 100644 --- a/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/dynamic_tracer.cc +++ b/src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/dynamic_tracer.cc @@ -141,11 +141,9 @@ StatusOr CompileProgram(ir::logical::TracepointDeployment* input_pro // Pre-processing pipeline // -------------------------- - // Populate source language. - DetectSourceLanguage(obj_info.elf_reader.get(), obj_info.dwarf_reader.get(), input_program); - - // Expand symbol. - PX_RETURN_IF_ERROR(ResolveProbeSymbol(obj_info.elf_reader.get(), input_program)); + // Expand symbols and populate source language. + PX_RETURN_IF_ERROR(ResolveProbeSymbolAndLanguage(obj_info.elf_reader.get(), + obj_info.dwarf_reader.get(), input_program)); LOG_IF(INFO, FLAGS_debug_dt_pipeline) << input_program->DebugString(); diff --git a/src/stirling/source_connectors/socket_tracer/BUILD.bazel b/src/stirling/source_connectors/socket_tracer/BUILD.bazel index 35be00e993f..10d20e5e03f 100644 --- a/src/stirling/source_connectors/socket_tracer/BUILD.bazel +++ b/src/stirling/source_connectors/socket_tracer/BUILD.bazel @@ -70,7 +70,7 @@ pl_cc_test( name = "uprobe_symaddrs_test", srcs = ["uprobe_symaddrs_test.cc"], data = [ - "//src/stirling/testing/demo_apps/go_grpc_tls_pl/server:golang_1_19_grpc_tls_server_binary", + "//src/stirling/testing/demo_apps/go_grpc_tls_pl/server:golang_1_24_grpc_tls_server_binary", "//src/stirling/testing/demo_apps/node:node_debug", ], deps = [ diff --git a/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs_test.cc b/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs_test.cc index 12f7688c8c9..9c165d03bf4 100644 --- a/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs_test.cc +++ b/src/stirling/source_connectors/socket_tracer/uprobe_symaddrs_test.cc @@ -44,7 +44,7 @@ class UprobeSymaddrsTest : public ::testing::Test { } static inline constexpr std::string_view kGoGRPCServer = - "src/stirling/testing/demo_apps/go_grpc_tls_pl/server/golang_1_19_grpc_tls_server_binary"; + "src/stirling/testing/demo_apps/go_grpc_tls_pl/server/golang_1_24_grpc_tls_server_binary"; std::unique_ptr dwarf_reader_; std::unique_ptr elf_reader_; @@ -63,7 +63,7 @@ TEST_F(UprobeSymaddrsTest, GoCommonSymAddrs) { // If the test breaks because of that, just update the numbers here. EXPECT_EQ(symaddrs.FD_Sysfd_offset, 16); EXPECT_EQ(symaddrs.tlsConn_conn_offset, 0); - EXPECT_EQ(symaddrs.g_goid_offset, 152); + EXPECT_EQ(symaddrs.g_goid_offset, 160); } TEST_F(UprobeSymaddrsTest, GoHTTP2SymAddrs) {