diff --git a/BUILD.bazel b/BUILD.bazel index 874f7e13e5e..bf5810028a5 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -60,6 +60,7 @@ gazelle( # gazelle:resolve go px.dev/pixie/src/carnot/planner/compilerpb //src/carnot/planner/compilerpb:compiler_status_pl_go_proto # gazelle:resolve go px.dev/pixie/src/carnot/planner/distributedpb //src/carnot/planner/distributedpb:distributed_plan_pl_go_proto # gazelle:resolve go px.dev/pixie/src/carnot/planner/file_source/ir //src/carnot/planner/file_source/ir:logical_pl_go_proto +# gazelle:resolve go px.dev/pixie/src/carnot/planner/tetragon/ir //src/carnot/planner/tetragon/ir:logical_pl_go_proto # gazelle:resolve go px.dev/pixie/src/carnot/planner/dynamic_tracing/ir/logicalpb //src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_go_proto # gazelle:resolve go px.dev/pixie/src/carnot/planner/plannerpb //src/carnot/planner/plannerpb:service_pl_go_proto # gazelle:resolve go px.dev/pixie/src/carnot/planpb //src/carnot/planpb:plan_pl_go_proto diff --git a/src/carnot/planner/compiler/BUILD.bazel b/src/carnot/planner/compiler/BUILD.bazel index 359d3518227..4137b4932a5 100644 --- a/src/carnot/planner/compiler/BUILD.bazel +++ b/src/carnot/planner/compiler/BUILD.bazel @@ -41,6 +41,7 @@ pl_cc_library( "//src/carnot/planner/compiler_error_context:cc_library", "//src/carnot/planner/compiler_state:cc_library", "//src/carnot/planner/file_source:cc_library", + "//src/carnot/planner/tetragon:cc_library", "//src/carnot/planner/ir:cc_library", "//src/carnot/planner/metadata:cc_library", "//src/carnot/planner/objects:cc_library", diff --git a/src/carnot/planner/compiler/ast_visitor.cc b/src/carnot/planner/compiler/ast_visitor.cc index a4bfe1eb071..90f87bea786 100644 --- a/src/carnot/planner/compiler/ast_visitor.cc +++ b/src/carnot/planner/compiler/ast_visitor.cc @@ -106,6 +106,8 @@ Status ASTVisitorImpl::SetupModules( TraceModule::Create(mutations_, this)); PX_ASSIGN_OR_RETURN((*module_handler_)[LogModule::kLogModuleObjName], LogModule::Create(mutations_, this)); + PX_ASSIGN_OR_RETURN((*module_handler_)[TetragonModule::kTetragonModuleObjName], + TetragonModule::Create(mutations_, this)); PX_ASSIGN_OR_RETURN((*module_handler_)[ConfigModule::kConfigModuleObjName], ConfigModule::Create(mutations_, this)); for (const auto& [module_name, module_text] : module_name_to_pxl_map) { diff --git a/src/carnot/planner/compiler/ast_visitor.h b/src/carnot/planner/compiler/ast_visitor.h index 7984698ecb5..5e5f12d5f6b 100644 --- a/src/carnot/planner/compiler/ast_visitor.h +++ b/src/carnot/planner/compiler/ast_visitor.h @@ -34,6 +34,7 @@ #include "src/carnot/planner/ast/ast_visitor.h" #include "src/carnot/planner/compiler_state/compiler_state.h" #include "src/carnot/planner/file_source/log_module.h" +#include "src/carnot/planner/tetragon/tetragon_module.h" #include "src/carnot/planner/ir/ast_utils.h" #include "src/carnot/planner/ir/ir.h" #include "src/carnot/planner/objects/dataframe.h" diff --git a/src/carnot/planner/plannerpb/BUILD.bazel b/src/carnot/planner/plannerpb/BUILD.bazel index 8fb5c37e0e4..8b445c2601b 100644 --- a/src/carnot/planner/plannerpb/BUILD.bazel +++ b/src/carnot/planner/plannerpb/BUILD.bazel @@ -29,6 +29,7 @@ pl_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_proto", "//src/carnot/planner/file_source/ir:logical_pl_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_proto", "//src/carnot/planpb:plan_pl_proto", "//src/common/base/statuspb:status_pl_proto", "//src/shared/types/typespb:types_pl_proto", @@ -44,6 +45,7 @@ pl_cc_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_cc_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_cc_proto", "//src/carnot/planner/file_source/ir:logical_pl_cc_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_cc_proto", "//src/carnot/planpb:plan_pl_cc_proto", "//src/common/base/statuspb:status_pl_cc_proto", "//src/shared/types/typespb/wrapper:cc_library", @@ -59,6 +61,7 @@ pl_go_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_go_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_go_proto", "//src/carnot/planner/file_source/ir:logical_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", "//src/carnot/planpb:plan_pl_go_proto", "//src/common/base/statuspb:status_pl_go_proto", "//src/shared/types/typespb:types_pl_go_proto", diff --git a/src/carnot/planner/plannerpb/service.proto b/src/carnot/planner/plannerpb/service.proto index 0c3b63aad94..e8a673466ee 100644 --- a/src/carnot/planner/plannerpb/service.proto +++ b/src/carnot/planner/plannerpb/service.proto @@ -24,6 +24,7 @@ option go_package = "plannerpb"; import "src/carnot/planner/dynamic_tracing/ir/logicalpb/logical.proto"; import "src/carnot/planner/file_source/ir/logical.proto"; +import "src/carnot/planner/tetragon/ir/logical.proto"; import "src/common/base/statuspb/status.proto"; import "gogoproto/gogo.proto"; import "src/carnot/planner/distributedpb/distributed_plan.proto"; @@ -135,6 +136,11 @@ message DeleteFileSource { string glob_pattern = 1; } +message DeleteTetragon { + // The glob pattern to use to find files to read. Also doubles as the name of the tetragon log file. + string glob_pattern = 1; +} + // The definition of a mutation to perfom on Vizier. Mutations include operations // that add and delete tables to the database. message CompileMutation { @@ -150,6 +156,9 @@ message CompileMutation { carnot.planner.file_source.ir.FileSourceDeployment file_source = 5; // Mutation that deletes a file source/poller DeleteFileSource delete_file_source = 6; + carnot.planner.tetragon.ir.TetragonDeployment tetragon = 7; + // Mutation that deletes a tetragon/poller + DeleteTetragon delete_tetragon = 8; } } diff --git a/src/carnot/planner/probes/BUILD.bazel b/src/carnot/planner/probes/BUILD.bazel index bd98b0fb8d6..396632910a2 100644 --- a/src/carnot/planner/probes/BUILD.bazel +++ b/src/carnot/planner/probes/BUILD.bazel @@ -38,6 +38,7 @@ pl_cc_library( deps = [ "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_cc_proto", "//src/carnot/planner/file_source/ir:logical_pl_cc_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_cc_proto", "//src/carnot/planner/objects:cc_library", "//src/common/uuid:cc_library", ], diff --git a/src/carnot/planner/probes/probes.cc b/src/carnot/planner/probes/probes.cc index 942abab414c..1467ad70199 100644 --- a/src/carnot/planner/probes/probes.cc +++ b/src/carnot/planner/probes/probes.cc @@ -312,6 +312,14 @@ Status MutationsIR::ToProto(plannerpb::CompileMutationsResponse* pb) { *(pb->add_mutations()->mutable_file_source()) = file_source; } + for (const auto& tetragon_to_delete : TetragonsToDelete()) { + pb->add_mutations()->mutable_delete_tetragon()->set_glob_pattern(tetragon_to_delete); + } + + for (const auto& tetragon : tetragon_deployments_) { + *(pb->add_mutations()->mutable_tetragon()) = tetragon; + } + return Status::OK(); } @@ -329,6 +337,26 @@ void MutationsIR::CreateFileSourceDeployment(const std::string& glob_pattern, file_source_deployments_.push_back(file_source); } +std::vector MutationsIR::TetragonDeployments() { + std::vector tetragon_deployments; + for (size_t i = 0; i < tetragon_deployments_.size(); i++) { + tetragon_deployments.push_back(tetragon_deployments_[i]); + } + return tetragon_deployments; +} + +void MutationsIR::CreateTetragonDeployment(const std::string& glob_pattern, + const std::string& table_name, int64_t ttl_ns) { + tetragon::ir::TetragonDeployment tetragon; + tetragon.set_name(glob_pattern); + tetragon.set_glob_pattern(glob_pattern); + tetragon.set_table_name(table_name); + auto one_sec = std::chrono::duration_cast(std::chrono::seconds(1)); + tetragon.mutable_ttl()->set_seconds(ttl_ns / one_sec.count()); + tetragon.mutable_ttl()->set_nanos(ttl_ns % one_sec.count()); + tetragon_deployments_.push_back(tetragon); +} + } // namespace compiler } // namespace planner } // namespace carnot diff --git a/src/carnot/planner/probes/probes.h b/src/carnot/planner/probes/probes.h index 3d90992402b..55fc8d65527 100644 --- a/src/carnot/planner/probes/probes.h +++ b/src/carnot/planner/probes/probes.h @@ -24,6 +24,7 @@ #include "src/carnot/planner/dynamic_tracing/ir/logicalpb/logical.pb.h" #include "src/carnot/planner/file_source/ir/logical.pb.h" +#include "src/carnot/planner/tetragon/ir/logical.pb.h" #include "src/carnot/planner/objects/funcobject.h" #include "src/carnot/planner/plannerpb/service.pb.h" #include "src/carnot/planner/probes/label_selector_target.h" @@ -181,6 +182,20 @@ class FileSourceDeployment { int64_t ttl_ns_; }; +class TetragonDeployment { + public: + TetragonDeployment(const std::string& glob_pattern, const std::string& table_name, + int64_t ttl_ns) + : glob_pattern_(glob_pattern), table_name_(table_name), ttl_ns_(ttl_ns) {} + + Status ToProto(tetragon::ir::TetragonDeployment pb) const; + + private: + std::string glob_pattern_; + std::string table_name_; + int64_t ttl_ns_; +}; + class TracepointDeployment { public: TracepointDeployment(const std::string& trace_name, int64_t ttl_ns) @@ -244,6 +259,12 @@ class MutationsIR { int64_t ttl_ns); void CreateDeleteFileSource(const std::string& glob_pattern); + + void CreateTetragonDeployment(const std::string& glob_pattern, const std::string& table_name, + int64_t ttl_ns); + + void CreateDeleteTetragon(const std::string& glob_pattern); + /** * @brief Create a TraceProgram for the MutationsIR w/ the specified UPID. * @@ -363,7 +384,20 @@ class MutationsIR { const std::vector& FileSourcesToDelete() { return file_sources_to_delete_; } - private: + std::vector TetragonDeployments(); + + /** + * @brief Deletes the tetragon log passed in. + * + * @param tetragon_to_delete + */ + void DeleteTetragon(const std::string& tetragon_to_delete) { + tetragons_to_delete_.push_back(tetragon_to_delete); + } + + const std::vector& TetragonsToDelete() { return tetragons_to_delete_; } + + private: // All the new tracepoints added as part of this mutation. DeploymentSpecs are protobufs because // we only modify these upon inserting the new tracepoint, while the Tracepoint definition is // still modified aftered adding the tracepoint. @@ -383,6 +417,9 @@ class MutationsIR { std::vector file_source_deployments_; std::vector file_sources_to_delete_; + + std::vector tetragon_deployments_; + std::vector tetragons_to_delete_; }; } // namespace compiler diff --git a/src/carnot/planner/probes/tracepoint_generator.cc b/src/carnot/planner/probes/tracepoint_generator.cc index 3dc23bd2c66..60c24f2f855 100644 --- a/src/carnot/planner/probes/tracepoint_generator.cc +++ b/src/carnot/planner/probes/tracepoint_generator.cc @@ -29,6 +29,7 @@ #include "src/carnot/planner/dynamic_tracing/ir/logicalpb/logical.pb.h" #include "src/carnot/planner/file_source/ir/logical.pb.h" +#include "src/carnot/planner/tetragon/ir/logical.pb.h" namespace px { namespace carnot { @@ -83,6 +84,11 @@ StatusOr CompileFileSource(std::string_vi return pb.mutations()[0].file_source(); } +StatusOr CompileTetragon(std::string_view query) { + PX_ASSIGN_OR_RETURN(auto pb, CompileMutations(query)); + return pb.mutations()[0].tetragon(); +} + } // namespace compiler } // namespace planner } // namespace carnot diff --git a/src/carnot/planner/probes/tracepoint_generator.h b/src/carnot/planner/probes/tracepoint_generator.h index 0894d92bbec..3e34694b2fa 100644 --- a/src/carnot/planner/probes/tracepoint_generator.h +++ b/src/carnot/planner/probes/tracepoint_generator.h @@ -39,6 +39,12 @@ StatusOr Co */ StatusOr CompileFileSource(std::string_view query); +/** + * Take a tetragon log specification in PXL format, and compiles it to a logical tetragon log + * deployment. + */ +StatusOr CompileTetragon(std::string_view query); + } // namespace compiler } // namespace planner } // namespace carnot diff --git a/src/carnot/planner/tetragon/BUILD.bazel b/src/carnot/planner/tetragon/BUILD.bazel new file mode 100644 index 00000000000..1b4c67542da --- /dev/null +++ b/src/carnot/planner/tetragon/BUILD.bazel @@ -0,0 +1,52 @@ +# Copyright 2018- The Pixie Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +load("//bazel:pl_build_system.bzl", "pl_cc_binary", "pl_cc_library", "pl_cc_test") + +package(default_visibility = [ + "//src/carnot:__subpackages__", + "//src/experimental/standalone_pem:__subpackages__", # TODO(ddelnano): Is this needed? +]) + +pl_cc_library( + name = "cc_library", + srcs = glob( + [ + "*.cc", + "*.h", + ], + exclude = [ + "**/*_test.cc", + "**/*_test_utils.h", + ], + ), + hdrs = ["tetragon.h"], + deps = [ + "//src/carnot/planner/objects:cc_library", + "//src/carnot/planner/probes:cc_library", + "//src/common/uuid:cc_library", # TODO(ddelnano): This may not be needed + ], +) + +pl_cc_test( + name = "tetragon_test", + srcs = ["tetragon_test.cc"], + deps = [ + ":cc_library", + "//src/carnot/planner:test_utils", + "//src/carnot/planner/compiler:cc_library", + ], +) diff --git a/src/carnot/planner/tetragon/ir/BUILD.bazel b/src/carnot/planner/tetragon/ir/BUILD.bazel new file mode 100644 index 00000000000..0a6533ffc2d --- /dev/null +++ b/src/carnot/planner/tetragon/ir/BUILD.bazel @@ -0,0 +1,41 @@ +# Copyright 2018- The Pixie Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +load("//bazel:proto_compile.bzl", "pl_cc_proto_library", "pl_go_proto_library", "pl_proto_library") + +package(default_visibility = ["//src:__subpackages__"]) + +pl_proto_library( + name = "logical_pl_proto", + srcs = ["logical.proto"], + deps = [ + "@gogo_grpc_proto//gogoproto:gogo_pl_proto", + ], +) + +pl_cc_proto_library( + name = "logical_pl_cc_proto", + proto = ":logical_pl_proto", + deps = [ + "@gogo_grpc_proto//gogoproto:gogo_pl_cc_proto", + ], +) + +pl_go_proto_library( + name = "logical_pl_go_proto", + importpath = "px.dev/pixie/src/carnot/planner/tetragon/ir", + proto = ":logical_pl_proto", +) diff --git a/src/carnot/planner/tetragon/ir/logical.pb.go b/src/carnot/planner/tetragon/ir/logical.pb.go new file mode 100755 index 00000000000..f424f8ec525 --- /dev/null +++ b/src/carnot/planner/tetragon/ir/logical.pb.go @@ -0,0 +1,567 @@ +// Code generated by protoc-gen-gogo. DO NOT EDIT. +// source: src/carnot/planner/file_source/ir/logical.proto + +package ir + +import ( + fmt "fmt" + _ "github.com/gogo/protobuf/gogoproto" + proto "github.com/gogo/protobuf/proto" + types "github.com/gogo/protobuf/types" + io "io" + math "math" + math_bits "math/bits" + reflect "reflect" + strings "strings" +) + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package + +type FileSourceDeployment struct { + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + GlobPattern string `protobuf:"bytes,2,opt,name=glob_pattern,json=globPattern,proto3" json:"glob_pattern,omitempty"` + TableName string `protobuf:"bytes,3,opt,name=table_name,json=tableName,proto3" json:"table_name,omitempty"` + TTL *types.Duration `protobuf:"bytes,4,opt,name=ttl,proto3" json:"ttl,omitempty"` +} + +func (m *FileSourceDeployment) Reset() { *m = FileSourceDeployment{} } +func (*FileSourceDeployment) ProtoMessage() {} +func (*FileSourceDeployment) Descriptor() ([]byte, []int) { + return fileDescriptor_452b4826b1190f86, []int{0} +} +func (m *FileSourceDeployment) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *FileSourceDeployment) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_FileSourceDeployment.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *FileSourceDeployment) XXX_Merge(src proto.Message) { + xxx_messageInfo_FileSourceDeployment.Merge(m, src) +} +func (m *FileSourceDeployment) XXX_Size() int { + return m.Size() +} +func (m *FileSourceDeployment) XXX_DiscardUnknown() { + xxx_messageInfo_FileSourceDeployment.DiscardUnknown(m) +} + +var xxx_messageInfo_FileSourceDeployment proto.InternalMessageInfo + +func (m *FileSourceDeployment) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +func (m *FileSourceDeployment) GetGlobPattern() string { + if m != nil { + return m.GlobPattern + } + return "" +} + +func (m *FileSourceDeployment) GetTableName() string { + if m != nil { + return m.TableName + } + return "" +} + +func (m *FileSourceDeployment) GetTTL() *types.Duration { + if m != nil { + return m.TTL + } + return nil +} + +func init() { + proto.RegisterType((*FileSourceDeployment)(nil), "px.carnot.planner.file_source.ir.FileSourceDeployment") +} + +func init() { + proto.RegisterFile("src/carnot/planner/file_source/ir/logical.proto", fileDescriptor_452b4826b1190f86) +} + +var fileDescriptor_452b4826b1190f86 = []byte{ + // 302 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x4c, 0x8e, 0xb1, 0x4e, 0x42, 0x31, + 0x18, 0x85, 0x6f, 0x81, 0x68, 0x28, 0x4e, 0x37, 0x0c, 0x48, 0xe2, 0x2f, 0x3a, 0x31, 0xb5, 0x89, + 0x3a, 0x38, 0x13, 0xe2, 0x64, 0x8c, 0x41, 0x26, 0x17, 0xd2, 0x7b, 0x2d, 0x4d, 0x93, 0xd2, 0xff, + 0xa6, 0x94, 0x44, 0x37, 0x1f, 0xc1, 0x67, 0x70, 0xf2, 0x51, 0x1c, 0x19, 0x99, 0x8c, 0xf4, 0x2e, + 0x8e, 0x3c, 0x82, 0xb9, 0xbd, 0x98, 0xb8, 0xfd, 0xff, 0x39, 0xdf, 0x39, 0x39, 0x94, 0x2f, 0x5d, + 0xce, 0x73, 0xe1, 0x2c, 0x7a, 0x5e, 0x18, 0x61, 0xad, 0x74, 0x7c, 0xae, 0x8d, 0x9c, 0x2d, 0x71, + 0xe5, 0x72, 0xc9, 0xb5, 0xe3, 0x06, 0x95, 0xce, 0x85, 0x61, 0x85, 0x43, 0x8f, 0xe9, 0xa0, 0x78, + 0x66, 0x35, 0xcf, 0xf6, 0x3c, 0xfb, 0xc7, 0x33, 0xed, 0xfa, 0x5d, 0x85, 0x0a, 0x23, 0xcc, 0xab, + 0xab, 0xce, 0xf5, 0x41, 0x21, 0x2a, 0x23, 0x79, 0xfc, 0xb2, 0xd5, 0x9c, 0x3f, 0xad, 0x9c, 0xf0, + 0x1a, 0x6d, 0xed, 0x9f, 0xbf, 0x13, 0xda, 0xbd, 0xd1, 0x46, 0x3e, 0xc4, 0x9e, 0xb1, 0x2c, 0x0c, + 0xbe, 0x2c, 0xa4, 0xf5, 0x69, 0x4a, 0x5b, 0x56, 0x2c, 0x64, 0x8f, 0x0c, 0xc8, 0xb0, 0x3d, 0x89, + 0x77, 0x7a, 0x46, 0x8f, 0x94, 0xc1, 0x6c, 0x56, 0x08, 0xef, 0xa5, 0xb3, 0xbd, 0x46, 0xf4, 0x3a, + 0x95, 0x76, 0x5f, 0x4b, 0xe9, 0x09, 0xa5, 0x5e, 0x64, 0x46, 0xce, 0x62, 0xb8, 0x19, 0x81, 0x76, + 0x54, 0xee, 0xaa, 0x86, 0x2b, 0xda, 0xf4, 0xde, 0xf4, 0x5a, 0x03, 0x32, 0xec, 0x5c, 0x1c, 0xb3, + 0x7a, 0x1c, 0xfb, 0x1b, 0xc7, 0xc6, 0xfb, 0x71, 0xa3, 0xc3, 0xf0, 0x75, 0xda, 0x9c, 0x4e, 0x6f, + 0x27, 0x15, 0x3e, 0xba, 0x5e, 0x6f, 0x21, 0xd9, 0x6c, 0x21, 0xd9, 0x6d, 0x81, 0xbc, 0x06, 0x20, + 0x1f, 0x01, 0xc8, 0x67, 0x00, 0xb2, 0x0e, 0x40, 0xbe, 0x03, 0x90, 0x9f, 0x00, 0xc9, 0x2e, 0x00, + 0x79, 0x2b, 0x21, 0x59, 0x97, 0x90, 0x6c, 0x4a, 0x48, 0x1e, 0x1b, 0xda, 0x65, 0x07, 0xb1, 0xfa, + 0xf2, 0x37, 0x00, 0x00, 0xff, 0xff, 0x0b, 0x07, 0x40, 0x1c, 0x70, 0x01, 0x00, 0x00, +} + +func (this *FileSourceDeployment) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + that1, ok := that.(*FileSourceDeployment) + if !ok { + that2, ok := that.(FileSourceDeployment) + if ok { + that1 = &that2 + } else { + return false + } + } + if that1 == nil { + return this == nil + } else if this == nil { + return false + } + if this.Name != that1.Name { + return false + } + if this.GlobPattern != that1.GlobPattern { + return false + } + if this.TableName != that1.TableName { + return false + } + if !this.TTL.Equal(that1.TTL) { + return false + } + return true +} +func (this *FileSourceDeployment) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 8) + s = append(s, "&ir.FileSourceDeployment{") + s = append(s, "Name: "+fmt.Sprintf("%#v", this.Name)+",\n") + s = append(s, "GlobPattern: "+fmt.Sprintf("%#v", this.GlobPattern)+",\n") + s = append(s, "TableName: "+fmt.Sprintf("%#v", this.TableName)+",\n") + if this.TTL != nil { + s = append(s, "TTL: "+fmt.Sprintf("%#v", this.TTL)+",\n") + } + s = append(s, "}") + return strings.Join(s, "") +} +func valueToGoStringLogical(v interface{}, typ string) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv) +} +func (m *FileSourceDeployment) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *FileSourceDeployment) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *FileSourceDeployment) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if m.TTL != nil { + { + size, err := m.TTL.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintLogical(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x22 + } + if len(m.TableName) > 0 { + i -= len(m.TableName) + copy(dAtA[i:], m.TableName) + i = encodeVarintLogical(dAtA, i, uint64(len(m.TableName))) + i-- + dAtA[i] = 0x1a + } + if len(m.GlobPattern) > 0 { + i -= len(m.GlobPattern) + copy(dAtA[i:], m.GlobPattern) + i = encodeVarintLogical(dAtA, i, uint64(len(m.GlobPattern))) + i-- + dAtA[i] = 0x12 + } + if len(m.Name) > 0 { + i -= len(m.Name) + copy(dAtA[i:], m.Name) + i = encodeVarintLogical(dAtA, i, uint64(len(m.Name))) + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} + +func encodeVarintLogical(dAtA []byte, offset int, v uint64) int { + offset -= sovLogical(v) + base := offset + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return base +} +func (m *FileSourceDeployment) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.Name) + if l > 0 { + n += 1 + l + sovLogical(uint64(l)) + } + l = len(m.GlobPattern) + if l > 0 { + n += 1 + l + sovLogical(uint64(l)) + } + l = len(m.TableName) + if l > 0 { + n += 1 + l + sovLogical(uint64(l)) + } + if m.TTL != nil { + l = m.TTL.Size() + n += 1 + l + sovLogical(uint64(l)) + } + return n +} + +func sovLogical(x uint64) (n int) { + return (math_bits.Len64(x|1) + 6) / 7 +} +func sozLogical(x uint64) (n int) { + return sovLogical(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *FileSourceDeployment) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&FileSourceDeployment{`, + `Name:` + fmt.Sprintf("%v", this.Name) + `,`, + `GlobPattern:` + fmt.Sprintf("%v", this.GlobPattern) + `,`, + `TableName:` + fmt.Sprintf("%v", this.TableName) + `,`, + `TTL:` + strings.Replace(fmt.Sprintf("%v", this.TTL), "Duration", "types.Duration", 1) + `,`, + `}`, + }, "") + return s +} +func valueToStringLogical(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *FileSourceDeployment) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowLogical + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: FileSourceDeployment: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: FileSourceDeployment: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowLogical + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthLogical + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthLogical + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Name = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field GlobPattern", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowLogical + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthLogical + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthLogical + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.GlobPattern = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field TableName", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowLogical + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthLogical + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return ErrInvalidLengthLogical + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.TableName = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field TTL", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowLogical + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthLogical + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthLogical + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.TTL == nil { + m.TTL = &types.Duration{} + } + if err := m.TTL.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipLogical(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthLogical + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipLogical(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + depth := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowLogical + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowLogical + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + case 1: + iNdEx += 8 + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowLogical + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthLogical + } + iNdEx += length + case 3: + depth++ + case 4: + if depth == 0 { + return 0, ErrUnexpectedEndOfGroupLogical + } + depth-- + case 5: + iNdEx += 4 + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + if iNdEx < 0 { + return 0, ErrInvalidLengthLogical + } + if depth == 0 { + return iNdEx, nil + } + } + return 0, io.ErrUnexpectedEOF +} + +var ( + ErrInvalidLengthLogical = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowLogical = fmt.Errorf("proto: integer overflow") + ErrUnexpectedEndOfGroupLogical = fmt.Errorf("proto: unexpected end of group") +) diff --git a/src/carnot/planner/tetragon/ir/logical.proto b/src/carnot/planner/tetragon/ir/logical.proto new file mode 100644 index 00000000000..8b6dcb6bd75 --- /dev/null +++ b/src/carnot/planner/tetragon/ir/logical.proto @@ -0,0 +1,39 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +syntax = "proto3"; + +package px.carnot.planner.tetragon.ir; + +option go_package = "ir2"; + +import "gogoproto/gogo.proto"; +import "google/protobuf/duration.proto"; + +// A logical tetragon deployment +message TetragonDeployment { + // For now this is the same as glob_pattern, but in the future may provide a logical name for the + // tetragon. + string name = 1; + // The glob pattern to use to find files to read. + string glob_pattern = 2; + // The table name to write the data to. + string table_name = 3; + // The ttl to run the tetragon for. -1 indicates that the tetragon should run indefinitely. + google.protobuf.Duration ttl = 4 [ (gogoproto.customname) = "TTL" ]; +} diff --git a/src/carnot/planner/tetragon/tetragon.cc b/src/carnot/planner/tetragon/tetragon.cc new file mode 100644 index 00000000000..21bca05c385 --- /dev/null +++ b/src/carnot/planner/tetragon/tetragon.cc @@ -0,0 +1,27 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "src/carnot/planner/tetragon/tetragon.h" + +namespace px { +namespace carnot { +namespace planner { +namespace compiler {} // namespace compiler +} // namespace planner +} // namespace carnot +} // namespace px diff --git a/src/carnot/planner/tetragon/tetragon.h b/src/carnot/planner/tetragon/tetragon.h new file mode 100644 index 00000000000..18ed8948c57 --- /dev/null +++ b/src/carnot/planner/tetragon/tetragon.h @@ -0,0 +1,37 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "src/carnot/planner/objects/funcobject.h" + +namespace px { +namespace carnot { +namespace planner { +namespace compiler { + +class TetragonIR { + /* public: */ + + /* private: */ +}; + +} // namespace compiler +} // namespace planner +} // namespace carnot +} // namespace px diff --git a/src/carnot/planner/tetragon/tetragon_module.cc b/src/carnot/planner/tetragon/tetragon_module.cc new file mode 100644 index 00000000000..d157279792f --- /dev/null +++ b/src/carnot/planner/tetragon/tetragon_module.cc @@ -0,0 +1,104 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "src/carnot/planner/tetragon/tetragon_module.h" + +namespace px { +namespace carnot { +namespace planner { +namespace compiler { + +class TetragonHandler { + public: + static StatusOr Eval(MutationsIR* mutations_ir, const pypa::AstPtr& ast, + const ParsedArgs& args, ASTVisitor* visitor); +}; + +class DeleteTetragonHandler { + public: + static StatusOr Eval(MutationsIR* mutations_ir, const pypa::AstPtr& ast, + const ParsedArgs& args, ASTVisitor* visitor); +}; + +StatusOr> TetragonModule::Create(MutationsIR* mutations_ir, + ASTVisitor* ast_visitor) { + auto tracing_module = std::shared_ptr(new TetragonModule(mutations_ir, ast_visitor)); + PX_RETURN_IF_ERROR(tracing_module->Init()); + return tracing_module; +} + +Status TetragonModule::Init() { + PX_ASSIGN_OR_RETURN( + std::shared_ptr upsert_fn, + FuncObject::Create(kTetragonID, {"glob_pattern", "table_name", "ttl"}, {}, + /* has_variable_len_args */ false, + /* has_variable_len_kwargs */ false, + std::bind(TetragonHandler::Eval, mutations_ir_, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3), + ast_visitor())); + PX_RETURN_IF_ERROR(upsert_fn->SetDocString(kTetragonDocstring)); + AddMethod(kTetragonID, upsert_fn); + + PX_ASSIGN_OR_RETURN(std::shared_ptr delete_fn, + FuncObject::Create(kTetragonID, {"name"}, {}, + /* has_variable_len_args */ false, + /* has_variable_len_kwargs */ false, + std::bind(DeleteTetragonHandler::Eval, mutations_ir_, + std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3), + ast_visitor())); + PX_RETURN_IF_ERROR(upsert_fn->SetDocString(kDeleteTetragonDocstring)); + AddMethod(kDeleteTetragonID, delete_fn); + + return Status::OK(); +} + +StatusOr TetragonHandler::Eval(MutationsIR* mutations_ir, const pypa::AstPtr& ast, + const ParsedArgs& args, ASTVisitor* visitor) { + DCHECK(mutations_ir); + + PX_ASSIGN_OR_RETURN(auto glob_pattern_ir, GetArgAs(ast, args, "glob_pattern")); + PX_ASSIGN_OR_RETURN(auto table_name_ir, GetArgAs(ast, args, "table_name")); + PX_ASSIGN_OR_RETURN(auto ttl_ir, GetArgAs(ast, args, "ttl")); + + const std::string& glob_pattern_str = glob_pattern_ir->str(); + const std::string& table_name_str = table_name_ir->str(); + PX_ASSIGN_OR_RETURN(int64_t ttl_ns, StringToTimeInt(ttl_ir->str())); + + mutations_ir->CreateTetragonDeployment(glob_pattern_str, table_name_str, ttl_ns); + + return std::static_pointer_cast(std::make_shared(ast, visitor)); +} + +StatusOr DeleteTetragonHandler::Eval(MutationsIR* mutations_ir, + const pypa::AstPtr& ast, const ParsedArgs& args, + ASTVisitor* visitor) { + DCHECK(mutations_ir); + + PX_ASSIGN_OR_RETURN(auto glob_pattern_ir, GetArgAs(ast, args, "name")); + const std::string& glob_pattern_str = glob_pattern_ir->str(); + + mutations_ir->DeleteTetragon(glob_pattern_str); + + return std::static_pointer_cast(std::make_shared(ast, visitor)); +} + +} // namespace compiler +} // namespace planner +} // namespace carnot +} // namespace px diff --git a/src/carnot/planner/tetragon/tetragon_module.h b/src/carnot/planner/tetragon/tetragon_module.h new file mode 100644 index 00000000000..c02446af03b --- /dev/null +++ b/src/carnot/planner/tetragon/tetragon_module.h @@ -0,0 +1,69 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once +#include +#include +#include +#include + +#include "src/carnot/planner/compiler_state/compiler_state.h" +#include "src/carnot/planner/objects/funcobject.h" +#include "src/carnot/planner/objects/none_object.h" +#include "src/carnot/planner/probes/probes.h" + +namespace px { +namespace carnot { +namespace planner { +namespace compiler { + +class TetragonModule : public QLObject { + public: + static constexpr TypeDescriptor LogModuleType = { + /* name */ "pxlog", + /* type */ QLObjectType::kLogModule, + }; + static StatusOr> Create(MutationsIR* mutations_ir, + ASTVisitor* ast_visitor); + + // Constant for the modules. + inline static constexpr char kTetragonModuleObjName[] = "pxlog"; + + inline static constexpr char kTetragonID[] = "Tetragon"; + inline static constexpr char kTetragonDocstring[] = R"doc( + TBD + )doc"; + + inline static constexpr char kDeleteTetragonID[] = "DeleteTetragon"; + inline static constexpr char kDeleteTetragonDocstring[] = R"doc( + TBD + )doc"; + + protected: + explicit TetragonModule(MutationsIR* mutations_ir, ASTVisitor* ast_visitor) + : QLObject(LogModuleType, ast_visitor), mutations_ir_(mutations_ir) {} + Status Init(); + + private: + MutationsIR* mutations_ir_; +}; + +} // namespace compiler +} // namespace planner +} // namespace carnot +} // namespace px diff --git a/src/carnot/planner/tetragon/tetragon_test.cc b/src/carnot/planner/tetragon/tetragon_test.cc new file mode 100644 index 00000000000..a12973d8241 --- /dev/null +++ b/src/carnot/planner/tetragon/tetragon_test.cc @@ -0,0 +1,91 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "src/carnot/planner/compiler/ast_visitor.h" +#include "src/carnot/planner/compiler/test_utils.h" +#include "src/carnot/planner/probes/probes.h" + +namespace px { +namespace carnot { +namespace planner { +namespace compiler { +using ::testing::ContainsRegex; +using ::testing::Not; +using ::testing::UnorderedElementsAre; + +constexpr char kSingleTetragon[] = R"pxl( +import pxlog + +glob_pattern = 'test.json' +pxlog.Tetragon(glob_pattern, 'test_table', '5m') +)pxl"; + +constexpr char kSingleTetragonProgramPb[] = R"pxl( +glob_pattern: "test.json" +table_name: "test_table" +ttl { + seconds: 300 +} +)pxl"; + +class TetragonCompilerTest : public ASTVisitorTest { + protected: + StatusOr> CompileTetragonScript( + std::string_view query, const ExecFuncs& exec_funcs = {}) { + absl::flat_hash_set reserved_names; + for (const auto& func : exec_funcs) { + reserved_names.insert(func.output_table_prefix()); + } + auto func_based_exec = exec_funcs.size() > 0; + + Parser parser; + PX_ASSIGN_OR_RETURN(auto ast, parser.Parse(query)); + + std::shared_ptr ir = std::make_shared(); + std::shared_ptr mutation_ir = std::make_shared(); + + ModuleHandler module_handler; + PX_ASSIGN_OR_RETURN(auto ast_walker, compiler::ASTVisitorImpl::Create( + ir.get(), mutation_ir.get(), compiler_state_.get(), + &module_handler, func_based_exec, reserved_names, {})); + + PX_RETURN_IF_ERROR(ast_walker->ProcessModuleNode(ast)); + if (func_based_exec) { + PX_RETURN_IF_ERROR(ast_walker->ProcessExecFuncs(exec_funcs)); + } + return mutation_ir; + } +}; + +// TODO(ddelnano): Add test that verifies missing arguments provides a compiler error +// instead of the "Query should not be empty" error. There seems to be a bug where default +// arguments are not being handled correctly. + +TEST_F(TetragonCompilerTest, parse_single_tetragon) { + ASSERT_OK_AND_ASSIGN(auto mutation_ir, CompileTetragonScript(kSingleTetragon)); + plannerpb::CompileMutationsResponse pb; + EXPECT_OK(mutation_ir->ToProto(&pb)); + ASSERT_EQ(pb.mutations_size(), 1); + EXPECT_THAT(pb.mutations()[0].tetragon(), + testing::proto::EqualsProto(kSingleTetragonProgramPb)); +} + +} // namespace compiler +} // namespace planner +} // namespace carnot +} // namespace px diff --git a/src/experimental/standalone_pem/standalone_pem_manager.cc b/src/experimental/standalone_pem/standalone_pem_manager.cc index 312b189d327..162a53e2af7 100644 --- a/src/experimental/standalone_pem/standalone_pem_manager.cc +++ b/src/experimental/standalone_pem/standalone_pem_manager.cc @@ -73,7 +73,7 @@ StandalonePEMManager::StandalonePEMManager(sole::uuid agent_id, std::string_view dispatcher_(api_->AllocateDispatcher("manager")), table_store_(std::make_shared()), func_context_(this, /* mds_stub= */ nullptr, /* mdtp_stub= */ nullptr, - /* mdfs_stub= */ nullptr, + /* mdfs_stub= */ nullptr, /* mdtt_stub= */ nullptr, /* cronscript_stub= */ nullptr, table_store_, [](grpc::ClientContext*) {}), stirling_(px::stirling::Stirling::Create(px::stirling::CreateSourceRegistryFromFlag())), results_sink_server_(std::make_unique()) { @@ -113,6 +113,8 @@ StandalonePEMManager::StandalonePEMManager(sole::uuid agent_id, std::string_view std::make_unique(dispatcher_.get(), stirling_.get(), table_store_.get()); file_source_manager_ = std::make_unique(dispatcher_.get(), stirling_.get(), table_store_.get()); + tetragon_manager_ = + std::make_unique(dispatcher_.get(), stirling_.get(), table_store_.get()); // Force Metadata Update. ECHECK_OK(mds_manager_->PerformMetadataStateUpdate()); } @@ -154,7 +156,7 @@ Status StandalonePEMManager::Init() { vizier_grpc_server_ = std::make_unique( port_, carnot_.get(), results_sink_server_.get(), carnot_->GetEngineState(), - tracepoint_manager_.get(), file_source_manager_.get()); + tracepoint_manager_.get(), file_source_manager_.get(), tetragon_manager_.get()); return Status::OK(); } diff --git a/src/experimental/standalone_pem/standalone_pem_manager.h b/src/experimental/standalone_pem/standalone_pem_manager.h index 99af95bddbc..0da9c79e6d4 100644 --- a/src/experimental/standalone_pem/standalone_pem_manager.h +++ b/src/experimental/standalone_pem/standalone_pem_manager.h @@ -24,6 +24,7 @@ #include "src/carnot/carnot.h" #include "src/common/event/event.h" #include "src/experimental/standalone_pem/file_source_manager.h" +#include "src/experimental/standalone_pem/tetragon_manager.h" #include "src/experimental/standalone_pem/sink_server.h" #include "src/experimental/standalone_pem/tracepoint_manager.h" #include "src/experimental/standalone_pem/vizier_server.h" @@ -91,6 +92,9 @@ class StandalonePEMManager : public BaseManager { // FileSource manager std::unique_ptr file_source_manager_; + + // Tetragon manager + std::unique_ptr tetragon_manager_; }; } // namespace agent diff --git a/src/experimental/standalone_pem/tetragon_manager.cc b/src/experimental/standalone_pem/tetragon_manager.cc new file mode 100644 index 00000000000..281c48020b8 --- /dev/null +++ b/src/experimental/standalone_pem/tetragon_manager.cc @@ -0,0 +1,195 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include "src/common/base/base.h" +#include "src/experimental/standalone_pem/tetragon_manager.h" + +constexpr auto kUpdateInterval = std::chrono::seconds(2); + +namespace px { +namespace vizier { +namespace agent { + +TetragonManager::TetragonManager(px::event::Dispatcher* dispatcher, + stirling::Stirling* stirling, + table_store::TableStore* table_store) + : dispatcher_(dispatcher), stirling_(stirling), table_store_(table_store) { + tetragon_monitor_timer_ = + dispatcher_->CreateTimer(std::bind(&TetragonManager::Monitor, this)); + // Kick off the background monitor. + tetragon_monitor_timer_->EnableTimer(kUpdateInterval); +} + +std::string TetragonManager::DebugString() const { + std::lock_guard lock(mu_); + std::stringstream ss; + auto now = std::chrono::steady_clock::now(); + ss << absl::Substitute("Tetragon Manager Debug State:\n"); + ss << absl::Substitute("ID\tNAME\tCURRENT_STATE\tEXPECTED_STATE\tlast_updated\n"); + for (const auto& [id, tetragon] : tetragons_) { + ss << absl::Substitute( + "$0\t$1\t$2\t$3\t$4 seconds\n", id.str(), tetragon.name, + statuspb::LifeCycleState_Name(tetragon.current_state), + statuspb::LifeCycleState_Name(tetragon.expected_state), + std::chrono::duration_cast(now - tetragon.last_updated_at) + .count()); + } + return ss.str(); +} + +Status TetragonManager::HandleRegisterTetragonRequest(sole::uuid id, std::string file_name) { + LOG(INFO) << "Registering tetragon: " << file_name; + + TetragonInfo info; + info.name = file_name; + info.id = id; + info.expected_state = statuspb::RUNNING_STATE; + info.current_state = statuspb::PENDING_STATE; + info.last_updated_at = dispatcher_->GetTimeSource().MonotonicTime(); + stirling_->RegisterTetragon(id, file_name); + { + std::lock_guard lock(mu_); + tetragons_[id] = std::move(info); + tetragon_name_map_[file_name] = id; + } + return Status::OK(); +} + +Status TetragonManager::HandleRemoveTetragonRequest( + sole::uuid id, const messages::TetragonMessage& /*msg*/) { + std::lock_guard lock(mu_); + auto it = tetragons_.find(id); + if (it == tetragons_.end()) { + return error::NotFound("Tetragon with ID: $0, not found", id.str()); + } + + it->second.expected_state = statuspb::TERMINATED_STATE; + return stirling_->RemoveTetragon(id); +} + +void TetragonManager::Monitor() { + std::lock_guard lock(mu_); + + for (auto& [id, tetragon] : tetragons_) { + auto s_or_publish = stirling_->GetTetragonInfo(id); + statuspb::LifeCycleState current_state; + // Get the latest current state according to stirling. + if (s_or_publish.ok()) { + current_state = statuspb::RUNNING_STATE; + } else { + switch (s_or_publish.code()) { + case statuspb::FAILED_PRECONDITION: + // Means the binary has not been found. + current_state = statuspb::FAILED_STATE; + break; + case statuspb::RESOURCE_UNAVAILABLE: + current_state = statuspb::PENDING_STATE; + break; + case statuspb::NOT_FOUND: + // Means we didn't actually find the probe. If we requested termination, + // it's because the probe has been removed. + current_state = (tetragon.expected_state == statuspb::TERMINATED_STATE) + ? statuspb::TERMINATED_STATE + : statuspb::UNKNOWN_STATE; + break; + default: + current_state = statuspb::FAILED_STATE; + break; + } + } + + if (current_state != statuspb::RUNNING_STATE && + tetragon.expected_state == statuspb::TERMINATED_STATE) { + current_state = statuspb::TERMINATED_STATE; + } + + if (current_state == tetragon.current_state) { + // No state transition, nothing to do. + continue; + } + + // The following transitions are legal: + // 1. Pending -> Terminated: Probe is stopped before starting. + // 2. Pending -> Running : Probe starts up. + // 3. Running -> Terminated: Probe is stopped. + // 4. Running -> Failed: Probe got dettached because binary died. + // 5. Failed -> Running: Probe started up because binary came back to life. + // + // In all cases we basically inform the MDS. + // In the cases where we transition to running, we need to update the schemas. + + Status probe_status = Status::OK(); + LOG(INFO) << absl::Substitute("Tetragon[$0]::$1 has transitioned $2 -> $3", id.str(), + tetragon.name, + statuspb::LifeCycleState_Name(tetragon.current_state), + statuspb::LifeCycleState_Name(current_state)); + // Check if running now, then update the schema. + if (current_state == statuspb::RUNNING_STATE) { + // We must have just transitioned into running. We try to apply the new schema. + // If it fails we will trigger an error and report that to MDS. + auto publish_pb = s_or_publish.ConsumeValueOrDie(); + auto s = UpdateSchema(publish_pb); + if (!s.ok()) { + current_state = statuspb::FAILED_STATE; + probe_status = s; + } + } else { + probe_status = s_or_publish.status(); + } + + tetragon.current_state = current_state; + } + tetragon_monitor_timer_->EnableTimer(kUpdateInterval); +} + +Status TetragonManager::UpdateSchema(const stirling::stirlingpb::Publish& publish_pb) { + LOG(INFO) << "Updating schema for tetragon"; + auto relation_info_vec = ConvertPublishPBToRelationInfo(publish_pb); + + // TODO(ddelnano): Failure here can lead to an inconsistent schema state. We should + // figure out how to handle this as part of the data model refactor project. + for (const auto& relation_info : relation_info_vec) { + LOG(INFO) << absl::Substitute("Adding table: $0", relation_info.name); + table_store_->AddTable( + table_store::HotOnlyTable::Create(relation_info.name, relation_info.relation), + relation_info.name, relation_info.id); + } + return Status::OK(); +} + +TetragonInfo* TetragonManager::GetTetragonInfo(std::string name) { + std::lock_guard lock(mu_); + auto pair = tetragon_name_map_.find(name); + if (pair == tetragon_name_map_.end()) { + return nullptr; + } + + auto id_pair = tetragons_.find(pair->second); + if (id_pair == tetragons_.end()) { + return nullptr; + } + + return &id_pair->second; +} + +} // namespace agent +} // namespace vizier +} // namespace px diff --git a/src/experimental/standalone_pem/tetragon_manager.h b/src/experimental/standalone_pem/tetragon_manager.h new file mode 100644 index 00000000000..7390a137d57 --- /dev/null +++ b/src/experimental/standalone_pem/tetragon_manager.h @@ -0,0 +1,71 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +#include + +#include "src/stirling/stirling.h" +#include "src/vizier/services/agent/shared/manager/manager.h" + +namespace px { +namespace vizier { +namespace agent { + +struct TetragonInfo { + std::string name; + sole::uuid id; + statuspb::LifeCycleState expected_state; + statuspb::LifeCycleState current_state; + std::chrono::time_point last_updated_at; +}; + +class TetragonManager { + public: + TetragonManager() = delete; + TetragonManager(px::event::Dispatcher* dispatcher, stirling::Stirling* stirling, + table_store::TableStore* table_store); + + std::string DebugString() const; + Status HandleRegisterTetragonRequest(sole::uuid id, std::string file_name); + Status HandleRemoveTetragonRequest(sole::uuid id, const messages::TetragonMessage& req); + TetragonInfo* GetTetragonInfo(std::string name); + + private: + // The tracepoint Monitor that is responsible for watching and updating the state of + // active tracepoints. + void Monitor(); + Status UpdateSchema(const stirling::stirlingpb::Publish& publish_proto); + + px::event::Dispatcher* dispatcher_; + stirling::Stirling* stirling_; + table_store::TableStore* table_store_; + + event::TimerUPtr tetragon_monitor_timer_; + mutable std::mutex mu_; + absl::flat_hash_map tetragons_; + // Tetragon name to UUID. + absl::flat_hash_map tetragon_name_map_; +}; + +} // namespace agent +} // namespace vizier +} // namespace px diff --git a/src/experimental/standalone_pem/vizier_server.h b/src/experimental/standalone_pem/vizier_server.h index 1968e0fe96d..4b3ce1c1469 100644 --- a/src/experimental/standalone_pem/vizier_server.h +++ b/src/experimental/standalone_pem/vizier_server.h @@ -51,12 +51,13 @@ class VizierServer final : public api::vizierpb::VizierService::Service { VizierServer() = delete; VizierServer(carnot::Carnot* carnot, px::vizier::agent::StandaloneGRPCResultSinkServer* svr, px::carnot::EngineState* engine_state, TracepointManager* tp_manager, - FileSourceManager* file_source_manager) { + FileSourceManager* file_source_manager, TetragonManager* tetragon_manager) { carnot_ = carnot; sink_server_ = svr; engine_state_ = engine_state; tp_manager_ = tp_manager; file_source_manager_ = file_source_manager; + tetragon_manager_ = tetragon_manager; } ::grpc::Status ExecuteScript( @@ -83,6 +84,7 @@ class VizierServer final : public api::vizierpb::VizierService::Service { auto mutations = mutations_or_s.ConsumeValueOrDie(); auto deployments = mutations->Deployments(); auto file_source_deployments = mutations->FileSourceDeployments(); + auto tetragon_deployments = mutations->TetragonDeployments(); bool tracepoints_running = true; auto ntp_info = TracepointInfo{}; @@ -146,6 +148,32 @@ class VizierServer final : public api::vizierpb::VizierService::Service { response->Write(mutation_resp); return ::grpc::Status::CANCELLED; } + + auto tetragons_running = true; + auto ntetragon_info = TetragonInfo{}; + for (size_t i = 0; i < tetragon_deployments.size(); i++) { + auto tetragon = tetragon_deployments[i]; + auto tetragon_info = tetragon_manager_->GetTetragonInfo(tetragon.glob_pattern()); + if (tetragon_info == nullptr) { + auto s = tetragon_manager_->HandleRegisterTetragonRequest( + sole::uuid4(), tetragon.glob_pattern()); + if (!s.ok()) { + return ::grpc::Status(grpc::StatusCode::INTERNAL, "Failed to register tetragon"); + } + ntetragon_info.name = tetragon.glob_pattern(); + ntetragon_info.current_state = statuspb::PENDING_STATE; + tetragon_info = &ntetragon_info; + } + if (tetragon_info->current_state != statuspb::RUNNING_STATE) { + tetragons_running = false; + } + } + if (!tetragons_running) { + auto m_info = mutation_resp.mutable_mutation_info(); + m_info->mutable_status()->set_code(grpc::StatusCode::UNAVAILABLE); + response->Write(mutation_resp); + return ::grpc::Status::CANCELLED; + } /* auto m_info = mutation_resp.mutable_mutation_info(); */ /* m_info->mutable_status()->set_code(0); */ /* response->Write(mutation_resp); */ @@ -231,6 +259,7 @@ class VizierServer final : public api::vizierpb::VizierService::Service { px::carnot::EngineState* engine_state_; TracepointManager* tp_manager_; FileSourceManager* file_source_manager_; + TetragonManager* tetragon_manager_; }; class VizierGRPCServer { @@ -239,9 +268,9 @@ class VizierGRPCServer { VizierGRPCServer(int port, carnot::Carnot* carnot, px::vizier::agent::StandaloneGRPCResultSinkServer* svr, carnot::EngineState* engine_state, TracepointManager* tp_manager, - FileSourceManager* file_source_manager) + FileSourceManager* file_source_manager, TetragonManager* tetragon_manager) : vizier_server_(std::make_unique(carnot, svr, engine_state, tp_manager, - file_source_manager)) { + file_source_manager, tetragon_manager)) { grpc::ServerBuilder builder; std::string uri = absl::Substitute("0.0.0.0:$0", port); diff --git a/src/stirling/BUILD.bazel b/src/stirling/BUILD.bazel index ec3e9a759e4..d99d72b279a 100644 --- a/src/stirling/BUILD.bazel +++ b/src/stirling/BUILD.bazel @@ -50,6 +50,7 @@ pl_cc_library( "//src/stirling/source_connectors/dynamic_bpftrace:cc_library", "//src/stirling/source_connectors/dynamic_tracer:cc_library", "//src/stirling/source_connectors/file_source:cc_library", + "//src/stirling/source_connectors/tetragon:cc_library", "//src/stirling/source_connectors/jvm_stats:cc_library", "//src/stirling/source_connectors/network_stats:cc_library", "//src/stirling/source_connectors/perf_profiler:cc_library", diff --git a/src/stirling/core/BUILD.bazel b/src/stirling/core/BUILD.bazel index 587f46b427c..4e9ffa5147e 100644 --- a/src/stirling/core/BUILD.bazel +++ b/src/stirling/core/BUILD.bazel @@ -33,6 +33,7 @@ pl_cc_library( "//src/stirling/source_connectors/dynamic_bpftrace:__pkg__", "//src/stirling/source_connectors/dynamic_tracer:__pkg__", "//src/stirling/source_connectors/file_source:__pkg__", + "//src/stirling/source_connectors/tetragon:__pkg__", "//src/stirling/source_connectors/jvm_stats:__pkg__", "//src/stirling/source_connectors/network_stats:__pkg__", "//src/stirling/source_connectors/perf_profiler:__pkg__", diff --git a/src/stirling/source_connectors/stirling_error/stirling_error_bpf_test.cc b/src/stirling/source_connectors/stirling_error/stirling_error_bpf_test.cc index df3b567982b..3887785169d 100644 --- a/src/stirling/source_connectors/stirling_error/stirling_error_bpf_test.cc +++ b/src/stirling/source_connectors/stirling_error/stirling_error_bpf_test.cc @@ -231,6 +231,25 @@ class StirlingErrorTest : public ::testing::Test { return id; } + StatusOr DeployTetragon(const std::string& program_text) { + // Compile tetragon. + PX_ASSIGN_OR_RETURN(auto compiled_tetragon, + px::carnot::planner::compiler::CompileTetragon(program_text)); + + // Register tracepoint. + sole::uuid id = sole::uuid4(); + stirling_->RegisterTetragon(id, std::move(compiled_tetragon.glob_pattern())); + + // Wait for deployment to finish. + StatusOr s; + do { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + s = stirling_->GetTetragonInfo(id); + } while (!s.ok() && s.code() == px::statuspb::Code::RESOURCE_UNAVAILABLE); + + return id; + } + Status AppendData(uint64_t table_id, types::TabletID tablet_id, std::unique_ptr record_batch) { PX_UNUSED(tablet_id); diff --git a/src/stirling/source_connectors/tetragon/BUILD.bazel b/src/stirling/source_connectors/tetragon/BUILD.bazel new file mode 100644 index 00000000000..74caa305caa --- /dev/null +++ b/src/stirling/source_connectors/tetragon/BUILD.bazel @@ -0,0 +1,60 @@ +# Copyright 2018- The Pixie Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +load("//bazel:pl_build_system.bzl", "pl_cc_bpf_test", "pl_cc_library", "pl_cc_test") + +package(default_visibility = ["//src/stirling:__subpackages__"]) + +pl_cc_library( + name = "cc_library", + srcs = glob( + ["*.cc"], + exclude = [ + "**/*_test.cc", + ], + ), + hdrs = glob(["*.h"]), + deps = [ + "//src/stirling/core:cc_library", + "//src/stirling/utils:cc_library", + "@com_github_tencent_rapidjson//:rapidjson", + ], +) + +pl_cc_test( + name = "tetragon_connector_test", + srcs = ["tetragon_connector_test.cc"], + data = [ + "testdata/test.json", + "testdata/unsupported.json", + ], + deps = [ + ":cc_library", + ], +) + +pl_cc_test( + name = "stirling_tt_test", + srcs = ["stirling_tt_test.cc"], + data = [ + "testdata/test.json", + "testdata/unsupported.json", + ], + deps = [ + ":cc_library", + "//src/stirling:cc_library", + ], +) diff --git a/src/stirling/source_connectors/tetragon/stirling_tt_test.cc b/src/stirling/source_connectors/tetragon/stirling_tt_test.cc new file mode 100644 index 00000000000..e5ead0e13da --- /dev/null +++ b/src/stirling/source_connectors/tetragon/stirling_tt_test.cc @@ -0,0 +1,225 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include + +#include "src/common/base/base.h" +#include "src/common/testing/testing.h" +#include "src/stirling/core/source_registry.h" +#include "src/stirling/core/types.h" +#include "src/stirling/stirling.h" + +namespace px { +namespace stirling { + +using ::px::testing::BazelRunfilePath; +using ::testing::SizeIs; +using ::testing::StrEq; + +//----------------------------------------------------------------------------- +// Test fixture and shared code +//----------------------------------------------------------------------------- + +class StirlingTetragonTest : public ::testing::Test { + protected: + void SetUp() override { + std::unique_ptr registry = std::make_unique(); + stirling_ = Stirling::Create(std::move(registry)); + + // Set function to call on data pushes. + stirling_->RegisterDataPushCallback( + absl::bind_front(&StirlingTetragonTest::AppendData, this)); + } + + Status AppendData(uint64_t /*table_id*/, types::TabletID /*tablet_id*/, + std::unique_ptr record_batch) { + record_batches_.push_back(std::move(record_batch)); + return Status::OK(); + } + + StatusOr WaitForStatus(sole::uuid trace_id) { + StatusOr s; + do { + s = stirling_->GetTetragonInfo(trace_id); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } while (!s.ok() && s.code() == px::statuspb::Code::RESOURCE_UNAVAILABLE); + + return s; + } + + std::optional FindFieldIndex(const stirlingpb::TableSchema& schema, + std::string_view field_name) { + int idx = 0; + for (const auto& e : schema.elements()) { + if (e.name() == field_name) { + return idx; + } + ++idx; + } + return {}; + } + + void DeployTetragon(std::string file_name, bool trigger_stop = true) { + sole::uuid id = sole::uuid4(); + stirling_->RegisterTetragon(id, file_name); + + // Should deploy. + stirlingpb::Publish publication; + ASSERT_OK_AND_ASSIGN(publication, WaitForStatus(id)); + + // Check the incremental publication change. + ASSERT_EQ(publication.published_info_classes_size(), 1); + info_class_ = publication.published_info_classes(0); + + // Run Stirling data collector. + ASSERT_OK(stirling_->RunAsThread()); + + // Wait to capture some data. + while (record_batches_.empty()) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + + if (trigger_stop) { + ASSERT_OK(stirling_->RemoveTetragon(id)); + + // Should get removed. + EXPECT_EQ(WaitForStatus(id).code(), px::statuspb::Code::NOT_FOUND); + + stirling_->Stop(); + } + } + + std::unique_ptr stirling_; + std::vector> record_batches_; + stirlingpb::InfoClass info_class_; +}; + +class TetragonJSONTest : public StirlingTetragonTest { + protected: + const std::string kFilePath = + BazelRunfilePath("src/stirling/source_connectors/tetragon/testdata/test.json"); +}; + +TEST_F(TetragonJSONTest, ParsesJSONFile) { + DeployTetragon(kFilePath); + EXPECT_THAT(record_batches_, SizeIs(1)); + auto& rb = record_batches_[0]; + // Expect there to be 8 columns. time_ and the 4 cols from the JSON file. + EXPECT_EQ(rb->size(), 8); + + for (size_t i = 0; i < rb->size(); ++i) { + auto col_wrapper = rb->at(i); + // The JSON file has 10 lines. + EXPECT_EQ(col_wrapper->Size(), 10); + } +} + +TEST_F(TetragonJSONTest, ContinuesReadingAfterEOFReached) { + std::string file_name = "./test.json"; + std::ofstream ofs(file_name, std::ios::app); + if (!ofs) { + LOG(FATAL) << absl::Substitute("Failed to open file= $0 received error=$1", kFilePath, strerror(errno)); + } + // TetragonConnector parses the first line to infer the file's schema, an empty file will cause an error. + ofs << R"({"id": 0, "active": false, "score": 6.28, "name": "item0", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]})" << std::endl; + + DeployTetragon(file_name, false); + EXPECT_THAT(record_batches_, SizeIs(1)); + auto& rb = record_batches_[0]; + // Expect there to be 8 columns. time_ and the 4 cols from the JSON file. + EXPECT_EQ(rb->size(), 8); + + for (size_t i = 0; i < rb->size(); ++i) { + auto col_wrapper = rb->at(i); + // TODO(ddelnano): Clean up these log messages and add better assertions for uint128 case + if (i == 1) { + LOG(INFO) << col_wrapper->Get(0).val; + LOG(INFO) << col_wrapper->Get(1).val; + } else if (i == 6) { + LOG(INFO) << col_wrapper->Get(0); + EXPECT_EQ(col_wrapper->Get(0), R"({"a":1,"b":2})"); + } else if (i == 7) { + LOG(INFO) << col_wrapper->Get(0); + EXPECT_EQ(col_wrapper->Get(0), R"([0,1,2])"); + } + // The file's first row batch has 1 line + EXPECT_EQ(col_wrapper->Size(), 1); + } + + ofs << R"({"id": 1, "active": false, "score": 6.28, "name": "item1", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]})" << std::endl; + ofs.flush(); + ofs.close(); + + while (record_batches_.size() < 2) { + std::this_thread::sleep_for(std::chrono::seconds(3)); + LOG(INFO) << "Waiting for more data..."; + } + + auto& rb2 = record_batches_[1]; + for (size_t i = 0; i < rb2->size(); ++i) { + auto col_wrapper = rb2->at(i); + // The file's second row batch has 1 line + EXPECT_EQ(col_wrapper->Size(), 1); + } +} + +TEST_F(TetragonJSONTest, ContinuesReadingAfterFileRotation) { + std::string file_name = "./test2.json"; + std::ofstream ofs(file_name, std::ios::app); + if (!ofs) { + LOG(FATAL) << absl::Substitute("Failed to open file= $0 received error=$1", kFilePath, strerror(errno)); + } + // TetragonConnector parses the first line to infer the file's schema, an empty file will cause an error. + ofs << R"({"id": 0, "active": false, "score": 6.28, "name": "item0", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]})" << std::endl; + ofs << R"({"id": 1, "active": false, "score": 6.28, "name": "item1", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]})" << std::endl; + + DeployTetragon(file_name, false); + EXPECT_THAT(record_batches_, SizeIs(1)); + auto& rb = record_batches_[0]; + // Expect there to be 8 columns. time_ and the 4 cols from the JSON file. + EXPECT_EQ(rb->size(), 8); + + for (size_t i = 0; i < rb->size(); ++i) { + auto col_wrapper = rb->at(i); + // The file's first row batch has 2 lines + EXPECT_EQ(col_wrapper->Size(), 2); + } + + std::ofstream ofs2(file_name, std::ios::trunc); + ofs2 << R"({"id": 2, "active": false, "score": 6.28, "name": "item2", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]})" << std::endl; + ofs2.flush(); + ofs.close(); + + while (record_batches_.size() < 2) { + std::this_thread::sleep_for(std::chrono::seconds(3)); + LOG(INFO) << "Waiting for more data..."; + } + + auto& rb2 = record_batches_[1]; + for (size_t i = 0; i < rb2->size(); ++i) { + auto col_wrapper = rb2->at(i); + // The file's second row batch has 1 line + EXPECT_EQ(col_wrapper->Size(), 1); + } +} + +} // namespace stirling +} // namespace px diff --git a/src/stirling/source_connectors/tetragon/testdata/kern.log b/src/stirling/source_connectors/tetragon/testdata/kern.log new file mode 100644 index 00000000000..fed434d43a4 --- /dev/null +++ b/src/stirling/source_connectors/tetragon/testdata/kern.log @@ -0,0 +1,5 @@ +2025-03-05T22:30:12.313406+00:00 dev-vm kernel: ll header: 00000000: ff ff ff ff ff ff 42 01 0a 81 00 01 08 06 +2025-03-05T22:30:18.313309+00:00 dev-vm kernel: IPv4: martian source 10.129.0.8 from 10.129.0.1, on dev ens4 +2025-03-05T22:30:18.313333+00:00 dev-vm kernel: ll header: 00000000: ff ff ff ff ff ff 42 01 0a 81 00 01 08 06 +2025-03-05T22:30:24.313240+00:00 dev-vm kernel: IPv4: martian source 10.129.0.8 from 10.129.0.1, on dev ens4 +2025-03-05T22:30:24.313268+00:00 dev-vm kernel: ll header: 00000000: ff ff ff ff ff ff 42 01 0a 81 00 01 08 06 diff --git a/src/stirling/source_connectors/tetragon/testdata/test.json b/src/stirling/source_connectors/tetragon/testdata/test.json new file mode 100644 index 00000000000..f65c3fabafb --- /dev/null +++ b/src/stirling/source_connectors/tetragon/testdata/test.json @@ -0,0 +1,10 @@ +{"id": 1, "active": true, "score": 3.14, "name": "item1", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 2, "active": false, "score": 2.71, "name": "item2", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 3, "active": true, "score": 1.41, "name": "item3", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 4, "active": false, "score": 1.73, "name": "item4", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 5, "active": true, "score": 0.99, "name": "item5", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 6, "active": false, "score": 2.18, "name": "item6", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 7, "active": true, "score": 3.67, "name": "item7", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 8, "active": false, "score": 4.56, "name": "item8", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 9, "active": true, "score": 5.32, "name": "item9", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} +{"id": 10, "active": false, "score": 6.28, "name": "item10", "object": {"a": 1, "b": 2}, "arr": [0, 1, 2]} diff --git a/src/stirling/source_connectors/tetragon/testdata/unsupported.json b/src/stirling/source_connectors/tetragon/testdata/unsupported.json new file mode 100644 index 00000000000..455064ea679 --- /dev/null +++ b/src/stirling/source_connectors/tetragon/testdata/unsupported.json @@ -0,0 +1 @@ +{"id": 1, "active": true, "score": 3.14, "name": "item1", "unsupported": {"a": 1, "b": 2}} diff --git a/src/stirling/source_connectors/tetragon/tetragon_connector.cc b/src/stirling/source_connectors/tetragon/tetragon_connector.cc new file mode 100644 index 00000000000..0df9cd646d7 --- /dev/null +++ b/src/stirling/source_connectors/tetragon/tetragon_connector.cc @@ -0,0 +1,217 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "src/stirling/source_connectors/tetragon/tetragon_connector.h" + +#include +#include + +#include +#include + +//DeployTetragonConnector + +using px::StatusOr; +using px::utils::RapidJSONTypeToString; + +constexpr size_t kMaxStringBytes = std::numeric_limits::max(); + +namespace px { +namespace stirling { + +namespace { + +StatusOr> DataElementsFromFile( + const std::filesystem::path& file_name) { + auto f = std::ifstream(file_name.string()); + if (!f.is_open()) { + return error::Internal("Failed to open file: $0 with error=$1", file_name.string(), + strerror(errno)); + } + + BackedDataElements data_elements; + PX_ASSIGN_OR_RETURN(data_elements, DataElementsForTetragonFile()); + f.seekg(0, std::ios::beg); + return std::make_pair(std::move(data_elements), std::move(f)); +} + +} // namespace + +StatusOr> TetragonConnector::Create( + std::string_view source_name, + const std::filesystem::path input_file_name) +{ + // get the file extension of the files + auto inputExtension = input_file_name.extension().string(); + if (inputExtension != ".log") + { + return error::InvalidArgument("Input file has not *.log extension"); + } + auto in_host_path = px::system::Config::GetInstance().ToHostPath(input_file_name); + PX_ASSIGN_OR_RETURN(auto data_elements_and_file, DataElementsFromFile(in_host_path)); + auto& [data_elements, file] = data_elements_and_file; + + // Get just the filename and extension + auto name = in_host_path.filename().string(); + std::unique_ptr table_schema = + DynamicDataTableSchema::Create(name, "", std::move(data_elements)); + return std::unique_ptr(new TetragonConnector( + source_name, std::move(in_host_path), std::move(file), std::move(table_schema))); +} + +TetragonConnector::TetragonConnector( + std::string_view source_name, + const std::filesystem::path& input_file_name, + std::ifstream file, + std::unique_ptr table_schema) + : SourceConnector(source_name, ArrayView(&table_schema->Get(), 1)), + name_(source_name), + file_name_(input_file_name), + file_(std::move(file)), + table_schema_(std::move(table_schema)), + transfer_specs_({ + {".log", {&TetragonConnector::TransferTetragonData}}, + }){} + + + +StatusOr DataElementsForTetragonFile() { + BackedDataElements data_elements(4); + data_elements.emplace_back("time_", "", types::DataType::STRING); + data_elements.emplace_back("node_name", "", types::DataType::STRING); + data_elements.emplace_back("type", "", types::DataType::STRING); + data_elements.emplace_back("payload", "", types::DataType::STRING); + return data_elements; +} + +Status TetragonConnector::InitImpl() { + sampling_freq_mgr_.set_period(kSamplingPeriod); + push_freq_mgr_.set_period(kPushPeriod); + return Status::OK(); +} + +Status TetragonConnector::StopImpl() { + file_.close(); + return Status::OK(); +} + +constexpr int kMaxLines = 1000; + + +void TetragonConnector::TransferTetragonData(DataTable::DynamicRecordBuilder* /*r*/, + const std::string& line) { + DataTable::DynamicRecordBuilder r(data_tables_[0]); + rapidjson::Document tetragon_data; + tetragon_data.Parse(line.c_str()); + if (tetragon_data.HasParseError()) { + LOG(ERROR) << "Error parsing JSON string:" << line; + return; + } + + // Extract common fields + for (const auto& entry : tetragon_data.GetArray()) { + if (entry.HasMember("time")) { + if (entry["time"].IsString()) { + std::string timeStr = entry["time"].GetString(); // Extract the string + r.Append(0, types::StringValue(timeStr), kMaxStringBytes); // Convert to StringValue + } else if (entry["time"].IsNull()) { + r.Append(0, types::StringValue("empty"), kMaxStringBytes); + } else { + LOG(ERROR) << "Key \"time\" is present but its value is not a string."; + } + } else { + LOG(ERROR) << "Key \"time\" is not present in json."; + } + + if (entry.HasMember("node_name")) { + if (entry["node_name"].IsString()) { + std::string nodeNameStr = entry["node_name"].GetString(); + r.Append(1, types::StringValue(nodeNameStr), kMaxStringBytes); + } else if (entry["node_name"].IsNull()) { + r.Append(1, types::StringValue("empty"), kMaxStringBytes); + } else { + LOG(ERROR) << "Key \"node_name\" is present but its value is not a string."; + } + } else { + LOG(ERROR) << "Key \"node_name\" is not present in json."; + } + } + + // Find the first key to use as the type + if (!tetragon_data.ObjectEmpty()) { + auto itr = tetragon_data.MemberBegin(); + std::string type = itr->name.GetString(); + r.Append(2, types::StringValue(type), kMaxStringBytes); + + // Add the payload (content of the first key) + if (tetragon_data[type.c_str()].IsString()) { + std::string payloadStr = tetragon_data[type.c_str()].GetString(); + r.Append(3, types::StringValue(payloadStr), kMaxStringBytes); + } else if (tetragon_data[type.c_str()].IsNull()) { + r.Append(3, types::StringValue("empty"), kMaxStringBytes); + } else { + LOG(ERROR) << "Key " << type << " is present but its value is not a string."; + } + } else { + LOG(ERROR) << "Error: JSON object is empty."; + return; + } +} + +void TetragonConnector::TransferDataImpl(ConnectorContext* /* ctx */) { + DCHECK_EQ(data_tables_.size(), 1U) << "Only one table is allowed per TetragonConnector."; + int i = 0; + auto extension = file_name_.extension().string(); + auto transfer_fn = transfer_specs_.at(extension).transfer_fn; + + auto before_pos = file_.tellg(); + while (i < kMaxLines) { + std::string line; + std::getline(file_, line); + + if (file_.eof() || line.empty()) { + file_.clear(); + auto after_pos = file_.tellg(); + if (after_pos == last_pos_) { + LOG_EVERY_N(INFO, 100) << absl::Substitute("Reached EOF for file=$0 eof count=$1 pos=", + file_name_.string(), eof_count_) + << after_pos; + eof_count_++; + + // TODO(ddlenano): Using a file's inode is a better way to detect file rotation. For now, + // this will suffice. + std::ifstream s(file_name_, std::ios::ate | std::ios::binary); + if (s.tellg() < after_pos) { + LOG(INFO) << "Detected file rotation, resetting file position"; + file_.close(); + file_.open(file_name_, std::ios::in); + } + } + break; + } + + transfer_fn(*this, nullptr, line); + i++; + } + auto after_pos = file_.tellg(); + last_pos_ = after_pos; + monitor_.AppendStreamStatusRecord(file_name_, after_pos - before_pos, ""); +} + +} // namespace stirling +} // namespace px diff --git a/src/stirling/source_connectors/tetragon/tetragon_connector.h b/src/stirling/source_connectors/tetragon/tetragon_connector.h new file mode 100644 index 00000000000..4ff67232ddc --- /dev/null +++ b/src/stirling/source_connectors/tetragon/tetragon_connector.h @@ -0,0 +1,81 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "src/stirling/core/source_connector.h" +#include "src/stirling/utils/monitor.h" + +namespace px { +namespace stirling { + +class TetragonConnector : public SourceConnector { + using pos_type = std::ifstream::pos_type; + + public: + static constexpr auto kSamplingPeriod = std::chrono::milliseconds{100}; + // Set this high enough to avoid the following error: + // F20250129 00:05:30.980778 2527479 source_connector.cc:64] Failed to push data. Message = + // Table_id 1 doesn't exist. + // + // This occurs when the Stirling data table has data but the table store hasn't received its + // schema yet. I'm not sure why the dynamic tracer doesn't experience this case. + static constexpr auto kPushPeriod = std::chrono::milliseconds{7000}; + + static StatusOr > Create(std::string_view source_name, + const std::filesystem::path input_file_name); + + TetragonConnector() = delete; + ~TetragonConnector() = default; + Status InitImpl() override; + Status StopImpl() override; + void TransferDataImpl(ConnectorContext* ctx) override; + + protected: + explicit TetragonConnector(std::string_view source_name, const std::filesystem::path& input_file_name, + std::ifstream file, + std::unique_ptr table_schema); + + private: + void TransferTetragonData(DataTable::DynamicRecordBuilder* builder, const std::string& line); + struct TetragonTransferSpec { + std::function + transfer_fn; + }; + + std::string name_; + const std::filesystem::path file_name_; + std::ifstream file_; + std::unique_ptr table_schema_; + absl::flat_hash_map transfer_specs_; + int eof_count_ = 0; + pos_type last_pos_ = 0; + StirlingMonitor& monitor_ = *StirlingMonitor::GetInstance(); +}; + +StatusOr DataElementsForTetragonFile(); + +} // namespace stirling +} // namespace px diff --git a/src/stirling/source_connectors/tetragon/tetragon_connector_test.cc b/src/stirling/source_connectors/tetragon/tetragon_connector_test.cc new file mode 100644 index 00000000000..5e26aef6ed8 --- /dev/null +++ b/src/stirling/source_connectors/tetragon/tetragon_connector_test.cc @@ -0,0 +1,30 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include "src/common/testing/testing.h" +#include "src/stirling/source_connectors/tetragon/tetragon_connector.h" + +namespace px { +namespace stirling { + + +} // namespace stirling +} // namespace px diff --git a/src/stirling/stirling.cc b/src/stirling/stirling.cc index fd35286854e..fabe2ee6358 100644 --- a/src/stirling/stirling.cc +++ b/src/stirling/stirling.cc @@ -58,6 +58,7 @@ #include "src/stirling/source_connectors/seq_gen/seq_gen_connector.h" #include "src/stirling/source_connectors/socket_tracer/socket_trace_connector.h" #include "src/stirling/source_connectors/stirling_error/stirling_error_connector.h" +#include "src/stirling/source_connectors/tetragon/tetragon_connector.h" #include "src/stirling/source_connectors/dynamic_tracer/dynamic_tracing/dynamic_tracer.h" #include "src/stirling/source_connectors/tcp_stats/tcp_stats_connector.h" @@ -230,6 +231,11 @@ class StirlingImpl final : public Stirling { void UpdateFileSourceStatus(const sole::uuid& uuid, const StatusOr& status); + void RegisterTetragon(sole::uuid id, std::string file_name) override; + void UpdateTetragonStatus(const sole::uuid& uuid, const StatusOr& status); + Status RemoveTetragon(sole::uuid trace_id) override; + StatusOr GetTetragonInfo(sole::uuid trace_id) override; + private: // Adds a source to Stirling, and updates all state accordingly. Status AddSource(std::unique_ptr source, @@ -251,6 +257,11 @@ class StirlingImpl final : public Stirling { void DestroyFileSourceConnector(sole::uuid id); + // Creates and deploys tetragon connector + void DeployTetragonConnector(sole::uuid trace_id, std::string file_name); + + void DestroyTetragonConnector(sole::uuid id); + // Main run implementation. void RunCore(); @@ -293,6 +304,9 @@ class StirlingImpl final : public Stirling { absl::flat_hash_map> file_source_status_map_ ABSL_GUARDED_BY(file_source_status_map_lock_); + absl::base_internal::SpinLock tetragon_status_map_lock_; + absl::flat_hash_map> tetragon_status_map_ + ABSL_GUARDED_BY(tetragon_status_map_lock_); StirlingMonitor& monitor_ = *StirlingMonitor::GetInstance(); struct DynamicTraceInfo { @@ -313,6 +327,15 @@ class StirlingImpl final : public Stirling { absl::flat_hash_map file_source_info_map_ ABSL_GUARDED_BY(file_source_status_map_lock_); + struct TetragonInfo { + std::string source_connector; + std::string file_name; + std::string output_table; + }; + + absl::flat_hash_map tetragon_info_map_ + ABSL_GUARDED_BY(tetragon_status_map_lock_); + // RunCoreStats tracks how much work is accomplished in each run core iteration, // and it also keeps a histogram of sleep durations. RunCoreStats run_core_stats_; @@ -529,6 +552,7 @@ namespace { constexpr char kDynTraceSourcePrefix[] = "DT_"; constexpr char kFileSourcePrefix[] = "LOG_"; +constexpr char kTetragonPrefix[] = "LOG_"; StatusOr> CreateFileSourceConnector(sole::uuid id, std::string file_name) { @@ -729,6 +753,145 @@ void StirlingImpl::DestroyFileSourceConnector(sole::uuid trace_id) { } } +void StirlingImpl::RegisterTetragon(sole::uuid id, std::string file_name) { + // Temporary: Check if the target exists on this PEM, otherwise return NotFound. + // TODO(oazizi): Need to think of a better way of doing this. + // Need to differentiate errors caused by the binary not being on the host vs + // other errors. Also should consider races with binary creation/deletion. + { + absl::base_internal::SpinLockHolder lock(&tetragon_status_map_lock_); + std::string source_connector = "tetragon"; + tetragon_info_map_[id] = {.source_connector = std::move(source_connector), + .file_name = file_name, + .output_table = ""}; + } + + // Initialize the status of this trace to pending. + { + absl::base_internal::SpinLockHolder lock(&tetragon_status_map_lock_); + tetragon_status_map_[id] = error::ResourceUnavailable("Waiting for file polling to start."); + } + + auto t = std::thread(&StirlingImpl::DeployTetragonConnector, this, id, file_name); + t.detach(); +} + + void StirlingImpl::UpdateTetragonStatus(const sole::uuid& id, + const StatusOr& s) { + absl::base_internal::SpinLockHolder lock(&tetragon_status_map_lock_); + tetragon_status_map_[id] = s; + + // Find program name and log dynamic trace status update to Stirling Monitor. + auto it = tetragon_info_map_.find(id); + if (it != tetragon_info_map_.end()) { +//TODO: Update with Tetragon source + TetragonInfo& tetragon_info = it->second; + + // Build info JSON with trace_id and output_table. + ::px::utils::JSONObjectBuilder builder; + builder.WriteKV("trace_id", id.str()); + if (s.ok()) { + builder.WriteKV("output_table", tetragon_info.output_table); + } + + monitor_.AppendSourceStatusRecord(tetragon_info.source_connector, s.status(), + builder.GetString()); + + // Clean up map if status is not ok. When status is RESOURCE_UNAVAILABLE, either deployment + // or removal is pending, so don't clean up. + if (!s.ok() && s.code() != statuspb::Code::RESOURCE_UNAVAILABLE) { + tetragon_info_map_.erase(id); + } + } +} + +Status StirlingImpl::RemoveTetragon(sole::uuid trace_id) { + // Change the status of this trace to pending while we delete it. + UpdateTetragonStatus(trace_id, error::ResourceUnavailable("Tetragon removal in progress.")); + + auto t = std::thread(&StirlingImpl::DestroyTetragonConnector, this, trace_id); + t.detach(); + + return Status::OK(); +} + +StatusOr> CreateTetragonConnector(sole::uuid id, + std::string file_name) { + auto name = absl::StrCat(kTetragonPrefix, id.str()); + return TetragonConnector::Create(name, file_name); +} + +void StirlingImpl::DeployTetragonConnector(sole::uuid id, std::string file_name) { + auto timer = ElapsedTimer(); + timer.Start(); + + // Try creating the DynamicTraceConnector--which compiles BCC code. + // On failure, set status and exit. + auto source_or_s = CreateTetragonConnector(id, file_name); + if (!source_or_s.ok()) { + Status ret_status(px::statuspb::Code::INTERNAL, source_or_s.msg()); + UpdateTetragonStatus(id, ret_status); + LOG(INFO) << ret_status.ToString(); + return; + } + auto source = source_or_s.ConsumeValueOrDie(); + + LOG(INFO) << absl::Substitute("TetragonConnector [$0] created in $1 ms.", source->name(), + timer.ElapsedTime_us() / 1000.0); + + // Cache table schema name as source will be moved below. + std::string output_name(source->table_schemas()[0].name()); + + { + absl::base_internal::SpinLockHolder lock(&tetragon_status_map_lock_); + auto it = tetragon_info_map_.find(id); + if (it != tetragon_info_map_.end()) { + tetragon_info_map_[id].output_table = output_name; + } + } + + timer.Start(); + auto s = AddSource(std::move(source), id.str()); + if (!s.ok()) { + UpdateTetragonStatus(id, s); + LOG(INFO) << s.ToString(); + return; + } + LOG(INFO) << absl::Substitute("TetragonConnector [$0] created in $1 ms.", id.str(), + timer.ElapsedTime_us() / 1000.0); + + stirlingpb::Publish publication; + { + absl::base_internal::SpinLockHolder lock(&info_class_mgrs_lock_); + PopulatePublishProto(&publication, info_class_mgrs_, output_name); + } + + UpdateTetragonStatus(id, publication); +} + +void StirlingImpl::DestroyTetragonConnector(sole::uuid trace_id) { + auto timer = ElapsedTimer(); + timer.Start(); + + // Remove from stirling. + auto s = RemoveSource(kTetragonPrefix + trace_id.str()); + if (!s.ok()) { + UpdateTetragonStatus(trace_id, s); + LOG(INFO) << s.ToString(); + return; + } + + LOG(INFO) << absl::Substitute("Tetragon [$0]: Removed file polling $1 ms.", trace_id.str(), + timer.ElapsedTime_us() / 1000.0); + + // Remove from map. + { + absl::base_internal::SpinLockHolder lock(&tetragon_status_map_lock_); + tetragon_status_map_.erase(trace_id); + tetragon_info_map_.erase(trace_id); + } +} + #undef RETURN_ERROR #undef RETURN_IF_ERROR #undef ASSIGN_OR_RETURN @@ -834,6 +997,18 @@ StatusOr StirlingImpl::GetFileSourceInfo(sole::uuid trace_i return s; } +StatusOr StirlingImpl::GetTetragonInfo(sole::uuid trace_id) { + absl::base_internal::SpinLockHolder lock(&tetragon_status_map_lock_); + + auto iter = tetragon_status_map_.find(trace_id); + if (iter == tetragon_status_map_.end()) { + return error::NotFound("Tetragon $0 not found.", trace_id.str()); + } + + StatusOr s = iter->second; + return s; +} + Status StirlingImpl::RemoveTracepoint(sole::uuid trace_id) { // Change the status of this trace to pending while we delete it. UpdateDynamicTraceStatus(trace_id, error::ResourceUnavailable("Probe removal in progress.")); diff --git a/src/stirling/stirling.h b/src/stirling/stirling.h index 86231e05193..45591c3f8fa 100644 --- a/src/stirling/stirling.h +++ b/src/stirling/stirling.h @@ -123,10 +123,14 @@ class Stirling : public NotCopyable { */ virtual StatusOr GetTracepointInfo(sole::uuid trace_id) = 0; virtual StatusOr GetFileSourceInfo(sole::uuid trace_id) = 0; + virtual StatusOr GetTetragonInfo(sole::uuid trace_id) = 0; virtual void RegisterFileSource(sole::uuid id, std::string file_name) = 0; virtual Status RemoveFileSource(sole::uuid id) = 0; + virtual void RegisterTetragon(sole::uuid id, std::string file_name) = 0; + virtual Status RemoveTetragon(sole::uuid id) = 0; + /** * Remove a dynamically created tracepoint. */ diff --git a/src/vizier/funcs/context/vizier_context.h b/src/vizier/funcs/context/vizier_context.h index 6820ac738f3..402a663ad9e 100644 --- a/src/vizier/funcs/context/vizier_context.h +++ b/src/vizier/funcs/context/vizier_context.h @@ -43,11 +43,13 @@ class VizierFuncFactoryContext : public NotCopyable { using MDSStub = services::metadata::MetadataService::Stub; using MDTPStub = services::metadata::MetadataTracepointService::Stub; using MDFSStub = services::metadata::MetadataFileSourceService::Stub; + using MDTTStub = services::metadata::MetadataTetragonService::Stub; VizierFuncFactoryContext() = default; VizierFuncFactoryContext( const agent::BaseManager* agent_manager, const std::shared_ptr& mds_stub, const std::shared_ptr& mdtp_stub, const std::shared_ptr& mdfs_stub, + const std::shared_ptr& mdtt_stub, const std::shared_ptr& cronscript_stub, std::shared_ptr<::px::table_store::TableStore> table_store, std::function add_grpc_auth) @@ -55,6 +57,7 @@ class VizierFuncFactoryContext : public NotCopyable { mds_stub_(mds_stub), mdtp_stub_(mdtp_stub), mdfs_stub_(mdfs_stub), + mdtt_stub_(mdtt_stub), cronscript_stub_(cronscript_stub), table_store_(table_store), add_auth_to_grpc_context_func_(add_grpc_auth) {} @@ -78,6 +81,10 @@ class VizierFuncFactoryContext : public NotCopyable { CHECK(mdfs_stub_ != nullptr); return mdfs_stub_; } + std::shared_ptr mdtt_stub() const { + CHECK(mdtt_stub_ != nullptr); + return mdtt_stub_; + } std::shared_ptr cronscript_stub() const { CHECK(cronscript_stub_ != nullptr); return cronscript_stub_; @@ -95,6 +102,7 @@ class VizierFuncFactoryContext : public NotCopyable { std::shared_ptr mds_stub_ = nullptr; std::shared_ptr mdtp_stub_ = nullptr; std::shared_ptr mdfs_stub_ = nullptr; + std::shared_ptr mdtt_stub_ = nullptr; std::shared_ptr cronscript_stub_ = nullptr; std::shared_ptr<::px::table_store::TableStore> table_store_ = nullptr; std::function add_auth_to_grpc_context_func_; diff --git a/src/vizier/funcs/md_udtfs/md_udtfs.cc b/src/vizier/funcs/md_udtfs/md_udtfs.cc index 9ebc5c75eed..8569c7020d6 100644 --- a/src/vizier/funcs/md_udtfs/md_udtfs.cc +++ b/src/vizier/funcs/md_udtfs/md_udtfs.cc @@ -57,6 +57,8 @@ void RegisterFuncsOrDie(const VizierFuncFactoryContext& ctx, carnot::udf::Regist "GetTracepointStatus", ctx); registry->RegisterFactoryOrDie>( "GetFileSourceStatus", ctx); + registry->RegisterFactoryOrDie>( + "GetTetragonStatus", ctx); registry ->RegisterFactoryOrDie>( "GetCronScriptHistory", ctx); diff --git a/src/vizier/funcs/md_udtfs/md_udtfs_impl.h b/src/vizier/funcs/md_udtfs/md_udtfs_impl.h index 55bc5492d0f..e4d579a74be 100644 --- a/src/vizier/funcs/md_udtfs/md_udtfs_impl.h +++ b/src/vizier/funcs/md_udtfs/md_udtfs_impl.h @@ -89,6 +89,20 @@ class UDTFWithMDFSFactory : public carnot::udf::UDTFFactory { const VizierFuncFactoryContext& ctx_; }; +template +class UDTFWithMDTTFactory : public carnot::udf::UDTFFactory { + public: + UDTFWithMDTTFactory() = delete; + explicit UDTFWithMDTTFactory(const VizierFuncFactoryContext& ctx) : ctx_(ctx) {} + + std::unique_ptr Make() override { + return std::make_unique(ctx_.mdtt_stub(), ctx_.add_auth_to_grpc_context_func()); + } + + private: + const VizierFuncFactoryContext& ctx_; +}; + template class UDTFWithCronscriptFactory : public carnot::udf::UDTFFactory { public: @@ -1130,6 +1144,130 @@ class GetFileSourceStatus final : public carnot::udf::UDTF std::function add_context_authentication_func_; }; +/** + * This UDTF fetches information about tracepoints from MDS. + */ +class GetTetragonStatus final : public carnot::udf::UDTF { + public: + using MDTTStub = vizier::services::metadata::MetadataTetragonService::Stub; + using TetragonResponse = vizier::services::metadata::GetTetragonInfoResponse; + GetTetragonStatus() = delete; + explicit GetTetragonStatus(std::shared_ptr stub, + std::function add_context_authentication) + : idx_(0), stub_(stub), add_context_authentication_func_(add_context_authentication) {} + + static constexpr auto Executor() { return carnot::udfspb::UDTFSourceExecutor::UDTF_ONE_KELVIN; } + + static constexpr auto OutputRelation() { + // TODO(ddelnano): Change the tetragon_id column to a UINT128 once the pxl lookup from + // px/pipeline_flow_graph works. That script has a UINT128 stored as a string and needs to + // be joined with this column + return MakeArray(ColInfo("tetragon_id", types::DataType::STRING, + types::PatternType::GENERAL, "The id of the tetragon"), + ColInfo("name", types::DataType::STRING, types::PatternType::GENERAL, + "The name of the tetragon"), + ColInfo("state", types::DataType::STRING, types::PatternType::GENERAL, + "The state of the tetragon"), + ColInfo("status", types::DataType::STRING, types::PatternType::GENERAL, + "The status message if not healthy"), + ColInfo("output_tables", types::DataType::STRING, types::PatternType::GENERAL, + "A list of tables output by the tetragon")); + // TODO(ddelnano): Add in the create time, and TTL in here after we add those attributes to the + // GetTetragonInfo RPC call in MDS. + } + + Status Init(FunctionContext*) { + px::vizier::services::metadata::GetTetragonInfoRequest req; + resp_ = std::make_unique(); + + grpc::ClientContext ctx; + add_context_authentication_func_(&ctx); + auto s = stub_->GetTetragonInfo(&ctx, req, resp_.get()); + if (!s.ok()) { + return error::Internal("Failed to make RPC call to GetTetragonStatus: $0", + s.error_message()); + } + return Status::OK(); + } + + bool NextRecord(FunctionContext*, RecordWriter* rw) { + if (resp_->tetragons_size() == 0) { + return false; + } + const auto& tetragon_info = resp_->tetragons(idx_); + + auto u_or_s = ParseUUID(tetragon_info.id()); + sole::uuid u; + if (u_or_s.ok()) { + u = u_or_s.ConsumeValueOrDie(); + } + + auto actual = tetragon_info.state(); + auto expected = tetragon_info.expected_state(); + std::string state; + + switch (actual) { + case statuspb::PENDING_STATE: { + state = "pending"; + break; + } + case statuspb::RUNNING_STATE: { + state = "running"; + break; + } + case statuspb::FAILED_STATE: { + state = "failed"; + break; + } + case statuspb::TERMINATED_STATE: { + if (actual != expected) { + state = "terminating"; + } else { + state = "terminated"; + } + break; + } + default: + state = "unknown"; + } + + rapidjson::Document tables; + tables.SetArray(); + for (const auto& table : tetragon_info.schema_names()) { + tables.PushBack(internal::StringRef(table), tables.GetAllocator()); + } + + rapidjson::StringBuffer tables_sb; + rapidjson::Writer tables_writer(tables_sb); + tables.Accept(tables_writer); + + rw->Append(u.str()); + rw->Append(tetragon_info.name()); + rw->Append(state); + + rapidjson::Document statuses; + statuses.SetArray(); + for (const auto& status : tetragon_info.statuses()) { + statuses.PushBack(internal::StringRef(status.msg()), statuses.GetAllocator()); + } + rapidjson::StringBuffer statuses_sb; + rapidjson::Writer statuses_writer(statuses_sb); + statuses.Accept(statuses_writer); + rw->Append(statuses_sb.GetString()); + + rw->Append(tables_sb.GetString()); + + ++idx_; + return idx_ < resp_->tetragons_size(); + } + + private: + int idx_ = 0; + std::unique_ptr resp_; + std::shared_ptr stub_; + std::function add_context_authentication_func_; +}; + class GetCronScriptHistory final : public carnot::udf::UDTF { public: using CronScriptStoreStub = vizier::services::metadata::CronScriptStoreService::Stub; diff --git a/src/vizier/messages/messagespb/BUILD.bazel b/src/vizier/messages/messagespb/BUILD.bazel index 902b666b693..7b098be2b4a 100644 --- a/src/vizier/messages/messagespb/BUILD.bazel +++ b/src/vizier/messages/messagespb/BUILD.bazel @@ -25,6 +25,7 @@ pl_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_proto", "//src/carnot/planner/file_source/ir:logical_pl_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_proto", "//src/carnot/planpb:plan_pl_proto", "//src/common/base/statuspb:status_pl_proto", "//src/shared/bloomfilterpb:bloomfilter_pl_proto", @@ -46,6 +47,7 @@ pl_cc_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_cc_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_cc_proto", "//src/carnot/planner/file_source/ir:logical_pl_cc_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_cc_proto", "//src/carnot/planpb:plan_pl_cc_proto", "//src/common/base/statuspb:status_pl_cc_proto", "//src/shared/bloomfilterpb:bloomfilter_pl_cc_proto", @@ -68,6 +70,7 @@ pl_go_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_go_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_go_proto", "//src/carnot/planner/file_source/ir:logical_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", "//src/carnot/planpb:plan_pl_go_proto", "//src/common/base/statuspb:status_pl_go_proto", "//src/shared/bloomfilterpb:bloomfilter_pl_go_proto", diff --git a/src/vizier/messages/messagespb/messages.proto b/src/vizier/messages/messagespb/messages.proto index 32e61d92dba..81ae7a617f5 100644 --- a/src/vizier/messages/messagespb/messages.proto +++ b/src/vizier/messages/messagespb/messages.proto @@ -27,6 +27,7 @@ import "src/api/proto/uuidpb/uuid.proto"; import "src/carnot/planner/distributedpb/distributed_plan.proto"; import "src/carnot/planner/dynamic_tracing/ir/logicalpb/logical.proto"; import "src/carnot/planner/file_source/ir/logical.proto"; +import "src/carnot/planner/tetragon/ir/logical.proto"; import "src/carnot/planpb/plan.proto"; import "src/common/base/statuspb/status.proto"; import "src/shared/k8s/metadatapb/metadata.proto"; @@ -46,6 +47,7 @@ message VizierMessage { ConfigUpdateMessage config_update_message = 11; K8sMetadataMessage k8s_metadata_message = 12; FileSourceMessage file_source_message = 13; + TetragonMessage tetragon_message = 14; } // DEPRECATED: Formerly used for UpdateAgentRequest. reserved 3; @@ -71,6 +73,15 @@ message FileSourceMessage { } } +// A wrapper around all tetragon-related messages that can be sent over the message bus. +message TetragonMessage { + oneof msg { + TetragonInfoUpdate tetragon_info_update = 1; + RemoveTetragonRequest remove_tetragon_request = 2; + RegisterTetragonRequest register_tetragon_request = 3; + } +} + // A wrapper around all PEM-config-related messages that can be sent over the message bus. message ConfigUpdateMessage { oneof msg { @@ -204,6 +215,27 @@ message RemoveFileSourceRequest { uuidpb.UUID id = 1 [ (gogoproto.customname) = "ID" ]; } +// The request to register tetragons on a PEM. +message RegisterTetragonRequest { + px.carnot.planner.tetragon.ir.TetragonDeployment tetragon_deployment = 1; + uuidpb.UUID id = 2 [ (gogoproto.customname) = "ID" ]; +} + +// An update message sent when a tetragon's status changes. +message TetragonInfoUpdate { + uuidpb.UUID id = 1 [ (gogoproto.customname) = "ID" ]; + // The state of the tetragon. + px.statuspb.LifeCycleState state = 2; + // The status of the tetragon, specified if the state of the tetragon is not healthy. + px.statuspb.Status status = 3; + // The ID of the agent sending the update. + uuidpb.UUID agent_id = 4 [ (gogoproto.customname) = "AgentID" ]; +} + +message RemoveTetragonRequest { + uuidpb.UUID id = 1 [ (gogoproto.customname) = "ID" ]; +} + // A request to update a config setting on a PEM. message ConfigUpdateRequest { // The key of the setting that should be updated. diff --git a/src/vizier/services/agent/pem/pem_manager.cc b/src/vizier/services/agent/pem/pem_manager.cc index c73444b9b6c..9c375da3e57 100644 --- a/src/vizier/services/agent/pem/pem_manager.cc +++ b/src/vizier/services/agent/pem/pem_manager.cc @@ -83,6 +83,11 @@ Status PEMManager::PostRegisterHookImpl() { stirling_.get(), table_store(), relation_info_manager()); PX_RETURN_IF_ERROR(RegisterMessageHandler(messages::VizierMessage::MsgCase::kFileSourceMessage, file_source_manager_)); + tetragon_manager_ = + std::make_shared(dispatcher(), info(), agent_nats_connector(), + stirling_.get(), table_store(), relation_info_manager()); + PX_RETURN_IF_ERROR(RegisterMessageHandler(messages::VizierMessage::MsgCase::kTetragonMessage, + tetragon_manager_)); return Status::OK(); } diff --git a/src/vizier/services/agent/pem/pem_manager.h b/src/vizier/services/agent/pem/pem_manager.h index d9c138355d9..e05703e5967 100644 --- a/src/vizier/services/agent/pem/pem_manager.h +++ b/src/vizier/services/agent/pem/pem_manager.h @@ -29,6 +29,7 @@ #include "src/common/system/kernel_version.h" #include "src/stirling/stirling.h" #include "src/vizier/services/agent/pem/file_source_manager.h" +#include "src/vizier/services/agent/pem/tetragon_manager.h" #include "src/vizier/services/agent/pem/tracepoint_manager.h" #include "src/vizier/services/agent/shared/manager/manager.h" @@ -106,6 +107,7 @@ class PEMManager : public Manager { std::unique_ptr stirling_; std::shared_ptr tracepoint_manager_; std::shared_ptr file_source_manager_; + std::shared_ptr tetragon_manager_; // Timer for triggering ClockConverter polls. px::event::TimerUPtr clock_converter_timer_; diff --git a/src/vizier/services/agent/pem/tetragon_manager.cc b/src/vizier/services/agent/pem/tetragon_manager.cc new file mode 100644 index 00000000000..a62239c9993 --- /dev/null +++ b/src/vizier/services/agent/pem/tetragon_manager.cc @@ -0,0 +1,234 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include "src/common/base/base.h" +#include "src/vizier/services/agent/pem/tetragon_manager.h" + +constexpr auto kUpdateInterval = std::chrono::seconds(2); + +namespace px { +namespace vizier { +namespace agent { + +TetragonManager::TetragonManager(px::event::Dispatcher* dispatcher, Info* agent_info, + Manager::VizierNATSConnector* nats_conn, + stirling::Stirling* stirling, + table_store::TableStore* table_store, + RelationInfoManager* relation_info_manager) + : MessageHandler(dispatcher, agent_info, nats_conn), + dispatcher_(dispatcher), + nats_conn_(nats_conn), + stirling_(stirling), + table_store_(table_store), + relation_info_manager_(relation_info_manager) { + tetragon_monitor_timer_ = + dispatcher_->CreateTimer(std::bind(&TetragonManager::Monitor, this)); + // Kick off the background monitor. + tetragon_monitor_timer_->EnableTimer(kUpdateInterval); +} + +Status TetragonManager::HandleMessage(std::unique_ptr msg) { + // The main purpose of handle message is to update the local state based on updates + // from the MDS. + if (!msg->has_tetragon_message()) { + return error::InvalidArgument("Can only handle tetragon requests"); + } + + const messages::TetragonMessage& tetragon = msg->tetragon_message(); + switch (tetragon.msg_case()) { + case messages::TetragonMessage::kRegisterTetragonRequest: { + return HandleRegisterTetragonRequest(tetragon.register_tetragon_request()); + } + case messages::TetragonMessage::kRemoveTetragonRequest: { + return HandleRemoveTetragonRequest(tetragon.remove_tetragon_request()); + } + default: + LOG(ERROR) << "Unknown message type: " << tetragon.msg_case() << " skipping"; + } + return Status::OK(); +} + +std::string TetragonManager::DebugString() const { + std::lock_guard lock(mu_); + std::stringstream ss; + auto now = std::chrono::steady_clock::now(); + ss << absl::Substitute("Tetragon Manager Debug State:\n"); + ss << absl::Substitute("ID\tNAME\tCURRENT_STATE\tEXPECTED_STATE\tlast_updated\n"); + for (const auto& [id, tetragon] : tetragons_) { + ss << absl::Substitute( + "$0\t$1\t$2\t$3\t$4 seconds\n", id.str(), tetragon.name, + statuspb::LifeCycleState_Name(tetragon.current_state), + statuspb::LifeCycleState_Name(tetragon.expected_state), + std::chrono::duration_cast(now - tetragon.last_updated_at) + .count()); + } + return ss.str(); +} + +Status TetragonManager::HandleRegisterTetragonRequest( + const messages::RegisterTetragonRequest& req) { + auto glob_pattern = req.tetragon_deployment().glob_pattern(); + PX_ASSIGN_OR_RETURN(auto id, ParseUUID(req.id())); + LOG(INFO) << "Registering tetragon: " << glob_pattern << " uuid string=" << id.str(); + + TetragonInfo info; + info.name = glob_pattern; + info.id = id; + info.expected_state = statuspb::RUNNING_STATE; + info.current_state = statuspb::PENDING_STATE; + info.last_updated_at = dispatcher_->GetTimeSource().MonotonicTime(); + stirling_->RegisterTetragon(id, glob_pattern); + { + std::lock_guard lock(mu_); + tetragons_[id] = std::move(info); + } + return Status::OK(); +} + +Status TetragonManager::HandleRemoveTetragonRequest( + const messages::RemoveTetragonRequest& req) { + PX_ASSIGN_OR_RETURN(auto id, ParseUUID(req.id())); + std::lock_guard lock(mu_); + auto it = tetragons_.find(id); + if (it == tetragons_.end()) { + return error::NotFound("Tetragon with ID: $0, not found", id.str()); + } + + it->second.expected_state = statuspb::TERMINATED_STATE; + return stirling_->RemoveTetragon(id); +} + +void TetragonManager::Monitor() { + std::lock_guard lock(mu_); + + for (auto& [id, tetragon] : tetragons_) { + auto s_or_publish = stirling_->GetTetragonInfo(id); + statuspb::LifeCycleState current_state; + // Get the latest current state according to stirling. + if (s_or_publish.ok()) { + current_state = statuspb::RUNNING_STATE; + } else { + switch (s_or_publish.code()) { + case statuspb::FAILED_PRECONDITION: + // Means the binary has not been found. + current_state = statuspb::FAILED_STATE; + break; + case statuspb::RESOURCE_UNAVAILABLE: + current_state = statuspb::PENDING_STATE; + break; + case statuspb::NOT_FOUND: + // Means we didn't actually find the probe. If we requested termination, + // it's because the probe has been removed. + current_state = (tetragon.expected_state == statuspb::TERMINATED_STATE) + ? statuspb::TERMINATED_STATE + : statuspb::UNKNOWN_STATE; + break; + default: + current_state = statuspb::FAILED_STATE; + break; + } + } + + if (current_state != statuspb::RUNNING_STATE && + tetragon.expected_state == statuspb::TERMINATED_STATE) { + current_state = statuspb::TERMINATED_STATE; + } + + if (current_state == tetragon.current_state) { + // No state transition, nothing to do. + continue; + } + + // The following transitions are legal: + // 1. Pending -> Terminated: Probe is stopped before starting. + // 2. Pending -> Running : Probe starts up. + // 3. Running -> Terminated: Probe is stopped. + // 4. Running -> Failed: Probe got dettached because binary died. + // 5. Failed -> Running: Probe started up because binary came back to life. + // + // In all cases we basically inform the MDS. + // In the cases where we transition to running, we need to update the schemas. + + Status probe_status = Status::OK(); + LOG(INFO) << absl::Substitute("Tetragon[$0]::$1 has transitioned $2 -> $3", id.str(), + tetragon.name, + statuspb::LifeCycleState_Name(tetragon.current_state), + statuspb::LifeCycleState_Name(current_state)); + // Check if running now, then update the schema. + if (current_state == statuspb::RUNNING_STATE) { + // We must have just transitioned into running. We try to apply the new schema. + // If it fails we will trigger an error and report that to MDS. + auto publish_pb = s_or_publish.ConsumeValueOrDie(); + auto s = UpdateSchema(publish_pb); + if (!s.ok()) { + current_state = statuspb::FAILED_STATE; + probe_status = s; + } + } else { + probe_status = s_or_publish.status(); + } + + tetragon.current_state = current_state; + + // Update MDS with the latest status. + px::vizier::messages::VizierMessage msg; + auto tetragon_msg = msg.mutable_tetragon_message(); + auto update_msg = tetragon_msg->mutable_tetragon_info_update(); + ToProto(agent_info()->agent_id, update_msg->mutable_agent_id()); + ToProto(id, update_msg->mutable_id()); + update_msg->set_state(tetragon.current_state); + probe_status.ToProto(update_msg->mutable_status()); + VLOG(1) << "Sending tetragon info update message: " << msg.DebugString(); + auto s = nats_conn_->Publish(msg); + if (!s.ok()) { + LOG(ERROR) << "Failed to update nats"; + } + } + tetragon_monitor_timer_->EnableTimer(kUpdateInterval); +} + +Status TetragonManager::UpdateSchema(const stirling::stirlingpb::Publish& publish_pb) { + LOG(INFO) << "Updating schema for tetragon"; + auto relation_info_vec = ConvertPublishPBToRelationInfo(publish_pb); + // TODO(zasgar): Failure here can lead to an inconsistent schema state. We should + // figure out how to handle this as part of the data model refactor project. + for (const auto& relation_info : relation_info_vec) { + if (!relation_info_manager_->HasRelation(relation_info.name)) { + table_store_->AddTable( + table_store::HotColdTable::Create(relation_info.name, relation_info.relation), + relation_info.name, relation_info.id); + PX_RETURN_IF_ERROR(relation_info_manager_->AddRelationInfo(relation_info)); + } else { + if (relation_info.relation != table_store_->GetTable(relation_info.name)->GetRelation()) { + return error::Internal( + "Tetragon is not compatible with the schema of the specified output table. " + "[table_name=$0]", + relation_info.name); + } + PX_RETURN_IF_ERROR(table_store_->AddTableAlias(relation_info.id, relation_info.name)); + } + } + return Status::OK(); +} + +} // namespace agent +} // namespace vizier +} // namespace px diff --git a/src/vizier/services/agent/pem/tetragon_manager.h b/src/vizier/services/agent/pem/tetragon_manager.h new file mode 100644 index 00000000000..5c1b236e3f4 --- /dev/null +++ b/src/vizier/services/agent/pem/tetragon_manager.h @@ -0,0 +1,73 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include + +#include + +#include "src/stirling/stirling.h" +#include "src/vizier/services/agent/shared/manager/manager.h" + +namespace px { +namespace vizier { +namespace agent { + +struct TetragonInfo { + std::string name; + sole::uuid id; + statuspb::LifeCycleState expected_state; + statuspb::LifeCycleState current_state; + std::chrono::time_point last_updated_at; +}; + +class TetragonManager : public Manager::MessageHandler { + public: + TetragonManager() = delete; + TetragonManager(px::event::Dispatcher* dispatcher, Info* agent_info, + Manager::VizierNATSConnector* nats_conn, stirling::Stirling* stirling, + table_store::TableStore* table_store, + RelationInfoManager* relation_info_manager); + + Status HandleMessage(std::unique_ptr msg) override; + std::string DebugString() const; + Status HandleRegisterTetragonRequest(const messages::RegisterTetragonRequest& req); + Status HandleRemoveTetragonRequest(const messages::RemoveTetragonRequest& req); + + private: + // The tracepoint Monitor that is responsible for watching and updating the state of + // active tracepoints. + void Monitor(); + Status UpdateSchema(const stirling::stirlingpb::Publish& publish_proto); + + px::event::Dispatcher* dispatcher_; + Manager::VizierNATSConnector* nats_conn_; + stirling::Stirling* stirling_; + table_store::TableStore* table_store_; + RelationInfoManager* relation_info_manager_; + + event::TimerUPtr tetragon_monitor_timer_; + mutable std::mutex mu_; + absl::flat_hash_map tetragons_; +}; + +} // namespace agent +} // namespace vizier +} // namespace px diff --git a/src/vizier/services/agent/shared/manager/manager.cc b/src/vizier/services/agent/shared/manager/manager.cc index 01efe60044b..980485a27a3 100644 --- a/src/vizier/services/agent/shared/manager/manager.cc +++ b/src/vizier/services/agent/shared/manager/manager.cc @@ -94,6 +94,13 @@ Manager::MDFSServiceSPtr CreateMDFSStub(const std::shared_ptr& ch return std::make_shared(chan); } +Manager::MDTTServiceSPtr CreateMDTTStub(const std::shared_ptr& chan) { + if (chan == nullptr) { + return nullptr; + } + return std::make_shared(chan); +} + std::shared_ptr CreateCronScriptStub( const std::shared_ptr& chan) { if (chan == nullptr) { @@ -115,7 +122,8 @@ Manager::Manager(sole::uuid agent_id, std::string_view pod_name, std::string_vie relation_info_manager_(std::make_unique()), mds_channel_(grpc::CreateChannel(std::string(mds_url), grpc_channel_creds_)), func_context_(this, CreateMDSStub(mds_channel_), CreateMDTPStub(mds_channel_), - CreateMDFSStub(mds_channel_), CreateCronScriptStub(mds_channel_), table_store_, + CreateMDFSStub(mds_channel_), CreateMDTTStub(mds_channel_), + CreateCronScriptStub(mds_channel_), table_store_, [](grpc::ClientContext* ctx) { AddServiceTokenToClientContext(ctx); }), memory_metrics_(&GetMetricsRegistry(), "agent_id", agent_id.str()) { // Register Vizier specific and carnot builtin functions. diff --git a/src/vizier/services/agent/shared/manager/manager.h b/src/vizier/services/agent/shared/manager/manager.h index af2cd912a5a..f2508294cd3 100644 --- a/src/vizier/services/agent/shared/manager/manager.h +++ b/src/vizier/services/agent/shared/manager/manager.h @@ -93,7 +93,9 @@ class Manager : public BaseManager { using MDTPService = services::metadata::MetadataTracepointService; using MDTPServiceSPtr = std::shared_ptr; using MDFSService = services::metadata::MetadataFileSourceService; + using MDTTService = services::metadata::MetadataTetragonService; using MDFSServiceSPtr = std::shared_ptr; + using MDTTServiceSPtr = std::shared_ptr; using ResultSinkStub = px::carnotpb::ResultSinkService::StubInterface; Manager() = delete; diff --git a/src/vizier/services/metadata/BUILD.bazel b/src/vizier/services/metadata/BUILD.bazel index f885bd1c777..f12eb593db8 100644 --- a/src/vizier/services/metadata/BUILD.bazel +++ b/src/vizier/services/metadata/BUILD.bazel @@ -34,6 +34,7 @@ go_library( "//src/vizier/services/metadata/controllers/agent", "//src/vizier/services/metadata/controllers/cronscript", "//src/vizier/services/metadata/controllers/file_source", + "//src/vizier/services/metadata/controllers/tetragon", "//src/vizier/services/metadata/controllers/k8smeta", "//src/vizier/services/metadata/controllers/tracepoint", "//src/vizier/services/metadata/metadataenv", diff --git a/src/vizier/services/metadata/controllers/tetragon/BUILD.bazel b/src/vizier/services/metadata/controllers/tetragon/BUILD.bazel new file mode 100644 index 00000000000..69c043bd2f3 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/BUILD.bazel @@ -0,0 +1,74 @@ +# Copyright 2018- The Pixie Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("//bazel:pl_build_system.bzl", "pl_go_test") + +go_library( + name = "tetragon", + srcs = [ + "tetragon.go", + "tetragon_store.go", + ], + importpath = "px.dev/pixie/src/vizier/services/metadata/controllers/tetragon", + visibility = ["//src/vizier:__subpackages__"], + deps = [ + "//src/api/proto/uuidpb:uuid_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", + "//src/common/base/statuspb:status_pl_go_proto", + "//src/utils", + "//src/vizier/messages/messagespb:messages_pl_go_proto", + "//src/vizier/services/metadata/storepb:store_pl_go_proto", + "//src/vizier/services/shared/agentpb:agent_pl_go_proto", + "//src/vizier/utils/datastore", + "@com_github_gofrs_uuid//:uuid", + "@com_github_gogo_protobuf//proto", + "@com_github_gogo_protobuf//types", + "@com_github_sirupsen_logrus//:logrus", + "@org_golang_google_grpc//codes", + "@org_golang_google_grpc//status", + "@org_golang_x_sync//errgroup", + ], +) + +pl_go_test( + name = "tetragon_test", + srcs = [ + "tetragon_store_test.go", + "tetragon_test.go", + ], + embed = [":tetragon"], + deps = [ + "//src/api/proto/uuidpb:uuid_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", + "//src/common/base/statuspb:status_pl_go_proto", + "//src/utils", + "//src/vizier/messages/messagespb:messages_pl_go_proto", + "//src/vizier/services/metadata/controllers/agent/mock", + "//src/vizier/services/metadata/controllers/tetragon/mock", + "//src/vizier/services/metadata/storepb:store_pl_go_proto", + "//src/vizier/services/shared/agentpb:agent_pl_go_proto", + "//src/vizier/utils/datastore/pebbledb", + "@com_github_cockroachdb_pebble//:pebble", + "@com_github_cockroachdb_pebble//vfs", + "@com_github_gofrs_uuid//:uuid", + "@com_github_gogo_protobuf//proto", + "@com_github_gogo_protobuf//types", + "@com_github_golang_mock//gomock", + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/src/vizier/services/metadata/controllers/tetragon/mock.go b/src/vizier/services/metadata/controllers/tetragon/mock.go new file mode 100644 index 00000000000..33877b4bd00 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/mock.go @@ -0,0 +1,21 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tetragon + +//go:generate mockgen -source=tetragon.go -destination=mock/mock_tetragon.gen.go Store diff --git a/src/vizier/services/metadata/controllers/tetragon/mock/BUILD.bazel b/src/vizier/services/metadata/controllers/tetragon/mock/BUILD.bazel new file mode 100644 index 00000000000..2bed53eead7 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/mock/BUILD.bazel @@ -0,0 +1,29 @@ +# Copyright 2018- The Pixie Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "mock", + srcs = ["mock_tetragon.gen.go"], + importpath = "px.dev/pixie/src/vizier/services/metadata/controllers/tetragon/mock", + visibility = ["//src/vizier:__subpackages__"], + deps = [ + "//src/vizier/services/metadata/storepb:store_pl_go_proto", + "@com_github_gofrs_uuid//:uuid", + "@com_github_golang_mock//gomock", + ], +) diff --git a/src/vizier/services/metadata/controllers/tetragon/tetragon.go b/src/vizier/services/metadata/controllers/tetragon/tetragon.go new file mode 100644 index 00000000000..ca190f347f0 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/tetragon.go @@ -0,0 +1,375 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tetragon + +import ( + "errors" + "fmt" + "sync" + "time" + + "github.com/gofrs/uuid" + "github.com/gogo/protobuf/proto" + "github.com/gogo/protobuf/types" + log "github.com/sirupsen/logrus" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "px.dev/pixie/src/api/proto/uuidpb" + "px.dev/pixie/src/carnot/planner/tetragon/ir" + "px.dev/pixie/src/common/base/statuspb" + "px.dev/pixie/src/utils" + "px.dev/pixie/src/vizier/messages/messagespb" + "px.dev/pixie/src/vizier/services/metadata/storepb" + "px.dev/pixie/src/vizier/services/shared/agentpb" +) + +var ( + // ErrTetragonAlreadyExists is produced if a tetragon already exists with the given name + // and does not have a matching schema. + ErrTetragonAlreadyExists = errors.New("Tetragon already exists") +) + +// agentMessenger is a controller that lets us message all agents and all active agents. +type agentMessenger interface { + MessageAgents(agentIDs []uuid.UUID, msg []byte) error + MessageActiveAgents(msg []byte) error +} + +// Store is a datastore which can store, update, and retrieve information about tetragons. +type Store interface { + UpsertTetragon(uuid.UUID, *storepb.TetragonInfo) error + GetTetragon(uuid.UUID) (*storepb.TetragonInfo, error) + GetTetragons() ([]*storepb.TetragonInfo, error) + UpdateTetragonState(*storepb.AgentTetragonStatus) error + GetTetragonStates(uuid.UUID) ([]*storepb.AgentTetragonStatus, error) + SetTetragonWithName(string, uuid.UUID) error + GetTetragonsWithNames([]string) ([]*uuid.UUID, error) + GetTetragonsForIDs([]uuid.UUID) ([]*storepb.TetragonInfo, error) + SetTetragonTTL(uuid.UUID, time.Duration) error + DeleteTetragonTTLs([]uuid.UUID) error + DeleteTetragon(uuid.UUID) error + DeleteTetragonsForAgent(uuid.UUID) error + GetTetragonTTLs() ([]uuid.UUID, []time.Time, error) +} + +// Manager manages the tetragons deployed in the cluster. +type Manager struct { + ts Store + agtMgr agentMessenger + + done chan struct{} + once sync.Once +} + +// NewManager creates a new tetragon manager. +func NewManager(ts Store, agtMgr agentMessenger, ttlReaperDuration time.Duration) *Manager { + tm := &Manager{ + ts: ts, + agtMgr: agtMgr, + done: make(chan struct{}), + } + + go tm.watchForTetragonExpiry(ttlReaperDuration) + return tm +} + +func (m *Manager) watchForTetragonExpiry(ttlReaperDuration time.Duration) { + ticker := time.NewTicker(ttlReaperDuration) + defer ticker.Stop() + for { + select { + case <-m.done: + return + case <-ticker.C: + m.terminateExpiredTetragons() + } + } +} + +func (m *Manager) terminateExpiredTetragons() { + fss, err := m.ts.GetTetragons() + if err != nil { + log.WithError(err).Warn("error encountered when trying to terminating expired tetragons") + return + } + + ttlKeys, ttlVals, err := m.ts.GetTetragonTTLs() + if err != nil { + log.WithError(err).Warn("error encountered when trying to terminating expired tetragons") + return + } + + now := time.Now() + + // Lookup for tetragons that still have an active ttl + fsActive := make(map[uuid.UUID]bool) + for i, fs := range ttlKeys { + fsActive[fs] = ttlVals[i].After(now) + } + + for _, fs := range fss { + fsID := utils.UUIDFromProtoOrNil(fs.ID) + if fsActive[fsID] { + // Tetragon TTL exists and is in the future + continue + } + if fs.ExpectedState == statuspb.TERMINATED_STATE { + // Tetragon is already in terminated state + continue + } + err = m.terminateTetragon(fsID) + if err != nil { + log.WithError(err).Warn("error encountered when trying to terminating expired tetragons") + } + } +} + +func (m *Manager) terminateTetragon(id uuid.UUID) error { + // Update state in datastore to terminated. + fs, err := m.ts.GetTetragon(id) + if err != nil { + return err + } + + if fs == nil { + return nil + } + + fs.ExpectedState = statuspb.TERMINATED_STATE + err = m.ts.UpsertTetragon(id, fs) + if err != nil { + return err + } + + // Send termination messages to PEMs. + tetragonReq := messagespb.VizierMessage{ + Msg: &messagespb.VizierMessage_TetragonMessage{ + TetragonMessage: &messagespb.TetragonMessage{ + Msg: &messagespb.TetragonMessage_RemoveTetragonRequest{ + RemoveTetragonRequest: &messagespb.RemoveTetragonRequest{ + ID: utils.ProtoFromUUID(id), + }, + }, + }, + }, + } + msg, err := tetragonReq.Marshal() + if err != nil { + return err + } + + return m.agtMgr.MessageActiveAgents(msg) +} + +func (m *Manager) deleteTetragon(id uuid.UUID) error { + return m.ts.DeleteTetragon(id) +} + +// CreateTetragon creates and stores info about the given tetragon. +func (m *Manager) CreateTetragon(tetragonName string, tetragonDeployment *ir2.TetragonDeployment) (*uuid.UUID, error) { + // Check to see if a tetragon with the matching name already exists. + resp, err := m.ts.GetTetragonsWithNames([]string{tetragonName}) + if err != nil { + return nil, err + } + + if len(resp) != 1 { + return nil, errors.New("Could not fetch tetragon") + } + prevTetragonID := resp[0] + + ttl, err := types.DurationFromProto(tetragonDeployment.TTL) + if err != nil { + return nil, status.Error(codes.Internal, fmt.Sprintf("Failed to parse duration: %+v", err)) + } + + if prevTetragonID != nil { // Existing tetragon already exists. + prevTetragon, err := m.ts.GetTetragon(*prevTetragonID) + if err != nil { + return nil, err + } + if prevTetragon != nil && prevTetragon.ExpectedState != statuspb.TERMINATED_STATE { + // If everything is exactly the same, no need to redeploy + // - return prevTetragonID, ErrTetragonAlreadyExists + // If anything inside tetragons has changed + // - delete old tetragons, and insert new tetragons. + + // Check if the tetragons are exactly the same. + allFsSame := true + if !proto.Equal(prevTetragon.Tetragon, tetragonDeployment) { + allFsSame = false + } + + if allFsSame { + err = m.ts.SetTetragonTTL(*prevTetragonID, ttl) + if err != nil { + return nil, err + } + return prevTetragonID, ErrTetragonAlreadyExists + } + + // Something has changed, so trigger termination of the old tetragon. + err = m.ts.DeleteTetragonTTLs([]uuid.UUID{*prevTetragonID}) + if err != nil { + return nil, err + } + } + } + + fsID, err := uuid.NewV4() + if err != nil { + return nil, err + } + newTetragon := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(fsID), + Name: tetragonName, + Tetragon: tetragonDeployment, + ExpectedState: statuspb.RUNNING_STATE, + } + err = m.ts.UpsertTetragon(fsID, newTetragon) + if err != nil { + return nil, err + } + err = m.ts.SetTetragonTTL(fsID, ttl) + if err != nil { + return nil, err + } + err = m.ts.SetTetragonWithName(tetragonName, fsID) + if err != nil { + return nil, err + } + return &fsID, nil +} + +// GetAllTetragons gets all the tetragons currently tracked by the metadata service. +func (m *Manager) GetAllTetragons() ([]*storepb.TetragonInfo, error) { + return m.ts.GetTetragons() +} + +// UpdateAgentTetragonStatus updates the tetragon info with the new agent tetragon status. +func (m *Manager) UpdateAgentTetragonStatus(tetragonID *uuidpb.UUID, agentID *uuidpb.UUID, state statuspb.LifeCycleState, status *statuspb.Status) error { + if state == statuspb.TERMINATED_STATE { // If all agent tetragon statuses are now terminated, we can finally delete the tetragon from the datastore. + tID := utils.UUIDFromProtoOrNil(tetragonID) + states, err := m.GetTetragonStates(tID) + if err != nil { + return err + } + allTerminated := true + for _, s := range states { + if s.State != statuspb.TERMINATED_STATE && !s.AgentID.Equal(agentID) { + allTerminated = false + break + } + } + + if allTerminated { + return m.deleteTetragon(tID) + } + } + + tetragonState := &storepb.AgentTetragonStatus{ + State: state, + Status: status, + ID: tetragonID, + AgentID: agentID, + } + + return m.ts.UpdateTetragonState(tetragonState) +} + +// RegisterTetragon sends requests to the given agents to register the specified tetragon. +func (m *Manager) RegisterTetragon(agents []*agentpb.Agent, tetragonID uuid.UUID, tetragonDeployment *ir2.TetragonDeployment) error { + agentIDs := make([]uuid.UUID, len(agents)) + tetragonReq := messagespb.VizierMessage{ + Msg: &messagespb.VizierMessage_TetragonMessage{ + TetragonMessage: &messagespb.TetragonMessage{ + Msg: &messagespb.TetragonMessage_RegisterTetragonRequest{ + RegisterTetragonRequest: &messagespb.RegisterTetragonRequest{ + TetragonDeployment: tetragonDeployment, + ID: utils.ProtoFromUUID(tetragonID), + }, + }, + }, + }, + } + msg, err := tetragonReq.Marshal() + if err != nil { + return err + } + for i, agt := range agents { + agentIDs[i] = utils.UUIDFromProtoOrNil(agt.Info.AgentID) + } + + err = m.agtMgr.MessageAgents(agentIDs, msg) + + if err != nil { + return err + } + + return nil +} + +// GetTetragonInfo gets the status for the tetragon with the given ID. +func (m *Manager) GetTetragonInfo(tetragonID uuid.UUID) (*storepb.TetragonInfo, error) { + return m.ts.GetTetragon(tetragonID) +} + +// GetTetragonStates gets all the known agent states for the given tetragon. +func (m *Manager) GetTetragonStates(tetragonID uuid.UUID) ([]*storepb.AgentTetragonStatus, error) { + return m.ts.GetTetragonStates(tetragonID) +} + +// GetTetragonsForIDs gets all the tetragon infos for the given ids. +func (m *Manager) GetTetragonsForIDs(ids []uuid.UUID) ([]*storepb.TetragonInfo, error) { + return m.ts.GetTetragonsForIDs(ids) +} + +// RemoveTetragons starts the termination process for the tetragons with the given names. +func (m *Manager) RemoveTetragons(names []string) error { + fsIDs, err := m.ts.GetTetragonsWithNames(names) + if err != nil { + return err + } + + ids := make([]uuid.UUID, len(fsIDs)) + + for i, id := range fsIDs { + if id == nil { + return fmt.Errorf("Could not find tetragon for given name: %s", names[i]) + } + ids[i] = *id + } + + return m.ts.DeleteTetragonTTLs(ids) +} + +// DeleteAgent deletes tetragons on the given agent. +func (m *Manager) DeleteAgent(agentID uuid.UUID) error { + return m.ts.DeleteTetragonsForAgent(agentID) +} + +// Close cleans up the goroutines created and renders this no longer useable. +func (m *Manager) Close() { + m.once.Do(func() { + close(m.done) + }) + m.ts = nil + m.agtMgr = nil +} diff --git a/src/vizier/services/metadata/controllers/tetragon/tetragon_store.go b/src/vizier/services/metadata/controllers/tetragon/tetragon_store.go new file mode 100644 index 00000000000..02079cc1c53 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/tetragon_store.go @@ -0,0 +1,309 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tetragon + +import ( + "path" + "strings" + "time" + + "github.com/gofrs/uuid" + "github.com/gogo/protobuf/proto" + "golang.org/x/sync/errgroup" + + "px.dev/pixie/src/api/proto/uuidpb" + "px.dev/pixie/src/utils" + "px.dev/pixie/src/vizier/services/metadata/storepb" + "px.dev/pixie/src/vizier/utils/datastore" +) + +const ( + tetragonsPrefix = "/tetragon/" + tetragonStatesPrefix = "/tetragonStates/" + tetragonTTLsPrefix = "/tetragonTTL/" + tetragonNamesPrefix = "/tetragonName/" +) + +// Datastore implements the TetragonStore interface on a given Datastore. +type Datastore struct { + ds datastore.MultiGetterSetterDeleterCloser +} + +// NewDatastore wraps the datastore in a tetragon store +func NewDatastore(ds datastore.MultiGetterSetterDeleterCloser) *Datastore { + return &Datastore{ds: ds} +} + +func getTetragonWithNameKey(tetragonName string) string { + return path.Join(tetragonNamesPrefix, tetragonName) +} + +func getTetragonKey(tetragonID uuid.UUID) string { + return path.Join(tetragonsPrefix, tetragonID.String()) +} + +func getTetragonStatesKey(tetragonID uuid.UUID) string { + return path.Join(tetragonStatesPrefix, tetragonID.String()) +} + +func getTetragonStateKey(tetragonID uuid.UUID, agentID uuid.UUID) string { + return path.Join(tetragonStatesPrefix, tetragonID.String(), agentID.String()) +} + +func getTetragonTTLKey(tetragonID uuid.UUID) string { + return path.Join(tetragonTTLsPrefix, tetragonID.String()) +} + +// GetTetragonsWithNames gets which tetragon is associated with the given name. +func (t *Datastore) GetTetragonsWithNames(tetragonNames []string) ([]*uuid.UUID, error) { + eg := errgroup.Group{} + ids := make([]*uuid.UUID, len(tetragonNames)) + for i := 0; i < len(tetragonNames); i++ { + i := i // Closure for goroutine + eg.Go(func() error { + val, err := t.ds.Get(getTetragonWithNameKey(tetragonNames[i])) + if err != nil { + return err + } + if val == nil { + return nil + } + uuidPB := &uuidpb.UUID{} + err = proto.Unmarshal(val, uuidPB) + if err != nil { + return err + } + id := utils.UUIDFromProtoOrNil(uuidPB) + ids[i] = &id + return nil + }) + } + err := eg.Wait() + if err != nil { + return nil, err + } + + return ids, nil +} + +// SetTetragonWithName associates the tetragone with the given name with the one with the provided ID. +func (t *Datastore) SetTetragonWithName(tetragonName string, tetragonID uuid.UUID) error { + tetragonIDpb := utils.ProtoFromUUID(tetragonID) + val, err := tetragonIDpb.Marshal() + if err != nil { + return err + } + + return t.ds.Set(getTetragonWithNameKey(tetragonName), string(val)) +} + +// UpsertTetragon updates or creates a new tetragon entry in the store. +func (t *Datastore) UpsertTetragon(tetragonID uuid.UUID, tetragonInfo *storepb.TetragonInfo) error { + val, err := tetragonInfo.Marshal() + if err != nil { + return err + } + + return t.ds.Set(getTetragonKey(tetragonID), string(val)) +} + +// DeleteTetragon deletes the tetragon from the store. +func (t *Datastore) DeleteTetragon(tetragonID uuid.UUID) error { + err := t.ds.DeleteAll([]string{getTetragonKey(tetragonID)}) + if err != nil { + return err + } + + return t.ds.DeleteWithPrefix(getTetragonStatesKey(tetragonID)) +} + +// GetTetragon gets the tetragon info from the store, if it exists. +func (t *Datastore) GetTetragon(tetragonID uuid.UUID) (*storepb.TetragonInfo, error) { + resp, err := t.ds.Get(getTetragonKey(tetragonID)) + if err != nil { + return nil, err + } + if resp == nil { + return nil, nil + } + + tetragonPb := &storepb.TetragonInfo{} + err = proto.Unmarshal(resp, tetragonPb) + if err != nil { + return nil, err + } + return tetragonPb, nil +} + +// GetTetragons gets all of the tetragon s in the store. +func (t *Datastore) GetTetragons() ([]*storepb.TetragonInfo, error) { + _, vals, err := t.ds.GetWithPrefix(tetragonsPrefix) + if err != nil { + return nil, err + } + + tetragons := make([]*storepb.TetragonInfo, len(vals)) + for i, val := range vals { + pb := &storepb.TetragonInfo{} + err := proto.Unmarshal(val, pb) + if err != nil { + continue + } + tetragons[i] = pb + } + return tetragons, nil +} + +// GetTetragonsForIDs gets all of the tetragon s with the given it.ds. +func (t *Datastore) GetTetragonsForIDs(ids []uuid.UUID) ([]*storepb.TetragonInfo, error) { + eg := errgroup.Group{} + tetragons := make([]*storepb.TetragonInfo, len(ids)) + for i := 0; i < len(ids); i++ { + i := i // Closure for goroutine + eg.Go(func() error { + val, err := t.ds.Get(getTetragonKey(ids[i])) + if err != nil { + return err + } + if val == nil { + return nil + } + fs := &storepb.TetragonInfo{} + err = proto.Unmarshal(val, fs) + if err != nil { + return err + } + tetragons[i] = fs + return nil + }) + } + + err := eg.Wait() + if err != nil { + return nil, err + } + + return tetragons, nil +} + +// UpdateTetragonState updates the agent tetragon state in the store. +func (t *Datastore) UpdateTetragonState(state *storepb.AgentTetragonStatus) error { + val, err := state.Marshal() + if err != nil { + return err + } + + fsID := utils.UUIDFromProtoOrNil(state.ID) + + return t.ds.Set(getTetragonStateKey(fsID, utils.UUIDFromProtoOrNil(state.AgentID)), string(val)) +} + +// GetTetragonStates gets all the agentTetragon states for the given tetragon . +func (t *Datastore) GetTetragonStates(tetragonID uuid.UUID) ([]*storepb.AgentTetragonStatus, error) { + _, vals, err := t.ds.GetWithPrefix(getTetragonStatesKey(tetragonID)) + if err != nil { + return nil, err + } + + tetragons := make([]*storepb.AgentTetragonStatus, len(vals)) + for i, val := range vals { + pb := &storepb.AgentTetragonStatus{} + err := proto.Unmarshal(val, pb) + if err != nil { + continue + } + tetragons[i] = pb + } + return tetragons, nil +} + +// SetTetragonTTL creates a key in the datastore with the given TTL. This represents the amount of time +// that the given tetragon should be persisted before terminating. +func (t *Datastore) SetTetragonTTL(tetragonID uuid.UUID, ttl time.Duration) error { + expiresAt := time.Now().Add(ttl) + encodedExpiry, err := expiresAt.MarshalBinary() + if err != nil { + return err + } + return t.ds.SetWithTTL(getTetragonTTLKey(tetragonID), string(encodedExpiry), ttl) +} + +// DeleteTetragonTTLs deletes the key in the datastore for the given tetragon TTLs. +// This is done as a single transaction, so if any deletes fail, they all fail. +func (t *Datastore) DeleteTetragonTTLs(ids []uuid.UUID) error { + keys := make([]string, len(ids)) + for i, id := range ids { + keys[i] = getTetragonTTLKey(id) + } + + return t.ds.DeleteAll(keys) +} + +// DeleteTetragonsForAgent deletes the tetragon s for a given agent. +// Note this only purges the combo tetragon ID+agentID keys. Said +// tetragon s might still be valid and deployed on other agents. +func (t *Datastore) DeleteTetragonsForAgent(agentID uuid.UUID) error { + fss, err := t.GetTetragons() + if err != nil { + return err + } + + delKeys := make([]string, len(fss)) + for i, fs := range fss { + delKeys[i] = getTetragonStateKey(utils.UUIDFromProtoOrNil(fs.ID), agentID) + } + + return t.ds.DeleteAll(delKeys) +} + +// GetTetragonTTLs gets the tetragon s which still have existing TTLs. +func (t *Datastore) GetTetragonTTLs() ([]uuid.UUID, []time.Time, error) { + keys, vals, err := t.ds.GetWithPrefix(tetragonTTLsPrefix) + if err != nil { + return nil, nil, err + } + + var ids []uuid.UUID + var expirations []time.Time + + for i, k := range keys { + keyParts := strings.Split(k, "/") + if len(keyParts) != 3 { + continue + } + id, err := uuid.FromString(keyParts[2]) + if err != nil { + continue + } + var expiresAt time.Time + err = expiresAt.UnmarshalBinary(vals[i]) + if err != nil { + // This shouldn't happen for new keys, but we might have added TTLs + // in the past without a value. So just pick some time sufficiently + // in the future. + // This value is only used to determine what tetragon s are expired + // as of _NOW_ so this is "safe". + expiresAt = time.Now().Add(30 * 24 * time.Hour) + } + ids = append(ids, id) + expirations = append(expirations, expiresAt) + } + + return ids, expirations, nil +} diff --git a/src/vizier/services/metadata/controllers/tetragon/tetragon_store_test.go b/src/vizier/services/metadata/controllers/tetragon/tetragon_store_test.go new file mode 100644 index 00000000000..3eea21a79f3 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/tetragon_store_test.go @@ -0,0 +1,364 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tetragon + +import ( + "os" + "testing" + "time" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/vfs" + "github.com/gofrs/uuid" + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "px.dev/pixie/src/api/proto/uuidpb" + "px.dev/pixie/src/common/base/statuspb" + "px.dev/pixie/src/utils" + "px.dev/pixie/src/vizier/services/metadata/storepb" + "px.dev/pixie/src/vizier/utils/datastore/pebbledb" +) + +func setupTest(t *testing.T) (*pebbledb.DataStore, *Datastore, func()) { + memFS := vfs.NewMem() + c, err := pebble.Open("test", &pebble.Options{ + FS: memFS, + }) + if err != nil { + t.Fatal("failed to initialize a pebbledb") + os.Exit(1) + } + + db := pebbledb.New(c, 3*time.Second) + ts := NewDatastore(db) + cleanup := func() { + err := db.Close() + if err != nil { + t.Fatal("Failed to close db") + } + } + + return db, ts, cleanup +} + +func TestTetragonStore_UpsertTetragon(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + // Create tetragon. + s1 := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(tpID), + } + + err := ts.UpsertTetragon(tpID, s1) + require.NoError(t, err) + + savedTetragon, err := db.Get("/tetragon/" + tpID.String()) + require.NoError(t, err) + savedTetragonPb := &storepb.TetragonInfo{} + err = proto.Unmarshal(savedTetragon, savedTetragonPb) + require.NoError(t, err) + assert.Equal(t, s1, savedTetragonPb) +} + +func TestTetragonStore_GetTetragon(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + // Create tetragon. + s1 := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(tpID), + } + s1Text, err := s1.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + err = db.Set("/tetragon/"+tpID.String(), string(s1Text)) + require.NoError(t, err) + + Tetragon, err := ts.GetTetragon(tpID) + require.NoError(t, err) + assert.NotNil(t, tetragon) + + assert.Equal(t, s1.ID, tetragon.ID) +} + +func TestTetragonStore_GetTetragons(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + // Create tetragons. + s1ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c8") + s1 := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(s1ID), + } + s1Text, err := s1.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + s2ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c9") + s2 := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(s2ID), + } + s2Text, err := s2.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + err = db.Set("/tetragon/"+s1ID.String(), string(s1Text)) + require.NoError(t, err) + err = db.Set("/tetragon/"+s2ID.String(), string(s2Text)) + require.NoError(t, err) + + tetragons, err := ts.GetTetragons() + require.NoError(t, err) + assert.Equal(t, 2, len(tetragons)) + + ids := make([]string, len(tetragons)) + for i, tp := range tetragons { + ids[i] = utils.ProtoToUUIDStr(tp.ID) + } + + assert.Contains(t, ids, utils.ProtoToUUIDStr(s1.ID)) + assert.Contains(t, ids, utils.ProtoToUUIDStr(s2.ID)) +} + +func TestTetragonStore_GetTetragonsForIDs(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + // Create tetragons. + s1ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c8") + s1 := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(s1ID), + } + s1Text, err := s1.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + s2ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c9") + s2 := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(s2ID), + } + s2Text, err := s2.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + s3ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c7") + + err = db.Set("/tetragon/"+s1ID.String(), string(s1Text)) + require.NoError(t, err) + err = db.Set("/tetragon/"+s2ID.String(), string(s2Text)) + require.NoError(t, err) + + tetragons, err := ts.GetTetragonsForIDs([]uuid.UUID{s1ID, s2ID, s3ID}) + require.NoError(t, err) + assert.Equal(t, 3, len(tetragons)) + + ids := make([]string, len(tetragons)) + for i, tp := range tetragons { + if tp == nil || tp.ID == nil { + continue + } + ids[i] = utils.ProtoToUUIDStr(tp.ID) + } + + assert.Contains(t, ids, utils.ProtoToUUIDStr(s1.ID)) + assert.Contains(t, ids, utils.ProtoToUUIDStr(s2.ID)) +} + +func TestTetragonStore_UpdateTetragonState(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + agentID := uuid.Must(uuid.NewV4()) + tpID := uuid.Must(uuid.NewV4()) + // Create tetragon state + s1 := &storepb.AgentTetragonStatus{ + ID: utils.ProtoFromUUID(tpID), + AgentID: utils.ProtoFromUUID(agentID), + State: statuspb.RUNNING_STATE, + } + + err := ts.UpdateTetragonState(s1) + require.NoError(t, err) + + savedTetragon, err := db.Get("/tetragonStates/" + tpID.String() + "/" + agentID.String()) + require.NoError(t, err) + savedTetragonPb := &storepb.AgentTetragonStatus{} + err = proto.Unmarshal(savedTetragon, savedTetragonPb) + require.NoError(t, err) + assert.Equal(t, s1, savedTetragonPb) +} + +func TestTetragonStore_GetTetragonStates(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + + agentID1 := uuid.FromStringOrNil("6ba7b810-9dad-11d1-80b4-00c04fd430c8") + agentID2 := uuid.FromStringOrNil("6ba7b810-9dad-11d1-80b4-00c04fd430c9") + + // Create tetragons. + s1 := &storepb.AgentTetragonStatus{ + ID: utils.ProtoFromUUID(tpID), + AgentID: utils.ProtoFromUUID(agentID1), + State: statuspb.RUNNING_STATE, + } + s1Text, err := s1.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + s2 := &storepb.AgentTetragonStatus{ + ID: utils.ProtoFromUUID(tpID), + AgentID: utils.ProtoFromUUID(agentID2), + State: statuspb.PENDING_STATE, + } + s2Text, err := s2.Marshal() + if err != nil { + t.Fatal("Unable to marshal tetragon pb") + } + + err = db.Set("/tetragonStates/"+tpID.String()+"/"+agentID1.String(), string(s1Text)) + require.NoError(t, err) + err = db.Set("/tetragonStates/"+tpID.String()+"/"+agentID2.String(), string(s2Text)) + require.NoError(t, err) + + tetragons, err := ts.GetTetragonStates(tpID) + require.NoError(t, err) + assert.Equal(t, 2, len(tetragons)) + + agentIDs := make([]string, len(tetragons)) + for i, tp := range tetragons { + agentIDs[i] = utils.ProtoToUUIDStr(tp.AgentID) + } + + assert.Contains(t, agentIDs, utils.ProtoToUUIDStr(s1.AgentID)) + assert.Contains(t, agentIDs, utils.ProtoToUUIDStr(s2.AgentID)) +} + +func TestTetragonStore_SetTetragonWithName(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + + err := ts.SetTetragonWithName("test", tpID) + require.NoError(t, err) + + savedTetragon, err := db.Get("/tetragonName/test") + require.NoError(t, err) + savedTetragonPb := &uuidpb.UUID{} + err = proto.Unmarshal(savedTetragon, savedTetragonPb) + require.NoError(t, err) + assert.Equal(t, tpID, utils.UUIDFromProtoOrNil(savedTetragonPb)) +} + +func TestTetragonStore_GetTetragonsWithNames(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + TetragonIDpb := utils.ProtoFromUUID(tpID) + val, err := tetragonIDpb.Marshal() + require.NoError(t, err) + + tpID2 := uuid.Must(uuid.NewV4()) + tetragonIDpb2 := utils.ProtoFromUUID(tpID2) + val2, err := tetragonIDpb2.Marshal() + require.NoError(t, err) + + err = db.Set("/tetragonName/test", string(val)) + require.NoError(t, err) + err = db.Set("/tetragonName/test2", string(val2)) + require.NoError(t, err) + + tetragons, err := ts.GetTetragonsWithNames([]string{"test", "test2"}) + require.NoError(t, err) + assert.Equal(t, 2, len(tetragons)) + + tps := make([]string, len(tetragons)) + for i, tp := range tetragons { + tps[i] = tp.String() + } + + assert.Contains(t, tps, tpID.String()) + assert.Contains(t, tps, tpID2.String()) +} + +func TestTetragonStore_DeleteTetragon(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + + err := db.Set("/tetragon/"+tpID.String(), "test") + require.NoError(t, err) + + err = ts.DeleteTetragon(tpID) + require.NoError(t, err) + + val, err := db.Get("/tetragon/" + tpID.String()) + require.NoError(t, err) + assert.Nil(t, val) +} + +func TestTetragonStore_DeleteTetragonTTLs(t *testing.T) { + _, ts, cleanup := setupTest(t) + defer cleanup() + + tpID := uuid.Must(uuid.NewV4()) + tpID2 := uuid.Must(uuid.NewV4()) + + err := ts.DeleteTetragonTTLs([]uuid.UUID{tpID, tpID2}) + require.NoError(t, err) +} + +func TestTetragonStore_GetTetragonTTLs(t *testing.T) { + db, ts, cleanup := setupTest(t) + defer cleanup() + + // Create tetragons. + s1ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c8") + s2ID := uuid.FromStringOrNil("8ba7b810-9dad-11d1-80b4-00c04fd430c9") + + err := db.Set("/tetragonTTL/"+s1ID.String(), "") + require.NoError(t, err) + err = db.Set("/tetragonTTL/"+s2ID.String(), "") + require.NoError(t, err) + err = db.Set("/tetragonTTL/invalid", "") + require.NoError(t, err) + + tetragons, _, err := ts.GetTetragonTTLs() + require.NoError(t, err) + assert.Equal(t, 2, len(tetragons)) + + assert.Contains(t, tetragons, s1ID) + assert.Contains(t, tetragons, s2ID) +} diff --git a/src/vizier/services/metadata/controllers/tetragon/tetragon_test.go b/src/vizier/services/metadata/controllers/tetragon/tetragon_test.go new file mode 100644 index 00000000000..80178161ce5 --- /dev/null +++ b/src/vizier/services/metadata/controllers/tetragon/tetragon_test.go @@ -0,0 +1,528 @@ +/* + * Copyright 2018- The Pixie Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +package tetragon_test + +import ( + "sync" + "testing" + "time" + + "github.com/gofrs/uuid" + "github.com/gogo/protobuf/proto" + "github.com/gogo/protobuf/types" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "px.dev/pixie/src/carnot/planner/tetragon/ir" + "px.dev/pixie/src/common/base/statuspb" + "px.dev/pixie/src/utils" + "px.dev/pixie/src/vizier/messages/messagespb" + mock_agent "px.dev/pixie/src/vizier/services/metadata/controllers/agent/mock" + "px.dev/pixie/src/vizier/services/metadata/controllers/tetragon" + mock_tetragon "px.dev/pixie/src/vizier/services/metadata/controllers/tetragon/mock" + "px.dev/pixie/src/vizier/services/metadata/storepb" + "px.dev/pixie/src/vizier/services/shared/agentpb" +) + +func TestCreateTetragon(t *testing.T) { + tests := []struct { + name string + originalTetragon *ir2.TetragonDeployment + originalTetragonState statuspb.LifeCycleState + newTetragon *ir2.TetragonDeployment + expectError bool + expectOldUpdated bool + expectTTLUpdateOnly bool + }{ + { + name: "test_tetragon", + originalTetragon: nil, + newTetragon: &ir2.TetragonDeployment{ + GlobPattern: "/tmp/test", + TableName: "/tmp/test", + TTL: &types.Duration{ + Seconds: 5, + }, + }, + expectError: false, + }, + { + name: "existing tetragon match", + originalTetragon: &ir2.TetragonDeployment{ + GlobPattern: "/tmp/test", + TableName: "/tmp/test", + TTL: &types.Duration{ + Seconds: 5, + }, + }, + originalTetragonState: statuspb.RUNNING_STATE, + newTetragon: &ir2.TetragonDeployment{ + GlobPattern: "/tmp/test", + TableName: "/tmp/test", + TTL: &types.Duration{ + Seconds: 5, + }, + }, + expectTTLUpdateOnly: true, + }, + { + name: "existing tetragon, not exactly the same (1)", + originalTetragon: &ir2.TetragonDeployment{ + GlobPattern: "/tmp/test", + TableName: "/tmp/test", + TTL: &types.Duration{ + Seconds: 5, + }, + }, + originalTetragonState: statuspb.RUNNING_STATE, + newTetragon: &ir2.TetragonDeployment{ + GlobPattern: "/tmp/test.json", + TableName: "/tmp/test", + TTL: &types.Duration{ + Seconds: 5, + }, + }, + expectOldUpdated: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + origID := uuid.Must(uuid.NewV4()) + + if test.originalTetragon == nil { + mockTetragonStore. + EXPECT(). + GetTetragonsWithNames([]string{"test_tetragon"}). + Return([]*uuid.UUID{nil}, nil) + } else { + mockTetragonStore. + EXPECT(). + GetTetragonsWithNames([]string{"test_tetragon"}). + Return([]*uuid.UUID{&origID}, nil) + mockTetragonStore. + EXPECT(). + GetTetragon(origID). + Return(&storepb.TetragonInfo{ + ExpectedState: test.originalTetragonState, + Tetragon: test.originalTetragon, + }, nil) + } + + if test.expectTTLUpdateOnly { + mockTetragonStore. + EXPECT(). + SetTetragonTTL(origID, time.Second*5) + } + + if test.expectOldUpdated { + mockTetragonStore. + EXPECT(). + DeleteTetragonTTLs([]uuid.UUID{origID}). + Return(nil) + } + + var newID uuid.UUID + + if !test.expectError && !test.expectTTLUpdateOnly { + mockTetragonStore. + EXPECT(). + UpsertTetragon(gomock.Any(), gomock.Any()). + DoAndReturn(func(id uuid.UUID, tpInfo *storepb.TetragonInfo) error { + newID = id + assert.Equal(t, &storepb.TetragonInfo{ + Tetragon: test.newTetragon, + Name: "test_tetragon", + ID: utils.ProtoFromUUID(id), + ExpectedState: statuspb.RUNNING_STATE, + }, tpInfo) + return nil + }) + + mockTetragonStore. + EXPECT(). + SetTetragonWithName("test_tetragon", gomock.Any()). + DoAndReturn(func(name string, id uuid.UUID) error { + assert.Equal(t, newID, id) + return nil + }) + + mockTetragonStore. + EXPECT(). + SetTetragonTTL(gomock.Any(), time.Second*5). + DoAndReturn(func(id uuid.UUID, ttl time.Duration) error { + assert.Equal(t, newID, id) + return nil + }) + } + + mockAgtMgr := mock_agent.NewMockManager(ctrl) + TetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer TetragonMgr.Close() + + actualFsID, err := TetragonMgr.CreateTetragon("test_tetragon", test.newTetragon) + if test.expectError || test.expectTTLUpdateOnly { + assert.Equal(t, tetragon.ErrTetragonAlreadyExists, err) + } else { + require.NoError(t, err) + assert.Equal(t, &newID, actualFsID) + } + }) + } +} + +func TestGetTetragons(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + TetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer TetragonMgr.Close() + + tID1 := uuid.Must(uuid.NewV4()) + tID2 := uuid.Must(uuid.NewV4()) + expectedTetragonInfo := []*storepb.TetragonInfo{ + { + ID: utils.ProtoFromUUID(tID1), + }, + { + ID: utils.ProtoFromUUID(tID2), + }, + } + + mockTetragonStore. + EXPECT(). + GetTetragons(). + Return(expectedTetragonInfo, nil) + + tetragons, err := TetragonMgr.GetAllTetragons() + require.NoError(t, err) + assert.Equal(t, expectedTetragonInfo, tetragons) +} + +func TestGetTetragonInfo(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer tetragonMgr.Close() + + fsID1 := uuid.Must(uuid.NewV4()) + expectedTetragonInfo := &storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(fsID1), + } + + mockTetragonStore. + EXPECT(). + GetTetragon(fsID1). + Return(expectedTetragonInfo, nil) + + tetragons, err := tetragonMgr.GetTetragonInfo(fsID1) + require.NoError(t, err) + assert.Equal(t, expectedTetragonInfo, tetragons) +} + +func TestGetTetragonStates(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer tetragonMgr.Close() + + agentUUID1 := uuid.Must(uuid.NewV4()) + tID1 := uuid.Must(uuid.NewV4()) + expectedTetragonStatus1 := &storepb.AgentTetragonStatus{ + ID: utils.ProtoFromUUID(tID1), + AgentID: utils.ProtoFromUUID(agentUUID1), + State: statuspb.RUNNING_STATE, + } + + agentUUID2 := uuid.Must(uuid.NewV4()) + expectedTetragonStatus2 := &storepb.AgentTetragonStatus{ + ID: utils.ProtoFromUUID(tID1), + AgentID: utils.ProtoFromUUID(agentUUID2), + State: statuspb.PENDING_STATE, + } + + mockTetragonStore. + EXPECT(). + GetTetragonStates(tID1). + Return([]*storepb.AgentTetragonStatus{expectedTetragonStatus1, expectedTetragonStatus2}, nil) + + tetragons, err := tetragonMgr.GetTetragonStates(tID1) + require.NoError(t, err) + assert.Equal(t, expectedTetragonStatus1, tetragons[0]) + assert.Equal(t, expectedTetragonStatus2, tetragons[1]) +} + +func TestRegisterTetragon(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer tetragonMgr.Close() + + agentUUID1 := uuid.Must(uuid.NewV4()) + agentUUID2 := uuid.Must(uuid.NewV4()) + upb1 := utils.ProtoFromUUID(agentUUID1) + upb2 := utils.ProtoFromUUID(agentUUID2) + mockAgents := []*agentpb.Agent{ + // Should match programUpTo5.18.0 and programFrom5.10.0To5.18.0 + { + Info: &agentpb.AgentInfo{ + AgentID: upb1, + }, + }, + { + Info: &agentpb.AgentInfo{ + AgentID: upb2, + }, + }, + } + + tetragonID := uuid.Must(uuid.NewV4()) + tetragonDeployment := &ir2.TetragonDeployment{} + expectedTetragonReq := messagespb.VizierMessage{ + Msg: &messagespb.VizierMessage_TetragonMessage{ + TetragonMessage: &messagespb.TetragonMessage{ + Msg: &messagespb.TetragonMessage_RegisterTetragonRequest{ + RegisterTetragonRequest: &messagespb.RegisterTetragonRequest{ + TetragonDeployment: tetragonDeployment, + ID: utils.ProtoFromUUID(tetragonID), + }, + }, + }, + }, + } + // Serialize tetragon request proto into byte slice to compare with the actual message sent to agents. + msg1, err := expectedTetragonReq.Marshal() + if err != nil { + t.Fatal(err) + } + + mockAgtMgr. + EXPECT(). + MessageAgents([]uuid.UUID{agentUUID1, agentUUID2}, msg1). + Return(nil) + + err = tetragonMgr.RegisterTetragon(mockAgents, tetragonID, tetragonDeployment) + require.NoError(t, err) +} + +func TestUpdateAgentTetragonStatus(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer tetragonMgr.Close() + + agentUUID1 := uuid.Must(uuid.NewV4()) + fsID := uuid.Must(uuid.NewV4()) + expectedTetragonState := &storepb.AgentTetragonStatus{ + ID: utils.ProtoFromUUID(fsID), + AgentID: utils.ProtoFromUUID(agentUUID1), + State: statuspb.RUNNING_STATE, + } + + mockTetragonStore. + EXPECT(). + UpdateTetragonState(expectedTetragonState). + Return(nil) + + err := tetragonMgr.UpdateAgentTetragonStatus(utils.ProtoFromUUID(fsID), utils.ProtoFromUUID(agentUUID1), statuspb.RUNNING_STATE, nil) + require.NoError(t, err) +} + +func TestUpdateAgentTetragonStatus_Terminated(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer tetragonMgr.Close() + agentUUID1 := uuid.Must(uuid.NewV4()) + fsID := uuid.Must(uuid.NewV4()) + agentUUID2 := uuid.Must(uuid.NewV4()) + + mockTetragonStore. + EXPECT(). + GetTetragonStates(fsID). + Return([]*storepb.AgentTetragonStatus{ + {AgentID: utils.ProtoFromUUID(agentUUID1), State: statuspb.TERMINATED_STATE}, + {AgentID: utils.ProtoFromUUID(agentUUID2), State: statuspb.RUNNING_STATE}, + }, nil) + + mockTetragonStore. + EXPECT(). + DeleteTetragon(fsID). + Return(nil) + + err := tetragonMgr.UpdateAgentTetragonStatus(utils.ProtoFromUUID(fsID), utils.ProtoFromUUID(agentUUID2), statuspb.TERMINATED_STATE, nil) + require.NoError(t, err) +} + +func TestTTLExpiration(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 5*time.Second) + defer tetragonMgr.Close() + + agentUUID1 := uuid.Must(uuid.NewV4()) + fsID := uuid.Must(uuid.NewV4()) + agentUUID2 := uuid.Must(uuid.NewV4()) + + mockTetragonStore. + EXPECT(). + GetTetragonStates(fsID). + Return([]*storepb.AgentTetragonStatus{ + {AgentID: utils.ProtoFromUUID(agentUUID1), State: statuspb.TERMINATED_STATE}, + {AgentID: utils.ProtoFromUUID(agentUUID2), State: statuspb.RUNNING_STATE}, + }, nil) + + mockTetragonStore. + EXPECT(). + DeleteTetragon(fsID). + Return(nil) + + err := tetragonMgr.UpdateAgentTetragonStatus(utils.ProtoFromUUID(fsID), utils.ProtoFromUUID(agentUUID2), statuspb.TERMINATED_STATE, nil) + require.NoError(t, err) +} + +func TestUpdateAgentTetragonStatus_RemoveTetragons(t *testing.T) { + // Set up mock. + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockAgtMgr := mock_agent.NewMockManager(ctrl) + mockTetragonStore := mock_tetragon.NewMockStore(ctrl) + + fsID1 := uuid.Must(uuid.NewV4()) + fsID2 := uuid.Must(uuid.NewV4()) + fsID3 := uuid.Must(uuid.NewV4()) + fsID4 := uuid.Must(uuid.NewV4()) + + mockTetragonStore. + EXPECT(). + GetTetragons(). + Return([]*storepb.TetragonInfo{ + { + ID: utils.ProtoFromUUID(fsID1), + }, + { + ID: utils.ProtoFromUUID(fsID2), + }, + { + ID: utils.ProtoFromUUID(fsID3), + }, + { + ID: utils.ProtoFromUUID(fsID4), + ExpectedState: statuspb.TERMINATED_STATE, + }, + }, nil) + + mockTetragonStore. + EXPECT(). + GetTetragonTTLs(). + Return([]uuid.UUID{ + fsID1, + fsID3, + fsID4, + }, []time.Time{ + time.Now().Add(1 * time.Hour), + time.Now().Add(-1 * time.Minute), + time.Now().Add(-1 * time.Hour), + }, nil) + + mockTetragonStore. + EXPECT(). + GetTetragon(fsID2). + Return(&storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(fsID2), + }, nil) + + mockTetragonStore. + EXPECT(). + GetTetragon(fsID3). + Return(&storepb.TetragonInfo{ + ID: utils.ProtoFromUUID(fsID3), + }, nil) + + mockTetragonStore. + EXPECT(). + UpsertTetragon(fsID2, &storepb.TetragonInfo{ID: utils.ProtoFromUUID(fsID2), ExpectedState: statuspb.TERMINATED_STATE}). + Return(nil) + + mockTetragonStore. + EXPECT(). + UpsertTetragon(fsID3, &storepb.TetragonInfo{ID: utils.ProtoFromUUID(fsID3), ExpectedState: statuspb.TERMINATED_STATE}). + Return(nil) + + var wg sync.WaitGroup + wg.Add(2) + + var seenDeletions []string + msgHandler := func(msg []byte) error { + vzMsg := &messagespb.VizierMessage{} + err := proto.Unmarshal(msg, vzMsg) + require.NoError(t, err) + req := vzMsg.GetTetragonMessage().GetRemoveTetragonRequest() + assert.NotNil(t, req) + seenDeletions = append(seenDeletions, utils.ProtoToUUIDStr(req.ID)) + + wg.Done() + return nil + } + + mockAgtMgr. + EXPECT(). + MessageActiveAgents(gomock.Any()). + Times(2). + DoAndReturn(msgHandler) + + tetragonMgr := tetragon.NewManager(mockTetragonStore, mockAgtMgr, 25*time.Millisecond) + defer tetragonMgr.Close() + + wg.Wait() + assert.Contains(t, seenDeletions, fsID2.String()) + assert.Contains(t, seenDeletions, fsID3.String()) +} diff --git a/src/vizier/services/metadata/metadatapb/BUILD.bazel b/src/vizier/services/metadata/metadatapb/BUILD.bazel index c1b4c41ae1b..d310bccbeeb 100644 --- a/src/vizier/services/metadata/metadatapb/BUILD.bazel +++ b/src/vizier/services/metadata/metadatapb/BUILD.bazel @@ -25,6 +25,7 @@ pl_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_proto", "//src/carnot/planner/file_source/ir:logical_pl_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_proto", "//src/common/base/statuspb:status_pl_proto", "//src/shared/cvmsgspb:cvmsgs_pl_proto", "//src/shared/types/typespb:types_pl_proto", @@ -44,6 +45,7 @@ pl_cc_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_cc_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_cc_proto", "//src/carnot/planner/file_source/ir:logical_pl_cc_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_cc_proto", "//src/common/base/statuspb:status_pl_cc_proto", "//src/shared/cvmsgspb:cvmsgs_pl_cc_proto", "//src/shared/types/typespb/wrapper:cc_library", @@ -64,6 +66,7 @@ pl_go_proto_library( "//src/carnot/planner/distributedpb:distributed_plan_pl_go_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_go_proto", "//src/carnot/planner/file_source/ir:logical_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", "//src/common/base/statuspb:status_pl_go_proto", "//src/shared/cvmsgspb:cvmsgs_pl_go_proto", "//src/shared/types/typespb:types_pl_go_proto", diff --git a/src/vizier/services/metadata/metadatapb/service.proto b/src/vizier/services/metadata/metadatapb/service.proto index 1b5dd699660..64ec5be07af 100644 --- a/src/vizier/services/metadata/metadatapb/service.proto +++ b/src/vizier/services/metadata/metadatapb/service.proto @@ -29,6 +29,7 @@ import "src/api/proto/uuidpb/uuid.proto"; import "src/carnot/planner/distributedpb/distributed_plan.proto"; import "src/carnot/planner/dynamic_tracing/ir/logicalpb/logical.proto"; import "src/carnot/planner/file_source/ir/logical.proto"; +import "src/carnot/planner/tetragon/ir/logical.proto"; import "src/common/base/statuspb/status.proto"; import "src/table_store/schemapb/schema.proto"; import "src/vizier/messages/messagespb/messages.proto"; @@ -52,6 +53,12 @@ service MetadataFileSourceService { rpc RemoveFileSource(RemoveFileSourceRequest) returns (RemoveFileSourceResponse); } +service MetadataTetragonService { + rpc RegisterTetragon(RegisterTetragonRequest) returns (RegisterTetragonResponse); + rpc GetTetragonInfo(GetTetragonInfoRequest) returns (GetTetragonInfoResponse); + rpc RemoveTetragon(RemoveTetragonRequest) returns (RemoveTetragonResponse); +} + service MetadataTracepointService { rpc RegisterTracepoint(RegisterTracepointRequest) returns (RegisterTracepointResponse); rpc GetTracepointInfo(GetTracepointInfoRequest) returns (GetTracepointInfoResponse); @@ -226,6 +233,63 @@ message RemoveFileSourceResponse { px.statuspb.Status status = 1; } +message RegisterTetragonRequest { + repeated px.carnot.planner.tetragon.ir.TetragonDeployment requests = 1; +} + +// The response to a RegisterTetragonRequest. +message RegisterTetragonResponse { + message TetragonStatus { + px.statuspb.Status status = 1; // TODO(ddelnano): Is this necessary? + // The ID of the tetragon. This should be the user-specified name for the tetragon . + uuidpb.UUID id = 2 [ (gogoproto.customname) = "ID" ]; + string name = 3; + } + repeated TetragonStatus tetragons = 1; + // Overall status of whether tetragon registration requests were initiated with/without + // errors. + px.statuspb.Status status = 2; +} + +// The request to check the status for a tetragon with the given names. +message GetTetragonInfoRequest { + // The tetragon IDs to get the info for. If empty, fetches the info for all known tetragon + // s. + repeated uuidpb.UUID ids = 1 [ (gogoproto.customname) = "IDs" ]; +} + +// The status of whether the tetragon has successfully registered or not. +message GetTetragonInfoResponse { + message TetragonState { + // The tetragon ID. + uuidpb.UUID id = 1 [ (gogoproto.customname) = "ID" ]; + // The state of the tetragon . + px.statuspb.LifeCycleState state = 2; + // The status of the tetragon, specified if the state of the tetragon is not healthy. + repeated px.statuspb.Status statuses = 3; + string name = 4; + // The desired state for the tetragon . This can be used to determine whether + // the tetragon is just starting up or in the process of terminating. + px.statuspb.LifeCycleState expected_state = 5; + repeated string schema_names = 6; + } + // List of tetragon states. + repeated TetragonState tetragons = 1; +} + +// The request to evict a tetragon . This will normally happen via the tetragon 's TTL, but +// can be initiated via request as well. +message RemoveTetragonRequest { + // The name of the tetragon to remove. + repeated string names = 1; +} + +// The response to the tetragon removal. +message RemoveTetragonResponse { + // Status of whether the tetragon removal request was initiated with/without errors. + px.statuspb.Status status = 1; +} + // The request to register tracepoints on all PEMs. message RegisterTracepointRequest { message TracepointRequest { diff --git a/src/vizier/services/metadata/storepb/BUILD.bazel b/src/vizier/services/metadata/storepb/BUILD.bazel index f0a1ba5db8d..ce0f4ddb322 100644 --- a/src/vizier/services/metadata/storepb/BUILD.bazel +++ b/src/vizier/services/metadata/storepb/BUILD.bazel @@ -24,6 +24,7 @@ pl_proto_library( "//src/api/proto/uuidpb:uuid_pl_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_proto", "//src/carnot/planner/file_source/ir:logical_pl_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_proto", "//src/common/base/statuspb:status_pl_proto", "//src/shared/k8s/metadatapb:metadata_pl_proto", "//src/shared/types/typespb:types_pl_proto", @@ -39,6 +40,7 @@ pl_cc_proto_library( "//src/api/proto/uuidpb:uuid_pl_cc_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_cc_proto", "//src/carnot/planner/file_source/ir:logical_pl_cc_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_cc_proto", "//src/common/base/statuspb:status_pl_cc_proto", "//src/shared/k8s/metadatapb:metadata_pl_cc_proto", "//src/shared/types/typespb/wrapper:cc_library", @@ -55,6 +57,7 @@ pl_go_proto_library( "//src/api/proto/uuidpb:uuid_pl_go_proto", "//src/carnot/planner/dynamic_tracing/ir/logicalpb:logical_pl_go_proto", "//src/carnot/planner/file_source/ir:logical_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", "//src/common/base/statuspb:status_pl_go_proto", "//src/shared/k8s/metadatapb:metadata_pl_go_proto", "//src/shared/types/typespb:types_pl_go_proto", diff --git a/src/vizier/services/metadata/storepb/store.proto b/src/vizier/services/metadata/storepb/store.proto index 4e1144a497f..983300ca982 100644 --- a/src/vizier/services/metadata/storepb/store.proto +++ b/src/vizier/services/metadata/storepb/store.proto @@ -27,6 +27,7 @@ import "google/protobuf/timestamp.proto"; import "src/api/proto/uuidpb/uuid.proto"; import "src/carnot/planner/dynamic_tracing/ir/logicalpb/logical.proto"; import "src/carnot/planner/file_source/ir/logical.proto"; +import "src/carnot/planner/tetragon/ir/logical.proto"; import "src/common/base/statuspb/status.proto"; import "src/shared/k8s/metadatapb/metadata.proto"; import "src/shared/types/typespb/types.proto"; @@ -79,6 +80,28 @@ message AgentFileSourceStatus { uuidpb.UUID agent_id = 4 [ (gogoproto.customname) = "AgentID" ]; } +// Information about the status of a specific tetragon +message TetragonInfo { + uuidpb.UUID id = 1 [ (gogoproto.customname) = "ID" ]; + // The tetragon deployment. + px.carnot.planner.tetragon.ir.TetragonDeployment tetragon = 2; + // The name of the tetragon, not unique. + string name = 3; + // The desired state of the tetragon, either running or terminated. The actual + // state of the tetragon is derived by the states of the individual agent tetragons. + px.statuspb.LifeCycleState expected_state = 4; +} + +// The agent's registration status for a particular tetragon. +message AgentTetragonStatus { + // The state of the tetragon. + px.statuspb.LifeCycleState state = 1; + // The status of the tetragon, specified if the state of the tetragon is not healthy. + px.statuspb.Status status = 2; + uuidpb.UUID id = 3 [ (gogoproto.customname) = "ID" ]; + uuidpb.UUID agent_id = 4 [ (gogoproto.customname) = "AgentID" ]; +} + // TableInfo contains info about the table in Vizier. message TableInfo { // Name of the table. diff --git a/src/vizier/services/query_broker/controllers/BUILD.bazel b/src/vizier/services/query_broker/controllers/BUILD.bazel index 662397ac614..e51eabdeda9 100644 --- a/src/vizier/services/query_broker/controllers/BUILD.bazel +++ b/src/vizier/services/query_broker/controllers/BUILD.bazel @@ -47,6 +47,7 @@ go_library( "//src/carnot/planner/compilerpb:compiler_status_pl_go_proto", "//src/carnot/planner/distributedpb:distributed_plan_pl_go_proto", "//src/carnot/planner/file_source/ir:logical_pl_go_proto", + "//src/carnot/planner/tetragon/ir:logical_pl_go_proto", "//src/carnot/planner/plannerpb:service_pl_go_proto", "//src/carnot/planpb:plan_pl_go_proto", "//src/carnot/queryresultspb:query_results_pl_go_proto", diff --git a/src/vizier/services/query_broker/controllers/errors.go b/src/vizier/services/query_broker/controllers/errors.go index 3ce6c74bd1b..92fc8704c55 100644 --- a/src/vizier/services/query_broker/controllers/errors.go +++ b/src/vizier/services/query_broker/controllers/errors.go @@ -33,4 +33,8 @@ var ( ErrFileSourceRegistrationFailed = errors.New("failed to register file sources") // ErrFileSourceDeletionFailed failed to delete file source. ErrFileSourceDeletionFailed = errors.New("failed to delete file sources") + // ErrTetragonRegistrationFailed failed to register tetragon. to an agent. + ErrTetragonRegistrationFailed = errors.New("failed to register tetragons") + // ErrTetragonDeletionFailed failed to delete tetragon. + ErrTetragonDeletionFailed = errors.New("failed to delete tetragon") ) diff --git a/src/vizier/services/query_broker/controllers/mutation_executor.go b/src/vizier/services/query_broker/controllers/mutation_executor.go index abd0e45a267..e73e89e8ee7 100644 --- a/src/vizier/services/query_broker/controllers/mutation_executor.go +++ b/src/vizier/services/query_broker/controllers/mutation_executor.go @@ -31,6 +31,7 @@ import ( "px.dev/pixie/src/api/proto/vizierpb" "px.dev/pixie/src/carnot/planner/distributedpb" "px.dev/pixie/src/carnot/planner/file_source/ir" + "px.dev/pixie/src/carnot/planner/tetragon/ir" "px.dev/pixie/src/carnot/planner/plannerpb" "px.dev/pixie/src/carnot/planpb" "px.dev/pixie/src/common/base/statuspb" @@ -42,6 +43,7 @@ import ( // TracepointMap stores a map from the name to tracepoint info. type TracepointMap map[string]*TracepointInfo type FileSourceMap map[string]*FileSourceInfo +type TetragonMap map[string]*TetragonInfo // MutationExecutor is the interface for running script mutations. type MutationExecutor interface { @@ -54,9 +56,11 @@ type MutationExecutorImpl struct { planner Planner mdtp metadatapb.MetadataTracepointServiceClient mdfs metadatapb.MetadataFileSourceServiceClient + mdtt metadatapb.MetadataTetragonServiceClient mdconf metadatapb.MetadataConfigServiceClient activeTracepoints TracepointMap activeFileSources FileSourceMap + activeTetragons TetragonMap outputTables []string distributedState *distributedpb.DistributedState } @@ -75,11 +79,19 @@ type FileSourceInfo struct { Status *statuspb.Status } +type TetragonInfo struct { + GlobPattern string + TableName string + ID uuid.UUID + Status *statuspb.Status +} + // NewMutationExecutor creates a new mutation executor. func NewMutationExecutor( planner Planner, mdtp metadatapb.MetadataTracepointServiceClient, mdfs metadatapb.MetadataFileSourceServiceClient, + mdtt metadatapb.MetadataTetragonServiceClient, mdconf metadatapb.MetadataConfigServiceClient, distributedState *distributedpb.DistributedState, ) MutationExecutor { @@ -87,10 +99,12 @@ func NewMutationExecutor( planner: planner, mdtp: mdtp, mdfs: mdfs, + mdtt: mdtt, mdconf: mdconf, distributedState: distributedState, activeTracepoints: make(TracepointMap), activeFileSources: make(FileSourceMap), + activeTetragons: make(TetragonMap), } } @@ -156,6 +170,12 @@ func (m *MutationExecutorImpl) Execute(ctx context.Context, req *vizierpb.Execut deleteFileSourcesReq := &metadatapb.RemoveFileSourceRequest{ Names: make([]string, 0), } + tetragonReqs := &metadatapb.RegisterTetragonRequest{ + Requests: make([]*ir2.TetragonDeployment, 0), + } + deleteTetragonsReq := &metadatapb.RemoveTetragonRequest{ + Names: make([]string, 0), + } outputTablesMap := make(map[string]bool) // TODO(zasgar): We should make sure that we don't simultaneously add and delete the tracepoint. @@ -225,6 +245,34 @@ func (m *MutationExecutorImpl) Execute(ctx context.Context, req *vizierpb.Execut { deleteFileSourcesReq.Names = append(deleteFileSourcesReq.Names, mut.DeleteFileSource.GlobPattern) } + case *plannerpb.CompileMutation_Tetragon: + { + name := mut.Tetragon.GlobPattern + tableName := mut.Tetragon.TableName + tetragonReqs.Requests = append(tetragonReqs.Requests, &ir2.TetragonDeployment{ + Name: name, + GlobPattern: name, + TableName: tableName, + TTL: mut.Tetragon.TTL, + }) + if _, ok := m.activeTetragons[name]; ok { + return nil, fmt.Errorf("file source with name '%s', already used", name) + } + // TODO(ddelnano): Add unit tests that would have caught the bug with the + // file source output table issue. The line that caused the bug is left commented below: + // outputTablesMap[name] = true + outputTablesMap[tableName] = true + + m.activeTetragons[name] = &TetragonInfo{ + GlobPattern: mut.Tetragon.GlobPattern, + ID: uuid.Nil, + Status: nil, + } + } + case *plannerpb.CompileMutation_DeleteTetragon: + { + deleteTetragonsReq.Names = append(deleteTetragonsReq.Names, mut.DeleteTetragon.GlobPattern) + } } } @@ -314,6 +362,44 @@ func (m *MutationExecutorImpl) Execute(ctx context.Context, req *vizierpb.Execut } } + if len(tetragonReqs.Requests) > 0 { + resp, err := m.mdtt.RegisterTetragon(ctx, tetragonReqs) + if err != nil { + log.WithError(err). + Errorf("Failed to register tetragons") + return nil, ErrTetragonRegistrationFailed + } + if resp.Status != nil && resp.Status.ErrCode != statuspb.OK { + log.WithField("status", resp.Status.String()). + Errorf("Failed to register tetragons with bad status") + return resp.Status, ErrTetragonRegistrationFailed + } + + // Update the internal stat of the tetragons. + for _, tt := range resp.Tetragons { + id := utils.UUIDFromProtoOrNil(tt.ID) + m.activeTetragons[tt.Name].ID = id + m.activeTetragons[tt.Name].Status = tt.Status + } + } + if len(deleteTetragonsReq.Names) > 0 { + delResp, err := m.mdtt.RemoveTetragon(ctx, deleteTetragonsReq) + if err != nil { + log.WithError(err). + Errorf("Failed to delete tetragons") + return nil, ErrTetragonDeletionFailed + } + if delResp.Status != nil && delResp.Status.ErrCode != statuspb.OK { + log.WithField("status", delResp.Status.String()). + Errorf("Failed to delete tetragons with bad status") + return delResp.Status, ErrTetragonDeletionFailed + } + // Remove the tetragons we considered deleted. + for _, ttName := range deleteTetragonsReq.Names { + delete(m.activeTetragons, ttName) + } + } + m.outputTables = make([]string, 0) for k := range outputTablesMap { m.outputTables = append(m.outputTables, k) @@ -336,6 +422,12 @@ func (m *MutationExecutorImpl) MutationInfo(ctx context.Context) (*vizierpb.Muta for _, fs := range m.activeFileSources { fsReq.IDs = append(fsReq.IDs, utils.ProtoFromUUID(fs.ID)) } + ttReq := &metadatapb.GetTetragonInfoRequest{ + IDs: make([]*uuidpb.UUID, 0), + } + for _, tt := range m.activeTetragons { + ttReq.IDs = append(ttReq.IDs, utils.ProtoFromUUID(tt.ID)) + } aCtx, err := authcontext.FromContext(ctx) if err != nil { return nil, err @@ -350,10 +442,14 @@ func (m *MutationExecutorImpl) MutationInfo(ctx context.Context) (*vizierpb.Muta if err != nil { return nil, err } + ttResp, err := m.mdtt.GetTetragonInfo(ctx, ttReq) + if err != nil { + return nil, err + } tps := len(tpResp.Tracepoints) mutationInfo := &vizierpb.MutationInfo{ Status: &vizierpb.Status{Code: 0}, - States: make([]*vizierpb.MutationInfo_MutationState, tps+len(fsResp.FileSources)), + States: make([]*vizierpb.MutationInfo_MutationState, tps+len(fsResp.FileSources)+len(ttResp.Tetragons)), } tpReady := true @@ -380,6 +476,18 @@ func (m *MutationExecutorImpl) MutationInfo(ctx context.Context) (*vizierpb.Muta } } + ttReady := true + for idx, tt := range ttResp.Tetragons { + mutationInfo.States[idx+tps] = &vizierpb.MutationInfo_MutationState{ + ID: utils.UUIDFromProtoOrNil(tt.ID).String(), + State: convertLifeCycleStateToVizierLifeCycleState(tt.State), + Name: tt.Name, + } + if tt.State != statuspb.RUNNING_STATE { + ttReady = false + } + } + if !tpReady { mutationInfo.Status = &vizierpb.Status{ Code: int32(codes.Unavailable), @@ -396,6 +504,14 @@ func (m *MutationExecutorImpl) MutationInfo(ctx context.Context) (*vizierpb.Muta return mutationInfo, nil } + if !ttReady { + mutationInfo.Status = &vizierpb.Status{ + Code: int32(codes.Unavailable), + Message: "tetragon installation in progress", + } + return mutationInfo, nil + } + if !m.isSchemaReady() { mutationInfo.Status = &vizierpb.Status{ Code: int32(codes.Unavailable), diff --git a/src/vizier/services/query_broker/controllers/query_executor.go b/src/vizier/services/query_broker/controllers/query_executor.go index 8897034bcaa..c60a641bc12 100644 --- a/src/vizier/services/query_broker/controllers/query_executor.go +++ b/src/vizier/services/query_broker/controllers/query_executor.go @@ -90,6 +90,7 @@ type DataPrivacy interface { type MutationExecFactory func(Planner, metadatapb.MetadataTracepointServiceClient, metadatapb.MetadataFileSourceServiceClient, + metadatapb.MetadataTetragonServiceClient, metadatapb.MetadataConfigServiceClient, *distributedpb.DistributedState) MutationExecutor @@ -102,6 +103,7 @@ type QueryExecutorImpl struct { natsConn *nats.Conn mdtp metadatapb.MetadataTracepointServiceClient mdfs metadatapb.MetadataFileSourceServiceClient + mdtt metadatapb.MetadataTetragonServiceClient mdconf metadatapb.MetadataConfigServiceClient resultForwarder QueryResultForwarder planner Planner @@ -130,6 +132,7 @@ func NewQueryExecutorFromServer(s *Server, mutExecFactory MutationExecFactory) Q s.natsConn, s.mdtp, s.mdfs, + s.mdtt, s.mdconf, s.resultForwarder, s.planner, @@ -146,6 +149,7 @@ func NewQueryExecutor( natsConn *nats.Conn, mdtp metadatapb.MetadataTracepointServiceClient, mdfs metadatapb.MetadataFileSourceServiceClient, + mdtt metadatapb.MetadataTetragonServiceClient, mdconf metadatapb.MetadataConfigServiceClient, resultForwarder QueryResultForwarder, planner Planner, @@ -159,6 +163,7 @@ func NewQueryExecutor( natsConn: natsConn, mdtp: mdtp, mdfs: mdfs, + mdtt: mdtt, mdconf: mdconf, resultForwarder: resultForwarder, planner: planner, @@ -297,7 +302,7 @@ func (q *QueryExecutorImpl) getPlanOpts(queryStr string) (*planpb.PlanOptions, e } func (q *QueryExecutorImpl) runMutation(ctx context.Context, resultCh chan<- *vizierpb.ExecuteScriptResponse, req *vizierpb.ExecuteScriptRequest, planOpts *planpb.PlanOptions, distributedState *distributedpb.DistributedState) error { - mutationExec := q.mutationExecFactory(q.planner, q.mdtp, q.mdfs, q.mdconf, distributedState) + mutationExec := q.mutationExecFactory(q.planner, q.mdtp, q.mdfs, q.mdtt, q.mdconf, distributedState) s, err := mutationExec.Execute(ctx, req, planOpts) if err != nil { diff --git a/src/vizier/services/query_broker/controllers/server.go b/src/vizier/services/query_broker/controllers/server.go index fae5d15ad91..68236fa1138 100644 --- a/src/vizier/services/query_broker/controllers/server.go +++ b/src/vizier/services/query_broker/controllers/server.go @@ -83,6 +83,7 @@ type Server struct { mdtp metadatapb.MetadataTracepointServiceClient mdfs metadatapb.MetadataFileSourceServiceClient + mdtt metadatapb.MetadataTetragonServiceClient mdconf metadatapb.MetadataConfigServiceClient resultForwarder QueryResultForwarder @@ -96,7 +97,8 @@ type QueryExecutorFactory func(*Server, MutationExecFactory) QueryExecutor // NewServer creates GRPC handlers. func NewServer(env querybrokerenv.QueryBrokerEnv, agentsTracker AgentsTracker, dataPrivacy DataPrivacy, - mds metadatapb.MetadataTracepointServiceClient, mdfs metadatapb.MetadataFileSourceServiceClient, mdconf metadatapb.MetadataConfigServiceClient, + mds metadatapb.MetadataTracepointServiceClient, mdfs metadatapb.MetadataFileSourceServiceClient, + mdtt metadatapb.MetadataTetragonServiceClient, mdconf metadatapb.MetadataConfigServiceClient, natsConn *nats.Conn, queryExecFactory QueryExecutorFactory) (*Server, error) { var udfInfo udfspb.UDFInfo if err := loadUDFInfo(&udfInfo); err != nil { @@ -107,7 +109,7 @@ func NewServer(env querybrokerenv.QueryBrokerEnv, agentsTracker AgentsTracker, d return nil, err } - return NewServerWithForwarderAndPlanner(env, agentsTracker, dataPrivacy, NewQueryResultForwarder(), mds, mdfs, mdconf, + return NewServerWithForwarderAndPlanner(env, agentsTracker, dataPrivacy, NewQueryResultForwarder(), mds, mdfs, mdtt, mdconf, natsConn, c, queryExecFactory) } @@ -118,6 +120,7 @@ func NewServerWithForwarderAndPlanner(env querybrokerenv.QueryBrokerEnv, resultForwarder QueryResultForwarder, mds metadatapb.MetadataTracepointServiceClient, mdfs metadatapb.MetadataFileSourceServiceClient, + mdtt metadatapb.MetadataTetragonServiceClient, mdconf metadatapb.MetadataConfigServiceClient, natsConn *nats.Conn, planner Planner, @@ -131,6 +134,7 @@ func NewServerWithForwarderAndPlanner(env querybrokerenv.QueryBrokerEnv, natsConn: natsConn, mdtp: mds, mdfs: mdfs, + mdtt: mdtt, mdconf: mdconf, planner: planner, queryExecFactory: queryExecFactory, diff --git a/src/vizier/services/query_broker/query_broker_server.go b/src/vizier/services/query_broker/query_broker_server.go index 5bc9b74c6ec..c1895d9344e 100644 --- a/src/vizier/services/query_broker/query_broker_server.go +++ b/src/vizier/services/query_broker/query_broker_server.go @@ -142,6 +142,7 @@ func main() { mdsClient := metadatapb.NewMetadataServiceClient(mdsConn) mdtpClient := metadatapb.NewMetadataTracepointServiceClient(mdsConn) mdfsClient := metadatapb.NewMetadataFileSourceServiceClient(mdsConn) + mdttClient := metadatapb.NewMetadataTetragonServiceClient(mdsConn) mdconfClient := metadatapb.NewMetadataConfigServiceClient(mdsConn) csClient := metadatapb.NewCronScriptStoreServiceClient(mdsConn) @@ -171,7 +172,7 @@ func main() { agentTracker := tracker.NewAgents(mdsClient, viper.GetString("jwt_signing_key")) agentTracker.Start() defer agentTracker.Stop() - svr, err := controllers.NewServer(env, agentTracker, dataPrivacy, mdtpClient, mdfsClient, mdconfClient, natsConn, controllers.NewQueryExecutorFromServer) + svr, err := controllers.NewServer(env, agentTracker, dataPrivacy, mdtpClient, mdfsClient, mdttClient, mdconfClient, natsConn, controllers.NewQueryExecutorFromServer) if err != nil { log.WithError(err).Fatal("Failed to initialize GRPC server funcs.") }