-
Notifications
You must be signed in to change notification settings - Fork 91
feat(clp-s::log_converter): Add support for compressing converted KV-IR files; make compressing outputs the default behaviour. #2240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3669348
d2e90b1
7230434
b73d0fc
ad2bf27
2562865
dc6ab26
12ee9b6
de12ffb
73e6d50
4b772fa
991416e
513617f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,16 +2,20 @@ | |
| #define CLP_S_LOG_CONVERTER_LOGSERIALIZER_HPP | ||
|
|
||
| #include <cstddef> | ||
| #include <memory> | ||
| #include <string_view> | ||
| #include <utility> | ||
| #include <vector> | ||
|
|
||
| #include <ystdlib/error_handling/Result.hpp> | ||
|
|
||
| #include "../../clp/ffi/ir_stream/protocol_constants.hpp" | ||
| #include "../../clp/ffi/ir_stream/Serializer.hpp" | ||
| #include "../../clp/ir/types.hpp" | ||
| #include "../../clp/type_utils.hpp" | ||
| #include "../FileWriter.hpp" | ||
| #include <clp/ffi/ir_stream/protocol_constants.hpp> | ||
| #include <clp/ffi/ir_stream/Serializer.hpp> | ||
| #include <clp/FileWriter.hpp> | ||
| #include <clp/ir/types.hpp> | ||
| #include <clp/streaming_compression/Compressor.hpp> | ||
| #include <clp/type_utils.hpp> | ||
| #include <clp/WriterInterface.hpp> | ||
|
|
||
| namespace clp_s::log_converter { | ||
| /** | ||
|
|
@@ -24,14 +28,18 @@ class LogSerializer { | |
| * Creates an instance of `LogSerializer`. | ||
| * @param output_dir The destination directory for generated KV-IR. | ||
| * @param original_file_path The original path for the file being converted to KV-IR. | ||
| * @param compress_with_zstd Whether the output KV-IR should be zstd-compressed. | ||
| * @return A result containing a `LogSerializer` on success, or an error code indicating the | ||
| * failure: | ||
| * - std::errc::no_such_file_or_directory if a `clp_s::FileWriter` fails to open an output file. | ||
| * - std::errc::no_such_file_or_directory if a `clp::FileWriter` fails to open an output file. | ||
| * - std::errc::protocol_error if a `clp::zstd::Compressor` fails to open a compression stream. | ||
| * - Forwards `clp::ffi::ir_stream::Serializer<>::create()`'s return values. | ||
| */ | ||
| [[nodiscard]] static auto | ||
| create(std::string_view output_dir, std::string_view original_file_path) | ||
| -> ystdlib::error_handling::Result<LogSerializer>; | ||
| [[nodiscard]] static auto create( | ||
| std::string_view output_dir, | ||
| std::string_view original_file_path, | ||
| bool compress_with_zstd | ||
| ) -> ystdlib::error_handling::Result<LogSerializer>; | ||
|
|
||
| // Constructors | ||
| // Delete copy constructor and assignment operator | ||
|
|
@@ -74,8 +82,18 @@ class LogSerializer { | |
| */ | ||
| void close() { | ||
| flush_buffer(); | ||
| m_writer.write_numeric_value(clp::ffi::ir_stream::cProtocol::Eof); | ||
| m_writer.close(); | ||
| m_nested_writers.back()->write_numeric_value(clp::ffi::ir_stream::cProtocol::Eof); | ||
| for (auto it{m_nested_writers.rbegin()}; it != m_nested_writers.rend(); ++it) { | ||
| if (auto compressor{dynamic_cast<clp::streaming_compression::Compressor*>(it->get())}; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we open an issue about this? If one already exists, can we link it in the PR description? |
||
| nullptr != compressor) | ||
| { | ||
| compressor->close(); | ||
| } else if (auto file_writer{dynamic_cast<clp::FileWriter*>(it->get())}; | ||
| nullptr != file_writer) | ||
| { | ||
| file_writer->close(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private: | ||
|
|
@@ -88,26 +106,30 @@ class LogSerializer { | |
| // Constructors | ||
| explicit LogSerializer( | ||
| clp::ffi::ir_stream::Serializer<clp::ir::eight_byte_encoded_variable_t>&& serializer, | ||
| clp_s::FileWriter&& writer | ||
| std::vector<std::unique_ptr<clp::WriterInterface>>&& nested_writers | ||
| ) | ||
| : m_serializer{std::move(serializer)}, | ||
| m_writer{std::move(writer)} {} | ||
| m_nested_writers{std::move(nested_writers)} {} | ||
|
|
||
| // Methods | ||
| /** | ||
| * Flushes the buffer from the serializer to the output file. | ||
| */ | ||
| void flush_buffer() { | ||
| auto const buffer{m_serializer.get_ir_buf_view()}; | ||
| m_writer.write( | ||
| m_nested_writers.back()->write( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit confused on the order of the component in the pipeline. Looks like the Furthermore, could we define the order of the expected writer chain at the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I can update the docs to be clear about the expectation for nested writers. The intention is that the writers are in an order like: The semantic of the |
||
| clp::size_checked_pointer_cast<char const>(buffer.data()), | ||
| buffer.size_bytes() | ||
| ); | ||
| m_serializer.clear_ir_buf(); | ||
| } | ||
|
|
||
| clp::ffi::ir_stream::Serializer<clp::ir::eight_byte_encoded_variable_t> m_serializer; | ||
| clp_s::FileWriter m_writer; | ||
| // Nested writers are ordered from closest to furthest from output sink. Typically, this will | ||
| // look like `FileWriter` <- `Compressor`. | ||
|
gibber9809 marked this conversation as resolved.
|
||
| // NOTE: This class depends on there being at least one writer in `m_nested_writers` at all | ||
| // times. | ||
| std::vector<std::unique_ptr<clp::WriterInterface>> m_nested_writers; | ||
| }; | ||
| } // namespace clp_s::log_converter | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧹 Nitpick | 🔵 Trivial | 💤 Low value
Redundant
.get()when dereferencing aunique_ptr.nested_writers.back()already yields astd::unique_ptr<...>&, which can be dereferenced directly. The.get()call is unnecessary.♻️ Proposed tweak
🤖 Prompt for AI Agents