From 7096e615a6288a331af11d0e5950806610325f48 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 18 Mar 2026 15:16:16 -0400 Subject: [PATCH 1/4] Migrate `.metapack` headers into a memory format Signed-off-by: Juan Cruz Viotti --- CMakeLists.txt | 2 + contrib/file2metapack/CMakeLists.txt | 2 +- contrib/file2metapack/file2metapack.cc | 10 +- src/index/CMakeLists.txt | 1 + src/index/explorer.h | 173 +++++++-- src/index/generators.h | 97 ++--- src/index/index.cc | 6 +- src/metapack/CMakeLists.txt | 7 + .../include/sourcemeta/one/metapack.h | 315 ++++++++++++++++ src/metapack/metapack.cc | 356 ++++++++++++++++++ src/resolver/CMakeLists.txt | 2 + src/resolver/resolver.cc | 4 +- src/server/CMakeLists.txt | 1 + src/server/action_jsonschema_evaluate.h | 5 +- src/server/action_schema_search.h | 38 +- src/server/action_serve_metapack_file.h | 44 ++- src/shared/CMakeLists.txt | 10 +- src/shared/include/sourcemeta/one/shared.h | 1 - .../include/sourcemeta/one/shared_metapack.h | 78 ---- src/shared/metapack.cc | 223 ----------- src/web/CMakeLists.txt | 1 + src/web/pages/directory.cc | 12 +- src/web/pages/index.cc | 12 +- src/web/pages/not_found.cc | 10 +- src/web/pages/schema.cc | 14 +- test/unit/metapack/CMakeLists.txt | 5 + test/unit/metapack/metapack_test.cc | 189 ++++++++++ 27 files changed, 1160 insertions(+), 458 deletions(-) create mode 100644 src/metapack/CMakeLists.txt create mode 100644 src/metapack/include/sourcemeta/one/metapack.h create mode 100644 src/metapack/metapack.cc delete mode 100644 src/shared/include/sourcemeta/one/shared_metapack.h delete mode 100644 src/shared/metapack.cc create mode 100644 test/unit/metapack/CMakeLists.txt create mode 100644 test/unit/metapack/metapack_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index d7575d803..d2631478b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,7 @@ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/VERSION" "${ONE_VERSION}") if(ONE_INDEX OR ONE_SERVER) add_subdirectory(src/shared) add_subdirectory(src/gzip) + add_subdirectory(src/metapack) endif() if(ONE_INDEX) @@ -121,6 +122,7 @@ if(ONE_TESTS) if(ONE_INDEX OR ONE_SERVER) add_subdirectory(test/unit/gzip) + add_subdirectory(test/unit/metapack) endif() if(ONE_INDEX) diff --git a/contrib/file2metapack/CMakeLists.txt b/contrib/file2metapack/CMakeLists.txt index fd69252b6..91dd91ff6 100644 --- a/contrib/file2metapack/CMakeLists.txt +++ b/contrib/file2metapack/CMakeLists.txt @@ -2,7 +2,7 @@ sourcemeta_executable(NAMESPACE sourcemeta PROJECT one NAME file2metapack SOURCES file2metapack.cc) target_link_libraries(sourcemeta_one_file2metapack - PRIVATE sourcemeta::one::shared) + PRIVATE sourcemeta::one::metapack) target_link_libraries(sourcemeta_one_file2metapack PRIVATE sourcemeta::core::options) diff --git a/contrib/file2metapack/file2metapack.cc b/contrib/file2metapack/file2metapack.cc index 228d326fd..4c74f81b1 100644 --- a/contrib/file2metapack/file2metapack.cc +++ b/contrib/file2metapack/file2metapack.cc @@ -1,5 +1,5 @@ #include -#include +#include #include // EXIT_FAILURE, EXIT_SUCCESS #include // std::exception @@ -18,11 +18,11 @@ auto main(int argc, char *argv[]) noexcept -> int { const std::filesystem::path output{app.positional().at(2)}; std::filesystem::create_directories(output.parent_path()); - sourcemeta::one::write_file( + sourcemeta::one::metapack_write_file( output, app.positional().at(0), std::string{app.positional().at(1)}, - app.contains("gzip") ? sourcemeta::one::Encoding::GZIP - : sourcemeta::one::Encoding::Identity, - sourcemeta::core::JSON{nullptr}, std::chrono::milliseconds::zero()); + app.contains("gzip") ? sourcemeta::one::MetapackEncoding::GZIP + : sourcemeta::one::MetapackEncoding::Identity, + {}, std::chrono::milliseconds::zero()); return EXIT_SUCCESS; } catch (const std::exception &error) { std::cerr << "unexpected error: " << error.what() << "\n"; diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt index 699ef5c03..6c8b88803 100644 --- a/src/index/CMakeLists.txt +++ b/src/index/CMakeLists.txt @@ -13,6 +13,7 @@ endif() target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::resolver) target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::shared) +target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::metapack) target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::configuration) target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::web) diff --git a/src/index/explorer.h b/src/index/explorer.h index 07fea7210..a087fe904 100644 --- a/src/index/explorer.h +++ b/src/index/explorer.h @@ -2,6 +2,7 @@ #define SOURCEMETA_ONE_INDEX_EXPLORER_H_ #include +#include #include #include @@ -116,44 +117,48 @@ struct GENERATE_EXPLORER_SCHEMA_METADATA { const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; const auto &resolver_entry{resolver.entry(action.data)}; - const auto schema{ - sourcemeta::one::read_json_with_metadata(action.dependencies.front())}; + // Read the schema to get data and bytes + sourcemeta::core::FileView schema_view{action.dependencies.front()}; + const auto schema_info{sourcemeta::one::metapack_info(schema_view)}; + const auto schema_data{ + sourcemeta::one::metapack_read_json(action.dependencies.front())}; const auto id{sourcemeta::core::identify( - schema.data, [&callback, &resolver](const auto identifier) { + schema_data, [&callback, &resolver](const auto identifier) { return resolver(identifier, callback); })}; assert(!id.empty()); auto result{sourcemeta::core::JSON::make_object()}; - result.assign("bytes", sourcemeta::core::JSON{schema.bytes}); + result.assign("bytes", sourcemeta::core::JSON{static_cast( + schema_info.content_bytes)}); result.assign("identifier", sourcemeta::core::JSON{std::string{id}}); result.assign("path", sourcemeta::core::JSON{ "/" + resolver_entry.relative_path.string()}); const auto base_dialect{sourcemeta::core::base_dialect( - schema.data, [&callback, &resolver](const auto identifier) { + schema_data, [&callback, &resolver](const auto identifier) { return resolver(identifier, callback); })}; assert(base_dialect.has_value()); result.assign("baseDialect", sourcemeta::core::JSON{std::string{ sourcemeta::core::to_string(base_dialect.value())}}); - const auto dialect{sourcemeta::core::dialect(schema.data)}; + const auto dialect{sourcemeta::core::dialect(schema_data)}; assert(!dialect.empty()); result.assign("dialect", sourcemeta::core::JSON{std::string{dialect}}); - if (schema.data.is_object()) { - const auto title{schema.data.try_at("title")}; + if (schema_data.is_object()) { + const auto title{schema_data.try_at("title")}; if (title && title->is_string()) { result.assign("title", sourcemeta::core::JSON{title->trim()}); } - const auto description{schema.data.try_at("description")}; + const auto description{schema_data.try_at("description")}; if (description && description->is_string()) { result.assign("description", sourcemeta::core::JSON{description->trim()}); } auto examples_array{sourcemeta::core::JSON::make_array()}; - const auto *examples{schema.data.try_at("examples")}; + const auto *examples{schema_data.try_at("examples")}; if (examples && examples->is_array() && !examples->empty()) { const auto vocabularies{sourcemeta::core::vocabularies( [&callback, &resolver](const auto identifier) { @@ -178,11 +183,12 @@ struct GENERATE_EXPLORER_SCHEMA_METADATA { result.assign("examples", std::move(examples_array)); } - const auto health{sourcemeta::one::read_json(action.dependencies.at(1))}; + const auto health{ + sourcemeta::one::metapack_read_json(action.dependencies.at(1))}; result.assign("health", health.at("score")); const auto schema_dependencies{ - sourcemeta::one::read_json(action.dependencies.at(2))}; + sourcemeta::one::metapack_read_json(action.dependencies.at(2))}; result.assign("dependencies", sourcemeta::core::to_json(schema_dependencies.size())); @@ -208,10 +214,29 @@ struct GENERATE_EXPLORER_SCHEMA_METADATA { const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + // Build the binary extension with all fields needed by + // search index and directory listing handlers + const auto extension_bytes{ + sourcemeta::one::metapack_make_explorer_schema_extension( + result.at("health").to_integer(), + static_cast(schema_info.content_bytes), + result.at("dependencies").to_integer(), + result.at("path").to_string(), result.at("identifier").to_string(), + result.at("baseDialect").to_string(), + result.at("dialect").to_string(), + result.defines("title") ? result.at("title").to_string() : "", + result.defines("description") ? result.at("description").to_string() + : "", + result.at("alert").is_string() ? result.at("alert").to_string() + : "", + result.at("provenance").is_string() + ? result.at("provenance").to_string() + : "")}; + + sourcemeta::one::metapack_write_pretty_json( action.destination, result, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, + std::span{extension_bytes}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -230,18 +255,32 @@ struct GENERATE_EXPLORER_SEARCH_INDEX { result.reserve(action.dependencies.size()); for (const auto &dependency : action.dependencies) { - auto metadata_json{sourcemeta::one::read_json(dependency)}; - if (!sourcemeta::core::is_schema(metadata_json)) { + // Read only the binary extension header — no gzip decompression + sourcemeta::core::FileView dep_view{dependency}; + const auto extension_offset{ + sourcemeta::one::metapack_extension_offset(dep_view)}; + if (extension_offset == 0) { continue; } + const auto *extension{sourcemeta::one::metapack_extension< + sourcemeta::one::MetapackExplorerSchemaExtension>(dep_view)}; + if (extension == nullptr) { + continue; + } + + const auto *extension_base{dep_view.as(extension_offset)}; + const auto path{ + sourcemeta::one::metapack_explorer_path(extension, extension_base)}; + const auto title{ + sourcemeta::one::metapack_explorer_title(extension, extension_base)}; + const auto description{sourcemeta::one::metapack_explorer_description( + extension, extension_base)}; + auto entry{sourcemeta::core::JSON::make_array()}; - entry.push_back( - sourcemeta::core::JSON{metadata_json.at("path").to_string()}); - // TODO: Can we move these? - entry.push_back(metadata_json.at_or("title", sourcemeta::core::JSON{""})); - entry.push_back( - metadata_json.at_or("description", sourcemeta::core::JSON{""})); + entry.push_back(sourcemeta::core::JSON{std::string{path}}); + entry.push_back(sourcemeta::core::JSON{std::string{title}}); + entry.push_back(sourcemeta::core::JSON{std::string{description}}); result.push_back(std::move(entry)); } @@ -272,12 +311,11 @@ struct GENERATE_EXPLORER_SEARCH_INDEX { const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_jsonl( + sourcemeta::one::metapack_write_jsonl( action.destination, result, "application/jsonl", // We don't want to compress this one so we can // quickly skim through it while streaming it - sourcemeta::one::Encoding::Identity, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::Identity, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -313,7 +351,7 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { dependency.parent_path().parent_path().filename().string()}; if (filename == "directory.metapack") { - auto directory_json{sourcemeta::one::read_json(dependency)}; + auto directory_json{sourcemeta::one::metapack_read_json(dependency)}; assert(directory_json.is_object()); assert(directory_json.defines("health")); assert(directory_json.at("health").is_integer()); @@ -344,22 +382,76 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { } entries.push_back(std::move(entry_json)); } else if (filename == "schema.metapack") { - auto nav{sourcemeta::one::read_json(dependency)}; + // Read from binary extension — no gzip decompression needed + sourcemeta::core::FileView dep_view{dependency}; + const auto ext_offset{ + sourcemeta::one::metapack_extension_offset(dep_view)}; + const auto *extension{sourcemeta::one::metapack_extension< + sourcemeta::one::MetapackExplorerSchemaExtension>(dep_view)}; + + if (extension == nullptr || ext_offset == 0) { + continue; + } + + const auto *ext_base{dep_view.as(ext_offset)}; auto entry_json{sourcemeta::core::JSON::make_object()}; entry_json.assign("name", sourcemeta::core::JSON{child_name}); - entry_json.merge(nav.as_object()); - assert(!entry_json.defines("entries")); - entry_json.erase("breadcrumb"); - entry_json.erase("examples"); entry_json.assign("type", sourcemeta::core::JSON{"schema"}); - assert(entry_json.defines("path")); - std::filesystem::path url{entry_json.at("path").to_string()}; - entry_json.at("path").into(sourcemeta::core::JSON{url}); + const auto schema_path{ + sourcemeta::one::metapack_explorer_path(extension, ext_base)}; + entry_json.assign("path", sourcemeta::core::JSON{std::filesystem::path{ + std::string{schema_path}}}); + entry_json.assign("identifier", + sourcemeta::core::JSON{std::string{ + sourcemeta::one::metapack_explorer_identifier( + extension, ext_base)}}); + entry_json.assign("bytes", sourcemeta::core::JSON{extension->bytes}); + entry_json.assign("baseDialect", + sourcemeta::core::JSON{std::string{ + sourcemeta::one::metapack_explorer_base_dialect( + extension, ext_base)}}); + entry_json.assign("dialect", + sourcemeta::core::JSON{std::string{ + sourcemeta::one::metapack_explorer_dialect( + extension, ext_base)}}); + entry_json.assign("health", sourcemeta::core::JSON{extension->health}); + entry_json.assign("dependencies", + sourcemeta::core::JSON{extension->dependencies}); + + const auto title{ + sourcemeta::one::metapack_explorer_title(extension, ext_base)}; + if (!title.empty()) { + entry_json.assign("title", + sourcemeta::core::JSON{std::string{title}}); + } + + const auto description{sourcemeta::one::metapack_explorer_description( + extension, ext_base)}; + if (!description.empty()) { + entry_json.assign("description", + sourcemeta::core::JSON{std::string{description}}); + } + + const auto alert{ + sourcemeta::one::metapack_explorer_alert(extension, ext_base)}; + if (!alert.empty()) { + entry_json.assign("alert", + sourcemeta::core::JSON{std::string{alert}}); + } else { + entry_json.assign("alert", sourcemeta::core::JSON{nullptr}); + } + + const auto provenance{ + sourcemeta::one::metapack_explorer_provenance(extension, ext_base)}; + if (!provenance.empty()) { + entry_json.assign("provenance", + sourcemeta::core::JSON{std::string{provenance}}); + } else { + entry_json.assign("provenance", sourcemeta::core::JSON{nullptr}); + } - assert(entry_json.defines("health")); - assert(entry_json.at("health").is_integer()); - scores.emplace_back(entry_json.at("health").to_integer()); + scores.emplace_back(extension->health); entries.push_back(std::move(entry_json)); } } @@ -435,10 +527,9 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { } const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, meta, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; diff --git a/src/index/generators.h b/src/index/generators.h index 379acdf5e..965391e62 100644 --- a/src/index/generators.h +++ b/src/index/generators.h @@ -4,6 +4,7 @@ #include "error.h" #include +#include #include #include @@ -118,11 +119,12 @@ struct GENERATE_MATERIALISED_SCHEMA { dialect_identifier); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + const auto extension_bytes{ + sourcemeta::one::metapack_make_dialect_extension(dialect_identifier)}; + sourcemeta::one::metapack_write_pretty_json( action.destination, schema.value(), "application/schema+json", - sourcemeta::one::Encoding::GZIP, - sourcemeta::core::JSON{std::string{dialect_identifier}}, + sourcemeta::one::MetapackEncoding::GZIP, + std::span{extension_bytes}, std::chrono::duration_cast(timestamp_end - timestamp_start)); resolver.cache_path(action.data, action.destination); @@ -160,14 +162,17 @@ struct GENERATE_POINTER_POSITIONS { const sourcemeta::one::Configuration &, const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; + const auto schema{ + sourcemeta::one::metapack_read_json(action.dependencies.front())}; + std::ostringstream schema_stream; + sourcemeta::core::prettify(schema, schema_stream); sourcemeta::core::PointerPositionTracker tracker; - sourcemeta::one::read_json(action.dependencies.front(), std::ref(tracker)); + sourcemeta::core::parse_json(schema_stream.str(), std::ref(tracker)); const auto result{sourcemeta::core::to_json(tracker)}; const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, result, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -182,9 +187,12 @@ struct GENERATE_FRAME_LOCATIONS { const sourcemeta::one::Configuration &, const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; + const auto contents{ + sourcemeta::one::metapack_read_json(action.dependencies.front())}; + std::ostringstream contents_stream; + sourcemeta::core::prettify(contents, contents_stream); sourcemeta::core::PointerPositionTracker tracker; - const auto contents{sourcemeta::one::read_json(action.dependencies.front(), - std::ref(tracker))}; + sourcemeta::core::parse_json(contents_stream.str(), std::ref(tracker)); sourcemeta::core::SchemaFrame frame{ sourcemeta::core::SchemaFrame::Mode::Locations}; frame.analyse(contents, sourcemeta::core::schema_walker, @@ -193,10 +201,9 @@ struct GENERATE_FRAME_LOCATIONS { }); const auto result{frame.to_json(tracker).at("locations")}; const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, result, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -212,7 +219,7 @@ struct GENERATE_DEPENDENCIES { const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; const auto contents{ - sourcemeta::one::read_json(action.dependencies.front())}; + sourcemeta::one::metapack_read_json(action.dependencies.front())}; auto result{sourcemeta::core::JSON::make_array()}; sourcemeta::core::dependencies( contents, sourcemeta::core::schema_walker, @@ -231,10 +238,9 @@ struct GENERATE_DEPENDENCIES { assert(result.unique()); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, result, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -267,7 +273,7 @@ struct GENERATE_DEPENDENTS { std::unordered_set>; DirectMap direct; for (const auto &dependency : action.dependencies) { - const auto contents{sourcemeta::one::read_json(dependency)}; + const auto contents{sourcemeta::one::metapack_read_json(dependency)}; assert(contents.is_array()); for (const auto &entry : contents.as_array()) { direct[entry.at("to").to_string()].emplace( @@ -316,10 +322,9 @@ struct GENERATE_DEPENDENTS { const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, result, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -335,7 +340,7 @@ struct GENERATE_HEALTH { const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; const auto contents{ - sourcemeta::one::read_json(action.dependencies.front())}; + sourcemeta::one::metapack_read_json(action.dependencies.front())}; const auto &collection{*resolver.entry(action.data).collection}; auto &cache_entry{bundle_for(collection, resolver, callback)}; auto errors{sourcemeta::core::JSON::make_array()}; @@ -374,10 +379,9 @@ struct GENERATE_HEALTH { report.assign("errors", std::move(errors)); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, report, "application/json", - sourcemeta::one::Encoding::GZIP, sourcemeta::core::JSON{nullptr}, + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -433,7 +437,8 @@ struct GENERATE_BUNDLE { const sourcemeta::one::Configuration &, const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; - auto schema{sourcemeta::one::read_json(action.dependencies.front())}; + auto schema{ + sourcemeta::one::metapack_read_json(action.dependencies.front())}; sourcemeta::core::bundle(schema, sourcemeta::core::schema_walker, [&callback, &resolver](const auto identifier) { return resolver(identifier, callback); @@ -448,11 +453,12 @@ struct GENERATE_BUNDLE { dialect_identifier); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + const auto extension_bytes{ + sourcemeta::one::metapack_make_dialect_extension(dialect_identifier)}; + sourcemeta::one::metapack_write_pretty_json( action.destination, schema, "application/schema+json", - sourcemeta::one::Encoding::GZIP, - sourcemeta::core::JSON{std::string{dialect_identifier}}, + sourcemeta::one::MetapackEncoding::GZIP, + std::span{extension_bytes}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -467,7 +473,8 @@ struct GENERATE_EDITOR { const sourcemeta::one::Configuration &, const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; - auto schema{sourcemeta::one::read_json(action.dependencies.front())}; + auto schema{ + sourcemeta::one::metapack_read_json(action.dependencies.front())}; sourcemeta::core::for_editor(schema, sourcemeta::core::schema_walker, [&callback, &resolver](const auto identifier) { return resolver(identifier, callback); @@ -482,11 +489,12 @@ struct GENERATE_EDITOR { dialect_identifier); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + const auto extension_bytes{ + sourcemeta::one::metapack_make_dialect_extension(dialect_identifier)}; + sourcemeta::one::metapack_write_pretty_json( action.destination, schema, "application/schema+json", - sourcemeta::one::Encoding::GZIP, - sourcemeta::core::JSON{std::string{dialect_identifier}}, + sourcemeta::one::MetapackEncoding::GZIP, + std::span{extension_bytes}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; @@ -498,7 +506,8 @@ static auto generate_blaze_template( const sourcemeta::one::BuildPlan::Action::Dependencies &dependencies, const sourcemeta::blaze::Mode mode) -> void { const auto timestamp_start{std::chrono::steady_clock::now()}; - const auto contents{sourcemeta::one::read_json(dependencies.front())}; + const auto contents{ + sourcemeta::one::metapack_read_json(dependencies.front())}; sourcemeta::core::SchemaFrame frame{ sourcemeta::core::SchemaFrame::Mode::References}; frame.analyse(contents, sourcemeta::core::schema_walker, @@ -509,10 +518,9 @@ static auto generate_blaze_template( sourcemeta::blaze::default_schema_compiler, frame, frame.root(), mode)}; const auto result{sourcemeta::blaze::to_json(schema_template)}; const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(destination.parent_path()); - sourcemeta::one::write_json( - destination, result, "application/json", sourcemeta::one::Encoding::GZIP, - sourcemeta::core::JSON{nullptr}, + sourcemeta::one::metapack_write_json( + destination, result, "application/json", + sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); } @@ -551,7 +559,8 @@ struct GENERATE_STATS { const sourcemeta::one::Configuration &, const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; - const auto schema{sourcemeta::one::read_json(action.dependencies.front())}; + const auto schema{ + sourcemeta::one::metapack_read_json(action.dependencies.front())}; std::map> result; @@ -577,11 +586,9 @@ struct GENERATE_STATS { } const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - sourcemeta::one::write_pretty_json( + sourcemeta::one::metapack_write_pretty_json( action.destination, sourcemeta::core::to_json(result), - "application/json", sourcemeta::one::Encoding::GZIP, - sourcemeta::core::JSON{nullptr}, + "application/json", sourcemeta::one::MetapackEncoding::GZIP, {}, std::chrono::duration_cast(timestamp_end - timestamp_start)); return true; diff --git a/src/index/index.cc b/src/index/index.cc index 5bd0fbbe1..2326fc837 100644 --- a/src/index/index.cc +++ b/src/index/index.cc @@ -427,9 +427,9 @@ static auto index_main(const std::string_view &program, std::filesystem::recursive_directory_iterator{canonical_output}) { if (entry.is_regular_file() && entry.path().extension() == ".metapack") { try { - const auto file{sourcemeta::one::read_stream_raw(entry.path())}; - assert(file.has_value()); - durations.emplace_back(entry.path(), file.value().duration); + sourcemeta::core::FileView file_view{entry.path()}; + const auto file_info{sourcemeta::one::metapack_info(file_view)}; + durations.emplace_back(entry.path(), file_info.duration); } catch (...) { std::cerr << "Could not profile file: " << entry.path() << "\n"; throw; diff --git a/src/metapack/CMakeLists.txt b/src/metapack/CMakeLists.txt new file mode 100644 index 000000000..8f79d0f74 --- /dev/null +++ b/src/metapack/CMakeLists.txt @@ -0,0 +1,7 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT one NAME metapack + SOURCES metapack.cc) + +target_link_libraries(sourcemeta_one_metapack PUBLIC sourcemeta::core::json) +target_link_libraries(sourcemeta_one_metapack PUBLIC sourcemeta::core::io) +target_link_libraries(sourcemeta_one_metapack PRIVATE sourcemeta::core::crypto) +target_link_libraries(sourcemeta_one_metapack PRIVATE sourcemeta::one::gzip) diff --git a/src/metapack/include/sourcemeta/one/metapack.h b/src/metapack/include/sourcemeta/one/metapack.h new file mode 100644 index 000000000..653d8a39f --- /dev/null +++ b/src/metapack/include/sourcemeta/one/metapack.h @@ -0,0 +1,315 @@ +#ifndef SOURCEMETA_ONE_METAPACK_H_ +#define SOURCEMETA_ONE_METAPACK_H_ + +#ifndef SOURCEMETA_ONE_METAPACK_EXPORT +#include +#endif + +#include +#include + +#include // std::array +#include // std::chrono +#include // std::uint8_t, std::uint16_t, std::uint32_t, etc. +#include // std::filesystem::path +#include // std::ostream +#include // std::span +#include // std::string_view +#include // std::vector + +namespace sourcemeta::one { + +/// The magic number at the start of every metapack file: "META" +static constexpr std::uint32_t METAPACK_MAGIC{0x4154454D}; + +/// The current binary format version +static constexpr std::uint16_t METAPACK_VERSION{1}; + +/// Payload encoding +enum class MetapackEncoding : std::uint8_t { Identity = 0, GZIP = 1 }; + +/// The fixed portion of the binary header. All multi-byte fields are +/// little-endian (native on x86/ARM). +#pragma pack(push, 1) +struct MetapackHeader { + std::uint32_t magic; + std::uint16_t format_version; + MetapackEncoding encoding; + std::uint8_t reserved; + std::int64_t last_modified; // nanoseconds since epoch + std::uint64_t content_bytes; // uncompressed payload size + std::int64_t duration; // build duration in milliseconds + // SHA-256 checksum (32 raw bytes) + std::array checksum; + std::uint16_t mime_length; + // Followed by: + // mime (mime_length bytes, UTF-8) + // extension_size (4 bytes, uint32) + // extension (extension_size bytes, opaque) + // payload data +}; +#pragma pack(pop) + +/// Write a metapack file with JSON payload (compact serialization) +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_write_json(const std::filesystem::path &destination, + const sourcemeta::core::JSON &document, + const std::string_view mime, MetapackEncoding encoding, + std::span extension, + std::chrono::milliseconds duration) -> void; + +/// Write a metapack file with JSON payload (pretty serialization) +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_write_pretty_json(const std::filesystem::path &destination, + const sourcemeta::core::JSON &document, + const std::string_view mime, + MetapackEncoding encoding, + std::span extension, + std::chrono::milliseconds duration) -> void; + +/// Write a metapack file with text payload +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_write_text(const std::filesystem::path &destination, + std::string_view contents, const std::string_view mime, + MetapackEncoding encoding, + std::span extension, + std::chrono::milliseconds duration) -> void; + +/// Write a metapack file with JSONL payload +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_write_jsonl(const std::filesystem::path &destination, + const std::vector &entries, + const std::string_view mime, + MetapackEncoding encoding, + std::span extension, + std::chrono::milliseconds duration) -> void; + +/// Write a metapack file with a raw file as payload +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_write_file(const std::filesystem::path &destination, + const std::filesystem::path &source, + const std::string_view mime, MetapackEncoding encoding, + std::span extension, + std::chrono::milliseconds duration) -> void; + +// --------------------------------------------------------------------------- +// Extension structs for specific file types +// --------------------------------------------------------------------------- + +/// Extension for schemas/.../schema.metapack, bundle.metapack, editor.metapack. +/// Contains the dialect URI for the JSON Schema Link header. +#pragma pack(push, 1) +struct MetapackDialectExtension { + std::uint16_t dialect_length; + // Followed by dialect_length bytes of UTF-8 dialect URI string +}; +#pragma pack(pop) + +/// Extension for explorer/.../schema.metapack (SchemaMetadata). +/// Contains all fields needed by search index and directory listing handlers. +#pragma pack(push, 1) +struct MetapackExplorerSchemaExtension { + std::int64_t health; + std::int64_t bytes; + std::int64_t dependencies; + std::uint16_t path_length; + std::uint16_t identifier_length; + std::uint16_t base_dialect_length; + std::uint16_t dialect_length; + std::uint16_t title_length; + std::uint16_t description_length; + std::uint16_t alert_length; + std::uint16_t provenance_length; + // Followed by string data in order: + // path, identifier, base_dialect, dialect, + // title, description, alert, provenance +}; +#pragma pack(pop) + +/// Helper to read string fields from a MetapackExplorerSchemaExtension +/// in mmap'd memory. The `base` pointer must point to the start of the +/// extension data in the FileView. Strings are contiguous after the struct. +inline auto metapack_explorer_schema_string( + const MetapackExplorerSchemaExtension *, const std::uint8_t *base, + const std::size_t field_offset, const std::size_t field_length) + -> std::string_view { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return {reinterpret_cast( + base + sizeof(MetapackExplorerSchemaExtension) + field_offset), + field_length}; +} + +inline auto +metapack_explorer_path(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + return metapack_explorer_schema_string(extension, base, 0, + extension->path_length); +} + +inline auto +metapack_explorer_identifier(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{extension->path_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->identifier_length); +} + +inline auto +metapack_explorer_base_dialect(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->base_dialect_length); +} + +inline auto +metapack_explorer_dialect(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->dialect_length); +} + +inline auto +metapack_explorer_title(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length + + extension->dialect_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->title_length); +} + +inline auto +metapack_explorer_description(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length + + extension->dialect_length + extension->title_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->description_length); +} + +inline auto +metapack_explorer_alert(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length + + extension->dialect_length + extension->title_length + + extension->description_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->alert_length); +} + +inline auto +metapack_explorer_provenance(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{ + static_cast(extension->path_length) + + extension->identifier_length + extension->base_dialect_length + + extension->dialect_length + extension->title_length + + extension->description_length + extension->alert_length}; + return metapack_explorer_schema_string(extension, base, offset, + extension->provenance_length); +} + +/// Build a MetapackDialectExtension as a byte vector +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_make_dialect_extension(std::string_view dialect) + -> std::vector; + +/// Build a MetapackExplorerSchemaExtension as a byte vector +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_make_explorer_schema_extension( + std::int64_t health, std::int64_t bytes, std::int64_t dependencies, + std::string_view path, std::string_view identifier, + std::string_view base_dialect, std::string_view dialect, + std::string_view title, std::string_view description, + std::string_view alert, std::string_view provenance) + -> std::vector; + +// --------------------------------------------------------------------------- +// Read functions +// --------------------------------------------------------------------------- + +/// Read the full JSON payload from a metapack file (decompresses if needed) +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_read_json(const std::filesystem::path &path) + -> sourcemeta::core::JSON; + +/// Header metadata extracted from a metapack file +struct MetapackInfo { + std::string checksum_hex; + std::chrono::system_clock::time_point last_modified; + std::string mime; + MetapackEncoding encoding; + std::uint64_t content_bytes; + std::chrono::milliseconds duration; +}; + +/// Read only the header metadata from a memory-mapped metapack file +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_info(const sourcemeta::core::FileView &view) -> MetapackInfo; + +/// Compute the byte offset where the payload data begins +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_payload_offset(const sourcemeta::core::FileView &view) + -> std::size_t; + +/// Compute the byte offset where the extension data begins within +/// a memory-mapped metapack file. Returns 0 if the extension is empty. +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_extension_offset(const sourcemeta::core::FileView &view) + -> std::size_t; + +/// Compute the size of the extension data within a memory-mapped +/// metapack file. Returns 0 if there is no extension. +SOURCEMETA_ONE_METAPACK_EXPORT +auto metapack_extension_size(const sourcemeta::core::FileView &view) + -> std::uint32_t; + +/// Read the typed extension from a memory-mapped metapack file. +/// The returned pointer points directly into the mmap'd memory. +/// The FileView must outlive the returned pointer. +/// Returns nullptr if the file has no extension or extension_size +/// is smaller than sizeof(T). +template +auto metapack_extension(const sourcemeta::core::FileView &view) -> const T * { + const auto offset{metapack_extension_offset(view)}; + if (offset == 0) { + return nullptr; + } + + const auto size{metapack_extension_size(view)}; + if (size < sizeof(T)) { + return nullptr; + } + + return view.as(offset); +} + +/// Helper: read a string_view from a dialect extension in mmap'd memory. +/// The FileView must outlive the returned string_view. +inline auto metapack_dialect_string(const sourcemeta::core::FileView &view) + -> std::string_view { + const auto *extension{metapack_extension(view)}; + if (extension == nullptr || extension->dialect_length == 0) { + return {}; + } + + const auto string_offset{metapack_extension_offset(view) + + sizeof(MetapackDialectExtension)}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return {reinterpret_cast(view.as(string_offset)), + extension->dialect_length}; +} + +} // namespace sourcemeta::one + +#endif diff --git a/src/metapack/metapack.cc b/src/metapack/metapack.cc new file mode 100644 index 000000000..67fe909b6 --- /dev/null +++ b/src/metapack/metapack.cc @@ -0,0 +1,356 @@ +#include + +#include +#include + +// TODO: Remove this dependency once we handle gzip internally +#include + +#include // assert +#include // std::memcpy +#include // std::ofstream +#include // std::ostringstream, std::stringstream +#include // std::runtime_error +#include // std::string +#include // std::string_view + +namespace sourcemeta::one { + +static auto write_binary_header(std::ostream &output, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration, + const std::string_view payload, + const std::size_t uncompressed_size) -> void { + MetapackHeader header{}; + header.magic = METAPACK_MAGIC; + header.format_version = METAPACK_VERSION; + header.encoding = encoding; + header.reserved = 0; + + const auto now{std::chrono::system_clock::now()}; + header.last_modified = std::chrono::duration_cast( + now.time_since_epoch()) + .count(); + header.content_bytes = uncompressed_size; + header.duration = duration.count(); + + // Compute SHA-256 checksum of the uncompressed payload + std::ostringstream checksum_hex; + sourcemeta::core::sha256(payload, checksum_hex); + const auto hex_string{checksum_hex.str()}; + // Convert hex string to raw bytes + for (std::size_t index{0}; index < 32 && index * 2 + 1 < hex_string.size(); + index++) { + const auto byte_string{hex_string.substr(index * 2, 2)}; + header.checksum[index] = + static_cast(std::stoul(byte_string, nullptr, 16)); + } + + assert(mime.size() <= UINT16_MAX); + header.mime_length = static_cast(mime.size()); + + // Write the fixed header + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + output.write(reinterpret_cast(&header), sizeof(MetapackHeader)); + + // Write mime string + output.write(mime.data(), static_cast(mime.size())); + + // Write extension size and data + const auto extension_size{static_cast(extension.size())}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + output.write(reinterpret_cast(&extension_size), + sizeof(extension_size)); + if (!extension.empty()) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + output.write(reinterpret_cast(extension.data()), + static_cast(extension.size())); + } +} + +static auto write_metapack(const std::filesystem::path &destination, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration, + const std::string &content) -> void { + std::ofstream output{destination, std::ios::binary}; + assert(!output.fail()); + + write_binary_header(output, mime, encoding, extension, duration, content, + content.size()); + + if (encoding == MetapackEncoding::GZIP) { + std::istringstream input_stream{content}; + sourcemeta::one::gzip(input_stream, output); + } else { + output.write(content.data(), static_cast(content.size())); + } + + output.flush(); +} + +auto metapack_write_json(const std::filesystem::path &destination, + const sourcemeta::core::JSON &document, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration) -> void { + std::ostringstream buffer; + sourcemeta::core::stringify(document, buffer); + std::filesystem::create_directories(destination.parent_path()); + write_metapack(destination, mime, encoding, extension, duration, + buffer.str()); +} + +auto metapack_write_pretty_json(const std::filesystem::path &destination, + const sourcemeta::core::JSON &document, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration) + -> void { + std::ostringstream buffer; + sourcemeta::core::prettify(document, buffer); + std::filesystem::create_directories(destination.parent_path()); + write_metapack(destination, mime, encoding, extension, duration, + buffer.str()); +} + +auto metapack_write_text(const std::filesystem::path &destination, + const std::string_view contents, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration) -> void { + std::string content{contents}; + content += '\n'; + std::filesystem::create_directories(destination.parent_path()); + write_metapack(destination, mime, encoding, extension, duration, content); +} + +auto metapack_write_jsonl(const std::filesystem::path &destination, + const std::vector &entries, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration) -> void { + std::ostringstream buffer; + for (const auto &entry : entries) { + sourcemeta::core::stringify(entry, buffer); + buffer << '\n'; + } + + std::filesystem::create_directories(destination.parent_path()); + write_metapack(destination, mime, encoding, extension, duration, + buffer.str()); +} + +auto metapack_write_file(const std::filesystem::path &destination, + const std::filesystem::path &source, + const std::string_view mime, + const MetapackEncoding encoding, + const std::span extension, + const std::chrono::milliseconds duration) -> void { + auto stream{sourcemeta::core::read_file(source)}; + std::ostringstream buffer; + buffer << stream.rdbuf(); + std::filesystem::create_directories(destination.parent_path()); + write_metapack(destination, mime, encoding, extension, duration, + buffer.str()); +} + +auto metapack_extension_offset(const sourcemeta::core::FileView &view) + -> std::size_t { + if (view.size() < sizeof(MetapackHeader) + sizeof(std::uint32_t)) { + return 0; + } + + const auto *header{view.as()}; + assert(header->magic == METAPACK_MAGIC); + assert(header->format_version == METAPACK_VERSION); + + const auto offset_of_extension_size{sizeof(MetapackHeader) + + header->mime_length}; + if (offset_of_extension_size + sizeof(std::uint32_t) > view.size()) { + return 0; + } + + const auto *extension_size_pointer{ + view.as(offset_of_extension_size)}; + if (*extension_size_pointer == 0) { + return 0; + } + + return offset_of_extension_size + sizeof(std::uint32_t); +} + +auto metapack_extension_size(const sourcemeta::core::FileView &view) + -> std::uint32_t { + if (view.size() < sizeof(MetapackHeader) + sizeof(std::uint32_t)) { + return 0; + } + + const auto *header{view.as()}; + assert(header->magic == METAPACK_MAGIC); + + const auto offset_of_extension_size{sizeof(MetapackHeader) + + header->mime_length}; + if (offset_of_extension_size + sizeof(std::uint32_t) > view.size()) { + return 0; + } + + return *view.as(offset_of_extension_size); +} + +auto metapack_read_json(const std::filesystem::path &path) + -> sourcemeta::core::JSON { + sourcemeta::core::FileView view{path}; + if (view.size() < sizeof(MetapackHeader) + sizeof(std::uint32_t)) { + throw std::runtime_error("Metapack file too small"); + } + + const auto *header{view.as()}; + if (header->magic != METAPACK_MAGIC) { + throw std::runtime_error("Invalid metapack magic"); + } + + if (header->format_version != METAPACK_VERSION) { + throw std::runtime_error("Unsupported metapack version"); + } + + // Skip past: fixed header + mime + extension_size + extension + auto payload_offset{sizeof(MetapackHeader) + header->mime_length}; + const auto *extension_size{view.as(payload_offset)}; + payload_offset += sizeof(std::uint32_t) + *extension_size; + + if (payload_offset > view.size()) { + throw std::runtime_error("Metapack header extends past file end"); + } + + const auto payload_data_size{view.size() - payload_offset}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *payload_data{ + reinterpret_cast(view.as(payload_offset))}; + + if (header->encoding == MetapackEncoding::GZIP) { + std::string compressed{payload_data, payload_data_size}; + std::istringstream compressed_stream{compressed}; + std::ostringstream decompressed; + sourcemeta::one::gunzip(compressed_stream, decompressed); + return sourcemeta::core::parse_json(decompressed.str()); + } + + const std::string payload_string{payload_data, payload_data_size}; + return sourcemeta::core::parse_json(payload_string); +} + +auto metapack_make_dialect_extension(const std::string_view dialect) + -> std::vector { + std::vector result; + result.resize(sizeof(MetapackDialectExtension) + dialect.size()); + + MetapackDialectExtension header{}; + header.dialect_length = static_cast(dialect.size()); + std::memcpy(result.data(), &header, sizeof(header)); + std::memcpy(result.data() + sizeof(header), dialect.data(), dialect.size()); + + return result; +} + +auto metapack_make_explorer_schema_extension( + const std::int64_t health, const std::int64_t bytes, + const std::int64_t dependencies, const std::string_view path, + const std::string_view identifier, const std::string_view base_dialect, + const std::string_view dialect, const std::string_view title, + const std::string_view description, const std::string_view alert, + const std::string_view provenance) -> std::vector { + const auto strings_size{ + path.size() + identifier.size() + base_dialect.size() + dialect.size() + + title.size() + description.size() + alert.size() + provenance.size()}; + std::vector result; + result.resize(sizeof(MetapackExplorerSchemaExtension) + strings_size); + + MetapackExplorerSchemaExtension header{}; + header.health = health; + header.bytes = bytes; + header.dependencies = dependencies; + header.path_length = static_cast(path.size()); + header.identifier_length = static_cast(identifier.size()); + header.base_dialect_length = static_cast(base_dialect.size()); + header.dialect_length = static_cast(dialect.size()); + header.title_length = static_cast(title.size()); + header.description_length = static_cast(description.size()); + header.alert_length = static_cast(alert.size()); + header.provenance_length = static_cast(provenance.size()); + + auto *cursor{result.data()}; + std::memcpy(cursor, &header, sizeof(header)); + cursor += sizeof(header); + + const auto append = [&cursor](const std::string_view string) { + std::memcpy(cursor, string.data(), string.size()); + cursor += string.size(); + }; + + append(path); + append(identifier); + append(base_dialect); + append(dialect); + append(title); + append(description); + append(alert); + append(provenance); + + return result; +} + +auto metapack_info(const sourcemeta::core::FileView &view) -> MetapackInfo { + assert(view.size() >= sizeof(MetapackHeader) + sizeof(std::uint32_t)); + const auto *header{view.as()}; + assert(header->magic == METAPACK_MAGIC); + assert(header->format_version == METAPACK_VERSION); + + // Convert raw checksum bytes to hex string + std::string checksum_hex; + checksum_hex.reserve(64); + static constexpr const char *hex_chars = "0123456789abcdef"; + for (const auto byte : header->checksum) { + checksum_hex += hex_chars[(byte >> 4) & 0x0F]; + checksum_hex += hex_chars[byte & 0x0F]; + } + + // Convert nanoseconds since epoch to time_point + const auto nanos{std::chrono::nanoseconds{header->last_modified}}; + const auto time_point{std::chrono::system_clock::time_point{ + std::chrono::duration_cast(nanos)}}; + + // Read mime string + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *mime_data{reinterpret_cast( + view.as(sizeof(MetapackHeader)))}; + + return MetapackInfo{.checksum_hex = std::move(checksum_hex), + .last_modified = time_point, + .mime = std::string{mime_data, header->mime_length}, + .encoding = header->encoding, + .content_bytes = header->content_bytes, + .duration = std::chrono::milliseconds{header->duration}}; +} + +auto metapack_payload_offset(const sourcemeta::core::FileView &view) + -> std::size_t { + assert(view.size() >= sizeof(MetapackHeader) + sizeof(std::uint32_t)); + const auto *header{view.as()}; + assert(header->magic == METAPACK_MAGIC); + + auto offset{sizeof(MetapackHeader) + header->mime_length}; + const auto *extension_size{view.as(offset)}; + offset += sizeof(std::uint32_t) + *extension_size; + + return offset; +} + +} // namespace sourcemeta::one diff --git a/src/resolver/CMakeLists.txt b/src/resolver/CMakeLists.txt index 402498f5c..2981bee6d 100644 --- a/src/resolver/CMakeLists.txt +++ b/src/resolver/CMakeLists.txt @@ -14,3 +14,5 @@ target_link_libraries(sourcemeta_one_resolver PRIVATE sourcemeta::core::yaml) target_link_libraries(sourcemeta_one_resolver PRIVATE sourcemeta::one::shared) +target_link_libraries(sourcemeta_one_resolver PRIVATE + sourcemeta::one::metapack) diff --git a/src/resolver/resolver.cc b/src/resolver/resolver.cc index 6f519273b..061fb9cec 100644 --- a/src/resolver/resolver.cc +++ b/src/resolver/resolver.cc @@ -1,3 +1,4 @@ +#include #include #include @@ -120,7 +121,8 @@ auto Resolver::operator()( if (result->second.cache_path.has_value()) { // We can guarantee the cached outcome is JSON, so we don't need to try // reading as YAML - auto schema{sourcemeta::one::read_json(result->second.cache_path.value())}; + auto schema{ + sourcemeta::one::metapack_read_json(result->second.cache_path.value())}; assert(sourcemeta::core::is_schema(schema)); if (callback) { callback(result->second.cache_path.value()); diff --git a/src/server/CMakeLists.txt b/src/server/CMakeLists.txt index a18685b85..b8a624099 100644 --- a/src/server/CMakeLists.txt +++ b/src/server/CMakeLists.txt @@ -22,6 +22,7 @@ target_link_libraries(sourcemeta_one_server PRIVATE uNetworking::uSockets) target_link_libraries(sourcemeta_one_server PRIVATE uNetworking::uWebSockets) target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::gzip) target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::shared) +target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::metapack) target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::blaze::evaluator) target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::blaze::output) diff --git a/src/server/action_jsonschema_evaluate.h b/src/server/action_jsonschema_evaluate.h index c3097629b..b88dc9ed6 100644 --- a/src/server/action_jsonschema_evaluate.h +++ b/src/server/action_jsonschema_evaluate.h @@ -7,6 +7,7 @@ #include #include +#include #include #include "helpers.h" @@ -32,7 +33,7 @@ auto trace(sourcemeta::blaze::Evaluator &evaluator, auto locations_path{template_path.parent_path() / "locations.metapack"}; // TODO: Cache this across runs? - const auto locations{sourcemeta::one::read_json(locations_path)}; + const auto locations{sourcemeta::one::metapack_read_json(locations_path)}; if (!locations.is_object() || !locations.defines("static")) { throw std::runtime_error("Failed to read schema locations metadata"); } @@ -143,7 +144,7 @@ auto evaluate(const std::filesystem::path &template_path, // TODO: Cache this conversion across runs, potentially using the schema file // "checksum" as the cache key. This is important as the template might be // compressed - const auto template_json{read_json(template_path)}; + const auto template_json{metapack_read_json(template_path)}; const auto schema_template{sourcemeta::blaze::from_json(template_json)}; if (!schema_template.has_value()) { throw std::runtime_error("Failed to parse schema template"); diff --git a/src/server/action_schema_search.h b/src/server/action_schema_search.h index 93ccbd568..5378cc8fb 100644 --- a/src/server/action_schema_search.h +++ b/src/server/action_schema_search.h @@ -1,8 +1,10 @@ #ifndef SOURCEMETA_ONE_SERVER_ACTION_SCHEMA_SEARCH_H #define SOURCEMETA_ONE_SERVER_ACTION_SCHEMA_SEARCH_H +#include #include +#include #include #include "helpers.h" @@ -14,7 +16,7 @@ #include // std::filesystem #include // std::ostringstream #include // std::runtime_error -#include // std::string, std::getline +#include // std::string #include // std::string_view namespace sourcemeta::one { @@ -27,18 +29,29 @@ static auto search(const std::filesystem::path &search_index, assert(search_index.is_absolute()); - auto file{read_stream_raw(search_index)}; - if (!file.has_value()) { - throw std::runtime_error("Failed to read search index"); - } + sourcemeta::core::FileView view{search_index}; + const auto payload_start{metapack_payload_offset(view)}; + const auto payload_size{view.size() - payload_start}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string_view payload{ + reinterpret_cast(view.as(payload_start)), + payload_size}; auto result{sourcemeta::core::JSON::make_array()}; - // TODO: Extend the Core JSONL iterators to be able - // to access the stringified contents of the current entry - // BEFORE parsing it as JSON, letting the client decide - // whether to parse or not. - std::string line; - while (std::getline(file.value().data, line)) { + std::size_t line_start{0}; + while (line_start < payload.size()) { + auto line_end{payload.find('\n', line_start)}; + if (line_end == std::string_view::npos) { + line_end = payload.size(); + } + + const auto line{payload.substr(line_start, line_end - line_start)}; + line_start = line_end + 1; + + if (line.empty()) { + continue; + } + if (std::search(line.cbegin(), line.cend(), query.begin(), query.end(), [](const auto left, const auto right) { return std::tolower(left) == std::tolower(right); @@ -47,7 +60,8 @@ static auto search(const std::filesystem::path &search_index, } auto entry{sourcemeta::core::JSON::make_object()}; - auto line_json{sourcemeta::core::parse_json(line)}; + const std::string line_string{line}; + auto line_json{sourcemeta::core::parse_json(line_string)}; entry.assign("path", std::move(line_json.at(0))); entry.assign("title", std::move(line_json.at(1))); entry.assign("description", std::move(line_json.at(2))); diff --git a/src/server/action_serve_metapack_file.h b/src/server/action_serve_metapack_file.h index c1fe3af8b..5e4b436a4 100644 --- a/src/server/action_serve_metapack_file.h +++ b/src/server/action_serve_metapack_file.h @@ -1,8 +1,10 @@ #ifndef SOURCEMETA_ONE_SERVER_ACTION_SERVE_METAPACK_FILE_H #define SOURCEMETA_ONE_SERVER_ACTION_SERVE_METAPACK_FILE_H +#include #include +#include #include #include "helpers.h" @@ -29,13 +31,22 @@ static auto action_serve_metapack_file( return; } - auto file{sourcemeta::one::read_stream_raw(absolute_path)}; - if (!file.has_value()) { + if (!std::filesystem::exists(absolute_path)) { json_error(request, response, sourcemeta::one::STATUS_NOT_FOUND, "not-found", "There is nothing at this URL"); return; } + sourcemeta::core::FileView view{absolute_path}; + if (view.size() < + sizeof(sourcemeta::one::MetapackHeader) + sizeof(std::uint32_t)) { + json_error(request, response, sourcemeta::one::STATUS_NOT_FOUND, + "not-found", "There is nothing at this URL"); + return; + } + + const auto info{sourcemeta::one::metapack_info(view)}; + // Note that `If-Modified-Since` can only be used with a `GET` or `HEAD`. // See // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since @@ -44,7 +55,7 @@ static auto action_serve_metapack_file( // to more consistent behavior. if (if_modified_since.has_value() && (if_modified_since.value() + std::chrono::seconds(1)) >= - file.value().last_modified) { + info.last_modified) { response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED); if (enable_cors) { response.write_header("Access-Control-Allow-Origin", "*"); @@ -54,7 +65,7 @@ static auto action_serve_metapack_file( return; } - const auto &checksum{file.value().checksum}; + const auto &checksum{info.checksum_hex}; std::ostringstream etag_value_strong; std::ostringstream etag_value_weak; etag_value_strong << '"' << checksum << '"'; @@ -83,11 +94,11 @@ static auto action_serve_metapack_file( if (mime.has_value()) { response.write_header("Content-Type", mime.value()); } else { - response.write_header("Content-Type", file.value().mime); + response.write_header("Content-Type", info.mime); } response.write_header("Last-Modified", - sourcemeta::core::to_gmt(file.value().last_modified)); + sourcemeta::core::to_gmt(info.last_modified)); std::ostringstream etag; etag << '"' << checksum << '"'; @@ -98,20 +109,25 @@ static auto action_serve_metapack_file( if (link.has_value()) { write_link_header(response, link.value()); } else { - const auto &dialect{file.value().extension}; - if (dialect.is_string()) { - write_link_header(response, dialect.to_string()); + const auto dialect{sourcemeta::one::metapack_dialect_string(view)}; + if (!dialect.empty()) { + write_link_header(response, std::string{dialect}); } } - std::ostringstream contents; - contents << file.value().data.rdbuf(); + // Read the raw payload data from the mmap'd view + const auto payload_start{sourcemeta::one::metapack_payload_offset(view)}; + const auto payload_size{view.size() - payload_start}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string contents{ + reinterpret_cast(view.as(payload_start)), + payload_size}; - if (file.value().encoding == sourcemeta::one::Encoding::GZIP) { - send_response(code, request, response, contents.str(), + if (info.encoding == sourcemeta::one::MetapackEncoding::GZIP) { + send_response(code, request, response, contents, sourcemeta::one::Encoding::GZIP); } else { - send_response(code, request, response, contents.str(), + send_response(code, request, response, contents, sourcemeta::one::Encoding::Identity); } } diff --git a/src/shared/CMakeLists.txt b/src/shared/CMakeLists.txt index daf8f4458..4d0ebb418 100644 --- a/src/shared/CMakeLists.txt +++ b/src/shared/CMakeLists.txt @@ -1,12 +1,6 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT one NAME shared - PRIVATE_HEADERS metapack.h encoding.h version.h - SOURCES metapack.cc version.cc configure.h.in) - -target_link_libraries(sourcemeta_one_shared PUBLIC sourcemeta::core::json) -target_link_libraries(sourcemeta_one_shared PRIVATE sourcemeta::core::io) -target_link_libraries(sourcemeta_one_shared PRIVATE sourcemeta::core::crypto) -target_link_libraries(sourcemeta_one_shared PRIVATE sourcemeta::core::time) -target_link_libraries(sourcemeta_one_shared PRIVATE sourcemeta::one::gzip) + PRIVATE_HEADERS encoding.h version.h + SOURCES version.cc configure.h.in) if(ONE_ENTERPRISE) target_compile_definitions(sourcemeta_one_shared diff --git a/src/shared/include/sourcemeta/one/shared.h b/src/shared/include/sourcemeta/one/shared.h index 0f52d2531..1356aac64 100644 --- a/src/shared/include/sourcemeta/one/shared.h +++ b/src/shared/include/sourcemeta/one/shared.h @@ -5,7 +5,6 @@ // between the indexer and the server #include -#include #include namespace sourcemeta::one { diff --git a/src/shared/include/sourcemeta/one/shared_metapack.h b/src/shared/include/sourcemeta/one/shared_metapack.h deleted file mode 100644 index ce556c11e..000000000 --- a/src/shared/include/sourcemeta/one/shared_metapack.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef SOURCEMETA_ONE_SHARED_METAPACK_H -#define SOURCEMETA_ONE_SHARED_METAPACK_H - -#include - -#include - -#include // std::chrono -#include // std::uint64_t -#include // std::filesystem -#include // std::ifstream -#include // std::optional -#include // std::vector - -namespace sourcemeta::one { - -template struct File { - T data; - std::uint64_t version; - sourcemeta::core::JSON::String checksum; - std::chrono::system_clock::time_point last_modified; - sourcemeta::core::JSON::String mime; - std::size_t bytes; - std::chrono::milliseconds duration; - Encoding encoding; - sourcemeta::core::JSON extension; -}; - -auto read_stream_raw(const std::filesystem::path &path) - -> std::optional>; - -auto read_json(const std::filesystem::path &path, - const sourcemeta::core::JSON::ParseCallback &callback = nullptr) - -> sourcemeta::core::JSON; - -auto read_json_with_metadata( - const std::filesystem::path &path, - const sourcemeta::core::JSON::ParseCallback &callback = nullptr) - -> File; - -auto write_json(const std::filesystem::path &destination, - const sourcemeta::core::JSON &document, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void; - -auto write_pretty_json(const std::filesystem::path &destination, - const sourcemeta::core::JSON &document, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void; - -auto write_text(const std::filesystem::path &destination, - const std::string_view contents, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void; - -auto write_file(const std::filesystem::path &destination, - const std::filesystem::path &source, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void; - -auto write_jsonl(const std::filesystem::path &destination, - const std::vector &entries, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void; - -} // namespace sourcemeta::one - -#endif diff --git a/src/shared/metapack.cc b/src/shared/metapack.cc deleted file mode 100644 index fa5caebf2..000000000 --- a/src/shared/metapack.cc +++ /dev/null @@ -1,223 +0,0 @@ -#include -#include - -#include -#include -#include - -#include // assert -#include // std::chrono::system_clock::time_point -#include // std::functional -#include // std::ostream -#include // std::ostringstream -#include // std::runtime_error -#include // std::move - -// TODO: There are lots of opportunities to optimise this file -// and avoid temporary buffers, etc - -namespace { - -auto write_stream(const std::filesystem::path &path, - const sourcemeta::core::JSON::String &mime, - const sourcemeta::one::Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration, - const std::function &callback) -> void { - // TODO: Ideally we wouldn't write the file all at once first - std::stringstream buffer; - callback(buffer); - - auto metadata{sourcemeta::core::JSON::make_object()}; - metadata.assign("version", sourcemeta::core::JSON{1}); - std::ostringstream checksum; - // TODO: Have a shorthand version that doesn't require an intermediary stream - sourcemeta::core::sha256(buffer.str(), checksum); - metadata.assign("checksum", sourcemeta::core::JSON{checksum.str()}); - metadata.assign("lastModified", - sourcemeta::core::JSON{sourcemeta::core::to_gmt( - std::chrono::system_clock::now())}); - metadata.assign("mime", sourcemeta::core::JSON{mime}); - metadata.assign("bytes", sourcemeta::core::JSON{buffer.tellp()}); - metadata.assign("duration", sourcemeta::core::JSON{duration.count()}); - - switch (encoding) { - case sourcemeta::one::Encoding::Identity: - metadata.assign("encoding", sourcemeta::core::JSON{"identity"}); - break; - case sourcemeta::one::Encoding::GZIP: - metadata.assign("encoding", sourcemeta::core::JSON{"gzip"}); - break; - default: - assert(false); - break; - } - - if (!extension.is_null()) { - metadata.assign("extension", extension); - } - - std::ofstream output{path}; - assert(!output.fail()); - sourcemeta::core::stringify(metadata, output); - if (encoding == sourcemeta::one::Encoding::GZIP) { - sourcemeta::one::gzip(buffer, output); - } else { - output << buffer.str(); - } - - output.flush(); -} - -} // namespace - -namespace sourcemeta::one { - -auto read_stream_raw(const std::filesystem::path &path) - -> std::optional> { - assert(path.is_absolute()); - if (!std::filesystem::exists(path)) { - return std::nullopt; - } - - auto stream{sourcemeta::core::read_file(path)}; - auto metadata{sourcemeta::core::parse_json(stream)}; - if (!metadata.is_object() || !metadata.defines("version") || - !metadata.defines("checksum") || !metadata.defines("lastModified") || - !metadata.defines("mime") || !metadata.defines("bytes") || - !metadata.defines("duration") || !metadata.defines("encoding")) { - throw std::runtime_error("The file metadata is missing required fields"); - } - - if (!metadata.at("version").is_integer() || - !metadata.at("version").is_positive() || - !metadata.at("checksum").is_string() || - !metadata.at("lastModified").is_string() || - !metadata.at("mime").is_string() || !metadata.at("bytes").is_integer() || - !metadata.at("bytes").is_positive() || - !metadata.at("duration").is_integer() || - !metadata.at("duration").is_positive() || - !metadata.at("encoding").is_string()) { - throw std::runtime_error( - "The file metadata has fields with unexpected types"); - } - - Encoding encoding{Encoding::Identity}; - if (metadata.at("encoding").to_string() == "gzip") { - encoding = Encoding::GZIP; - } else if (metadata.at("encoding").to_string() != "identity") { - throw std::runtime_error("Failed to determine file encoding"); - } - - return File{ - .data = std::move(stream), - .version = - static_cast(metadata.at("version").to_integer()), - .checksum = metadata.at("checksum").to_string(), - .last_modified = - sourcemeta::core::from_gmt(metadata.at("lastModified").to_string()), - .mime = metadata.at("mime").to_string(), - .bytes = static_cast(metadata.at("bytes").to_integer()), - .duration = static_cast( - metadata.at("duration").to_integer()), - .encoding = encoding, - .extension = std::move(metadata).at_or("extension", - sourcemeta::core::JSON{nullptr})}; -} - -auto read_json(const std::filesystem::path &path, - const sourcemeta::core::JSON::ParseCallback &callback) - -> sourcemeta::core::JSON { - return read_json_with_metadata(path, callback).data; -} - -auto read_json_with_metadata( - const std::filesystem::path &path, - const sourcemeta::core::JSON::ParseCallback &callback) - -> File { - auto file{read_stream_raw(path)}; - if (!file.has_value()) { - throw std::runtime_error("Failed to read file"); - } - std::ostringstream buffer; - if (file.value().encoding == Encoding::GZIP) { - sourcemeta::one::gunzip(file.value().data, buffer); - } else { - buffer << file.value().data.rdbuf(); - } - - return File{.data = sourcemeta::core::parse_json(buffer.str(), callback), - .version = file.value().version, - .checksum = file.value().checksum, - .last_modified = file.value().last_modified, - .mime = std::move(file.value().mime), - .bytes = file.value().bytes, - .duration = file.value().duration, - .encoding = file.value().encoding, - .extension = file.value().extension}; -} - -auto write_json(const std::filesystem::path &destination, - const sourcemeta::core::JSON &document, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void { - write_stream(destination, mime, encoding, extension, duration, - [&document](auto &stream) { - sourcemeta::core::stringify(document, stream); - }); -} - -auto write_pretty_json(const std::filesystem::path &destination, - const sourcemeta::core::JSON &document, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void { - write_stream(destination, mime, encoding, extension, duration, - [&document](auto &stream) { - sourcemeta::core::prettify(document, stream); - }); -} - -auto write_text(const std::filesystem::path &destination, - const std::string_view contents, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void { - write_stream(destination, mime, encoding, extension, duration, - [&contents](auto &stream) { - stream << contents; - stream << "\n"; - }); -} - -auto write_file(const std::filesystem::path &destination, - const std::filesystem::path &source, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void { - auto stream{sourcemeta::core::read_file(source)}; - write_stream(destination, mime, encoding, extension, duration, - [&stream](auto &target) { target << stream.rdbuf(); }); -} - -auto write_jsonl(const std::filesystem::path &destination, - const std::vector &entries, - const sourcemeta::core::JSON::String &mime, - const Encoding encoding, - const sourcemeta::core::JSON &extension, - const std::chrono::milliseconds duration) -> void { - write_stream(destination, mime, encoding, extension, duration, - [&entries](auto &stream) { - for (const auto &entry : entries) { - sourcemeta::core::stringify(entry, stream); - stream << "\n"; - } - }); -} - -} // namespace sourcemeta::one diff --git a/src/web/CMakeLists.txt b/src/web/CMakeLists.txt index d10f64a57..3e86058f2 100644 --- a/src/web/CMakeLists.txt +++ b/src/web/CMakeLists.txt @@ -11,6 +11,7 @@ target_link_libraries(sourcemeta_one_web PRIVATE sourcemeta::core::html) target_link_libraries(sourcemeta_one_web PUBLIC sourcemeta::one::configuration) target_link_libraries(sourcemeta_one_web PUBLIC sourcemeta::one::resolver) target_link_libraries(sourcemeta_one_web PRIVATE sourcemeta::one::shared) +target_link_libraries(sourcemeta_one_web PRIVATE sourcemeta::one::metapack) sourcemeta_esbuild_bundle( ENTRYPOINT "${CMAKE_CURRENT_SOURCE_DIR}/scripts/main.js" diff --git a/src/web/pages/directory.cc b/src/web/pages/directory.cc index d65bcd921..fca65f369 100644 --- a/src/web/pages/directory.cc +++ b/src/web/pages/directory.cc @@ -4,6 +4,7 @@ #include "../page.h" #include +#include #include #include // std::chrono @@ -19,7 +20,7 @@ auto GENERATE_WEB_DIRECTORY::handler( const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; - const auto directory{read_json(action.dependencies.front())}; + const auto directory{metapack_read_json(action.dependencies.front())}; const auto &canonical{directory.at("url").to_string()}; const auto &title{directory.defines("title") ? directory.at("title").to_string() @@ -37,11 +38,10 @@ auto GENERATE_WEB_DIRECTORY::handler( html::make_file_manager(directory)); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - write_text(action.destination, html_content.str(), "text/html", - Encoding::GZIP, sourcemeta::core::JSON{nullptr}, - std::chrono::duration_cast( - timestamp_end - timestamp_start)); + metapack_write_text(action.destination, html_content.str(), "text/html", + MetapackEncoding::GZIP, {}, + std::chrono::duration_cast( + timestamp_end - timestamp_start)); return true; } diff --git a/src/web/pages/index.cc b/src/web/pages/index.cc index 049452d69..7bcde3cb1 100644 --- a/src/web/pages/index.cc +++ b/src/web/pages/index.cc @@ -4,6 +4,7 @@ #include "../page.h" #include +#include #include #include // std::chrono @@ -36,7 +37,7 @@ auto GENERATE_WEB_INDEX::handler( const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; - const auto directory{read_json(action.dependencies.front())}; + const auto directory{metapack_read_json(action.dependencies.front())}; const auto &canonical{directory.at("url").to_string()}; const auto title{configuration.html->name + " Schemas"}; const auto &description{configuration.html->description}; @@ -47,11 +48,10 @@ auto GENERATE_WEB_INDEX::handler( html::make_file_manager(directory)); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - write_text(action.destination, html_content.str(), "text/html", - Encoding::GZIP, sourcemeta::core::JSON{nullptr}, - std::chrono::duration_cast( - timestamp_end - timestamp_start)); + metapack_write_text(action.destination, html_content.str(), "text/html", + MetapackEncoding::GZIP, {}, + std::chrono::duration_cast( + timestamp_end - timestamp_start)); return true; } diff --git a/src/web/pages/not_found.cc b/src/web/pages/not_found.cc index dd9732cb9..83780b4fd 100644 --- a/src/web/pages/not_found.cc +++ b/src/web/pages/not_found.cc @@ -4,6 +4,7 @@ #include "../page.h" #include +#include #include #include // std::chrono @@ -36,11 +37,10 @@ auto GENERATE_WEB_NOT_FOUND::handler( html::a({{"href", "/"}}, "Get back to the home page"))); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - write_text(action.destination, html_content.str(), "text/html", - Encoding::GZIP, sourcemeta::core::JSON{nullptr}, - std::chrono::duration_cast( - timestamp_end - timestamp_start)); + metapack_write_text(action.destination, html_content.str(), "text/html", + MetapackEncoding::GZIP, {}, + std::chrono::duration_cast( + timestamp_end - timestamp_start)); return true; } diff --git a/src/web/pages/schema.cc b/src/web/pages/schema.cc index 2e7621d4b..2c93e7752 100644 --- a/src/web/pages/schema.cc +++ b/src/web/pages/schema.cc @@ -4,6 +4,7 @@ #include "../page.h" #include +#include #include #include // assert @@ -22,7 +23,7 @@ auto GENERATE_WEB_SCHEMA::handler( const sourcemeta::core::JSON &) -> bool { const auto timestamp_start{std::chrono::steady_clock::now()}; - const auto meta{read_json(action.dependencies.front())}; + const auto meta{metapack_read_json(action.dependencies.front())}; const auto &canonical{meta.at("identifier").to_string()}; const auto &title{meta.defines("title") ? meta.at("title").to_string() : meta.at("path").to_string()}; @@ -195,7 +196,7 @@ auto GENERATE_WEB_SCHEMA::handler( {"data-sourcemeta-ui-editor-language", "json"}}, "Loading schema...")); - const auto health{read_json(action.dependencies.at(1))}; + const auto health{metapack_read_json(action.dependencies.at(1))}; assert(health.is_object()); assert(health.defines("errors")); @@ -348,11 +349,10 @@ auto GENERATE_WEB_SCHEMA::handler( container_children)); const auto timestamp_end{std::chrono::steady_clock::now()}; - std::filesystem::create_directories(action.destination.parent_path()); - write_text(action.destination, html_content.str(), "text/html", - Encoding::GZIP, sourcemeta::core::JSON{nullptr}, - std::chrono::duration_cast( - timestamp_end - timestamp_start)); + metapack_write_text(action.destination, html_content.str(), "text/html", + MetapackEncoding::GZIP, {}, + std::chrono::duration_cast( + timestamp_end - timestamp_start)); return true; } diff --git a/test/unit/metapack/CMakeLists.txt b/test/unit/metapack/CMakeLists.txt new file mode 100644 index 000000000..20d5d96b9 --- /dev/null +++ b/test/unit/metapack/CMakeLists.txt @@ -0,0 +1,5 @@ +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT one NAME metapack + SOURCES metapack_test.cc) + +target_link_libraries(sourcemeta_one_metapack_unit + PRIVATE sourcemeta::one::metapack) diff --git a/test/unit/metapack/metapack_test.cc b/test/unit/metapack/metapack_test.cc new file mode 100644 index 000000000..bb9a363ab --- /dev/null +++ b/test/unit/metapack/metapack_test.cc @@ -0,0 +1,189 @@ +#include + +#include +#include + +#include + +#include // std::chrono +#include // std::memcmp +#include // std::filesystem +#include // std::span +#include // std::string +#include // std::vector + +static auto temp_path(const std::string &name) -> std::filesystem::path { + return std::filesystem::temp_directory_path() / ("metapack_test_" + name); +} + +class MetapackTest : public ::testing::Test { +protected: + void TearDown() override { + for (const auto &path : cleanup_paths) { + std::filesystem::remove_all(path); + } + } + + auto make_temp(const std::string &name) -> std::filesystem::path { + auto path{temp_path(name)}; + cleanup_paths.push_back(path); + return path; + } + +private: + std::vector cleanup_paths; +}; + +TEST_F(MetapackTest, write_and_read_json_identity) { + const auto path{make_temp("identity.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + document.assign("hello", sourcemeta::core::JSON{"world"}); + + sourcemeta::one::metapack_write_json( + path, document, "application/json", + sourcemeta::one::MetapackEncoding::Identity, {}, + std::chrono::milliseconds{5}); + + const auto result{sourcemeta::one::metapack_read_json(path)}; + EXPECT_TRUE(result.is_object()); + EXPECT_EQ(result.at("hello").to_string(), "world"); +} + +TEST_F(MetapackTest, write_and_read_json_gzip) { + const auto path{make_temp("gzip.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + document.assign("foo", sourcemeta::core::JSON{42}); + + sourcemeta::one::metapack_write_json(path, document, "application/json", + sourcemeta::one::MetapackEncoding::GZIP, + {}, std::chrono::milliseconds{10}); + + const auto result{sourcemeta::one::metapack_read_json(path)}; + EXPECT_TRUE(result.is_object()); + EXPECT_TRUE(result.at("foo").is_integer()); + EXPECT_EQ(result.at("foo").to_integer(), 42); +} + +TEST_F(MetapackTest, write_and_read_pretty_json) { + const auto path{make_temp("pretty.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + document.assign("key", sourcemeta::core::JSON{"value"}); + + sourcemeta::one::metapack_write_pretty_json( + path, document, "application/schema+json", + sourcemeta::one::MetapackEncoding::GZIP, {}, + std::chrono::milliseconds{3}); + + const auto result{sourcemeta::one::metapack_read_json(path)}; + EXPECT_EQ(result.at("key").to_string(), "value"); +} + +TEST_F(MetapackTest, binary_header_magic_and_version) { + const auto path{make_temp("header.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + + sourcemeta::one::metapack_write_json( + path, document, "application/json", + sourcemeta::one::MetapackEncoding::Identity, {}, + std::chrono::milliseconds{0}); + + sourcemeta::core::FileView view{path}; + EXPECT_GE(view.size(), sizeof(sourcemeta::one::MetapackHeader)); + const auto *header{view.as()}; + EXPECT_EQ(header->magic, sourcemeta::one::METAPACK_MAGIC); + EXPECT_EQ(header->format_version, sourcemeta::one::METAPACK_VERSION); + EXPECT_EQ(header->encoding, sourcemeta::one::MetapackEncoding::Identity); +} + +TEST_F(MetapackTest, no_extension) { + const auto path{make_temp("no_ext.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + + sourcemeta::one::metapack_write_json( + path, document, "application/json", + sourcemeta::one::MetapackEncoding::Identity, {}, + std::chrono::milliseconds{0}); + + sourcemeta::core::FileView view{path}; + EXPECT_EQ(sourcemeta::one::metapack_extension_size(view), 0); + EXPECT_EQ(sourcemeta::one::metapack_extension_offset(view), 0); +} + +#pragma pack(push, 1) +struct TestExtension { + std::int32_t value; + std::uint16_t name_length; + // followed by name bytes +}; +#pragma pack(pop) + +TEST_F(MetapackTest, write_and_read_extension) { + const auto path{make_temp("with_ext.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + + // Build a test extension: {value=42, name="hello"} + std::vector extension_bytes; + TestExtension extension_header{}; + extension_header.value = 42; + const std::string name{"hello"}; + extension_header.name_length = static_cast(name.size()); + + // Serialize the struct + const auto *header_bytes{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast(&extension_header)}; + extension_bytes.insert(extension_bytes.end(), header_bytes, + header_bytes + sizeof(TestExtension)); + // Append the name string + extension_bytes.insert(extension_bytes.end(), name.begin(), name.end()); + + sourcemeta::one::metapack_write_json( + path, document, "application/json", + sourcemeta::one::MetapackEncoding::GZIP, + std::span{extension_bytes}, + std::chrono::milliseconds{7}); + + // Read back via mmap + sourcemeta::core::FileView view{path}; + EXPECT_EQ(sourcemeta::one::metapack_extension_size(view), + extension_bytes.size()); + + const auto *read_extension{ + sourcemeta::one::metapack_extension(view)}; + ASSERT_NE(read_extension, nullptr); + EXPECT_EQ(read_extension->value, 42); + EXPECT_EQ(read_extension->name_length, 5); + + // Read the name string from the extension data + const auto extension_start{sourcemeta::one::metapack_extension_offset(view)}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *name_data{reinterpret_cast( + view.as(extension_start + sizeof(TestExtension)))}; + EXPECT_EQ(std::string_view(name_data, read_extension->name_length), "hello"); + + // Also verify the JSON payload is still readable + const auto result{sourcemeta::one::metapack_read_json(path)}; + EXPECT_TRUE(result.is_object()); +} + +TEST_F(MetapackTest, extension_nullptr_when_too_small) { + const auto path{make_temp("small_ext.metapack")}; + auto document{sourcemeta::core::JSON::make_object()}; + + // Write a 2-byte extension + std::vector small_extension{0x01, 0x02}; + + sourcemeta::one::metapack_write_json( + path, document, "application/json", + sourcemeta::one::MetapackEncoding::Identity, + std::span{small_extension}, + std::chrono::milliseconds{0}); + + sourcemeta::core::FileView view{path}; + EXPECT_EQ(sourcemeta::one::metapack_extension_size(view), 2); + + // TestExtension is larger than 2 bytes, so this should return nullptr + const auto *read_extension{ + sourcemeta::one::metapack_extension(view)}; + EXPECT_EQ(read_extension, nullptr); +} From f8081a99188dadb6b3846e70d7a24ea10fca2f4a Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 18 Mar 2026 15:28:45 -0400 Subject: [PATCH 2/4] Fix Signed-off-by: Juan Cruz Viotti --- src/server/action_serve_metapack_file.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server/action_serve_metapack_file.h b/src/server/action_serve_metapack_file.h index 5e4b436a4..fa173acc3 100644 --- a/src/server/action_serve_metapack_file.h +++ b/src/server/action_serve_metapack_file.h @@ -115,7 +115,6 @@ static auto action_serve_metapack_file( } } - // Read the raw payload data from the mmap'd view const auto payload_start{sourcemeta::one::metapack_payload_offset(view)}; const auto payload_size{view.size() - payload_start}; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) From 5b7f1865b4e7a7f6b8df1ec72b2aca89964745b0 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 18 Mar 2026 15:41:15 -0400 Subject: [PATCH 3/4] Simpler Signed-off-by: Juan Cruz Viotti --- src/index/explorer.h | 242 ++++++++++++++---- src/index/generators.h | 27 +- .../include/sourcemeta/one/metapack.h | 230 ++--------------- src/metapack/metapack.cc | 71 ----- src/server/action_serve_metapack_file.h | 18 +- 5 files changed, 250 insertions(+), 338 deletions(-) diff --git a/src/index/explorer.h b/src/index/explorer.h index a087fe904..7fd767cff 100644 --- a/src/index/explorer.h +++ b/src/index/explorer.h @@ -15,6 +15,7 @@ #include // assert #include // std::chrono #include // std::lround +#include // std::memcpy #include // std::filesystem #include // std::accumulate #include // std::optional @@ -108,6 +109,159 @@ inflate_metadata(const sourcemeta::one::Configuration &configuration, namespace sourcemeta::one { +#pragma pack(push, 1) +struct MetapackExplorerSchemaExtension { + std::int64_t health; + std::int64_t bytes; + std::int64_t dependencies; + std::uint16_t path_length; + std::uint16_t identifier_length; + std::uint16_t base_dialect_length; + std::uint16_t dialect_length; + std::uint16_t title_length; + std::uint16_t description_length; + std::uint16_t alert_length; + std::uint16_t provenance_length; +}; +#pragma pack(pop) + +inline auto explorer_extension_string(const MetapackExplorerSchemaExtension *, + const std::uint8_t *base, + const std::size_t field_offset, + const std::size_t field_length) + -> std::string_view { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return {reinterpret_cast( + base + sizeof(MetapackExplorerSchemaExtension) + field_offset), + field_length}; +} + +inline auto +explorer_extension_path(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + return explorer_extension_string(extension, base, 0, extension->path_length); +} + +inline auto +explorer_extension_identifier(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{extension->path_length}; + return explorer_extension_string(extension, base, offset, + extension->identifier_length); +} + +inline auto explorer_extension_base_dialect( + const MetapackExplorerSchemaExtension *extension, const std::uint8_t *base) + -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length}; + return explorer_extension_string(extension, base, offset, + extension->base_dialect_length); +} + +inline auto +explorer_extension_dialect(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length}; + return explorer_extension_string(extension, base, offset, + extension->dialect_length); +} + +inline auto +explorer_extension_title(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length + + extension->dialect_length}; + return explorer_extension_string(extension, base, offset, + extension->title_length); +} + +inline auto +explorer_extension_description(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length + + extension->dialect_length + extension->title_length}; + return explorer_extension_string(extension, base, offset, + extension->description_length); +} + +inline auto +explorer_extension_alert(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{static_cast(extension->path_length) + + extension->identifier_length + + extension->base_dialect_length + + extension->dialect_length + extension->title_length + + extension->description_length}; + return explorer_extension_string(extension, base, offset, + extension->alert_length); +} + +inline auto +explorer_extension_provenance(const MetapackExplorerSchemaExtension *extension, + const std::uint8_t *base) -> std::string_view { + const std::size_t offset{ + static_cast(extension->path_length) + + extension->identifier_length + extension->base_dialect_length + + extension->dialect_length + extension->title_length + + extension->description_length + extension->alert_length}; + return explorer_extension_string(extension, base, offset, + extension->provenance_length); +} + +static auto make_explorer_schema_extension( + const std::int64_t health, const std::int64_t bytes, + const std::int64_t dependencies, const std::string_view path, + const std::string_view identifier, const std::string_view base_dialect, + const std::string_view dialect, const std::string_view title, + const std::string_view description, const std::string_view alert, + const std::string_view provenance) -> std::vector { + const auto strings_size{ + path.size() + identifier.size() + base_dialect.size() + dialect.size() + + title.size() + description.size() + alert.size() + provenance.size()}; + std::vector result; + result.resize(sizeof(MetapackExplorerSchemaExtension) + strings_size); + + MetapackExplorerSchemaExtension header{}; + header.health = health; + header.bytes = bytes; + header.dependencies = dependencies; + header.path_length = static_cast(path.size()); + header.identifier_length = static_cast(identifier.size()); + header.base_dialect_length = static_cast(base_dialect.size()); + header.dialect_length = static_cast(dialect.size()); + header.title_length = static_cast(title.size()); + header.description_length = static_cast(description.size()); + header.alert_length = static_cast(alert.size()); + header.provenance_length = static_cast(provenance.size()); + + auto *cursor{result.data()}; + std::memcpy(cursor, &header, sizeof(header)); + cursor += sizeof(header); + + const auto append = [&cursor](const std::string_view string) { + std::memcpy(cursor, string.data(), string.size()); + cursor += string.size(); + }; + + append(path); + append(identifier); + append(base_dialect); + append(dialect); + append(title); + append(description); + append(alert); + append(provenance); + + return result; +} + struct GENERATE_EXPLORER_SCHEMA_METADATA { static auto handler(const sourcemeta::one::BuildState &, const sourcemeta::one::BuildPlan::Action &action, @@ -214,24 +368,19 @@ struct GENERATE_EXPLORER_SCHEMA_METADATA { const auto timestamp_end{std::chrono::steady_clock::now()}; - // Build the binary extension with all fields needed by - // search index and directory listing handlers - const auto extension_bytes{ - sourcemeta::one::metapack_make_explorer_schema_extension( - result.at("health").to_integer(), - static_cast(schema_info.content_bytes), - result.at("dependencies").to_integer(), - result.at("path").to_string(), result.at("identifier").to_string(), - result.at("baseDialect").to_string(), - result.at("dialect").to_string(), - result.defines("title") ? result.at("title").to_string() : "", - result.defines("description") ? result.at("description").to_string() - : "", - result.at("alert").is_string() ? result.at("alert").to_string() - : "", - result.at("provenance").is_string() - ? result.at("provenance").to_string() - : "")}; + const auto extension_bytes{make_explorer_schema_extension( + result.at("health").to_integer(), + static_cast(schema_info.content_bytes), + result.at("dependencies").to_integer(), result.at("path").to_string(), + result.at("identifier").to_string(), + result.at("baseDialect").to_string(), result.at("dialect").to_string(), + result.defines("title") ? result.at("title").to_string() : "", + result.defines("description") ? result.at("description").to_string() + : "", + result.at("alert").is_string() ? result.at("alert").to_string() : "", + result.at("provenance").is_string() + ? result.at("provenance").to_string() + : "")}; sourcemeta::one::metapack_write_pretty_json( action.destination, result, "application/json", @@ -255,7 +404,6 @@ struct GENERATE_EXPLORER_SEARCH_INDEX { result.reserve(action.dependencies.size()); for (const auto &dependency : action.dependencies) { - // Read only the binary extension header — no gzip decompression sourcemeta::core::FileView dep_view{dependency}; const auto extension_offset{ sourcemeta::one::metapack_extension_offset(dep_view)}; @@ -263,19 +411,18 @@ struct GENERATE_EXPLORER_SEARCH_INDEX { continue; } - const auto *extension{sourcemeta::one::metapack_extension< - sourcemeta::one::MetapackExplorerSchemaExtension>(dep_view)}; + const auto *extension{ + sourcemeta::one::metapack_extension( + dep_view)}; if (extension == nullptr) { continue; } const auto *extension_base{dep_view.as(extension_offset)}; - const auto path{ - sourcemeta::one::metapack_explorer_path(extension, extension_base)}; - const auto title{ - sourcemeta::one::metapack_explorer_title(extension, extension_base)}; - const auto description{sourcemeta::one::metapack_explorer_description( - extension, extension_base)}; + const auto path{explorer_extension_path(extension, extension_base)}; + const auto title{explorer_extension_title(extension, extension_base)}; + const auto description{ + explorer_extension_description(extension, extension_base)}; auto entry{sourcemeta::core::JSON::make_array()}; entry.push_back(sourcemeta::core::JSON{std::string{path}}); @@ -382,12 +529,11 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { } entries.push_back(std::move(entry_json)); } else if (filename == "schema.metapack") { - // Read from binary extension — no gzip decompression needed sourcemeta::core::FileView dep_view{dependency}; const auto ext_offset{ sourcemeta::one::metapack_extension_offset(dep_view)}; const auto *extension{sourcemeta::one::metapack_extension< - sourcemeta::one::MetapackExplorerSchemaExtension>(dep_view)}; + MetapackExplorerSchemaExtension>(dep_view)}; if (extension == nullptr || ext_offset == 0) { continue; @@ -398,43 +544,39 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { entry_json.assign("name", sourcemeta::core::JSON{child_name}); entry_json.assign("type", sourcemeta::core::JSON{"schema"}); - const auto schema_path{ - sourcemeta::one::metapack_explorer_path(extension, ext_base)}; + const auto schema_path{explorer_extension_path(extension, ext_base)}; entry_json.assign("path", sourcemeta::core::JSON{std::filesystem::path{ std::string{schema_path}}}); - entry_json.assign("identifier", - sourcemeta::core::JSON{std::string{ - sourcemeta::one::metapack_explorer_identifier( - extension, ext_base)}}); + entry_json.assign( + "identifier", + sourcemeta::core::JSON{std::string{ + explorer_extension_identifier(extension, ext_base)}}); entry_json.assign("bytes", sourcemeta::core::JSON{extension->bytes}); - entry_json.assign("baseDialect", - sourcemeta::core::JSON{std::string{ - sourcemeta::one::metapack_explorer_base_dialect( - extension, ext_base)}}); - entry_json.assign("dialect", - sourcemeta::core::JSON{std::string{ - sourcemeta::one::metapack_explorer_dialect( - extension, ext_base)}}); + entry_json.assign( + "baseDialect", + sourcemeta::core::JSON{std::string{ + explorer_extension_base_dialect(extension, ext_base)}}); + entry_json.assign( + "dialect", sourcemeta::core::JSON{std::string{ + explorer_extension_dialect(extension, ext_base)}}); entry_json.assign("health", sourcemeta::core::JSON{extension->health}); entry_json.assign("dependencies", sourcemeta::core::JSON{extension->dependencies}); - const auto title{ - sourcemeta::one::metapack_explorer_title(extension, ext_base)}; + const auto title{explorer_extension_title(extension, ext_base)}; if (!title.empty()) { entry_json.assign("title", sourcemeta::core::JSON{std::string{title}}); } - const auto description{sourcemeta::one::metapack_explorer_description( - extension, ext_base)}; + const auto description{ + explorer_extension_description(extension, ext_base)}; if (!description.empty()) { entry_json.assign("description", sourcemeta::core::JSON{std::string{description}}); } - const auto alert{ - sourcemeta::one::metapack_explorer_alert(extension, ext_base)}; + const auto alert{explorer_extension_alert(extension, ext_base)}; if (!alert.empty()) { entry_json.assign("alert", sourcemeta::core::JSON{std::string{alert}}); @@ -443,7 +585,7 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { } const auto provenance{ - sourcemeta::one::metapack_explorer_provenance(extension, ext_base)}; + explorer_extension_provenance(extension, ext_base)}; if (!provenance.empty()) { entry_json.assign("provenance", sourcemeta::core::JSON{std::string{provenance}}); diff --git a/src/index/generators.h b/src/index/generators.h index 965391e62..633510a1a 100644 --- a/src/index/generators.h +++ b/src/index/generators.h @@ -25,6 +25,7 @@ #endif #include // assert +#include // std::memcpy #include // std::filesystem #include // std::ofstream #include // std::unique_ptr @@ -38,6 +39,23 @@ namespace sourcemeta::one { +#pragma pack(push, 1) +struct MetapackDialectExtension { + std::uint16_t dialect_length; +}; +#pragma pack(pop) + +static auto make_dialect_extension(const std::string_view dialect) + -> std::vector { + std::vector result; + result.resize(sizeof(MetapackDialectExtension) + dialect.size()); + MetapackDialectExtension header{}; + header.dialect_length = static_cast(dialect.size()); + std::memcpy(result.data(), &header, sizeof(header)); + std::memcpy(result.data() + sizeof(header), dialect.data(), dialect.size()); + return result; +} + struct GENERATE_VERSION { static auto handler(const sourcemeta::one::BuildState &, const sourcemeta::one::BuildPlan::Action &action, @@ -119,8 +137,7 @@ struct GENERATE_MATERIALISED_SCHEMA { dialect_identifier); const auto timestamp_end{std::chrono::steady_clock::now()}; - const auto extension_bytes{ - sourcemeta::one::metapack_make_dialect_extension(dialect_identifier)}; + const auto extension_bytes{make_dialect_extension(dialect_identifier)}; sourcemeta::one::metapack_write_pretty_json( action.destination, schema.value(), "application/schema+json", sourcemeta::one::MetapackEncoding::GZIP, @@ -453,8 +470,7 @@ struct GENERATE_BUNDLE { dialect_identifier); const auto timestamp_end{std::chrono::steady_clock::now()}; - const auto extension_bytes{ - sourcemeta::one::metapack_make_dialect_extension(dialect_identifier)}; + const auto extension_bytes{make_dialect_extension(dialect_identifier)}; sourcemeta::one::metapack_write_pretty_json( action.destination, schema, "application/schema+json", sourcemeta::one::MetapackEncoding::GZIP, @@ -489,8 +505,7 @@ struct GENERATE_EDITOR { dialect_identifier); const auto timestamp_end{std::chrono::steady_clock::now()}; - const auto extension_bytes{ - sourcemeta::one::metapack_make_dialect_extension(dialect_identifier)}; + const auto extension_bytes{make_dialect_extension(dialect_identifier)}; sourcemeta::one::metapack_write_pretty_json( action.destination, schema, "application/schema+json", sourcemeta::one::MetapackEncoding::GZIP, diff --git a/src/metapack/include/sourcemeta/one/metapack.h b/src/metapack/include/sourcemeta/one/metapack.h index 653d8a39f..ae352f28e 100644 --- a/src/metapack/include/sourcemeta/one/metapack.h +++ b/src/metapack/include/sourcemeta/one/metapack.h @@ -12,273 +12,99 @@ #include // std::chrono #include // std::uint8_t, std::uint16_t, std::uint32_t, etc. #include // std::filesystem::path -#include // std::ostream #include // std::span #include // std::string_view #include // std::vector namespace sourcemeta::one { -/// The magic number at the start of every metapack file: "META" static constexpr std::uint32_t METAPACK_MAGIC{0x4154454D}; - -/// The current binary format version static constexpr std::uint16_t METAPACK_VERSION{1}; -/// Payload encoding enum class MetapackEncoding : std::uint8_t { Identity = 0, GZIP = 1 }; -/// The fixed portion of the binary header. All multi-byte fields are -/// little-endian (native on x86/ARM). #pragma pack(push, 1) struct MetapackHeader { std::uint32_t magic; std::uint16_t format_version; MetapackEncoding encoding; std::uint8_t reserved; - std::int64_t last_modified; // nanoseconds since epoch - std::uint64_t content_bytes; // uncompressed payload size - std::int64_t duration; // build duration in milliseconds - // SHA-256 checksum (32 raw bytes) + std::int64_t last_modified; + std::uint64_t content_bytes; + std::int64_t duration; std::array checksum; std::uint16_t mime_length; - // Followed by: - // mime (mime_length bytes, UTF-8) - // extension_size (4 bytes, uint32) - // extension (extension_size bytes, opaque) - // payload data }; #pragma pack(pop) -/// Write a metapack file with JSON payload (compact serialization) +struct MetapackInfo { + std::string checksum_hex; + std::chrono::system_clock::time_point last_modified; + std::string mime; + MetapackEncoding encoding; + std::uint64_t content_bytes; + std::chrono::milliseconds duration; +}; + +// Writers + SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_write_json(const std::filesystem::path &destination, const sourcemeta::core::JSON &document, - const std::string_view mime, MetapackEncoding encoding, + std::string_view mime, MetapackEncoding encoding, std::span extension, std::chrono::milliseconds duration) -> void; -/// Write a metapack file with JSON payload (pretty serialization) SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_write_pretty_json(const std::filesystem::path &destination, const sourcemeta::core::JSON &document, - const std::string_view mime, + std::string_view mime, MetapackEncoding encoding, std::span extension, std::chrono::milliseconds duration) -> void; -/// Write a metapack file with text payload SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_write_text(const std::filesystem::path &destination, - std::string_view contents, const std::string_view mime, + std::string_view contents, std::string_view mime, MetapackEncoding encoding, std::span extension, std::chrono::milliseconds duration) -> void; -/// Write a metapack file with JSONL payload SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_write_jsonl(const std::filesystem::path &destination, const std::vector &entries, - const std::string_view mime, - MetapackEncoding encoding, + std::string_view mime, MetapackEncoding encoding, std::span extension, std::chrono::milliseconds duration) -> void; -/// Write a metapack file with a raw file as payload SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_write_file(const std::filesystem::path &destination, const std::filesystem::path &source, - const std::string_view mime, MetapackEncoding encoding, + std::string_view mime, MetapackEncoding encoding, std::span extension, std::chrono::milliseconds duration) -> void; -// --------------------------------------------------------------------------- -// Extension structs for specific file types -// --------------------------------------------------------------------------- - -/// Extension for schemas/.../schema.metapack, bundle.metapack, editor.metapack. -/// Contains the dialect URI for the JSON Schema Link header. -#pragma pack(push, 1) -struct MetapackDialectExtension { - std::uint16_t dialect_length; - // Followed by dialect_length bytes of UTF-8 dialect URI string -}; -#pragma pack(pop) - -/// Extension for explorer/.../schema.metapack (SchemaMetadata). -/// Contains all fields needed by search index and directory listing handlers. -#pragma pack(push, 1) -struct MetapackExplorerSchemaExtension { - std::int64_t health; - std::int64_t bytes; - std::int64_t dependencies; - std::uint16_t path_length; - std::uint16_t identifier_length; - std::uint16_t base_dialect_length; - std::uint16_t dialect_length; - std::uint16_t title_length; - std::uint16_t description_length; - std::uint16_t alert_length; - std::uint16_t provenance_length; - // Followed by string data in order: - // path, identifier, base_dialect, dialect, - // title, description, alert, provenance -}; -#pragma pack(pop) - -/// Helper to read string fields from a MetapackExplorerSchemaExtension -/// in mmap'd memory. The `base` pointer must point to the start of the -/// extension data in the FileView. Strings are contiguous after the struct. -inline auto metapack_explorer_schema_string( - const MetapackExplorerSchemaExtension *, const std::uint8_t *base, - const std::size_t field_offset, const std::size_t field_length) - -> std::string_view { - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return {reinterpret_cast( - base + sizeof(MetapackExplorerSchemaExtension) + field_offset), - field_length}; -} - -inline auto -metapack_explorer_path(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - return metapack_explorer_schema_string(extension, base, 0, - extension->path_length); -} - -inline auto -metapack_explorer_identifier(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{extension->path_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->identifier_length); -} - -inline auto -metapack_explorer_base_dialect(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{static_cast(extension->path_length) + - extension->identifier_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->base_dialect_length); -} - -inline auto -metapack_explorer_dialect(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{static_cast(extension->path_length) + - extension->identifier_length + - extension->base_dialect_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->dialect_length); -} - -inline auto -metapack_explorer_title(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{static_cast(extension->path_length) + - extension->identifier_length + - extension->base_dialect_length + - extension->dialect_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->title_length); -} - -inline auto -metapack_explorer_description(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{static_cast(extension->path_length) + - extension->identifier_length + - extension->base_dialect_length + - extension->dialect_length + extension->title_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->description_length); -} - -inline auto -metapack_explorer_alert(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{static_cast(extension->path_length) + - extension->identifier_length + - extension->base_dialect_length + - extension->dialect_length + extension->title_length + - extension->description_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->alert_length); -} - -inline auto -metapack_explorer_provenance(const MetapackExplorerSchemaExtension *extension, - const std::uint8_t *base) -> std::string_view { - const std::size_t offset{ - static_cast(extension->path_length) + - extension->identifier_length + extension->base_dialect_length + - extension->dialect_length + extension->title_length + - extension->description_length + extension->alert_length}; - return metapack_explorer_schema_string(extension, base, offset, - extension->provenance_length); -} - -/// Build a MetapackDialectExtension as a byte vector -SOURCEMETA_ONE_METAPACK_EXPORT -auto metapack_make_dialect_extension(std::string_view dialect) - -> std::vector; - -/// Build a MetapackExplorerSchemaExtension as a byte vector -SOURCEMETA_ONE_METAPACK_EXPORT -auto metapack_make_explorer_schema_extension( - std::int64_t health, std::int64_t bytes, std::int64_t dependencies, - std::string_view path, std::string_view identifier, - std::string_view base_dialect, std::string_view dialect, - std::string_view title, std::string_view description, - std::string_view alert, std::string_view provenance) - -> std::vector; - -// --------------------------------------------------------------------------- -// Read functions -// --------------------------------------------------------------------------- +// Readers -/// Read the full JSON payload from a metapack file (decompresses if needed) SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_read_json(const std::filesystem::path &path) -> sourcemeta::core::JSON; -/// Header metadata extracted from a metapack file -struct MetapackInfo { - std::string checksum_hex; - std::chrono::system_clock::time_point last_modified; - std::string mime; - MetapackEncoding encoding; - std::uint64_t content_bytes; - std::chrono::milliseconds duration; -}; - -/// Read only the header metadata from a memory-mapped metapack file SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_info(const sourcemeta::core::FileView &view) -> MetapackInfo; -/// Compute the byte offset where the payload data begins SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_payload_offset(const sourcemeta::core::FileView &view) -> std::size_t; -/// Compute the byte offset where the extension data begins within -/// a memory-mapped metapack file. Returns 0 if the extension is empty. SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_extension_offset(const sourcemeta::core::FileView &view) -> std::size_t; -/// Compute the size of the extension data within a memory-mapped -/// metapack file. Returns 0 if there is no extension. SOURCEMETA_ONE_METAPACK_EXPORT auto metapack_extension_size(const sourcemeta::core::FileView &view) -> std::uint32_t; -/// Read the typed extension from a memory-mapped metapack file. -/// The returned pointer points directly into the mmap'd memory. -/// The FileView must outlive the returned pointer. -/// Returns nullptr if the file has no extension or extension_size -/// is smaller than sizeof(T). template auto metapack_extension(const sourcemeta::core::FileView &view) -> const T * { const auto offset{metapack_extension_offset(view)}; @@ -294,22 +120,6 @@ auto metapack_extension(const sourcemeta::core::FileView &view) -> const T * { return view.as(offset); } -/// Helper: read a string_view from a dialect extension in mmap'd memory. -/// The FileView must outlive the returned string_view. -inline auto metapack_dialect_string(const sourcemeta::core::FileView &view) - -> std::string_view { - const auto *extension{metapack_extension(view)}; - if (extension == nullptr || extension->dialect_length == 0) { - return {}; - } - - const auto string_offset{metapack_extension_offset(view) + - sizeof(MetapackDialectExtension)}; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return {reinterpret_cast(view.as(string_offset)), - extension->dialect_length}; -} - } // namespace sourcemeta::one #endif diff --git a/src/metapack/metapack.cc b/src/metapack/metapack.cc index 67fe909b6..618542aac 100644 --- a/src/metapack/metapack.cc +++ b/src/metapack/metapack.cc @@ -3,7 +3,6 @@ #include #include -// TODO: Remove this dependency once we handle gzip internally #include #include // assert @@ -36,11 +35,9 @@ static auto write_binary_header(std::ostream &output, header.content_bytes = uncompressed_size; header.duration = duration.count(); - // Compute SHA-256 checksum of the uncompressed payload std::ostringstream checksum_hex; sourcemeta::core::sha256(payload, checksum_hex); const auto hex_string{checksum_hex.str()}; - // Convert hex string to raw bytes for (std::size_t index{0}; index < 32 && index * 2 + 1 < hex_string.size(); index++) { const auto byte_string{hex_string.substr(index * 2, 2)}; @@ -51,14 +48,10 @@ static auto write_binary_header(std::ostream &output, assert(mime.size() <= UINT16_MAX); header.mime_length = static_cast(mime.size()); - // Write the fixed header // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) output.write(reinterpret_cast(&header), sizeof(MetapackHeader)); - - // Write mime string output.write(mime.data(), static_cast(mime.size())); - // Write extension size and data const auto extension_size{static_cast(extension.size())}; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) output.write(reinterpret_cast(&extension_size), @@ -221,7 +214,6 @@ auto metapack_read_json(const std::filesystem::path &path) throw std::runtime_error("Unsupported metapack version"); } - // Skip past: fixed header + mime + extension_size + extension auto payload_offset{sizeof(MetapackHeader) + header->mime_length}; const auto *extension_size{view.as(payload_offset)}; payload_offset += sizeof(std::uint32_t) + *extension_size; @@ -247,73 +239,12 @@ auto metapack_read_json(const std::filesystem::path &path) return sourcemeta::core::parse_json(payload_string); } -auto metapack_make_dialect_extension(const std::string_view dialect) - -> std::vector { - std::vector result; - result.resize(sizeof(MetapackDialectExtension) + dialect.size()); - - MetapackDialectExtension header{}; - header.dialect_length = static_cast(dialect.size()); - std::memcpy(result.data(), &header, sizeof(header)); - std::memcpy(result.data() + sizeof(header), dialect.data(), dialect.size()); - - return result; -} - -auto metapack_make_explorer_schema_extension( - const std::int64_t health, const std::int64_t bytes, - const std::int64_t dependencies, const std::string_view path, - const std::string_view identifier, const std::string_view base_dialect, - const std::string_view dialect, const std::string_view title, - const std::string_view description, const std::string_view alert, - const std::string_view provenance) -> std::vector { - const auto strings_size{ - path.size() + identifier.size() + base_dialect.size() + dialect.size() + - title.size() + description.size() + alert.size() + provenance.size()}; - std::vector result; - result.resize(sizeof(MetapackExplorerSchemaExtension) + strings_size); - - MetapackExplorerSchemaExtension header{}; - header.health = health; - header.bytes = bytes; - header.dependencies = dependencies; - header.path_length = static_cast(path.size()); - header.identifier_length = static_cast(identifier.size()); - header.base_dialect_length = static_cast(base_dialect.size()); - header.dialect_length = static_cast(dialect.size()); - header.title_length = static_cast(title.size()); - header.description_length = static_cast(description.size()); - header.alert_length = static_cast(alert.size()); - header.provenance_length = static_cast(provenance.size()); - - auto *cursor{result.data()}; - std::memcpy(cursor, &header, sizeof(header)); - cursor += sizeof(header); - - const auto append = [&cursor](const std::string_view string) { - std::memcpy(cursor, string.data(), string.size()); - cursor += string.size(); - }; - - append(path); - append(identifier); - append(base_dialect); - append(dialect); - append(title); - append(description); - append(alert); - append(provenance); - - return result; -} - auto metapack_info(const sourcemeta::core::FileView &view) -> MetapackInfo { assert(view.size() >= sizeof(MetapackHeader) + sizeof(std::uint32_t)); const auto *header{view.as()}; assert(header->magic == METAPACK_MAGIC); assert(header->format_version == METAPACK_VERSION); - // Convert raw checksum bytes to hex string std::string checksum_hex; checksum_hex.reserve(64); static constexpr const char *hex_chars = "0123456789abcdef"; @@ -322,12 +253,10 @@ auto metapack_info(const sourcemeta::core::FileView &view) -> MetapackInfo { checksum_hex += hex_chars[byte & 0x0F]; } - // Convert nanoseconds since epoch to time_point const auto nanos{std::chrono::nanoseconds{header->last_modified}}; const auto time_point{std::chrono::system_clock::time_point{ std::chrono::duration_cast(nanos)}}; - // Read mime string // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) const auto *mime_data{reinterpret_cast( view.as(sizeof(MetapackHeader)))}; diff --git a/src/server/action_serve_metapack_file.h b/src/server/action_serve_metapack_file.h index fa173acc3..671454054 100644 --- a/src/server/action_serve_metapack_file.h +++ b/src/server/action_serve_metapack_file.h @@ -17,6 +17,12 @@ #include // std::ostringstream #include // std::string +#pragma pack(push, 1) +struct MetapackDialectExtension { + std::uint16_t dialect_length; +}; +#pragma pack(pop) + static auto action_serve_metapack_file( const sourcemeta::one::HTTPRequest &request, sourcemeta::one::HTTPResponse &response, @@ -109,7 +115,17 @@ static auto action_serve_metapack_file( if (link.has_value()) { write_link_header(response, link.value()); } else { - const auto dialect{sourcemeta::one::metapack_dialect_string(view)}; + const auto *dialect_ext{ + sourcemeta::one::metapack_extension(view)}; + const std::string_view dialect = + (dialect_ext != nullptr && dialect_ext->dialect_length > 0) + ? std::string_view{reinterpret_cast< + const char *>(view.as( + sourcemeta::one::metapack_extension_offset( + view) + + sizeof(MetapackDialectExtension))), + dialect_ext->dialect_length} + : std::string_view{}; if (!dialect.empty()) { write_link_header(response, std::string{dialect}); } From 5124c39eb700ebf1fe447c6c6f48791de4f00f5f Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 18 Mar 2026 15:48:29 -0400 Subject: [PATCH 4/4] More review Signed-off-by: Juan Cruz Viotti --- src/build/delta.cc | 44 +++++++++---------- src/index/explorer.h | 43 +++++++++--------- test/unit/metapack/CMakeLists.txt | 3 ++ test/unit/metapack/metapack_test.cc | 67 +++++++++-------------------- 4 files changed, 68 insertions(+), 89 deletions(-) diff --git a/src/build/delta.cc b/src/build/delta.cc index f0f0be794..a3d425cb1 100644 --- a/src/build/delta.cc +++ b/src/build/delta.cc @@ -321,43 +321,42 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, // the dependencies.metapack keys that reference it. This is a // single O(keys) pass instead of O(affected × keys). std::unordered_map> - reverse_dep_index; - for (const auto dep_key : entries.keys()) { - if (!dep_key.ends_with("/%/dependencies.metapack")) { + reverse_dependency_index; + for (const auto dependency_key : entries.keys()) { + if (!dependency_key.ends_with("/%/dependencies.metapack")) { continue; } - const auto *dep_entry{entries.entry(std::string{dep_key})}; - if (dep_entry == nullptr) { + const auto *dependency_entry{ + entries.entry(std::string{dependency_key})}; + if (dependency_entry == nullptr) { continue; } - for (const auto &dependency : dep_entry->dependencies) { - const auto &dep_path{dependency.native()}; - if (!dep_path.starts_with(schemas_prefix)) { + for (const auto &dependency : dependency_entry->dependencies) { + const auto &dependency_path{dependency.native()}; + if (!dependency_path.starts_with(schemas_prefix)) { continue; } - const auto sentinel_pos{dep_path.find("/%/", owner_start)}; + const auto sentinel_pos{dependency_path.find("/%/", owner_start)}; if (sentinel_pos == std::string::npos) { continue; } auto referenced_schema{ - dep_path.substr(owner_start, sentinel_pos - owner_start)}; + dependency_path.substr(owner_start, sentinel_pos - owner_start)}; if (affected_schemas.contains(referenced_schema)) { - reverse_dep_index[std::move(referenced_schema)].emplace_back( - dep_key); + reverse_dependency_index[std::move(referenced_schema)].emplace_back( + dependency_key); } } } - // Deduplicate: a single dep_key may reference the same schema - // through multiple dependency paths - for (auto &[schema, dep_keys] : reverse_dep_index) { - std::ranges::sort(dep_keys); - const auto [first, last] = std::ranges::unique(dep_keys); - dep_keys.erase(first, last); + for (auto &[schema, dependency_keys] : reverse_dependency_index) { + std::ranges::sort(dependency_keys); + const auto [first, last] = std::ranges::unique(dependency_keys); + dependency_keys.erase(first, last); } std::vector dependents_wave; @@ -373,11 +372,12 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, append_filename(schema_base, "dependents.metapack")}}; BuildPlan::Action::Dependencies action_dependencies; - const auto reverse_iterator{reverse_dep_index.find(relative_string)}; - if (reverse_iterator != reverse_dep_index.end()) { + const auto reverse_iterator{ + reverse_dependency_index.find(relative_string)}; + if (reverse_iterator != reverse_dependency_index.end()) { action_dependencies.reserve(reverse_iterator->second.size()); - for (const auto &dep_key : reverse_iterator->second) { - action_dependencies.emplace_back(dep_key); + for (const auto &dependency_key : reverse_iterator->second) { + action_dependencies.emplace_back(dependency_key); } } diff --git a/src/index/explorer.h b/src/index/explorer.h index 7fd767cff..4c7d7cc88 100644 --- a/src/index/explorer.h +++ b/src/index/explorer.h @@ -404,21 +404,22 @@ struct GENERATE_EXPLORER_SEARCH_INDEX { result.reserve(action.dependencies.size()); for (const auto &dependency : action.dependencies) { - sourcemeta::core::FileView dep_view{dependency}; + sourcemeta::core::FileView dependency_view{dependency}; const auto extension_offset{ - sourcemeta::one::metapack_extension_offset(dep_view)}; + sourcemeta::one::metapack_extension_offset(dependency_view)}; if (extension_offset == 0) { continue; } const auto *extension{ sourcemeta::one::metapack_extension( - dep_view)}; + dependency_view)}; if (extension == nullptr) { continue; } - const auto *extension_base{dep_view.as(extension_offset)}; + const auto *extension_base{ + dependency_view.as(extension_offset)}; const auto path{explorer_extension_path(extension, extension_base)}; const auto title{explorer_extension_title(extension, extension_base)}; const auto description{ @@ -529,54 +530,56 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { } entries.push_back(std::move(entry_json)); } else if (filename == "schema.metapack") { - sourcemeta::core::FileView dep_view{dependency}; - const auto ext_offset{ - sourcemeta::one::metapack_extension_offset(dep_view)}; + sourcemeta::core::FileView dependency_view{dependency}; + const auto extension_offset{ + sourcemeta::one::metapack_extension_offset(dependency_view)}; const auto *extension{sourcemeta::one::metapack_extension< - MetapackExplorerSchemaExtension>(dep_view)}; + MetapackExplorerSchemaExtension>(dependency_view)}; - if (extension == nullptr || ext_offset == 0) { + if (extension == nullptr || extension_offset == 0) { continue; } - const auto *ext_base{dep_view.as(ext_offset)}; + const auto *extension_base{ + dependency_view.as(extension_offset)}; auto entry_json{sourcemeta::core::JSON::make_object()}; entry_json.assign("name", sourcemeta::core::JSON{child_name}); entry_json.assign("type", sourcemeta::core::JSON{"schema"}); - const auto schema_path{explorer_extension_path(extension, ext_base)}; + const auto schema_path{ + explorer_extension_path(extension, extension_base)}; entry_json.assign("path", sourcemeta::core::JSON{std::filesystem::path{ std::string{schema_path}}}); entry_json.assign( "identifier", sourcemeta::core::JSON{std::string{ - explorer_extension_identifier(extension, ext_base)}}); + explorer_extension_identifier(extension, extension_base)}}); entry_json.assign("bytes", sourcemeta::core::JSON{extension->bytes}); entry_json.assign( "baseDialect", sourcemeta::core::JSON{std::string{ - explorer_extension_base_dialect(extension, ext_base)}}); - entry_json.assign( - "dialect", sourcemeta::core::JSON{std::string{ - explorer_extension_dialect(extension, ext_base)}}); + explorer_extension_base_dialect(extension, extension_base)}}); + entry_json.assign("dialect", sourcemeta::core::JSON{ + std::string{explorer_extension_dialect( + extension, extension_base)}}); entry_json.assign("health", sourcemeta::core::JSON{extension->health}); entry_json.assign("dependencies", sourcemeta::core::JSON{extension->dependencies}); - const auto title{explorer_extension_title(extension, ext_base)}; + const auto title{explorer_extension_title(extension, extension_base)}; if (!title.empty()) { entry_json.assign("title", sourcemeta::core::JSON{std::string{title}}); } const auto description{ - explorer_extension_description(extension, ext_base)}; + explorer_extension_description(extension, extension_base)}; if (!description.empty()) { entry_json.assign("description", sourcemeta::core::JSON{std::string{description}}); } - const auto alert{explorer_extension_alert(extension, ext_base)}; + const auto alert{explorer_extension_alert(extension, extension_base)}; if (!alert.empty()) { entry_json.assign("alert", sourcemeta::core::JSON{std::string{alert}}); @@ -585,7 +588,7 @@ struct GENERATE_EXPLORER_DIRECTORY_LIST { } const auto provenance{ - explorer_extension_provenance(extension, ext_base)}; + explorer_extension_provenance(extension, extension_base)}; if (!provenance.empty()) { entry_json.assign("provenance", sourcemeta::core::JSON{std::string{provenance}}); diff --git a/test/unit/metapack/CMakeLists.txt b/test/unit/metapack/CMakeLists.txt index 20d5d96b9..b92ae3c3e 100644 --- a/test/unit/metapack/CMakeLists.txt +++ b/test/unit/metapack/CMakeLists.txt @@ -3,3 +3,6 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT one NAME metapack target_link_libraries(sourcemeta_one_metapack_unit PRIVATE sourcemeta::one::metapack) + +target_compile_definitions(sourcemeta_one_metapack_unit + PRIVATE METAPACK_TEST_DIRECTORY="${CMAKE_CURRENT_BINARY_DIR}") diff --git a/test/unit/metapack/metapack_test.cc b/test/unit/metapack/metapack_test.cc index bb9a363ab..e9c394a8d 100644 --- a/test/unit/metapack/metapack_test.cc +++ b/test/unit/metapack/metapack_test.cc @@ -6,36 +6,18 @@ #include #include // std::chrono -#include // std::memcmp +#include // std::memcpy #include // std::filesystem #include // std::span #include // std::string #include // std::vector -static auto temp_path(const std::string &name) -> std::filesystem::path { - return std::filesystem::temp_directory_path() / ("metapack_test_" + name); +static auto test_path(const std::string &name) -> std::filesystem::path { + return std::filesystem::path{METAPACK_TEST_DIRECTORY} / name; } -class MetapackTest : public ::testing::Test { -protected: - void TearDown() override { - for (const auto &path : cleanup_paths) { - std::filesystem::remove_all(path); - } - } - - auto make_temp(const std::string &name) -> std::filesystem::path { - auto path{temp_path(name)}; - cleanup_paths.push_back(path); - return path; - } - -private: - std::vector cleanup_paths; -}; - -TEST_F(MetapackTest, write_and_read_json_identity) { - const auto path{make_temp("identity.metapack")}; +TEST(Metapack, write_and_read_json_identity) { + const auto path{test_path("identity.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; document.assign("hello", sourcemeta::core::JSON{"world"}); @@ -49,8 +31,8 @@ TEST_F(MetapackTest, write_and_read_json_identity) { EXPECT_EQ(result.at("hello").to_string(), "world"); } -TEST_F(MetapackTest, write_and_read_json_gzip) { - const auto path{make_temp("gzip.metapack")}; +TEST(Metapack, write_and_read_json_gzip) { + const auto path{test_path("gzip.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; document.assign("foo", sourcemeta::core::JSON{42}); @@ -64,8 +46,8 @@ TEST_F(MetapackTest, write_and_read_json_gzip) { EXPECT_EQ(result.at("foo").to_integer(), 42); } -TEST_F(MetapackTest, write_and_read_pretty_json) { - const auto path{make_temp("pretty.metapack")}; +TEST(Metapack, write_and_read_pretty_json) { + const auto path{test_path("pretty.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; document.assign("key", sourcemeta::core::JSON{"value"}); @@ -78,8 +60,8 @@ TEST_F(MetapackTest, write_and_read_pretty_json) { EXPECT_EQ(result.at("key").to_string(), "value"); } -TEST_F(MetapackTest, binary_header_magic_and_version) { - const auto path{make_temp("header.metapack")}; +TEST(Metapack, binary_header_magic_and_version) { + const auto path{test_path("header.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; sourcemeta::one::metapack_write_json( @@ -88,15 +70,15 @@ TEST_F(MetapackTest, binary_header_magic_and_version) { std::chrono::milliseconds{0}); sourcemeta::core::FileView view{path}; - EXPECT_GE(view.size(), sizeof(sourcemeta::one::MetapackHeader)); + EXPECT_TRUE(view.size() >= sizeof(sourcemeta::one::MetapackHeader)); const auto *header{view.as()}; EXPECT_EQ(header->magic, sourcemeta::one::METAPACK_MAGIC); EXPECT_EQ(header->format_version, sourcemeta::one::METAPACK_VERSION); EXPECT_EQ(header->encoding, sourcemeta::one::MetapackEncoding::Identity); } -TEST_F(MetapackTest, no_extension) { - const auto path{make_temp("no_ext.metapack")}; +TEST(Metapack, no_extension) { + const auto path{test_path("no_ext.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; sourcemeta::one::metapack_write_json( @@ -113,28 +95,24 @@ TEST_F(MetapackTest, no_extension) { struct TestExtension { std::int32_t value; std::uint16_t name_length; - // followed by name bytes }; #pragma pack(pop) -TEST_F(MetapackTest, write_and_read_extension) { - const auto path{make_temp("with_ext.metapack")}; +TEST(Metapack, write_and_read_extension) { + const auto path{test_path("with_ext.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; - // Build a test extension: {value=42, name="hello"} std::vector extension_bytes; TestExtension extension_header{}; extension_header.value = 42; const std::string name{"hello"}; extension_header.name_length = static_cast(name.size()); - // Serialize the struct + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) const auto *header_bytes{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) reinterpret_cast(&extension_header)}; extension_bytes.insert(extension_bytes.end(), header_bytes, header_bytes + sizeof(TestExtension)); - // Append the name string extension_bytes.insert(extension_bytes.end(), name.begin(), name.end()); sourcemeta::one::metapack_write_json( @@ -143,34 +121,30 @@ TEST_F(MetapackTest, write_and_read_extension) { std::span{extension_bytes}, std::chrono::milliseconds{7}); - // Read back via mmap sourcemeta::core::FileView view{path}; EXPECT_EQ(sourcemeta::one::metapack_extension_size(view), extension_bytes.size()); const auto *read_extension{ sourcemeta::one::metapack_extension(view)}; - ASSERT_NE(read_extension, nullptr); + EXPECT_NE(read_extension, nullptr); EXPECT_EQ(read_extension->value, 42); EXPECT_EQ(read_extension->name_length, 5); - // Read the name string from the extension data const auto extension_start{sourcemeta::one::metapack_extension_offset(view)}; // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) const auto *name_data{reinterpret_cast( view.as(extension_start + sizeof(TestExtension)))}; EXPECT_EQ(std::string_view(name_data, read_extension->name_length), "hello"); - // Also verify the JSON payload is still readable const auto result{sourcemeta::one::metapack_read_json(path)}; EXPECT_TRUE(result.is_object()); } -TEST_F(MetapackTest, extension_nullptr_when_too_small) { - const auto path{make_temp("small_ext.metapack")}; +TEST(Metapack, extension_nullptr_when_too_small) { + const auto path{test_path("small_ext.metapack")}; auto document{sourcemeta::core::JSON::make_object()}; - // Write a 2-byte extension std::vector small_extension{0x01, 0x02}; sourcemeta::one::metapack_write_json( @@ -182,7 +156,6 @@ TEST_F(MetapackTest, extension_nullptr_when_too_small) { sourcemeta::core::FileView view{path}; EXPECT_EQ(sourcemeta::one::metapack_extension_size(view), 2); - // TestExtension is larger than 2 bytes, so this should return nullptr const auto *read_extension{ sourcemeta::one::metapack_extension(view)}; EXPECT_EQ(read_extension, nullptr);