From 3c2a83f1d73d4bbc9535e52d5437724579e7902f Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 18 Mar 2026 10:00:41 -0400 Subject: [PATCH 1/2] Fix potential inconsistencies when removing schemas with references Signed-off-by: Juan Cruz Viotti --- src/build/delta.cc | 148 +++++++++++------- .../include/sourcemeta/one/build_state.h | 7 +- src/build/state.cc | 14 ++ test/cli/CMakeLists.txt | 2 + .../dependents-remove-referenced-schema.sh | 57 +++++++ .../dependents-remove-referencing-schema.sh | 128 +++++++++++++++ 6 files changed, 296 insertions(+), 60 deletions(-) create mode 100755 test/cli/index/common/dependents-remove-referenced-schema.sh create mode 100755 test/cli/index/common/dependents-remove-referencing-schema.sh diff --git a/src/build/delta.cc b/src/build/delta.cc index b78b4477..bc81e91f 100644 --- a/src/build/delta.cc +++ b/src/build/delta.cc @@ -212,6 +212,39 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, const auto &output_string{output.native()}; const auto schemas_prefix{output_string + "/" + SCHEMAS_DIRECTORY + "/"}; + auto extract_cross_schema_refs{ + [&schemas_prefix]( + const BuildState::Entry *state_entry, + std::string_view owner_base) -> std::unordered_set { + std::unordered_set result; + if (state_entry == nullptr) { + return result; + } + + for (const auto &dependency : state_entry->dependencies) { + const auto &dependency_path{dependency.native()}; + if (!dependency_path.starts_with(schemas_prefix)) { + continue; + } + + const auto sentinel_position{dependency_path.find("/%/")}; + if (sentinel_position == std::string::npos) { + continue; + } + + const auto relative{dependency_path.substr( + schemas_prefix.size(), + sentinel_position - schemas_prefix.size())}; + if (relative != owner_base) { + result.insert(std::string{relative}); + } + } + + return result; + }}; + + const auto owner_start{schemas_prefix.size()}; + std::unordered_set affected_schemas; for (const auto key : entries.keys()) { if (!key.ends_with("/%/dependencies.metapack")) { @@ -225,38 +258,6 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, const auto *new_entry{entries.entry(std::string{key})}; const auto *old_entry{entries.disk_entry(std::string{key})}; - auto extract_cross_schema_refs{ - [&schemas_prefix]( - const BuildState::Entry *state_entry, - std::string_view owner_base) -> std::unordered_set { - std::unordered_set result; - if (state_entry == nullptr) { - return result; - } - - for (const auto &dependency : state_entry->dependencies) { - const auto &dependency_path{dependency.native()}; - if (!dependency_path.starts_with(schemas_prefix)) { - continue; - } - - const auto sentinel_position{dependency_path.find("/%/")}; - if (sentinel_position == std::string::npos) { - continue; - } - - const auto relative{dependency_path.substr( - schemas_prefix.size(), - sentinel_position - schemas_prefix.size())}; - if (relative != owner_base) { - result.insert(std::string{relative}); - } - } - - return result; - }}; - - const auto owner_start{schemas_prefix.size()}; const auto owner_sentinel{key.find("/%/", owner_start)}; if (owner_sentinel == std::string_view::npos) { continue; @@ -284,6 +285,33 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, } } + for (const auto &deleted_key : entries.deleted_keys()) { + if (!deleted_key.ends_with("/%/dependencies.metapack")) { + continue; + } + + if (!deleted_key.starts_with(schemas_prefix)) { + continue; + } + + const auto *old_entry{entries.raw_disk_entry(deleted_key)}; + if (old_entry == nullptr) { + continue; + } + + const auto owner_sentinel{deleted_key.find("/%/", owner_start)}; + if (owner_sentinel == std::string::npos) { + continue; + } + + const auto owner_base{ + deleted_key.substr(owner_start, owner_sentinel - owner_start)}; + for (const auto &reference : + extract_cross_schema_refs(old_entry, owner_base)) { + affected_schemas.insert(reference); + } + } + BuildPlan plan; plan.output = output; plan.type = build_type; @@ -568,6 +596,32 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, } } + std::unordered_set current_schema_bases; + current_schema_bases.reserve(active_schemas.size()); + for (const auto &schema : active_schemas) { + current_schema_bases.insert(schema.schema_base); + } + + std::unordered_set removed_entries; + { + const auto schemas_prefix{output_string + '/' + SCHEMAS_DIRECTORY + '/'}; + for (const auto entry_path : entries.keys()) { + if (!entry_path.starts_with(schemas_prefix)) { + continue; + } + + const auto sentinel_pos{entry_path.find("/%/")}; + if (sentinel_pos == std::string_view::npos) { + continue; + } + + if (!current_schema_bases.contains( + entry_path.substr(0, sentinel_pos + 2))) { + removed_entries.emplace(entry_path); + } + } + } + std::unordered_set dirty_set; { for (const auto &schema : active_schemas) { @@ -618,7 +672,8 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, } for (const auto &dep : state_entry->dependencies) { - if (dirty_set.contains(dep.native())) { + if (dirty_set.contains(dep.native()) || + removed_entries.contains(dep.native())) { dirty_set.insert(target_path); propagation_changed = true; break; @@ -692,32 +747,7 @@ auto delta(const BuildPhase phase, const BuildPlan::Type build_type, } } - bool has_potential_stale{false}; - { - std::unordered_set current_schema_bases; - current_schema_bases.reserve(schemas.size()); - for (const auto &schema : active_schemas) { - current_schema_bases.insert(schema.schema_base); - } - - const auto schemas_prefix{output_string + '/' + SCHEMAS_DIRECTORY + '/'}; - for (const auto entry_path : entries.keys()) { - if (!entry_path.starts_with(schemas_prefix)) { - continue; - } - - const auto sentinel_pos{entry_path.find("/%/")}; - if (sentinel_pos == std::string_view::npos) { - continue; - } - - if (!current_schema_bases.contains( - entry_path.substr(0, sentinel_pos + 2))) { - has_potential_stale = true; - break; - } - } - } + const auto has_potential_stale{!removed_entries.empty()}; if (!is_full && dirty_set.empty() && removed_uris.empty() && !has_missing_web && !has_stale_web && !has_potential_stale) { diff --git a/src/build/include/sourcemeta/one/build_state.h b/src/build/include/sourcemeta/one/build_state.h index bd051000..33ab6fee 100644 --- a/src/build/include/sourcemeta/one/build_state.h +++ b/src/build/include/sourcemeta/one/build_state.h @@ -68,8 +68,9 @@ class SOURCEMETA_ONE_BUILD_EXPORT BuildState { [[nodiscard]] auto in_overlay(const std::string &key) const -> bool; [[nodiscard]] auto disk_entry(const std::string &key) const -> const Entry *; + [[nodiscard]] auto raw_disk_entry(const std::string &key) const + -> const Entry *; -private: struct TransparentHash { using is_transparent = void; auto operator()(std::string_view value) const noexcept -> std::size_t { @@ -85,6 +86,10 @@ class SOURCEMETA_ONE_BUILD_EXPORT BuildState { } }; + [[nodiscard]] auto deleted_keys() const -> const + std::unordered_set &; + +private: auto probe_slot(std::string_view key, std::uint8_t kind) const -> const std::uint8_t *; auto parse_slot_entry(const std::uint8_t *slot) const -> const Entry &; diff --git a/src/build/state.cc b/src/build/state.cc index 001460ad..8c4c3905 100644 --- a/src/build/state.cc +++ b/src/build/state.cc @@ -454,6 +454,20 @@ auto BuildState::disk_entry(const std::string &key) const -> const Entry * { return &this->parse_slot_entry(slot); } +auto BuildState::raw_disk_entry(const std::string &key) const -> const Entry * { + const auto *slot{this->probe_slot(key, KIND_OUTPUT)}; + if (slot == nullptr) { + return nullptr; + } + + return &this->parse_slot_entry(slot); +} + +auto BuildState::deleted_keys() const -> const + std::unordered_set & { + return this->deleted; +} + auto BuildState::save(const std::filesystem::path &path) const -> void { if (!this->dirty && this->view && path == this->loaded_path) { return; diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index cfe0bfce..b3c9cb32 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -67,6 +67,8 @@ if(ONE_INDEX) sourcemeta_one_test_cli(common index dependents-modify-transitive) sourcemeta_one_test_cli(common index dependents-add-schema) sourcemeta_one_test_cli(common index dependents-modify-add-ref) + sourcemeta_one_test_cli(common index dependents-remove-referenced-schema) + sourcemeta_one_test_cli(common index dependents-remove-referencing-schema) if(ONE_ENTERPRISE) sourcemeta_one_test_cli(enterprise index no-options) diff --git a/test/cli/index/common/dependents-remove-referenced-schema.sh b/test/cli/index/common/dependents-remove-referenced-schema.sh new file mode 100755 index 00000000..404eb0ec --- /dev/null +++ b/test/cli/index/common/dependents-remove-referenced-schema.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +# Removing a schema that is still referenced by another schema should +# cause the indexer to fail, as the reference cannot be resolved. + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta.com/", + "contents": { + "example": { + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } + } + } +} +EOF + +mkdir "$TMP/schemas" + +cat << 'EOF' > "$TMP/schemas/a.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/a" +} +EOF + +cat << 'EOF' > "$TMP/schemas/b.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/b", + "properties": { + "foo": { "$ref": "https://example.com/a" } + } +} +EOF + +# Run 1: full build from scratch +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 \ + > /dev/null 2>&1 + +# Run 2: remove schema A (which is referenced by B) +rm "$TMP/schemas/a.json" + +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 \ + 2> "$TMP/output.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 diff --git a/test/cli/index/common/dependents-remove-referencing-schema.sh b/test/cli/index/common/dependents-remove-referencing-schema.sh new file mode 100755 index 00000000..50316e83 --- /dev/null +++ b/test/cli/index/common/dependents-remove-referencing-schema.sh @@ -0,0 +1,128 @@ +#!/bin/sh + +# When removing a schema that references another schema, the referenced +# schema's dependents.metapack must be rebuilt in the Combine phase to +# reflect that the removed schema is no longer a dependent. + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta.com/", + "contents": { + "example": { + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } + } + } +} +EOF + +mkdir "$TMP/schemas" + +cat << 'EOF' > "$TMP/schemas/a.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/a" +} +EOF + +cat << 'EOF' > "$TMP/schemas/b.json" +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/b", + "properties": { + "foo": { "$ref": "https://example.com/a" } + } +} +EOF + +remove_threads_information() { + expr='s/ \[[^]]*[^a-z-][^]]*\]//g' + if [ "$(uname -s)" = "Darwin" ]; then + sed -i '' "$expr" "$1" + else + sed -i "$expr" "$1" + fi +} + +# Run 1: full build from scratch +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 \ + > /dev/null 2>&1 + +# Run 2: remove schema B (which references A) +rm "$TMP/schemas/b.json" + +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 \ + 2> "$TMP/output.txt" +remove_threads_information "$TMP/output.txt" + +cat << EOF > "$TMP/expected.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/a.json (#1) +( 20%) Producing: explorer/%/directory.metapack +( 40%) Producing: explorer/%/search.metapack +( 60%) Producing: explorer/%/directory-html.metapack +( 80%) Disposing: explorer/example/schemas/b +(100%) Disposing: schemas/example/schemas/b +(100%) Combining: schemas/example/schemas/a/%/dependents.metapack +EOF +diff "$TMP/output.txt" "$TMP/expected.txt" + +# Verify final manifest +cd "$TMP/output" +find . -mindepth 1 | LC_ALL=C sort > "$TMP/manifest.txt" +cd - > /dev/null + +cat << 'EOF' > "$TMP/expected_manifest.txt" +./configuration.json +./explorer +./explorer/% +./explorer/%/404.metapack +./explorer/%/directory-html.metapack +./explorer/%/directory.metapack +./explorer/%/search.metapack +./explorer/example +./explorer/example/% +./explorer/example/%/directory-html.metapack +./explorer/example/%/directory.metapack +./explorer/example/schemas +./explorer/example/schemas/% +./explorer/example/schemas/%/directory-html.metapack +./explorer/example/schemas/%/directory.metapack +./explorer/example/schemas/a +./explorer/example/schemas/a/% +./explorer/example/schemas/a/%/schema-html.metapack +./explorer/example/schemas/a/%/schema.metapack +./routes.bin +./schemas +./schemas/example +./schemas/example/schemas +./schemas/example/schemas/a +./schemas/example/schemas/a/% +./schemas/example/schemas/a/%/blaze-exhaustive.metapack +./schemas/example/schemas/a/%/blaze-fast.metapack +./schemas/example/schemas/a/%/bundle.metapack +./schemas/example/schemas/a/%/dependencies.metapack +./schemas/example/schemas/a/%/dependents.metapack +./schemas/example/schemas/a/%/editor.metapack +./schemas/example/schemas/a/%/health.metapack +./schemas/example/schemas/a/%/locations.metapack +./schemas/example/schemas/a/%/positions.metapack +./schemas/example/schemas/a/%/schema.metapack +./schemas/example/schemas/a/%/stats.metapack +./state.bin +./version.json +EOF + +diff "$TMP/manifest.txt" "$TMP/expected_manifest.txt" From 076bbbed88c9edc6c2f3b45fb333779e50f1fde7 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 18 Mar 2026 10:15:37 -0400 Subject: [PATCH 2/2] Fixes Signed-off-by: Juan Cruz Viotti --- .../dependents-remove-referenced-schema.sh | 23 +++++++++++++++++++ .../dependents-remove-referencing-schema.sh | 4 ++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/test/cli/index/common/dependents-remove-referenced-schema.sh b/test/cli/index/common/dependents-remove-referenced-schema.sh index 404eb0ec..aeb13e77 100755 --- a/test/cli/index/common/dependents-remove-referenced-schema.sh +++ b/test/cli/index/common/dependents-remove-referenced-schema.sh @@ -45,6 +45,15 @@ cat << 'EOF' > "$TMP/schemas/b.json" } EOF +remove_threads_information() { + expr='s/ \[[^]]*[^a-z-][^]]*\]//g' + if [ "$(uname -s)" = "Darwin" ]; then + sed -i '' "$expr" "$1" + else + sed -i "$expr" "$1" + fi +} + # Run 1: full build from scratch "$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 \ > /dev/null 2>&1 @@ -55,3 +64,17 @@ rm "$TMP/schemas/a.json" "$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 \ 2> "$TMP/output.txt" && CODE="$?" || CODE="$?" test "$CODE" = "1" || exit 1 +remove_threads_information "$TMP/output.txt" + +cat << EOF > "$TMP/expected.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/b.json (#1) +( 7%) Producing: explorer/%/directory.metapack +( 15%) Producing: schemas/example/schemas/b/%/dependencies.metapack +error: Could not resolve the reference to an external schema + https://sourcemeta.com/example/schemas/a + +Did you forget to register a schema with such URI in the one? +EOF +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/cli/index/common/dependents-remove-referencing-schema.sh b/test/cli/index/common/dependents-remove-referencing-schema.sh index 50316e83..ec0a1390 100755 --- a/test/cli/index/common/dependents-remove-referencing-schema.sh +++ b/test/cli/index/common/dependents-remove-referencing-schema.sh @@ -1,8 +1,8 @@ #!/bin/sh # When removing a schema that references another schema, the referenced -# schema's dependents.metapack must be rebuilt in the Combine phase to -# reflect that the removed schema is no longer a dependent. +# schema's dependents must be rebuilt in phase to reflect that the removed +# schema is no longer a dependent. set -o errexit set -o nounset