diff --git a/src/index/generators.h b/src/index/generators.h index 79d48c64..b2b6ac82 100644 --- a/src/index/generators.h +++ b/src/index/generators.h @@ -117,6 +117,27 @@ struct GENERATE_MATERIALISED_SCHEMA { const auto metaschema{resolver(dialect_identifier)}; assert(metaschema.has_value()); + // If this schema declares $vocabulary (i.e. it is a metaschema in a + // vocabulary-aware dialect), reject any unknown required vocabularies. + // Pre-vocabulary dialects (draft-4, etc.) may have $vocabulary as a + // regular property, so only check 2020-12 and 2019-09 based schemas. + const auto schema_base_dialect{ + sourcemeta::core::to_base_dialect(dialect_identifier)}; + if (schema_base_dialect.has_value() && + (schema_base_dialect.value() == + sourcemeta::core::SchemaBaseDialect::JSON_Schema_2020_12 || + schema_base_dialect.value() == + sourcemeta::core::SchemaBaseDialect::JSON_Schema_2019_09) && + schema->is_object() && schema->defines("$vocabulary") && + schema->at("$vocabulary").is_object()) { + sourcemeta::core::Vocabularies declared_vocabularies; + for (const auto &entry : schema->at("$vocabulary").as_object()) { + declared_vocabularies.insert(entry.first, entry.second.to_boolean()); + } + declared_vocabularies.throw_if_any_unknown_required( + "The metaschema requires an unrecognised vocabulary"); + } + // Validate the schemas against their meta-schemas sourcemeta::blaze::SimpleOutput output{schema.value()}; sourcemeta::blaze::Evaluator evaluator; diff --git a/src/index/index.cc b/src/index/index.cc index 71d6c77a..bc7403da 100644 --- a/src/index/index.cc +++ b/src/index/index.cc @@ -183,6 +183,16 @@ static auto execute_plan(std::mutex &mutex, sourcemeta::core::SchemaReferenceObjectResourceError>( entry->path, error.identifier()); } + throw; + } catch (const sourcemeta::core::SchemaVocabularyError &error) { + const auto *entry{ + action.data.empty() ? nullptr : &resolver.entry(action.data)}; + if (entry) { + throw sourcemeta::core::FileError< + sourcemeta::core::SchemaVocabularyError>( + entry->path, error.uri(), error.what()); + } + throw; } @@ -670,6 +680,16 @@ auto main(int argc, char *argv[]) noexcept -> int { } catch (const sourcemeta::blaze::LinterMissingNameError &error) { std::cerr << "error: " << error.what() << "\n"; return EXIT_FAILURE; + } catch ( + const sourcemeta::core::FileError + &error) { + std::cerr << "error: " << error.what() << "\n at vocabulary " + << error.uri() << "\n at path " << error.path().string() << "\n"; + return EXIT_FAILURE; + } catch (const sourcemeta::core::SchemaVocabularyError &error) { + std::cerr << "error: " << error.what() << "\n at vocabulary " + << error.uri() << "\n"; + return EXIT_FAILURE; } catch (const sourcemeta::core::FileError< sourcemeta::core::SchemaUnknownBaseDialectError> &error) { std::cerr << "error: " << error.what() << "\n at path " diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index b731e79f..c5961523 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -58,6 +58,9 @@ if(ONE_INDEX) sourcemeta_one_test_cli(common index fail-schema-self-metaschema) sourcemeta_one_test_cli(common index fail-self-referencing-metaschema) sourcemeta_one_test_cli(common index fail-unknown-dialect) + sourcemeta_one_test_cli(common index fail-unknown-required-vocabulary) + sourcemeta_one_test_cli(common index fail-unknown-required-vocabulary-standalone) + sourcemeta_one_test_cli(common index fail-no-evaluate-unknown-required-vocabulary) sourcemeta_one_test_cli(common index fail-unknown-option) sourcemeta_one_test_cli(common index fail-vocabulary-not-object) sourcemeta_one_test_cli(common index draft4-ignore-vocabulary) diff --git a/test/cli/index/common/fail-no-evaluate-unknown-required-vocabulary.sh b/test/cli/index/common/fail-no-evaluate-unknown-required-vocabulary.sh new file mode 100755 index 00000000..97357b80 --- /dev/null +++ b/test/cli/index/common/fail-no-evaluate-unknown-required-vocabulary.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "http://localhost:8000", + "html": false, + "contents": { + "example": { + "baseUri": "https://example.com", + "path": "./schemas", + "x-sourcemeta-one:evaluate": false + } + } +} +EOF + +mkdir "$TMP/schemas" + +# A custom metaschema that requires an unknown vocabulary +cat << 'EOF' > "$TMP/schemas/custom-meta.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/custom-meta", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true, + "https://json-schema.org/draft/2020-12/vocab/applicator": true, + "https://json-schema.org/draft/2020-12/vocab/validation": true, + "https://example.com/vocab/totally-unknown": true + } +} +EOF + +# A schema that uses the custom metaschema +cat << 'EOF' > "$TMP/schemas/test.json" +{ + "$schema": "https://example.com/custom-meta", + "$id": "https://example.com/test", + "type": "string" +} +EOF + +remove_threads_information() { + expr='s/ \[[^]]*[^a-z-][^]]*\]//g' + if [ "$(uname -s)" = "Darwin" ]; then + sed -i '' "$expr" "$1" + else + sed -i "$expr" "$1" + fi +} + +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 2> "$TMP/output.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 +remove_threads_information "$TMP/output.txt" + +cat << EOF > "$TMP/expected1.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/custom-meta.json (#1) +Detecting: $(realpath "$TMP")/schemas/test.json (#2) +( 50%) Resolving: custom-meta.json +(100%) Resolving: test.json +( 4%) Producing: configuration.json +( 8%) Producing: version.json +( 12%) Producing: schemas/example/custom-meta/%/schema.metapack +error: The metaschema requires an unrecognised vocabulary + at vocabulary https://example.com/vocab/totally-unknown + at path $(realpath "$TMP")/schemas/custom-meta.json +EOF + +cat << EOF > "$TMP/expected2.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/test.json (#1) +Detecting: $(realpath "$TMP")/schemas/custom-meta.json (#2) +( 50%) Resolving: test.json +(100%) Resolving: custom-meta.json +( 4%) Producing: configuration.json +( 8%) Producing: version.json +( 12%) Producing: schemas/example/custom-meta/%/schema.metapack +error: The metaschema requires an unrecognised vocabulary + at vocabulary https://example.com/vocab/totally-unknown + at path $(realpath "$TMP")/schemas/custom-meta.json +EOF + +diff "$TMP/output.txt" "$TMP/expected1.txt" || diff "$TMP/output.txt" "$TMP/expected2.txt" diff --git a/test/cli/index/common/fail-unknown-required-vocabulary-standalone.sh b/test/cli/index/common/fail-unknown-required-vocabulary-standalone.sh new file mode 100755 index 00000000..f5751e7c --- /dev/null +++ b/test/cli/index/common/fail-unknown-required-vocabulary-standalone.sh @@ -0,0 +1,68 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta.com/", + "contents": { + "example": { + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } + } + } +} +EOF + +mkdir "$TMP/schemas" + +# A custom metaschema that requires an unknown vocabulary, +# with no other schema using it +cat << 'EOF' > "$TMP/schemas/custom-meta.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/custom-meta", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true, + "https://json-schema.org/draft/2020-12/vocab/applicator": true, + "https://json-schema.org/draft/2020-12/vocab/validation": true, + "https://example.com/vocab/totally-unknown": true + } +} +EOF + +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 2> "$TMP/output.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 + +# Remove thread information +if [ "$(uname)" = "Darwin" ] +then + sed -i '' 's/ \[.*\]//g' "$TMP/output.txt" +else + sed -i 's/ \[.*\]//g' "$TMP/output.txt" +fi + +cat << EOF > "$TMP/expected.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/custom-meta.json (#1) +(100%) Resolving: custom-meta.json +( 4%) Producing: configuration.json +( 8%) Producing: version.json +( 13%) Producing: explorer/%/404.metapack +( 17%) Producing: schemas/example/schemas/custom-meta/%/schema.metapack +error: The metaschema requires an unrecognised vocabulary + at vocabulary https://example.com/vocab/totally-unknown + at path $(realpath "$TMP")/schemas/custom-meta.json +EOF + +diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/cli/index/common/fail-unknown-required-vocabulary.sh b/test/cli/index/common/fail-unknown-required-vocabulary.sh new file mode 100755 index 00000000..22a91636 --- /dev/null +++ b/test/cli/index/common/fail-unknown-required-vocabulary.sh @@ -0,0 +1,96 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << EOF > "$TMP/one.json" +{ + "url": "https://sourcemeta.com/", + "contents": { + "example": { + "contents": { + "schemas": { + "baseUri": "https://example.com/", + "path": "./schemas" + } + } + } + } +} +EOF + +mkdir "$TMP/schemas" + +# A custom metaschema that requires an unknown vocabulary +cat << 'EOF' > "$TMP/schemas/custom-meta.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/custom-meta", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true, + "https://json-schema.org/draft/2020-12/vocab/applicator": true, + "https://json-schema.org/draft/2020-12/vocab/validation": true, + "https://example.com/vocab/totally-unknown": true + } +} +EOF + +# A schema that uses the custom metaschema +cat << 'EOF' > "$TMP/schemas/test.json" +{ + "$schema": "https://example.com/custom-meta", + "$id": "https://example.com/test", + "type": "string" +} +EOF + +remove_threads_information() { + expr='s/ \[[^]]*[^a-z-][^]]*\]//g' + if [ "$(uname -s)" = "Darwin" ]; then + sed -i '' "$expr" "$1" + else + sed -i "$expr" "$1" + fi +} + +"$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 2> "$TMP/output.txt" && CODE="$?" || CODE="$?" +test "$CODE" = "1" || exit 1 +remove_threads_information "$TMP/output.txt" + +cat << EOF > "$TMP/expected1.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/custom-meta.json (#1) +Detecting: $(realpath "$TMP")/schemas/test.json (#2) +( 50%) Resolving: custom-meta.json +(100%) Resolving: test.json +( 2%) Producing: configuration.json +( 5%) Producing: version.json +( 8%) Producing: explorer/%/404.metapack +( 11%) Producing: schemas/example/schemas/custom-meta/%/schema.metapack +error: The metaschema requires an unrecognised vocabulary + at vocabulary https://example.com/vocab/totally-unknown + at path $(realpath "$TMP")/schemas/custom-meta.json +EOF + +cat << EOF > "$TMP/expected2.txt" +Writing output to: $(realpath "$TMP")/output +Using configuration: $(realpath "$TMP")/one.json +Detecting: $(realpath "$TMP")/schemas/test.json (#1) +Detecting: $(realpath "$TMP")/schemas/custom-meta.json (#2) +( 50%) Resolving: test.json +(100%) Resolving: custom-meta.json +( 2%) Producing: configuration.json +( 5%) Producing: version.json +( 8%) Producing: explorer/%/404.metapack +( 11%) Producing: schemas/example/schemas/custom-meta/%/schema.metapack +error: The metaschema requires an unrecognised vocabulary + at vocabulary https://example.com/vocab/totally-unknown + at path $(realpath "$TMP")/schemas/custom-meta.json +EOF + +diff "$TMP/output.txt" "$TMP/expected1.txt" || diff "$TMP/output.txt" "$TMP/expected2.txt"