From b5b9d6564e8f5fea6e610cc4f09ccc4aa20d2e97 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 1 Jun 2026 16:07:59 -0400 Subject: [PATCH 1/2] WIP Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 4 +- vendor/blaze/DEPENDENCIES | 4 +- vendor/blaze/config.cmake.in | 4 +- ...mascript-regex-for-ES2018-lookbehind.patch | 40 --- vendor/blaze/src/alterschema/alterschema.cc | 4 + .../linter/pattern_non_ecma_regex.h | 46 ++++ .../pattern_properties_non_ecma_regex.h | 48 ++++ vendor/blaze/src/compiler/default_compiler.cc | 3 + .../src/compiler/default_compiler_draft3.h | 141 +++++++++-- vendor/blaze/src/evaluator/CMakeLists.txt | 2 + .../blaze/src/evaluator/evaluator_describe.cc | 12 + .../sourcemeta/blaze/evaluator_dispatch.h | 13 + .../sourcemeta/blaze/evaluator_value.h | 6 +- vendor/core/CMakeLists.txt | 9 + vendor/core/cmake/FindPCRE2.cmake | 2 + .../core/cmake/common/compiler/options.cmake | 26 +- vendor/core/config.cmake.in | 6 + vendor/core/src/core/css/CMakeLists.txt | 9 + vendor/core/src/core/css/css2_color.cc | 235 ++++++++++++++++++ .../core/css/include/sourcemeta/core/css.h | 128 ++++++++++ vendor/core/src/core/dns/hostname.cc | 16 ++ .../jsonrpc/include/sourcemeta/core/jsonrpc.h | 31 +++ vendor/core/src/core/jsonrpc/jsonrpc.cc | 8 + .../core/mcp/include/sourcemeta/core/mcp.h | 15 +- vendor/core/src/core/mcp/mcp.cc | 6 +- vendor/core/src/core/time/CMakeLists.txt | 2 +- .../core/time/include/sourcemeta/core/time.h | 27 ++ .../time/rfc3339_partialtime_no_secfrac.cc | 59 +++++ vendor/core/src/core/uri/CMakeLists.txt | 2 + .../core/uri/include/sourcemeta/core/uri.h | 46 +++- vendor/core/src/core/uri/parse.cc | 182 ++++++++++---- .../core/uritemplate/uritemplate_router.cc | 158 ++++++------ .../uritemplate/uritemplate_router_view.cc | 75 ++---- .../include/sourcemeta/core/numeric_util.h | 21 ++ .../lang/text/include/sourcemeta/core/text.h | 55 +++- vendor/core/src/lang/text/text.cc | 12 +- 36 files changed, 1194 insertions(+), 263 deletions(-) delete mode 100644 vendor/blaze/patches/jsonschema-test-suite/0001-Update-draft3-ecmascript-regex-for-ES2018-lookbehind.patch create mode 100644 vendor/blaze/src/alterschema/linter/pattern_non_ecma_regex.h create mode 100644 vendor/blaze/src/alterschema/linter/pattern_properties_non_ecma_regex.h create mode 100644 vendor/core/src/core/css/CMakeLists.txt create mode 100644 vendor/core/src/core/css/css2_color.cc create mode 100644 vendor/core/src/core/css/include/sourcemeta/core/css.h create mode 100644 vendor/core/src/core/time/rfc3339_partialtime_no_secfrac.cc diff --git a/DEPENDENCIES b/DEPENDENCIES index f263d5209..2ba35ceee 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,7 +1,7 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core ed4b82bd917e75041962358c5e9461ea5b2aa37e +core https://github.com/sourcemeta/core 5d187a796444fef1b5e044c659800085a4be7ef4 jsonbinpack https://github.com/sourcemeta/jsonbinpack 40293a182cabf15678ca0b59cddaf108e7862787 -blaze https://github.com/sourcemeta/blaze 9c1bae4dc45b9f04585c4520021bc8298cda2f51 +blaze https://github.com/sourcemeta/blaze 47ecf3c751d9e0e101e99567fa1d778d56e42432 mbedtls https://github.com/Mbed-TLS/mbedtls v3.6.6 curl https://github.com/curl/curl curl-8_20_0 nghttp2 https://github.com/nghttp2/nghttp2 v1.67.1 diff --git a/vendor/blaze/DEPENDENCIES b/vendor/blaze/DEPENDENCIES index 12c20e352..9ae6e513f 100644 --- a/vendor/blaze/DEPENDENCIES +++ b/vendor/blaze/DEPENDENCIES @@ -1,6 +1,6 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core cd56ace324a42f067b4b8f651f73b9aa0313ca2a -jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite aa77c9d343a2da809d4c8734f473c4d5b5d80f14 +core https://github.com/sourcemeta/core 5d187a796444fef1b5e044c659800085a4be7ef4 +jsonschema-test-suite https://github.com/json-schema-org/JSON-Schema-Test-Suite 60755c1097769e313fae3ec4d63bcc9d49b5d2d5 jsonschema-2020-12 https://github.com/json-schema-org/json-schema-spec 769daad75a9553562333a8937a187741cb708c72 jsonschema-2019-09 https://github.com/json-schema-org/json-schema-spec 41014ea723120ce70b314d72f863c6929d9f3cfd jsonschema-draft7 https://github.com/json-schema-org/json-schema-spec 567f768506aaa33a38e552c85bf0586029ef1b32 diff --git a/vendor/blaze/config.cmake.in b/vendor/blaze/config.cmake.in index 499cd43c2..cd9163c56 100644 --- a/vendor/blaze/config.cmake.in +++ b/vendor/blaze/config.cmake.in @@ -20,7 +20,9 @@ if(NOT BLAZE_COMPONENTS) endif() include(CMakeFindDependencyMacro) -find_dependency(Core COMPONENTS regex uri uritemplate json jsonpointer io yaml crypto html email ip dns time) +find_dependency(Core COMPONENTS + unicode punycode idna regex uri uritemplate json + jsonpointer io yaml crypto html email ip dns time css) foreach(component ${BLAZE_COMPONENTS}) if(component STREQUAL "foundation") diff --git a/vendor/blaze/patches/jsonschema-test-suite/0001-Update-draft3-ecmascript-regex-for-ES2018-lookbehind.patch b/vendor/blaze/patches/jsonschema-test-suite/0001-Update-draft3-ecmascript-regex-for-ES2018-lookbehind.patch deleted file mode 100644 index fae5d9965..000000000 --- a/vendor/blaze/patches/jsonschema-test-suite/0001-Update-draft3-ecmascript-regex-for-ES2018-lookbehind.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 8a0745f18ef6c2fc281c62ff3fbea712c72de76f Mon Sep 17 00:00:00 2001 -From: Juan Cruz Viotti -Date: Thu, 28 May 2026 10:27:12 -0400 -Subject: [PATCH] Update draft3 ecmascript-regex for ES2018 lookbehind - -Lookbehind assertions were added to ECMA-262 in ES2018 and are -accepted by every modern JavaScript engine, so "(?<=foo)bar" is in -fact a valid ECMA 262 regex. The existing test asserting otherwise -predates that addition and no longer reflects the specification. - -Flip the existing lookbehind case to valid and refresh its -description. Add a new case using Python's "(?Px)" named-group -syntax which remains genuinely outside ECMA 262, since ECMA uses -"(?x)" without the leading P. ---- - tests/draft3/optional/format/ecmascript-regex.json | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/tests/draft3/optional/format/ecmascript-regex.json b/tests/draft3/optional/format/ecmascript-regex.json -index 03fe977..2ffc929 100644 ---- a/tests/draft3/optional/format/ecmascript-regex.json -+++ b/tests/draft3/optional/format/ecmascript-regex.json -@@ -9,8 +9,13 @@ - "valid": true - }, - { -- "description": "ECMA 262 has no support for lookbehind", -+ "description": "ECMA 262 supports lookbehind since ES2018", - "data": "(?<=foo)bar", -+ "valid": true -+ }, -+ { -+ "description": "ECMA 262 does not support Python-style named groups", -+ "data": "(?Px)", - "valid": false - } - ] --- -2.54.0 - diff --git a/vendor/blaze/src/alterschema/alterschema.cc b/vendor/blaze/src/alterschema/alterschema.cc index 2f1a6cb76..4e511019c 100644 --- a/vendor/blaze/src/alterschema/alterschema.cc +++ b/vendor/blaze/src/alterschema/alterschema.cc @@ -253,7 +253,9 @@ auto WALK_UP_IN_PLACE_APPLICATORS(const JSON &root, const SchemaFrame &frame, #include "linter/items_array_default.h" #include "linter/items_schema_default.h" #include "linter/multiple_of_default.h" +#include "linter/pattern_non_ecma_regex.h" #include "linter/pattern_properties_default.h" +#include "linter/pattern_properties_non_ecma_regex.h" #include "linter/portable_anchor_names.h" #include "linter/properties_default.h" #include "linter/property_names_default.h" @@ -459,6 +461,8 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void { bundle.add(); bundle.add(); bundle.add(); + bundle.add(); + bundle.add(); bundle.add(); bundle.add(); bundle.add(); diff --git a/vendor/blaze/src/alterschema/linter/pattern_non_ecma_regex.h b/vendor/blaze/src/alterschema/linter/pattern_non_ecma_regex.h new file mode 100644 index 000000000..cef672f04 --- /dev/null +++ b/vendor/blaze/src/alterschema/linter/pattern_non_ecma_regex.h @@ -0,0 +1,46 @@ +class PatternNonEcmaRegex final : public SchemaTransformRule { +public: + using mutates = std::false_type; + using reframe_after_transform = std::false_type; + PatternNonEcmaRegex() + : SchemaTransformRule{ + "pattern_non_ecma_regex", + "For interoperability reasons, only set this keyword to a regular " + "expression that strictly adheres to the ECMA-262 dialect"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::blaze::Vocabularies &vocabularies, + const sourcemeta::blaze::SchemaFrame &, + const sourcemeta::blaze::SchemaFrame::Location &, + const sourcemeta::blaze::SchemaWalker &, + const sourcemeta::blaze::SchemaResolver &) const + -> SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Validation, + Vocabularies::Known::JSON_Schema_2019_09_Validation, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_7_Hyper, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_6_Hyper, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_4_Hyper, + Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_3_Hyper, + Vocabularies::Known::JSON_Schema_Draft_2, + Vocabularies::Known::JSON_Schema_Draft_2_Hyper, + Vocabularies::Known::JSON_Schema_Draft_1, + Vocabularies::Known::JSON_Schema_Draft_1_Hyper, + Vocabularies::Known::JSON_Schema_Draft_0, + Vocabularies::Known::JSON_Schema_Draft_0_Hyper})); + ONLY_CONTINUE_IF(schema.is_object()); + + const auto *pattern_value{schema.try_at("pattern")}; + ONLY_CONTINUE_IF(pattern_value && pattern_value->is_string()); + + ONLY_CONTINUE_IF( + !sourcemeta::core::is_regex_ecma(pattern_value->to_string())); + return APPLIES_TO_KEYWORDS("pattern"); + } +}; diff --git a/vendor/blaze/src/alterschema/linter/pattern_properties_non_ecma_regex.h b/vendor/blaze/src/alterschema/linter/pattern_properties_non_ecma_regex.h new file mode 100644 index 000000000..6fd2d25b9 --- /dev/null +++ b/vendor/blaze/src/alterschema/linter/pattern_properties_non_ecma_regex.h @@ -0,0 +1,48 @@ +class PatternPropertiesNonEcmaRegex final : public SchemaTransformRule { +public: + using mutates = std::false_type; + using reframe_after_transform = std::false_type; + PatternPropertiesNonEcmaRegex() + : SchemaTransformRule{ + "pattern_properties_non_ecma_regex", + "For interoperability reasons, only set the keys of this keyword " + "to regular expressions that strictly adhere to the ECMA-262 " + "dialect"} {}; + + [[nodiscard]] auto + condition(const sourcemeta::core::JSON &schema, + const sourcemeta::core::JSON &, + const sourcemeta::blaze::Vocabularies &vocabularies, + const sourcemeta::blaze::SchemaFrame &, + const sourcemeta::blaze::SchemaFrame::Location &, + const sourcemeta::blaze::SchemaWalker &, + const sourcemeta::blaze::SchemaResolver &) const + -> SchemaTransformRule::Result override { + ONLY_CONTINUE_IF(vocabularies.contains_any( + {Vocabularies::Known::JSON_Schema_2020_12_Applicator, + Vocabularies::Known::JSON_Schema_2019_09_Applicator, + Vocabularies::Known::JSON_Schema_Draft_7, + Vocabularies::Known::JSON_Schema_Draft_7_Hyper, + Vocabularies::Known::JSON_Schema_Draft_6, + Vocabularies::Known::JSON_Schema_Draft_6_Hyper, + Vocabularies::Known::JSON_Schema_Draft_4, + Vocabularies::Known::JSON_Schema_Draft_4_Hyper, + Vocabularies::Known::JSON_Schema_Draft_3, + Vocabularies::Known::JSON_Schema_Draft_3_Hyper})); + ONLY_CONTINUE_IF(schema.is_object()); + + const auto *pattern_properties{schema.try_at("patternProperties")}; + ONLY_CONTINUE_IF(pattern_properties && pattern_properties->is_object() && + !pattern_properties->empty()); + + std::vector offenders; + for (const auto &entry : pattern_properties->as_object()) { + if (!sourcemeta::core::is_regex_ecma(entry.first)) { + offenders.push_back(Pointer{"patternProperties", entry.first}); + } + } + + ONLY_CONTINUE_IF(!offenders.empty()); + return APPLIES_TO_POINTERS(std::move(offenders)); + } +}; diff --git a/vendor/blaze/src/compiler/default_compiler.cc b/vendor/blaze/src/compiler/default_compiler.cc index 28b20bdbb..5fe917fec 100644 --- a/vendor/blaze/src/compiler/default_compiler.cc +++ b/vendor/blaze/src/compiler/default_compiler.cc @@ -28,6 +28,7 @@ auto sourcemeta::blaze::default_schema_compiler( Known::JSON_Schema_2020_12_Validation, Known::JSON_Schema_2020_12_Meta_Data, Known::JSON_Schema_2020_12_Format_Annotation, + Known::JSON_Schema_2020_12_Format_Assertion, Known::JSON_Schema_2020_12_Content, Known::JSON_Schema_2019_09_Core, Known::JSON_Schema_2019_09_Applicator, @@ -113,6 +114,8 @@ auto sourcemeta::blaze::default_schema_compiler( compiler_2019_09_content_contentschema); COMPILE(Known::JSON_Schema_2020_12_Format_Annotation, "format", compiler_draft3_validation_format); + COMPILE(Known::JSON_Schema_2020_12_Format_Assertion, "format", + compiler_draft3_validation_format); // Same as Draft 7 diff --git a/vendor/blaze/src/compiler/default_compiler_draft3.h b/vendor/blaze/src/compiler/default_compiler_draft3.h index d46ab35f8..42ec05ff9 100644 --- a/vendor/blaze/src/compiler/default_compiler_draft3.h +++ b/vendor/blaze/src/compiler/default_compiler_draft3.h @@ -2447,15 +2447,85 @@ auto compiler_draft3_validation_format(const Context &context, static constexpr auto unsupported_dialect_message{ "The format assertion tweak not supported in this dialect"}; - if (schema_context.vocabularies.contains(Known::JSON_Schema_2019_09_Format) || - schema_context.vocabularies.contains( - Known::JSON_Schema_2020_12_Format_Annotation)) { - if (context.tweaks.format_assertion) { - throw sourcemeta::blaze::CompilerError( - schema_context.base, to_pointer(schema_context.relative_pointer), - unsupported_dialect_message); + const auto is_2019_09_format{ + schema_context.vocabularies.contains(Known::JSON_Schema_2019_09_Format)}; + const auto is_2020_12_format_annotation{schema_context.vocabularies.contains( + Known::JSON_Schema_2020_12_Format_Annotation)}; + const auto is_2020_12_format_assertion{schema_context.vocabularies.contains( + Known::JSON_Schema_2020_12_Format_Assertion)}; + + if ((is_2019_09_format && context.tweaks.format_assertion) || + is_2020_12_format_assertion || + (is_2020_12_format_annotation && context.tweaks.format_assertion)) { + const auto &format{schema_context.schema.at(dynamic_context.keyword)}; + if (!format.is_string()) { + return {}; } + const auto &name{format.to_string()}; + ValueStringType type; + if (name == "date-time") { + type = ValueStringType::DateTime; + } else if (name == "date") { + type = ValueStringType::Date; + } else if (name == "time") { + type = ValueStringType::Time; + } else if (name == "duration") { + type = ValueStringType::Duration; + } else if (name == "email") { + type = ValueStringType::Email; + } else if (name == "idn-email") { + type = ValueStringType::IDNEmail; + } else if (name == "hostname") { + type = ValueStringType::Hostname; + } else if (name == "idn-hostname") { + type = ValueStringType::IDNHostname; + } else if (name == "ipv4") { + type = ValueStringType::IPv4; + } else if (name == "ipv6") { + type = ValueStringType::IPv6; + } else if (name == "uri") { + type = ValueStringType::URI; + } else if (name == "uri-reference") { + type = ValueStringType::URIReference; + } else if (name == "iri") { + type = ValueStringType::IRI; + } else if (name == "iri-reference") { + type = ValueStringType::IRIReference; + } else if (name == "uri-template") { + type = ValueStringType::URITemplate; + } else if (name == "json-pointer") { + type = ValueStringType::JSONPointer; + } else if (name == "relative-json-pointer") { + type = ValueStringType::RelativeJSONPointer; + } else if (name == "regex") { + type = ValueStringType::Regex; + } else if (name == "uuid") { + type = ValueStringType::UUID; + } else { + return {}; + } + + Instructions instructions{ + make(sourcemeta::blaze::InstructionIndex::AssertionStringType, context, + schema_context, dynamic_context, type)}; + + if (context.mode == Mode::Exhaustive) { + Instructions annotation_children{ + make(sourcemeta::blaze::InstructionIndex::AnnotationEmit, context, + schema_context, dynamic_context, + sourcemeta::core::JSON{ + schema_context.schema.at(dynamic_context.keyword)})}; + instructions.push_back( + make(sourcemeta::blaze::InstructionIndex::ControlGroupWhenType, + context, schema_context, relative_dynamic_context(), + ValueType::String, std::move(annotation_children))); + } + + return instructions; + } + + if (is_2019_09_format || is_2020_12_format_annotation) { if (context.mode == Mode::FastValidation) { return {}; } @@ -2492,8 +2562,49 @@ auto compiler_draft3_validation_format(const Context &context, const auto is_draft6{ schema_context.vocabularies.contains(Known::JSON_Schema_Draft_6) || schema_context.vocabularies.contains(Known::JSON_Schema_Draft_6_Hyper)}; + const auto is_draft7{ + schema_context.vocabularies.contains(Known::JSON_Schema_Draft_7) || + schema_context.vocabularies.contains(Known::JSON_Schema_Draft_7_Hyper)}; - if (is_draft4 || is_draft6) { + if (is_draft7) { + if (name == "date-time") { + type = ValueStringType::DateTime; + } else if (name == "date") { + type = ValueStringType::Date; + } else if (name == "time") { + type = ValueStringType::Time; + } else if (name == "email") { + type = ValueStringType::Email; + } else if (name == "idn-email") { + type = ValueStringType::IDNEmail; + } else if (name == "hostname") { + type = ValueStringType::Hostname; + } else if (name == "idn-hostname") { + type = ValueStringType::IDNHostname; + } else if (name == "ipv4") { + type = ValueStringType::IPv4; + } else if (name == "ipv6") { + type = ValueStringType::IPv6; + } else if (name == "uri") { + type = ValueStringType::URI; + } else if (name == "uri-reference") { + type = ValueStringType::URIReference; + } else if (name == "uri-template") { + type = ValueStringType::URITemplate; + } else if (name == "json-pointer") { + type = ValueStringType::JSONPointer; + } else if (name == "relative-json-pointer") { + type = ValueStringType::RelativeJSONPointer; + } else if (name == "regex") { + type = ValueStringType::Regex; + } else if (name == "iri") { + type = ValueStringType::IRI; + } else if (name == "iri-reference") { + type = ValueStringType::IRIReference; + } else { + return {}; + } + } else if (is_draft4 || is_draft6) { if (name == "date-time") { type = ValueStringType::DateTime; } else if (name == "email") { @@ -2531,24 +2642,26 @@ auto compiler_draft3_validation_format(const Context &context, } else if (name == "date") { type = ValueStringType::Date; } else if (name == "time") { - throw sourcemeta::blaze::CompilerError( - schema_context.base, to_pointer(schema_context.relative_pointer), - "The \"time\" format is not supported in assertion mode yet"); + type = ValueStringType::PartialTime; } else if (name == "utc-millisec") { + // TODO: Support this old format, even though not even the official test + // suite covers it throw sourcemeta::blaze::CompilerError( schema_context.base, to_pointer(schema_context.relative_pointer), "The \"utc-millisec\" format is not supported in assertion mode yet"); } else if (name == "regex") { type = ValueStringType::Regex; } else if (name == "color") { - throw sourcemeta::blaze::CompilerError( - schema_context.base, to_pointer(schema_context.relative_pointer), - "The \"color\" format is not supported in assertion mode yet"); + type = ValueStringType::Color; } else if (name == "style") { + // TODO: Support this old format, even though not even the official test + // suite covers it throw sourcemeta::blaze::CompilerError( schema_context.base, to_pointer(schema_context.relative_pointer), "The \"style\" format is not supported in assertion mode yet"); } else if (name == "phone") { + // TODO: Support this old format, even though not even the official test + // suite covers it throw sourcemeta::blaze::CompilerError( schema_context.base, to_pointer(schema_context.relative_pointer), "The \"phone\" format is not supported in assertion mode yet"); diff --git a/vendor/blaze/src/evaluator/CMakeLists.txt b/vendor/blaze/src/evaluator/CMakeLists.txt index 4f2b8b010..2b04a12f8 100644 --- a/vendor/blaze/src/evaluator/CMakeLists.txt +++ b/vendor/blaze/src/evaluator/CMakeLists.txt @@ -31,3 +31,5 @@ target_link_libraries(sourcemeta_blaze_evaluator PUBLIC sourcemeta::core::time) target_link_libraries(sourcemeta_blaze_evaluator PUBLIC sourcemeta::core::crypto) +target_link_libraries(sourcemeta_blaze_evaluator PUBLIC + sourcemeta::core::css) diff --git a/vendor/blaze/src/evaluator/evaluator_describe.cc b/vendor/blaze/src/evaluator/evaluator_describe.cc index d4f8dd43e..e970478c3 100644 --- a/vendor/blaze/src/evaluator/evaluator_describe.cc +++ b/vendor/blaze/src/evaluator/evaluator_describe.cc @@ -2123,6 +2123,12 @@ auto describe(const bool valid, const Instruction &step, case ValueStringType::URITemplate: message << " URI template"; break; + case ValueStringType::IRI: + message << " IRI"; + break; + case ValueStringType::IRIReference: + message << " IRI reference"; + break; case ValueStringType::Email: message << " email address"; break; @@ -2150,6 +2156,9 @@ auto describe(const bool valid, const Instruction &step, case ValueStringType::Time: message << " RFC 3339 full-time"; break; + case ValueStringType::PartialTime: + message << " RFC 3339 partial-time without fractional seconds"; + break; case ValueStringType::Duration: message << " RFC 3339 duration"; break; @@ -2165,6 +2174,9 @@ auto describe(const bool valid, const Instruction &step, case ValueStringType::Regex: message << " ECMA-262 regular expression"; break; + case ValueStringType::Color: + message << " CSS 2 color"; + break; default: return unknown(); } diff --git a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h index 11028e34a..bf9fa62dc 100644 --- a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h +++ b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_dispatch.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -881,6 +882,12 @@ INSTRUCTION_HANDLER(AssertionStringType) { case ValueStringType::URITemplate: result = URITemplate::is_uritemplate(target); break; + case ValueStringType::IRI: + result = URI::is_iri(target); + break; + case ValueStringType::IRIReference: + result = URI::is_iri_reference(target); + break; case ValueStringType::Email: result = is_email(target); break; @@ -908,6 +915,9 @@ INSTRUCTION_HANDLER(AssertionStringType) { case ValueStringType::Time: result = is_rfc3339_fulltime(target); break; + case ValueStringType::PartialTime: + result = is_rfc3339_partialtime_no_secfrac(target); + break; case ValueStringType::Duration: result = is_rfc3339_duration(target); break; @@ -923,6 +933,9 @@ INSTRUCTION_HANDLER(AssertionStringType) { case ValueStringType::Regex: result = is_regex_ecma(target); break; + case ValueStringType::Color: + result = is_css2_color(target); + break; default: std::unreachable(); } diff --git a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_value.h b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_value.h index df37cd09e..38e59f7a5 100644 --- a/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_value.h +++ b/vendor/blaze/src/evaluator/include/sourcemeta/blaze/evaluator_value.h @@ -124,6 +124,8 @@ enum class ValueStringType : std::uint8_t { URI, URIReference, URITemplate, + IRI, + IRIReference, Email, IDNEmail, IPv4, @@ -133,11 +135,13 @@ enum class ValueStringType : std::uint8_t { DateTime, Date, Time, + PartialTime, Duration, JSONPointer, RelativeJSONPointer, UUID, - Regex + Regex, + Color }; /// @ingroup evaluator diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index 7c87031ed..288d5f994 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -33,6 +33,7 @@ option(SOURCEMETA_CORE_MCP "Build the Sourcemeta Core MCP library" ON) option(SOURCEMETA_CORE_SEMVER "Build the Sourcemeta Core SemVer library" ON) option(SOURCEMETA_CORE_GZIP "Build the Sourcemeta Core GZIP library" ON) option(SOURCEMETA_CORE_HTML "Build the Sourcemeta Core HTML library" ON) +option(SOURCEMETA_CORE_CSS "Build the Sourcemeta Core CSS library" ON) option(SOURCEMETA_CORE_MARKDOWN "Build the Sourcemeta Core Markdown library" ON) option(SOURCEMETA_CORE_TESTS "Build the Sourcemeta Core tests" OFF) option(SOURCEMETA_CORE_BENCHMARK "Build the Sourcemeta Core benchmarks" OFF) @@ -191,6 +192,10 @@ if(SOURCEMETA_CORE_HTML) add_subdirectory(src/core/html) endif() +if(SOURCEMETA_CORE_CSS) + add_subdirectory(src/core/css) +endif() + if(SOURCEMETA_CORE_MARKDOWN) find_package(CMarkGFM REQUIRED) add_subdirectory(src/core/markdown) @@ -341,6 +346,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/html) endif() + if(SOURCEMETA_CORE_CSS) + add_subdirectory(test/css) + endif() + if(SOURCEMETA_CORE_MARKDOWN) add_subdirectory(test/markdown) endif() diff --git a/vendor/core/cmake/FindPCRE2.cmake b/vendor/core/cmake/FindPCRE2.cmake index a3cf0ae4d..29e44e328 100644 --- a/vendor/core/cmake/FindPCRE2.cmake +++ b/vendor/core/cmake/FindPCRE2.cmake @@ -101,6 +101,7 @@ if(NOT PCRE2_FOUND) if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) target_compile_options(sljit PRIVATE -Wno-double-promotion) target_compile_options(sljit PRIVATE -Wno-conditional-uninitialized) + target_compile_options(sljit PRIVATE -fstrict-flex-arrays=0) endif() if(SOURCEMETA_COMPILER_MSVC) @@ -138,6 +139,7 @@ if(NOT PCRE2_FOUND) target_compile_options(pcre2 PRIVATE -Wno-overlength-strings) target_compile_options(pcre2 PRIVATE -Wno-conversion) target_compile_options(pcre2 PRIVATE -Wno-type-limits) + target_compile_options(pcre2 PRIVATE -fstrict-flex-arrays=0) endif() if(SOURCEMETA_COMPILER_MSVC) diff --git a/vendor/core/cmake/common/compiler/options.cmake b/vendor/core/cmake/common/compiler/options.cmake index 4a8c33b17..150799252 100644 --- a/vendor/core/cmake/common/compiler/options.cmake +++ b/vendor/core/cmake/common/compiler/options.cmake @@ -73,7 +73,13 @@ function(sourcemeta_add_default_options visibility target) # run analyses that never reach codegen, costing build time for no # behavioral effect $<$>:-funroll-loops> - $<$>:-ftree-vectorize>) + $<$>:-ftree-vectorize> + + # See https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html + -Wformat + -Wformat=2 + -Werror=format-security + -fstrict-flex-arrays=3) # Hardware-assisted control-flow protection. The compiler emits these as # HINT-space instructions that are NOPs on CPUs without the feature, so @@ -114,6 +120,11 @@ function(sourcemeta_add_default_options visibility target) $<$>:-fvectorize> # Enable vectorization of straight-line code for performance $<$>:-fslp-vectorize>) + + # Prevent the compiler from deleting redundant null-pointer checks after + # a dereference would normally prove them unreachable + target_compile_options("${target}" ${visibility} + $<$>:-fno-delete-null-pointer-checks>) elseif(SOURCEMETA_COMPILER_GCC) target_compile_options("${target}" ${visibility} # Newer versions of GCC (i.e. 14) seem to print a lot of false-positives here @@ -123,8 +134,21 @@ function(sourcemeta_add_default_options visibility target) # Disables runtime type information $<$,$>:-fno-rtti> # See https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html + -Wtrampolines + -Wbidi-chars=any -fstack-clash-protection) + # Prevent the compiler from deleting redundant null-pointer checks after + # a dereference would normally prove them unreachable + target_compile_options("${target}" ${visibility} + $<$>:-fno-delete-null-pointer-checks>) + + # Prevent the compiler from assuming shared library symbols could be + # interposed at runtime, enabling more inlining and devirtualization + if(BUILD_SHARED_LIBS) + target_compile_options("${target}" ${visibility} -fno-semantic-interposition) + endif() + # _GLIBCXX_ASSERTIONS is libstdc++ (GNU) specific, not honored by libc++ # (which the LLVM toolchain on Apple ships). Restrict to non-Apple GCC # to avoid emitting a Debug-only definition that does nothing on macOS diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index cd1bb4491..8b932aaef 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -29,6 +29,7 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS semver) list(APPEND SOURCEMETA_CORE_COMPONENTS gzip) list(APPEND SOURCEMETA_CORE_COMPONENTS html) + list(APPEND SOURCEMETA_CORE_COMPONENTS css) list(APPEND SOURCEMETA_CORE_COMPONENTS markdown) list(APPEND SOURCEMETA_CORE_COMPONENTS error) list(APPEND SOURCEMETA_CORE_COMPONENTS options) @@ -159,6 +160,11 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) elseif(component STREQUAL "html") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_html.cmake") + elseif(component STREQUAL "css") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_text.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_css.cmake") elseif(component STREQUAL "markdown") find_dependency(cmark_gfm CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_markdown.cmake") diff --git a/vendor/core/src/core/css/CMakeLists.txt b/vendor/core/src/core/css/CMakeLists.txt new file mode 100644 index 000000000..054580639 --- /dev/null +++ b/vendor/core/src/core/css/CMakeLists.txt @@ -0,0 +1,9 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME css + SOURCES css2_color.cc) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME css) +endif() + +target_link_libraries(sourcemeta_core_css PRIVATE sourcemeta::core::numeric) +target_link_libraries(sourcemeta_core_css PRIVATE sourcemeta::core::text) diff --git a/vendor/core/src/core/css/css2_color.cc b/vendor/core/src/core/css/css2_color.cc new file mode 100644 index 000000000..f7672e855 --- /dev/null +++ b/vendor/core/src/core/css/css2_color.cc @@ -0,0 +1,235 @@ +#include +#include +#include + +#include // std::array +#include // assert +#include // std::uint8_t + +namespace { + +// CSS Core Syntax whitespace: U+0009, U+000A, U+000C, U+000D, U+0020 +constexpr auto is_css_whitespace(const char character) noexcept -> bool { + return character == ' ' || character == '\t' || character == '\n' || + character == '\r' || character == '\f'; +} + +constexpr auto equals_ascii_ci(const std::string_view left, + const std::string_view right) noexcept -> bool { + assert(left.size() == right.size()); + for (std::string_view::size_type position{0}; position < left.size(); + position += 1) { + if (sourcemeta::core::to_lowercase(left[position]) != right[position]) { + return false; + } + } + return true; +} + +constexpr std::array CSS2_KEYWORDS{ + {"aqua", "black", "blue", "fuchsia", "gray", "green", "lime", "maroon", + "navy", "olive", "orange", "purple", "red", "silver", "teal", "white", + "yellow"}}; + +auto skip_whitespace(const std::string_view value, + std::string_view::size_type &position) noexcept -> void { + while (position < value.size() && is_css_whitespace(value[position])) { + position += 1; + } +} + +auto match_literal_ci(const std::string_view value, + std::string_view::size_type &position, + const std::string_view literal) noexcept -> bool { + if (position + literal.size() > value.size()) { + return false; + } + for (std::string_view::size_type index{0}; index < literal.size(); + index += 1) { + if (sourcemeta::core::to_lowercase(value[position + index]) != + literal[index]) { + return false; + } + } + position += literal.size(); + return true; +} + +auto match_byte(const std::string_view value, + std::string_view::size_type &position, + const char expected) noexcept -> bool { + if (position >= value.size() || value[position] != expected) { + return false; + } + position += 1; + return true; +} + +auto parse_number(const std::string_view value, + std::string_view::size_type &position, + bool &has_decimal) noexcept -> bool { + const auto start{position}; + has_decimal = false; + + if (position < value.size() && + (value[position] == '+' || value[position] == '-')) { + position += 1; + } + + const auto integer_start{position}; + while (position < value.size() && + sourcemeta::core::is_digit(value[position])) { + position += 1; + } + const auto integer_digits{position - integer_start}; + + if (position < value.size() && value[position] == '.') { + has_decimal = true; + position += 1; + const auto fractional_start{position}; + while (position < value.size() && + sourcemeta::core::is_digit(value[position])) { + position += 1; + } + const auto fractional_digits{position - fractional_start}; + if (fractional_digits == 0) { + position = start; + return false; + } + } else if (integer_digits == 0) { + position = start; + return false; + } + + return true; +} + +enum class RgbValueKind : std::uint8_t { Integer, Percentage }; + +auto parse_value(const std::string_view value, + std::string_view::size_type &position, + RgbValueKind &kind) noexcept -> bool { + const auto start{position}; + bool has_decimal{false}; + if (!parse_number(value, position, has_decimal)) { + return false; + } + + if (position < value.size() && value[position] == '%') { + position += 1; + kind = RgbValueKind::Percentage; + return true; + } + + if (has_decimal) { + position = start; + return false; + } + + kind = RgbValueKind::Integer; + return true; +} + +} // namespace + +namespace sourcemeta::core { + +auto is_css2_hex_color(const std::string_view value) noexcept -> bool { + if (value.size() != 4 && value.size() != 7) { + return false; + } + + if (value[0] != '#') { + return false; + } + + for (std::string_view::size_type position{1}; position < value.size(); + position += 1) { + if (!sourcemeta::core::is_hex_digit(value[position])) { + return false; + } + } + + return true; +} + +auto is_css2_color_keyword(const std::string_view value) noexcept -> bool { + if (value.size() < 3 || value.size() > 7) { + return false; + } + + for (const auto &keyword : CSS2_KEYWORDS) { + if (keyword.size() != value.size()) { + continue; + } + if (equals_ascii_ci(value, keyword)) { + return true; + } + } + + return false; +} + +auto is_css2_rgb_function(const std::string_view value) noexcept -> bool { + std::string_view::size_type position{0}; + + // Per CSS 2.1, the function-token is `IDENT(` with no whitespace between + // the identifier and the opening paren, and the `` value itself + // does not include surrounding whitespace + if (!match_literal_ci(value, position, "rgb")) { + return false; + } + + if (!match_byte(value, position, '(')) { + return false; + } + + skip_whitespace(value, position); + + RgbValueKind first_kind{}; + if (!parse_value(value, position, first_kind)) { + return false; + } + + skip_whitespace(value, position); + if (!match_byte(value, position, ',')) { + return false; + } + skip_whitespace(value, position); + + RgbValueKind second_kind{}; + if (!parse_value(value, position, second_kind)) { + return false; + } + if (second_kind != first_kind) { + return false; + } + + skip_whitespace(value, position); + if (!match_byte(value, position, ',')) { + return false; + } + skip_whitespace(value, position); + + RgbValueKind third_kind{}; + if (!parse_value(value, position, third_kind)) { + return false; + } + if (third_kind != first_kind) { + return false; + } + + skip_whitespace(value, position); + if (!match_byte(value, position, ')')) { + return false; + } + + return position == value.size(); +} + +auto is_css2_color(const std::string_view value) noexcept -> bool { + return is_css2_hex_color(value) || is_css2_color_keyword(value) || + is_css2_rgb_function(value); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/css/include/sourcemeta/core/css.h b/vendor/core/src/core/css/include/sourcemeta/core/css.h new file mode 100644 index 000000000..a8c8a0135 --- /dev/null +++ b/vendor/core/src/core/css/include/sourcemeta/core/css.h @@ -0,0 +1,128 @@ +#ifndef SOURCEMETA_CORE_CSS_H_ +#define SOURCEMETA_CORE_CSS_H_ + +#ifndef SOURCEMETA_CORE_CSS_EXPORT +#include +#endif + +#include // std::string_view + +/// @defgroup css CSS +/// @brief A growing implementation of CSS-related utilities. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup css +/// Check whether the given string is a valid CSS 2.1 hex color per §4.3.6. +/// Accepts only the two hex notations defined by CSS 2.1: +/// +/// ``` +/// "#" 3HEXDIG ; e.g. "#C89" +/// "#" 6HEXDIG ; e.g. "#CC8899" +/// ``` +/// +/// Hex digits are case-insensitive. The 4-digit and 8-digit alpha forms +/// from CSS Color Module Level 4 are not accepted. For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_css2_hex_color("#CC8899")); +/// assert(sourcemeta::core::is_css2_hex_color("#C89")); +/// assert(!sourcemeta::core::is_css2_hex_color("#00332520")); +/// assert(!sourcemeta::core::is_css2_hex_color("CC8899")); +/// ``` +SOURCEMETA_CORE_CSS_EXPORT +auto is_css2_hex_color(const std::string_view value) noexcept -> bool; + +/// @ingroup css +/// Check whether the given string is one of the 17 CSS 2.1 color keywords +/// defined in §4.3.6: +/// +/// ``` +/// aqua, black, blue, fuchsia, gray, green, lime, maroon, navy, +/// olive, orange, purple, red, silver, teal, white, yellow +/// ``` +/// +/// Matching is case-insensitive. The `transparent` keyword and the +/// deprecated system colors (`ButtonFace`, `ActiveBorder`, etc.) are out +/// of scope, as are the extended X11 names introduced by CSS Color Module +/// Level 3. For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_css2_color_keyword("fuchsia")); +/// assert(sourcemeta::core::is_css2_color_keyword("RED")); +/// assert(!sourcemeta::core::is_css2_color_keyword("puce")); +/// assert(!sourcemeta::core::is_css2_color_keyword("papayawhip")); +/// ``` +SOURCEMETA_CORE_CSS_EXPORT +auto is_css2_color_keyword(const std::string_view value) noexcept -> bool; + +/// @ingroup css +/// Check whether the given string is a valid CSS 2.1 functional RGB color +/// per §4.3.6. Accepts both the integer and percentage forms: +/// +/// ``` +/// rgb( , , ) +/// rgb( , , ) +/// ``` +/// +/// All three values must be the same type. The function name `rgb` is +/// case-insensitive, and CSS whitespace (space, tab, CR, LF, FF) is +/// permitted between tokens. Out-of-range values are accepted (CSS 2.1 +/// clamps at use time). The 4-argument `rgba(...)` form from CSS Color +/// Module Level 3 is not accepted. For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_css2_rgb_function("rgb(255, 0, 128)")); +/// assert(sourcemeta::core::is_css2_rgb_function("rgb(100%, 0%, 50%)")); +/// assert(sourcemeta::core::is_css2_rgb_function("rgb(300, -5, 128)")); +/// assert(!sourcemeta::core::is_css2_rgb_function("rgb(100%, 0, 50%)")); +/// assert(!sourcemeta::core::is_css2_rgb_function("rgba(0, 0, 0, 1)")); +/// ``` +SOURCEMETA_CORE_CSS_EXPORT +auto is_css2_rgb_function(const std::string_view value) noexcept -> bool; + +/// @ingroup css +/// Check whether the given string is a valid CSS 2.1 `` value per +/// §4.3.6. The accept set is the union of: +/// +/// - `is_css2_hex_color` +/// - `is_css2_color_keyword` +/// - `is_css2_rgb_function` +/// +/// For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_css2_color("fuchsia")); +/// assert(sourcemeta::core::is_css2_color("#CC8899")); +/// assert(sourcemeta::core::is_css2_color("rgb(255, 0, 0)")); +/// assert(!sourcemeta::core::is_css2_color("puce")); +/// assert(!sourcemeta::core::is_css2_color("#00332520")); +/// ``` +SOURCEMETA_CORE_CSS_EXPORT +auto is_css2_color(const std::string_view value) noexcept -> bool; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/dns/hostname.cc b/vendor/core/src/core/dns/hostname.cc index 7d116450c..e048fba73 100644 --- a/vendor/core/src/core/dns/hostname.cc +++ b/vendor/core/src/core/dns/hostname.cc @@ -1,5 +1,7 @@ #include +#include +#include // std::string #include // std::string_view namespace sourcemeta::core { @@ -58,6 +60,20 @@ auto is_hostname(const std::string_view value) -> bool { return false; } + // RFC 5890 §2.3.2.1: the ACE prefix "xn--" is case-insensitive. A-labels + // must also satisfy RFC 5891 §4.2.3 and RFC 5892 (Punycode round-trip, + // IDNA 2008 derived properties, contextual rules) + if (label_length >= 4 && ((value[label_start] | 0x20) == 'x') && + ((value[label_start + 1] | 0x20) == 'n') && + value[label_start + 2] == '-' && value[label_start + 3] == '-') { + std::string canonical{value.substr(label_start, label_length)}; + canonical[0] = 'x'; + canonical[1] = 'n'; + if (!idna_is_valid_a_label(canonical)) { + return false; + } + } + if (position < value.size()) { // value[position] == '.' position += 1; diff --git a/vendor/core/src/core/jsonrpc/include/sourcemeta/core/jsonrpc.h b/vendor/core/src/core/jsonrpc/include/sourcemeta/core/jsonrpc.h index 314fafbcd..ffabca559 100644 --- a/vendor/core/src/core/jsonrpc/include/sourcemeta/core/jsonrpc.h +++ b/vendor/core/src/core/jsonrpc/include/sourcemeta/core/jsonrpc.h @@ -66,6 +66,37 @@ constexpr std::int64_t JSONRPC_CODE_SERVER_ERROR_MAX = -32000; SOURCEMETA_CORE_JSONRPC_EXPORT auto jsonrpc_is_server_error(const std::int64_t code) -> bool; +/// @ingroup jsonrpc +/// Check whether the given JSON value is a JSON-RPC 2.0 batch envelope. For +/// example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto payload{sourcemeta::core::parse_json(R"([])")}; +/// assert(sourcemeta::core::jsonrpc_is_batch(payload)); +/// ``` +SOURCEMETA_CORE_JSONRPC_EXPORT +auto jsonrpc_is_batch(const sourcemeta::core::JSON &payload) -> bool; + +/// @ingroup jsonrpc +/// Check whether the given JSON value is a non-empty JSON-RPC 2.0 batch +/// envelope. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// const auto payload{sourcemeta::core::parse_json( +/// R"([ { "jsonrpc": "2.0", "method": "ping" } ])")}; +/// assert(sourcemeta::core::jsonrpc_is_valid_batch(payload)); +/// ``` +SOURCEMETA_CORE_JSONRPC_EXPORT +auto jsonrpc_is_valid_batch(const sourcemeta::core::JSON &payload) -> bool; + /// @ingroup jsonrpc /// Extract the request identifier from a JSON-RPC 2.0 envelope. Returns a /// pointer to the identifier (string, number, or null per the specification) diff --git a/vendor/core/src/core/jsonrpc/jsonrpc.cc b/vendor/core/src/core/jsonrpc/jsonrpc.cc index ca15a878e..89724d34e 100644 --- a/vendor/core/src/core/jsonrpc/jsonrpc.cc +++ b/vendor/core/src/core/jsonrpc/jsonrpc.cc @@ -29,6 +29,14 @@ auto jsonrpc_is_server_error(const std::int64_t code) -> bool { code <= JSONRPC_CODE_SERVER_ERROR_MAX; } +auto jsonrpc_is_batch(const sourcemeta::core::JSON &payload) -> bool { + return payload.is_array(); +} + +auto jsonrpc_is_valid_batch(const sourcemeta::core::JSON &payload) -> bool { + return jsonrpc_is_batch(payload) && !payload.empty(); +} + auto jsonrpc_request_id(const sourcemeta::core::JSON &request) -> const sourcemeta::core::JSON * { if (!request.is_object()) { diff --git a/vendor/core/src/core/mcp/include/sourcemeta/core/mcp.h b/vendor/core/src/core/mcp/include/sourcemeta/core/mcp.h index eb4b75dd6..de244891e 100644 --- a/vendor/core/src/core/mcp/include/sourcemeta/core/mcp.h +++ b/vendor/core/src/core/mcp/include/sourcemeta/core/mcp.h @@ -206,6 +206,14 @@ constexpr auto mcp_supports_implementation_website_url( return version == MCPProtocolVersion::V_2025_11_25; } +/// @ingroup mcp +/// Whether the given protocol version supports JSON-RPC 2.0 batching. +constexpr auto +mcp_supports_jsonrpc_batching(const MCPProtocolVersion version) noexcept + -> bool { + return version == MCPProtocolVersion::V_2025_03_26; +} + /// @ingroup mcp /// Build an MCP `text` content block carrying the given text payload. For /// example: @@ -321,13 +329,12 @@ auto mcp_make_tool_error(const sourcemeta::core::JSON &identifier, /// #include /// /// const auto identifier{sourcemeta::core::JSON{3}}; -/// const auto envelope{sourcemeta::core::mcp_make_error_resource_not_found( -/// identifier, "file:///missing")}; +/// const auto envelope{ +/// sourcemeta::core::mcp_make_error_resource_not_found(identifier)}; /// assert(envelope.at("error").at("code").to_integer() == -32002); /// ``` SOURCEMETA_CORE_MCP_EXPORT -auto mcp_make_error_resource_not_found(const sourcemeta::core::JSON &identifier, - const JSON::StringView uri) +auto mcp_make_error_resource_not_found(const sourcemeta::core::JSON &identifier) -> sourcemeta::core::JSON; /// @ingroup mcp diff --git a/vendor/core/src/core/mcp/mcp.cc b/vendor/core/src/core/mcp/mcp.cc index 1585ae3c7..969a9587a 100644 --- a/vendor/core/src/core/mcp/mcp.cc +++ b/vendor/core/src/core/mcp/mcp.cc @@ -178,12 +178,10 @@ auto mcp_make_tool_error(const sourcemeta::core::JSON &identifier, std::move(envelope_result)); } -auto mcp_make_error_resource_not_found(const sourcemeta::core::JSON &identifier, - const JSON::StringView uri) +auto mcp_make_error_resource_not_found(const sourcemeta::core::JSON &identifier) -> sourcemeta::core::JSON { return sourcemeta::core::jsonrpc_make_error( - &identifier, MCP_CODE_RESOURCE_NOT_FOUND, "Resource not found", - sourcemeta::core::JSON{uri}); + &identifier, MCP_CODE_RESOURCE_NOT_FOUND, "Resource not found"); } auto mcp_make_resource(const JSON::StringView uri, const JSON::StringView name, diff --git a/vendor/core/src/core/time/CMakeLists.txt b/vendor/core/src/core/time/CMakeLists.txt index e2ef8d440..08f115d13 100644 --- a/vendor/core/src/core/time/CMakeLists.txt +++ b/vendor/core/src/core/time/CMakeLists.txt @@ -1,6 +1,6 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME time SOURCES gmt.cc rfc3339_datetime.cc rfc3339_fulldate.cc rfc3339_fulltime.cc - rfc3339_duration.cc) + rfc3339_partialtime_no_secfrac.cc rfc3339_duration.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME time) diff --git a/vendor/core/src/core/time/include/sourcemeta/core/time.h b/vendor/core/src/core/time/include/sourcemeta/core/time.h index 790e14eae..681d13f33 100644 --- a/vendor/core/src/core/time/include/sourcemeta/core/time.h +++ b/vendor/core/src/core/time/include/sourcemeta/core/time.h @@ -161,6 +161,33 @@ auto is_rfc3339_fulldate(const std::string_view value) -> bool; SOURCEMETA_CORE_TIME_EXPORT auto is_rfc3339_fulltime(const std::string_view value) -> bool; +/// @ingroup time +/// Check whether the given string is a valid partial-time value per RFC 3339 +/// Section 5.6 (Internet Date/Time Format), excluding the optional +/// fractional seconds component. This implements the `partial-time` +/// production rule without `[time-secfrac]`: +/// +/// ``` +/// partial-time = time-hour ":" time-minute ":" time-second +/// ``` +/// +/// This matches the JSON Schema Draft 3 `time` format (`hh:mm:ss`). For +/// example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_rfc3339_partialtime_no_secfrac("08:30:06")); +/// assert(sourcemeta::core::is_rfc3339_partialtime_no_secfrac("23:59:60")); +/// assert(!sourcemeta::core::is_rfc3339_partialtime_no_secfrac("08:30:06.5")); +/// assert(!sourcemeta::core::is_rfc3339_partialtime_no_secfrac("08:30:06Z")); +/// assert(!sourcemeta::core::is_rfc3339_partialtime_no_secfrac("8:30 AM")); +/// ``` +SOURCEMETA_CORE_TIME_EXPORT +auto is_rfc3339_partialtime_no_secfrac(const std::string_view value) -> bool; + /// @ingroup time /// Check whether the given string is a valid duration value per RFC 3339 /// Appendix A (ISO 8601 Collected ABNF). This implements the `duration` diff --git a/vendor/core/src/core/time/rfc3339_partialtime_no_secfrac.cc b/vendor/core/src/core/time/rfc3339_partialtime_no_secfrac.cc new file mode 100644 index 000000000..2a972b313 --- /dev/null +++ b/vendor/core/src/core/time/rfc3339_partialtime_no_secfrac.cc @@ -0,0 +1,59 @@ +#include +#include + +namespace sourcemeta::core { + +auto is_rfc3339_partialtime_no_secfrac(const std::string_view value) -> bool { + // partial-time without [time-secfrac] is exactly "HH:MM:SS" = 8 characters + if (value.size() != 8) { + return false; + } + + // time-hour = 2DIGIT ; 00-23 + if (!is_digit(value[0]) || !is_digit(value[1])) { + return false; + } + const auto hour{static_cast(value[0] - '0') * 10 + + static_cast(value[1] - '0')}; + if (hour > 23) { + return false; + } + + if (value[2] != ':') { + return false; + } + + // time-minute = 2DIGIT ; 00-59 + if (!is_digit(value[3]) || !is_digit(value[4])) { + return false; + } + const auto minute{static_cast(value[3] - '0') * 10 + + static_cast(value[4] - '0')}; + if (minute > 59) { + return false; + } + + if (value[5] != ':') { + return false; + } + + // time-second = 2DIGIT ; 00-60 (60 = leap second per §5.7) + if (!is_digit(value[6]) || !is_digit(value[7])) { + return false; + } + const auto second{static_cast(value[6] - '0') * 10 + + static_cast(value[7] - '0')}; + if (second > 60) { + return false; + } + + // Leap second per §5.7: only legal at 23:59 UTC. partial-time carries no + // offset, so we treat the value as UTC and require 23:59:60 exactly + if (second == 60 && (hour != 23 || minute != 59)) { + return false; + } + + return true; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uri/CMakeLists.txt b/vendor/core/src/core/uri/CMakeLists.txt index 5f9f83306..e16121483 100644 --- a/vendor/core/src/core/uri/CMakeLists.txt +++ b/vendor/core/src/core/uri/CMakeLists.txt @@ -12,3 +12,5 @@ target_link_libraries(sourcemeta_core_uri PRIVATE sourcemeta::core::io) target_link_libraries(sourcemeta_core_uri PRIVATE sourcemeta::core::ip) +target_link_libraries(sourcemeta_core_uri + PRIVATE sourcemeta::core::unicode) diff --git a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h index c081310fa..ac95720a7 100644 --- a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h +++ b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h @@ -25,7 +25,8 @@ #include // std::vector /// @defgroup uri URI -/// @brief A strict RFC 3986 URI implementation. +/// @brief A strict RFC 3986 URI implementation, with RFC 3987 IRI +/// syntax checking. /// /// This functionality is included as follows: /// @@ -331,6 +332,19 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` class SOURCEMETA_CORE_URI_EXPORT Query { public: + /// Construct a query view over a raw RFC 3986 query string, + /// without a leading `?`. The view borrows the input and does not + /// own it. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// const sourcemeta::core::URI::Query query{"foo=bar&baz=qux"}; + /// assert(query.at("foo").value() == "bar"); + /// ``` + explicit Query(const std::string_view raw); + /// Get the raw RFC 3986 query string this view was constructed /// from. For example: /// @@ -403,9 +417,6 @@ class SOURCEMETA_CORE_URI_EXPORT URI { [[nodiscard]] auto end() const -> const_iterator; private: - friend class URI; - explicit Query(const std::string_view raw); - #if defined(_MSC_VER) #pragma warning(disable : 4251) #endif @@ -641,6 +652,33 @@ class SOURCEMETA_CORE_URI_EXPORT URI { [[nodiscard]] static auto is_uri_reference(std::string_view input) noexcept -> bool; + /// Check if the given string is a valid absolute IRI (has a scheme) per + /// RFC 3987 without constructing a full URI object. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_iri("https://example.com/path")); + /// assert(!sourcemeta::core::URI::is_iri("relative/path")); + /// ``` + [[nodiscard]] static auto is_iri(std::string_view input) noexcept -> bool; + + /// Check if the given string is a valid IRI reference per RFC 3987 + /// (absolute or relative) without constructing a full URI object. + /// For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_iri_reference("https://example.com")); + /// assert(sourcemeta::core::URI::is_iri_reference("relative/path")); + /// assert(!sourcemeta::core::URI::is_iri_reference("://bad")); + /// ``` + [[nodiscard]] static auto is_iri_reference(std::string_view input) noexcept + -> bool; + /// Strip a URI path prefix and return the remaining suffix. For example: /// /// ```cpp diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index ffa05b4b9..385bbb1b2 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -1,4 +1,5 @@ #include +#include #include #include "escaping.h" @@ -15,6 +16,7 @@ #include // std::string_view #include // std::errc #include // std::conditional_t +#include // std::pair namespace { @@ -40,6 +42,73 @@ auto validate_percent_encoded_utf8(const std::string_view input, return 3; } +[[maybe_unused]] auto +decode_utf8_codepoint(const std::string_view input, + const std::string_view::size_type position) + -> std::pair { + const auto lead = static_cast(input[position]); + const auto length = sourcemeta::core::utf8_lead_byte_size(lead); + if (length == 0 || position + length > input.size()) [[unlikely]] { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + + char32_t codepoint{0}; + if (length == 1) { + codepoint = static_cast(lead); + } else if (length == 2) { + codepoint = static_cast(lead & 0x1FU); + } else if (length == 3) { + codepoint = static_cast(lead & 0x0FU); + } else { + codepoint = static_cast(lead & 0x07U); + } + + for (std::uint8_t offset{1}; offset < length; offset += 1) { + const auto continuation = + static_cast(input[position + offset]); + if (!sourcemeta::core::is_utf8_continuation(continuation)) [[unlikely]] { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + codepoint = (codepoint << 6) | static_cast(continuation & 0x3FU); + } + + if (!sourcemeta::core::is_valid_codepoint(codepoint) || + sourcemeta::core::utf8_codepoint_byte_count(codepoint) != length) + [[unlikely]] { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + + return {codepoint, length}; +} + +template +auto accept_iri_extension(const std::string_view input, + std::string_view::size_type &position) -> bool { + if constexpr (!IRI) { + return false; + } else { + if ((static_cast(input[position]) & 0x80U) == 0U) { + return false; + } + const auto [codepoint, length] = decode_utf8_codepoint(input, position); + if (sourcemeta::core::is_ucschar(codepoint)) { + position += length; + return true; + } + if constexpr (AllowIPrivate) { + if (sourcemeta::core::is_iprivate(codepoint)) { + position += length; + return true; + } + } + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } +} + template auto parse_scheme(const std::string_view input, std::string_view::size_type &position) @@ -197,7 +266,7 @@ auto parse_ipv6(const std::string_view input, } } -template +template auto parse_host(const std::string_view input, std::string_view::size_type &position, [[maybe_unused]] bool &ip_literal) @@ -233,7 +302,7 @@ auto parse_host(const std::string_view input, position += skip; } else if (uri_is_unreserved(current) || uri_is_sub_delim(current)) { position += 1; - } else [[unlikely]] { + } else if (!accept_iri_extension(input, position)) [[unlikely]] { throw sourcemeta::core::URIParseError{ static_cast(position + 1)}; } @@ -248,7 +317,7 @@ auto parse_host(const std::string_view input, } } -template +template auto parse_userinfo(const std::string_view input, std::string_view::size_type &position) -> std::conditional_t> { @@ -272,6 +341,15 @@ auto parse_userinfo(const std::string_view input, } else if (uri_is_unreserved(current) || uri_is_sub_delim(current) || current == URI_COLON) { position += 1; + } else if constexpr (IRI) { + if ((static_cast(current) & 0x80U) == 0U) { + break; + } + const auto [codepoint, length] = decode_utf8_codepoint(input, position); + if (!sourcemeta::core::is_ucschar(codepoint)) { + break; + } + position += length; } else { break; } @@ -285,7 +363,7 @@ auto parse_userinfo(const std::string_view input, } } -template +template auto parse_path(const std::string_view input, std::string_view::size_type &position) -> std::conditional_t> { @@ -316,12 +394,9 @@ auto parse_path(const std::string_view input, if (current == URI_PERCENT) { const auto skip = validate_percent_encoded_utf8(input, position); position += skip; - continue; - } - - if (uri_is_pchar(current) || current == URI_SLASH) { + } else if (uri_is_pchar(current) || current == URI_SLASH) { position += 1; - } else [[unlikely]] { + } else if (!accept_iri_extension(input, position)) [[unlikely]] { throw sourcemeta::core::URIParseError{ static_cast(position + 1)}; } @@ -334,7 +409,7 @@ auto parse_path(const std::string_view input, } } -template +template auto parse_query(const std::string_view input, std::string_view::size_type &position) -> std::conditional_t> { @@ -358,13 +433,10 @@ auto parse_query(const std::string_view input, if (current == URI_PERCENT) { const auto skip = validate_percent_encoded_utf8(input, position); position += skip; - continue; - } - - if (uri_is_pchar(current) || current == URI_SLASH || - current == URI_QUESTION) { + } else if (uri_is_pchar(current) || current == URI_SLASH || + current == URI_QUESTION) { position += 1; - } else [[unlikely]] { + } else if (!accept_iri_extension(input, position)) [[unlikely]] { throw sourcemeta::core::URIParseError{ static_cast(position + 1)}; } @@ -377,7 +449,7 @@ auto parse_query(const std::string_view input, } } -template +template auto parse_fragment(const std::string_view input, std::string_view::size_type &position) -> std::conditional_t> { @@ -398,13 +470,10 @@ auto parse_fragment(const std::string_view input, if (current == URI_PERCENT) { const auto skip = validate_percent_encoded_utf8(input, position); position += skip; - continue; - } - - if (uri_is_pchar(current) || current == URI_SLASH || - current == URI_QUESTION) { + } else if (uri_is_pchar(current) || current == URI_SLASH || + current == URI_QUESTION) { position += 1; - } else [[unlikely]] { + } else if (!accept_iri_extension(input, position)) [[unlikely]] { throw sourcemeta::core::URIParseError{ static_cast(position + 1)}; } @@ -417,7 +486,7 @@ auto parse_fragment(const std::string_view input, } } -template +template auto parse_authority(const std::string_view input, std::string_view::size_type &position, [[maybe_unused]] std::optional &userinfo, @@ -425,16 +494,16 @@ auto parse_authority(const std::string_view input, [[maybe_unused]] std::optional &port, [[maybe_unused]] bool &ip_literal) -> void { if constexpr (CheckOnly) { - parse_userinfo(input, position); - parse_host(input, position, ip_literal); + parse_userinfo(input, position); + parse_host(input, position, ip_literal); } else { - auto userinfo_raw = parse_userinfo(input, position); + auto userinfo_raw = parse_userinfo(input, position); if (userinfo_raw.has_value()) { uri_unescape_unreserved_inplace(userinfo_raw.value()); userinfo = std::move(userinfo_raw.value()); } - auto host_raw = parse_host(input, position, ip_literal); + auto host_raw = parse_host(input, position, ip_literal); uri_unescape_unreserved_inplace(host_raw); host = std::move(host_raw); } @@ -466,7 +535,7 @@ auto parse_authority(const std::string_view input, } } -template +template auto do_parse(const std::string_view input, [[maybe_unused]] std::optional &scheme, [[maybe_unused]] std::optional &userinfo, @@ -496,8 +565,8 @@ auto do_parse(const std::string_view input, if (has_authority) { position += 2; - parse_authority(input, position, userinfo, host, port, - ip_literal); + parse_authority(input, position, userinfo, host, port, + ip_literal); // RFC 3986: hier-part = "//" authority path-abempty // path-abempty = *( "/" segment ), so after authority the next character @@ -513,9 +582,9 @@ auto do_parse(const std::string_view input, const auto path_start = position; bool has_path; if constexpr (CheckOnly) { - has_path = parse_path(input, position); + has_path = parse_path(input, position); } else { - auto parsed_path = parse_path(input, position); + auto parsed_path = parse_path(input, position); has_path = parsed_path.has_value(); if (has_path) { @@ -561,16 +630,16 @@ auto do_parse(const std::string_view input, } if constexpr (CheckOnly) { - parse_query(input, position); - parse_fragment(input, position); + parse_query(input, position); + parse_fragment(input, position); } else { - auto parsed_query = parse_query(input, position); + auto parsed_query = parse_query(input, position); if (parsed_query.has_value()) { uri_unescape_unreserved_inplace(parsed_query.value()); query = std::move(parsed_query.value()); } - auto parsed_fragment = parse_fragment(input, position); + auto parsed_fragment = parse_fragment(input, position); if (parsed_fragment.has_value()) { uri_unescape_unreserved_inplace(parsed_fragment.value()); fragment = std::move(parsed_fragment.value()); @@ -597,9 +666,9 @@ auto URI::parse(const std::string_view input) -> void { assert(!this->path_.has_value()); assert(!this->query_.has_value()); assert(!this->fragment_.has_value()); - do_parse(input, this->scheme_, this->userinfo_, this->host_, - this->port_, this->path_, this->query_, this->fragment_, - this->ip_literal_); + do_parse(input, this->scheme_, this->userinfo_, this->host_, + this->port_, this->path_, this->query_, + this->fragment_, this->ip_literal_); } auto URI::is_uri(const std::string_view input) noexcept -> bool { @@ -607,8 +676,8 @@ auto URI::is_uri(const std::string_view input) noexcept -> bool { std::optional scheme, userinfo, host, path, query, fragment; std::optional port; bool ip_literal{false}; - return do_parse(input, scheme, userinfo, host, port, path, query, - fragment, ip_literal); + return do_parse(input, scheme, userinfo, host, port, path, + query, fragment, ip_literal); } catch (...) { return false; } @@ -619,8 +688,33 @@ auto URI::is_uri_reference(const std::string_view input) noexcept -> bool { std::optional scheme, userinfo, host, path, query, fragment; std::optional port; bool ip_literal{false}; - do_parse(input, scheme, userinfo, host, port, path, query, fragment, - ip_literal); + do_parse(input, scheme, userinfo, host, port, path, query, + fragment, ip_literal); + return true; + } catch (...) { + return false; + } +} + +auto URI::is_iri(const std::string_view input) noexcept -> bool { + try { + std::optional scheme, userinfo, host, path, query, fragment; + std::optional port; + bool ip_literal{false}; + return do_parse(input, scheme, userinfo, host, port, path, + query, fragment, ip_literal); + } catch (...) { + return false; + } +} + +auto URI::is_iri_reference(const std::string_view input) noexcept -> bool { + try { + std::optional scheme, userinfo, host, path, query, fragment; + std::optional port; + bool ip_literal{false}; + do_parse(input, scheme, userinfo, host, port, path, query, + fragment, ip_literal); return true; } catch (...) { return false; diff --git a/vendor/core/src/core/uritemplate/uritemplate_router.cc b/vendor/core/src/core/uritemplate/uritemplate_router.cc index a1e61c94f..a1aea7367 100644 --- a/vendor/core/src/core/uritemplate/uritemplate_router.cc +++ b/vendor/core/src/core/uritemplate/uritemplate_router.cc @@ -241,18 +241,19 @@ auto URITemplateRouter::add(const std::string_view uri_template, throw URITemplateRouterDuplicateOperationIdError{operation_id}; } - // Walk base path segments to establish the trie prefix + if (!uri_template.empty() && uri_template.front() != '/' && + !(uri_template.size() >= 2 && uri_template[0] == '{' && + uri_template[1] == '/')) { + throw URITemplateRouterInvalidSegmentError{"Template must start with '/'", + uri_template}; + } + Node *current = nullptr; if (!this->base_path_.empty()) { - const char *base_position = this->base_path_.data(); - const char *const base_end = base_position + this->base_path_.size(); - while (base_position < base_end) { - while (base_position < base_end && *base_position == '/') { - ++base_position; - } - if (base_position >= base_end) { - break; - } + const char *base_position = this->base_path_.data() + 1; + const char *const base_end = + this->base_path_.data() + this->base_path_.size(); + while (true) { const char *segment_start = base_position; while (base_position < base_end && *base_position != '/') { ++base_position; @@ -262,6 +263,10 @@ auto URITemplateRouter::add(const std::string_view uri_template, static_cast(base_position - segment_start)}; auto &literals = current ? current->literals : this->root_.literals; current = &find_or_create_literal_child(literals, segment); + if (base_position >= base_end) { + break; + } + ++base_position; } } @@ -300,18 +305,23 @@ auto URITemplateRouter::add(const std::string_view uri_template, return; } - Node *base_path_end = current; bool absorbed = false; const char *position = uri_template.data(); const char *const end = position + uri_template.size(); - while (position < end && !absorbed) { - while (position < end && *position == '/') { - ++position; - } + if (position < end && *position == '/') { + ++position; + } - if (position >= end) { - break; + while (true) { + if (position >= end || *position == '/') { + auto &literals = current ? current->literals : this->root_.literals; + current = &find_or_create_literal_child(literals, ""); + if (position >= end) { + break; + } + ++position; + continue; } const char *segment_start = position; @@ -425,14 +435,16 @@ auto URITemplateRouter::add(const std::string_view uri_template, const std::string_view varname{ varname_start, static_cast(varname_end - varname_start)}; - ++position; // skip '}' + ++position; - if (position < end && *position != '/') { - if (*position != '{' || position + 1 >= end || *(position + 1) != '/') { - throw URITemplateRouterInvalidSegmentError{ - "Path segment cannot mix literals and variables", - extract_segment(expression_start, end)}; - } + const bool followed_by_path_operator = + position < end && *position == '{' && position + 1 < end && + *(position + 1) == '/'; + + if (position < end && *position != '/' && !followed_by_path_operator) { + throw URITemplateRouterInvalidSegmentError{ + "Path segment cannot mix literals and variables", + extract_segment(expression_start, end)}; } if (is_expansion_type(type) && position < end) { @@ -448,47 +460,54 @@ auto URITemplateRouter::add(const std::string_view uri_template, } else { current = result; } - } else { - while (position < end && *position != '/' && *position != '{') { - if (*position == '}') { - throw URITemplateRouterInvalidSegmentError{ - "Unmatched closing brace", extract_segment(segment_start, end)}; - } - ++position; + + if (absorbed || position >= end) { + break; + } + if (followed_by_path_operator) { + continue; } + ++position; + continue; + } - if (position < end && *position == '{') { - if (position + 1 < end && *(position + 1) == '/') { - const std::string_view segment{ - segment_start, - static_cast(position - segment_start)}; - auto &literals = current ? current->literals : this->root_.literals; - current = &find_or_create_literal_child(literals, segment); - continue; - } - const char *expr_end = find_expression_end(position, end); - const char *seg_end = expr_end; - while (seg_end < end && *seg_end != '/') { - ++seg_end; - } + while (position < end && *position != '/' && *position != '{') { + if (*position == '}') { throw URITemplateRouterInvalidSegmentError{ - "Path segment cannot mix literals and variables", - std::string_view{segment_start, static_cast( - seg_end - segment_start)}}; + "Unmatched closing brace", extract_segment(segment_start, end)}; } + ++position; + } - const std::string_view segment{ - segment_start, static_cast(position - segment_start)}; - - auto &literals = current ? current->literals : this->root_.literals; - current = &find_or_create_literal_child(literals, segment); + if (position < end && *position == '{') { + if (position + 1 < end && *(position + 1) == '/') { + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + auto &literals = current ? current->literals : this->root_.literals; + current = &find_or_create_literal_child(literals, segment); + continue; + } + const char *expr_end = find_expression_end(position, end); + const char *seg_end = expr_end; + while (seg_end < end && *seg_end != '/') { + ++seg_end; + } + throw URITemplateRouterInvalidSegmentError{ + "Path segment cannot mix literals and variables", + std::string_view{segment_start, + static_cast(seg_end - segment_start)}}; } - } - if (current == base_path_end && uri_template.size() == 1 && - uri_template[0] == '/') { + const std::string_view segment{ + segment_start, static_cast(position - segment_start)}; + auto &literals = current ? current->literals : this->root_.literals; - current = &find_or_create_literal_child(literals, ""); + current = &find_or_create_literal_child(literals, segment); + + if (position >= end) { + break; + } + ++position; } if (!absorbed && current != nullptr) { @@ -555,17 +574,13 @@ auto URITemplateRouter::match(const std::string_view path, this->root_.context); } - if (path.size() == 1 && path[0] == '/') { - if (auto *child = find_literal_child(this->root_.literals, "")) { - return finalize_match(this->otherwise_, child->identifier, - child->context); - } + if (path.front() != '/') { return finalize_match(this->otherwise_, 0, 0); } const Node *current = nullptr; - const char *position = path.data(); - const char *const path_end = position + path.size(); + const char *position = path.data() + 1; + const char *const path_end = path.data() + path.size(); const std::vector> *literal_children = &this->root_.literals; @@ -573,11 +588,6 @@ auto URITemplateRouter::match(const std::string_view path, std::size_t variable_index = 0; - // Skip leading slash - if (position < path_end && *position == '/') { - ++position; - } - while (true) { const char *segment_start = position; while (position < path_end && *position != '/') { @@ -586,14 +596,9 @@ auto URITemplateRouter::match(const std::string_view path, const std::string_view segment{ segment_start, static_cast(position - segment_start)}; - // Empty segment (from double slash or trailing slash) doesn't match - if (segment.empty()) { - return finalize_match(this->otherwise_, 0, 0); - } - if (auto *literal_match = find_literal_child(*literal_children, segment)) { current = literal_match; - } else if (*variable_child) { + } else if (!segment.empty() && *variable_child) { assert(variable_index <= std::numeric_limits::max()); if (is_expansion_type((*variable_child)->type)) { @@ -615,12 +620,9 @@ auto URITemplateRouter::match(const std::string_view path, literal_children = ¤t->literals; variable_child = ¤t->variable; - // Check if there's more path if (position >= path_end) { break; } - - // Skip the slash and continue to next segment ++position; } diff --git a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc index 9025f7331..ad17300bc 100644 --- a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc +++ b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc @@ -497,87 +497,46 @@ auto URITemplateRouterView::match( const auto string_table_size = header->arguments_offset - header->string_table_offset; - // Empty path matches empty template if (path.empty()) { return finalize_match(otherwise_context, nodes[0].identifier, nodes[0].context); } - // Root path "/" is stored as an empty literal segment - if (path.size() == 1 && path[0] == '/') { - const auto &root = nodes[0]; - if (root.first_literal_child == NO_CHILD) { - return finalize_match(otherwise_context, 0, 0); - } - - if (root.first_literal_child >= header->node_count || - root.literal_child_count > - header->node_count - root.first_literal_child) { - return finalize_match(otherwise_context, 0, 0); - } - - const auto match = binary_search_literal_children( - nodes, string_table, string_table_size, root.first_literal_child, - root.literal_child_count, "", 0); - if (match == NO_CHILD) { - return finalize_match(otherwise_context, 0, 0); - } - return finalize_match(otherwise_context, nodes[match].identifier, - nodes[match].context); + if (path.front() != '/') { + return finalize_match(otherwise_context, 0, 0); } - // Walk the trie, matching each path segment std::uint32_t current_node = 0; - const char *position = path.data(); - const char *const path_end = position + path.size(); + const char *position = path.data() + 1; + const char *const path_end = path.data() + path.size(); std::size_t variable_index = 0; - // Skip leading slash - if (position < path_end && *position == '/') { - ++position; - } - while (true) { - // Extract segment const char *segment_start = position; while (position < path_end && *position != '/') { ++position; } - const auto segment_length = static_cast(position - segment_start); - // Empty segment (from double slash or trailing slash) doesn't match - if (segment_length == 0) { - return finalize_match(otherwise_context, 0, 0); - } - const auto &node = nodes[current_node]; const auto node_count = header->node_count; - // Try literal children first + std::uint32_t literal_match = NO_CHILD; if (node.first_literal_child != NO_CHILD) { if (node.first_literal_child >= node_count || node.literal_child_count > node_count - node.first_literal_child) { return finalize_match(otherwise_context, 0, 0); } - - const auto literal_match = binary_search_literal_children( + literal_match = binary_search_literal_children( nodes, string_table, string_table_size, node.first_literal_child, node.literal_child_count, segment_start, segment_length); - if (literal_match != NO_CHILD) { - current_node = literal_match; - if (position >= path_end) { - break; - } - ++position; - continue; - } } - // Fall back to variable child - if (node.variable_child != NO_CHILD) { + if (literal_match != NO_CHILD) { + current_node = literal_match; + } else if (segment_length > 0 && node.variable_child != NO_CHILD) { if (node.variable_child >= node_count || variable_index > std::numeric_limits::max()) { @@ -592,8 +551,6 @@ auto URITemplateRouterView::match( return finalize_match(otherwise_context, 0, 0); } - // Both Expansion and OptionalExpansion consume the rest of the path - // verbatim if (is_expansion_type(variable_node.type)) { const auto remaining_length = static_cast(path_end - segment_start); @@ -605,22 +562,20 @@ auto URITemplateRouterView::match( variable_node.context); } - // Regular variable - match single segment callback(static_cast(variable_index), {string_table + variable_node.string_offset, variable_node.string_length}, {segment_start, segment_length}); ++variable_index; current_node = node.variable_child; - if (position >= path_end) { - break; - } - ++position; - continue; + } else { + return finalize_match(otherwise_context, 0, 0); } - // No match - return finalize_match(otherwise_context, 0, 0); + if (position >= path_end) { + break; + } + ++position; } const auto &final_node = nodes[current_node]; diff --git a/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h b/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h index 383d8d8a6..8630e9616 100644 --- a/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h +++ b/vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_util.h @@ -67,6 +67,27 @@ inline constexpr auto is_positive_digit(const char character) -> bool { return character >= '1' && character <= '9'; } +/// @ingroup numeric +/// Check whether the given character is an ASCII hexadecimal digit +/// (`'0'`-`'9'`, `'a'`-`'f'`, `'A'`-`'F'`). For example: +/// +/// ```cpp +/// #include +/// +/// #include +/// +/// assert(sourcemeta::core::is_hex_digit('0')); +/// assert(sourcemeta::core::is_hex_digit('a')); +/// assert(sourcemeta::core::is_hex_digit('F')); +/// assert(!sourcemeta::core::is_hex_digit('g')); +/// assert(!sourcemeta::core::is_hex_digit(' ')); +/// ``` +inline constexpr auto is_hex_digit(const char character) noexcept -> bool { + return (character >= '0' && character <= '9') || + (character >= 'a' && character <= 'f') || + (character >= 'A' && character <= 'F'); +} + /// @ingroup numeric /// Check whether a value fits in an unsigned 8-bit byte template constexpr auto is_byte(const T &value) -> bool { diff --git a/vendor/core/src/lang/text/include/sourcemeta/core/text.h b/vendor/core/src/lang/text/include/sourcemeta/core/text.h index 26ffc0616..cdc8ecf33 100644 --- a/vendor/core/src/lang/text/include/sourcemeta/core/text.h +++ b/vendor/core/src/lang/text/include/sourcemeta/core/text.h @@ -5,7 +5,9 @@ #include #endif +#include // std::same_as #include // std::size_t +#include // std::filesystem::path #include // std::optional #include // std::ostream #include // std::string @@ -41,7 +43,7 @@ auto to_title_case(std::string &value) -> void; /// @ingroup text /// -/// Return the ASCII lowercase form of a character. Non-ASCII bytes pass +/// Return the ASCII lowercase form of a character. Non-ASCII code units pass /// through unchanged. For example: /// /// ```cpp @@ -52,8 +54,57 @@ auto to_title_case(std::string &value) -> void; /// assert(sourcemeta::core::to_lowercase('a') == 'a'); /// assert(sourcemeta::core::to_lowercase('5') == '5'); /// ``` +template + requires std::same_as || + std::same_as || + std::same_as || + std::same_as +inline constexpr auto to_lowercase(const Character character) noexcept + -> Character { + return (character >= 'A' && character <= 'Z') + ? static_cast(character + ('a' - 'A')) + : character; +} + +/// @ingroup text +/// +/// Convert a string to ASCII lowercase in place. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::string value{"Hello WORLD"}; +/// sourcemeta::core::to_lowercase(value); +/// assert(value == "hello world"); +/// ``` +template + requires requires(Character character) { + { to_lowercase(character) } -> std::same_as; + } +inline auto to_lowercase(std::basic_string &value) + -> void { + for (auto &character : value) { + character = to_lowercase(character); + } +} + +/// @ingroup text +/// +/// Convert a filesystem path to ASCII lowercase in place. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::filesystem::path value{"/Foo/Bar.JSON"}; +/// sourcemeta::core::to_lowercase(value); +/// assert(value == std::filesystem::path{"/foo/bar.json"}); +/// ``` SOURCEMETA_CORE_TEXT_EXPORT -auto to_lowercase(const char character) noexcept -> char; +auto to_lowercase(std::filesystem::path &value) -> void; /// @ingroup text /// diff --git a/vendor/core/src/lang/text/text.cc b/vendor/core/src/lang/text/text.cc index 368e91528..0afe708ef 100644 --- a/vendor/core/src/lang/text/text.cc +++ b/vendor/core/src/lang/text/text.cc @@ -2,9 +2,11 @@ #include // std::isalpha, std::toupper #include // std::size_t +#include // std::filesystem::path #include // std::optional, std::nullopt +#include // std::string #include // std::string_view -#include // std::pair +#include // std::pair, std::move namespace { @@ -17,10 +19,10 @@ auto is_ascii_whitespace(const char character) noexcept -> bool { namespace sourcemeta::core { -auto to_lowercase(const char character) noexcept -> char { - return (character >= 'A' && character <= 'Z') - ? static_cast(character + 32) - : character; +auto to_lowercase(std::filesystem::path &value) -> void { + auto native{value.native()}; + to_lowercase(native); + value = std::filesystem::path{std::move(native)}; } auto to_title_case(std::string &value) -> void { From 3ad51f34df20e466cb9b30f6384245d788f1f3bf Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 1 Jun 2026 16:12:36 -0400 Subject: [PATCH 2/2] Add linter rules that check for ECMA-262 regex compliance and support 2020-12 format assertion Signed-off-by: Juan Cruz Viotti --- test/CMakeLists.txt | 8 +- test/compile/fail_format_assertion.sh | 57 ------------ test/compile/pass_format_assertion.sh | 86 +++++++++++++++++++ ...rtion.sh => pass_lint_format_assertion.sh} | 27 ++---- test/lint/pass_lint_list_exclude.sh | 8 +- test/lint/pass_lint_list_long.sh | 8 +- test/lint/pass_lint_list_short.sh | 8 +- ..._assertion.sh => pass_format_assertion.sh} | 30 ++----- ...on.sh => pass_2020_12_format_assertion.sh} | 29 ++----- .../pass_2020_12_format_assertion_optional.sh | 5 +- 10 files changed, 131 insertions(+), 135 deletions(-) delete mode 100755 test/compile/fail_format_assertion.sh create mode 100755 test/compile/pass_format_assertion.sh rename test/lint/{fail_lint_format_assertion.sh => pass_lint_format_assertion.sh} (54%) rename test/test/{fail_format_assertion.sh => pass_format_assertion.sh} (60%) rename test/validate/{fail_2020_12_format_assertion.sh => pass_2020_12_format_assertion.sh} (55%) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index db6ab6b68..0c2b04a8e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -139,7 +139,7 @@ add_jsonschema_test_unix(validate/fail_schema_enoent) add_jsonschema_test_unix(validate/fail_schema_invalid_json) add_jsonschema_test_unix(validate/fail_schema_non_schema) add_jsonschema_test_unix(validate/fail_schema_unknown_dialect) -add_jsonschema_test_unix(validate/fail_2020_12_format_assertion) +add_jsonschema_test_unix(validate/pass_2020_12_format_assertion) add_jsonschema_test_unix(validate/pass_2020_12_format_assertion_optional) add_jsonschema_test_unix(validate/fail_resolve_unknown_dialect) add_jsonschema_test_unix(validate/fail_resolve_invalid_id) @@ -380,7 +380,7 @@ add_jsonschema_test_unix(test/fail_false_single_resolve_verbose) add_jsonschema_test_unix(test/fail_multi_test) add_jsonschema_test_unix(test/fail_multi_resolve) add_jsonschema_test_unix(test/fail_multi_resolve_verbose) -add_jsonschema_test_unix(test/fail_format_assertion) +add_jsonschema_test_unix(test/pass_format_assertion) add_jsonschema_test_unix(test/fail_unresolvable) add_jsonschema_test_unix(test/fail_unresolvable_fragment) add_jsonschema_test_unix(test/fail_unresolvable_anchor) @@ -558,7 +558,7 @@ add_jsonschema_test_unix(compile/pass_default_dialect_config_relative) add_jsonschema_test_unix(compile/pass_yaml) add_jsonschema_test_unix(compile/fail_no_schema) add_jsonschema_test_unix(compile/fail_schema_invalid_json) -add_jsonschema_test_unix(compile/fail_format_assertion) +add_jsonschema_test_unix(compile/pass_format_assertion) add_jsonschema_test_unix(compile/fail_unknown_metaschema) add_jsonschema_test_unix(compile/fail_default_dialect_config_extension_mismatch) add_jsonschema_test_unix(compile/pass_resolve_remap) @@ -656,7 +656,7 @@ add_jsonschema_test_unix(lint/fail_lint_disable_one) add_jsonschema_test_unix(lint/fail_lint_disable_one_verbose) add_jsonschema_test_unix(lint/fail_lint_disable_unknown_verbose) add_jsonschema_test_unix(lint/fail_lint_disable_many) -add_jsonschema_test_unix(lint/fail_lint_format_assertion) +add_jsonschema_test_unix(lint/pass_lint_format_assertion) add_jsonschema_test_unix(lint/fail_lint_examples) add_jsonschema_test_unix(lint/fail_lint_default) add_jsonschema_test_unix(lint/pass_lint_json) diff --git a/test/compile/fail_format_assertion.sh b/test/compile/fail_format_assertion.sh deleted file mode 100755 index cda550890..000000000 --- a/test/compile/fail_format_assertion.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/sh - -set -o errexit -set -o nounset - -TMP="$(mktemp -d)" -clean() { rm -rf "$TMP"; } -trap clean EXIT - -cat << 'EOF' > "$TMP/metaschema.json" -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://example.com/custom-metaschema", - "$vocabulary": { - "https://json-schema.org/draft/2020-12/vocab/core": true, - "https://json-schema.org/draft/2020-12/vocab/applicator": true, - "https://json-schema.org/draft/2020-12/vocab/validation": true, - "https://json-schema.org/draft/2020-12/vocab/format-assertion": true - } -} -EOF - -cat << 'EOF' > "$TMP/schema.json" -{ - "$schema": "https://example.com/custom-metaschema", - "type": "string", - "format": "email" -} -EOF - -"$1" compile "$TMP/schema.json" \ - --resolve "$TMP/metaschema.json" 2> "$TMP/stderr.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" - -cat << EOF > "$TMP/expected.txt" -error: Cannot compile unsupported vocabulary - at file path $(realpath "$TMP")/schema.json - at uri https://json-schema.org/draft/2020-12/vocab/format-assertion -EOF - -diff "$TMP/stderr.txt" "$TMP/expected.txt" - -"$1" compile "$TMP/schema.json" \ - --resolve "$TMP/metaschema.json" --json > "$TMP/stdout.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" - -cat << EOF > "$TMP/expected.txt" -{ - "error": "Cannot compile unsupported vocabulary", - "filePath": "$(realpath "$TMP")/schema.json", - "uri": "https://json-schema.org/draft/2020-12/vocab/format-assertion" -} -EOF - -diff "$TMP/stdout.txt" "$TMP/expected.txt" diff --git a/test/compile/pass_format_assertion.sh b/test/compile/pass_format_assertion.sh new file mode 100755 index 000000000..f296144fa --- /dev/null +++ b/test/compile/pass_format_assertion.sh @@ -0,0 +1,86 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/metaschema.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/custom-metaschema", + "$vocabulary": { + "https://json-schema.org/draft/2020-12/vocab/core": true, + "https://json-schema.org/draft/2020-12/vocab/applicator": true, + "https://json-schema.org/draft/2020-12/vocab/validation": true, + "https://json-schema.org/draft/2020-12/vocab/format-assertion": true + } +} +EOF + +cat << 'EOF' > "$TMP/schema.json" +{ + "$schema": "https://example.com/custom-metaschema", + "type": "string", + "format": "email" +} +EOF + +"$1" compile "$TMP/schema.json" \ + --resolve "$TMP/metaschema.json" > "$TMP/template.json" 2> "$TMP/stderr.txt" + +cat << EOF > "$TMP/expected_stderr.txt" +EOF + +diff "$TMP/stderr.txt" "$TMP/expected_stderr.txt" + +cat << EOF > "$TMP/expected.json" +[ + 5, + false, + true, + [ + [ + [ + 40, + [ "format" ], + [], + "file://$(realpath "$TMP")/schema.json#/format", + 1, + [ 14, 5 ] + ], + [ + 96, + [], + [], + "file://$(realpath "$TMP")/schema.json#/format", + 1, + [ 8, 4 ], + [ + [ + 50, + [ "format" ], + [], + "file://$(realpath "$TMP")/schema.json#/format", + 1, + [ 1, "email" ] + ] + ] + ], + [ + 11, + [ "type" ], + [], + "file://$(realpath "$TMP")/schema.json#/type", + 1, + [ 8, 4 ] + ] + ] + ], + [] +] +EOF + +diff "$TMP/template.json" "$TMP/expected.json" diff --git a/test/lint/fail_lint_format_assertion.sh b/test/lint/pass_lint_format_assertion.sh similarity index 54% rename from test/lint/fail_lint_format_assertion.sh rename to test/lint/pass_lint_format_assertion.sh index cfff21ba6..8fcd9cfa0 100755 --- a/test/lint/fail_lint_format_assertion.sh +++ b/test/lint/pass_lint_format_assertion.sh @@ -24,35 +24,18 @@ EOF cat << 'EOF' > "$TMP/schema.json" { "$schema": "https://example.com/custom-metaschema", + "title": "Email", + "description": "An email address", "type": "string", - "examples": [ "hello" ] + "format": "email", + "examples": [ "foo@bar.com" ] } EOF "$1" lint "$TMP/schema.json" \ - --resolve "$TMP/metaschema.json" 2> "$TMP/stderr.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" + --resolve "$TMP/metaschema.json" > "$TMP/stderr.txt" 2>&1 cat << EOF > "$TMP/expected.txt" -error: Cannot compile unsupported vocabulary - at file path $(realpath "$TMP")/schema.json - at uri https://json-schema.org/draft/2020-12/vocab/format-assertion EOF diff "$TMP/stderr.txt" "$TMP/expected.txt" - -"$1" lint "$TMP/schema.json" \ - --resolve "$TMP/metaschema.json" --json > "$TMP/stdout.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" - -cat << EOF > "$TMP/expected.txt" -{ - "error": "Cannot compile unsupported vocabulary", - "filePath": "$(realpath "$TMP")/schema.json", - "uri": "https://json-schema.org/draft/2020-12/vocab/format-assertion" -} -EOF - -diff "$TMP/stdout.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_list_exclude.sh b/test/lint/pass_lint_list_exclude.sh index 2dfd43e6c..6bd013cf4 100755 --- a/test/lint/pass_lint_list_exclude.sh +++ b/test/lint/pass_lint_list_exclude.sh @@ -217,9 +217,15 @@ oneof_to_anyof_disjoint_types orphan_definitions Schema definitions in `$defs` or `definitions` that are never internally referenced can be removed +pattern_non_ecma_regex + For interoperability reasons, only set this keyword to a regular expression that strictly adheres to the ECMA-262 dialect + pattern_properties_default Setting the `patternProperties` keyword to the empty object does not add any further constraint +pattern_properties_non_ecma_regex + For interoperability reasons, only set the keys of this keyword to regular expressions that strictly adhere to the ECMA-262 dialect + portable_anchor_names Keep anchors within the safe allowed character set across JSON Schema dialects (`^[A-Za-z][A-Za-z0-9_.-]*$`) @@ -307,7 +313,7 @@ unsatisfiable_max_contains unsatisfiable_min_properties Setting `minProperties` to a number less than `required` does not add any further constraint -Number of rules: 99 +Number of rules: 101 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_list_long.sh b/test/lint/pass_lint_list_long.sh index 045f6d510..a8ef2bd18 100755 --- a/test/lint/pass_lint_list_long.sh +++ b/test/lint/pass_lint_list_long.sh @@ -217,9 +217,15 @@ oneof_to_anyof_disjoint_types orphan_definitions Schema definitions in `$defs` or `definitions` that are never internally referenced can be removed +pattern_non_ecma_regex + For interoperability reasons, only set this keyword to a regular expression that strictly adheres to the ECMA-262 dialect + pattern_properties_default Setting the `patternProperties` keyword to the empty object does not add any further constraint +pattern_properties_non_ecma_regex + For interoperability reasons, only set the keys of this keyword to regular expressions that strictly adhere to the ECMA-262 dialect + portable_anchor_names Keep anchors within the safe allowed character set across JSON Schema dialects (`^[A-Za-z][A-Za-z0-9_.-]*$`) @@ -313,7 +319,7 @@ valid_default valid_examples Only include instances in the `examples` array that validate against the schema -Number of rules: 101 +Number of rules: 103 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/lint/pass_lint_list_short.sh b/test/lint/pass_lint_list_short.sh index ccc0ba853..8eb13f4de 100755 --- a/test/lint/pass_lint_list_short.sh +++ b/test/lint/pass_lint_list_short.sh @@ -217,9 +217,15 @@ oneof_to_anyof_disjoint_types orphan_definitions Schema definitions in `$defs` or `definitions` that are never internally referenced can be removed +pattern_non_ecma_regex + For interoperability reasons, only set this keyword to a regular expression that strictly adheres to the ECMA-262 dialect + pattern_properties_default Setting the `patternProperties` keyword to the empty object does not add any further constraint +pattern_properties_non_ecma_regex + For interoperability reasons, only set the keys of this keyword to regular expressions that strictly adhere to the ECMA-262 dialect + portable_anchor_names Keep anchors within the safe allowed character set across JSON Schema dialects (`^[A-Za-z][A-Za-z0-9_.-]*$`) @@ -313,7 +319,7 @@ valid_default valid_examples Only include instances in the `examples` array that validate against the schema -Number of rules: 101 +Number of rules: 103 EOF diff "$TMP/output.txt" "$TMP/expected.txt" diff --git a/test/test/fail_format_assertion.sh b/test/test/pass_format_assertion.sh similarity index 60% rename from test/test/fail_format_assertion.sh rename to test/test/pass_format_assertion.sh index f9e5d5da3..7b4c57d85 100755 --- a/test/test/fail_format_assertion.sh +++ b/test/test/pass_format_assertion.sh @@ -37,6 +37,11 @@ cat << 'EOF' > "$TMP/test.json" "description": "A valid instance", "valid": true, "data": "foo@bar.com" + }, + { + "description": "An invalid instance", + "valid": false, + "data": "not-an-email" } ] } @@ -44,31 +49,10 @@ EOF "$1" test "$TMP/test.json" \ --resolve "$TMP/metaschema.json" --resolve "$TMP/schema.json" \ - 1> "$TMP/output.txt" 2>&1 \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" + > "$TMP/output.txt" 2>&1 cat << EOF > "$TMP/expected.txt" -$(realpath "$TMP")/test.json: -error: Cannot compile unsupported vocabulary - at file path $(realpath "$TMP")/test.json - at uri https://json-schema.org/draft/2020-12/vocab/format-assertion +$(realpath "$TMP")/test.json: PASS 2/2 EOF diff "$TMP/output.txt" "$TMP/expected.txt" - -"$1" test "$TMP/test.json" \ - --resolve "$TMP/metaschema.json" --resolve "$TMP/schema.json" \ - --json > "$TMP/stdout.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" - -cat << EOF > "$TMP/expected.txt" -{ - "error": "Cannot compile unsupported vocabulary", - "filePath": "$(realpath "$TMP")/test.json", - "uri": "https://json-schema.org/draft/2020-12/vocab/format-assertion" -} -EOF - -diff "$TMP/stdout.txt" "$TMP/expected.txt" diff --git a/test/validate/fail_2020_12_format_assertion.sh b/test/validate/pass_2020_12_format_assertion.sh similarity index 55% rename from test/validate/fail_2020_12_format_assertion.sh rename to test/validate/pass_2020_12_format_assertion.sh index 75d8f2e6a..19ad05068 100755 --- a/test/validate/fail_2020_12_format_assertion.sh +++ b/test/validate/pass_2020_12_format_assertion.sh @@ -34,33 +34,18 @@ cat << 'EOF' > "$TMP/schema.json" EOF cat << 'EOF' > "$TMP/instance.json" -{ "email": "not-an-email" } +{ "email": "foo@bar.com" } EOF "$1" validate "$TMP/schema.json" "$TMP/instance.json" \ - --resolve "$TMP/metaschema.json" 2> "$TMP/stderr.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" + --resolve "$TMP/metaschema.json" --verbose 2> "$TMP/stderr.txt" cat << EOF > "$TMP/expected.txt" -error: Cannot compile unsupported vocabulary - at file path $(realpath "$TMP")/schema.json - at uri https://json-schema.org/draft/2020-12/vocab/format-assertion +ok: $(realpath "$TMP")/instance.json + matches $(realpath "$TMP")/schema.json +annotation: "email" + at instance location "" (line 1, column 1) + at evaluate path "/properties" EOF diff "$TMP/stderr.txt" "$TMP/expected.txt" - -"$1" validate "$TMP/schema.json" "$TMP/instance.json" \ - --resolve "$TMP/metaschema.json" --json > "$TMP/stdout.txt" \ - && EXIT_CODE="$?" || EXIT_CODE="$?" -test "$EXIT_CODE" = "4" - -cat << EOF > "$TMP/expected.txt" -{ - "error": "Cannot compile unsupported vocabulary", - "filePath": "$(realpath "$TMP")/schema.json", - "uri": "https://json-schema.org/draft/2020-12/vocab/format-assertion" -} -EOF - -diff "$TMP/stdout.txt" "$TMP/expected.txt" diff --git a/test/validate/pass_2020_12_format_assertion_optional.sh b/test/validate/pass_2020_12_format_assertion_optional.sh index f71c28c2e..f1bea8eb0 100755 --- a/test/validate/pass_2020_12_format_assertion_optional.sh +++ b/test/validate/pass_2020_12_format_assertion_optional.sh @@ -34,7 +34,7 @@ cat << 'EOF' > "$TMP/schema.json" EOF cat << 'EOF' > "$TMP/instance.json" -{ "email": "not-an-email" } +{ "email": "foo@bar.com" } EOF "$1" validate "$TMP/schema.json" "$TMP/instance.json" \ @@ -46,9 +46,6 @@ ok: $(realpath "$TMP")/instance.json annotation: "email" at instance location "" (line 1, column 1) at evaluate path "/properties" -annotation: "email" - at instance location "/email" (line 1, column 3) - at evaluate path "/properties/email/format" EOF diff "$TMP/stderr.txt" "$TMP/expected.txt"