diff --git a/src/http/include/sourcemeta/one/http_request.h b/src/http/include/sourcemeta/one/http_request.h index 4946b21f..93dd3aac 100644 --- a/src/http/include/sourcemeta/one/http_request.h +++ b/src/http/include/sourcemeta/one/http_request.h @@ -118,6 +118,22 @@ class HTTPRequest { return this->request_->getHeader(name); } + // uWebSockets stores header field-names lowercased, so `name` must be + // lowercase too + [[nodiscard]] auto header_exists(const std::string_view name) const noexcept + -> bool { + if (this->request_ == nullptr) { + return false; + } + + for (const auto [key, value] : *this->request_) { + if (key == name) { + return true; + } + } + return false; + } + [[nodiscard]] auto query(const std::string_view name) const -> std::string_view { return this->request_->getQuery(name); diff --git a/src/router/artifact.cc b/src/router/artifact.cc index 25cc9802..7197f6f3 100644 --- a/src/router/artifact.cc +++ b/src/router/artifact.cc @@ -144,25 +144,6 @@ auto RouterAction::artifact_serve( return; } - // Note that `If-Modified-Since` can only be used with a `GET` or `HEAD`. - // See - // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since - const auto if_modified_since{request.header_gmt("if-modified-since")}; - // Time comparison can be flaky, but adding a bit of tolerance leads - // to more consistent behavior. - if (if_modified_since.has_value() && - (if_modified_since.value() + std::chrono::seconds(1)) >= - info->last_modified) { - response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED); - if (enable_cors) { - response.write_header("Access-Control-Allow-Origin", "*"); - } - - sourcemeta::one::send_response(sourcemeta::one::STATUS_NOT_MODIFIED, - request, response); - return; - } - // Our checksum is computed over the identity (uncompressed) payload at // index time. When the wire response is gzip-encoded the wire bytes are // not what the checksum covers, so per RFC 9110 §8.8.1 the validator must @@ -179,10 +160,45 @@ auto RouterAction::artifact_serve( const auto &checksum{info->checksum_hex}; const std::string etag_strong{std::string{"\""} + checksum + "\""}; const std::string etag_weak{std::string{"W/\""} + checksum + "\""}; - for (const auto &match : request.header_list("if-none-match")) { - // Cache hit - if (match.first == "*" || match.first == etag_weak || - match.first == etag_strong) { + + // RFC 9110 §13.2.2 (Precedence of Preconditions): If-None-Match is + // evaluated before If-Modified-Since. RFC 9110 §13.1.3: "A recipient + // MUST ignore If-Modified-Since if the request contains an If-None-Match + // header field; the condition in If-None-Match is considered to be a + // more accurate replacement for the condition in If-Modified-Since, and + // the two are only combined for the sake of interoperating with older + // intermediaries that might not implement If-None-Match." Header + // *presence* (not match outcome, and not non-empty value) is what + // suppresses If-Modified-Since: even a malformed `If-None-Match:` with + // an empty value still triggers the MUST per spec letter. + // https://datatracker.ietf.org/doc/html/rfc9110#section-13.2.2 + // https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.3 + if (request.header_exists("if-none-match")) { + for (const auto &match : request.header_list("if-none-match")) { + // Cache hit + if (match.first == "*" || match.first == etag_weak || + match.first == etag_strong) { + response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED); + if (enable_cors) { + response.write_header("Access-Control-Allow-Origin", "*"); + } + + sourcemeta::one::send_response(sourcemeta::one::STATUS_NOT_MODIFIED, + request, response); + return; + } + } + } else { + // Per RFC 9110 §13.1.3 If-Modified-Since is only evaluated when + // If-None-Match is absent. It can only be used with GET or HEAD; this + // function is reached only on those methods (checked at the entry). + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since + const auto if_modified_since{request.header_gmt("if-modified-since")}; + // Time comparison can be flaky, but adding a bit of tolerance leads + // to more consistent behavior. + if (if_modified_since.has_value() && + (if_modified_since.value() + std::chrono::seconds(1)) >= + info->last_modified) { response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED); if (enable_cors) { response.write_header("Access-Control-Allow-Origin", "*"); diff --git a/test/e2e/html/hurl/etag.all.hurl b/test/e2e/html/hurl/etag.all.hurl index 60e27312..aae39fdb 100644 --- a/test/e2e/html/hurl/etag.all.hurl +++ b/test/e2e/html/hurl/etag.all.hurl @@ -109,3 +109,59 @@ header "Content-Security-Policy" not exists header "X-Frame-Options" not exists header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/ bytes count > 0 + +# RFC 9110 §13.1.3 / §13.2.2: If-None-Match takes precedence over +# If-Modified-Since. When both headers are present the recipient MUST +# ignore If-Modified-Since and evaluate If-None-Match alone, regardless +# of the match outcome. +# https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.3 +# https://datatracker.ietf.org/doc/html/rfc9110#section-13.2.2 + +# (1) IMS in the past (would normally return 200) + matching INM: +# INM wins, response is 304 by ETag match. +GET {{base}}/test/schemas/string.json +If-Modified-Since: Thu, 01 Jan 1970 00:00:00 GMT +If-None-Match: {{test_schemas_string_json_etag}} +HTTP 304 +Access-Control-Allow-Origin: * +[Asserts] +header "Referrer-Policy" not exists +header "Content-Security-Policy" not exists +header "X-Frame-Options" not exists +header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/ +header "Content-Type" not exists +bytes count == 0 + +# (2) IMS in the future (would normally return 304) + non-matching INM: +# INM wins, response is 200 because INM did not match. The IMS that +# would have produced 304 is ignored per the MUST in §13.1.3. This is +# the key regression the precondition-order fix prevents: previously +# the server short-circuited on IMS before INM was even consulted. +GET {{base}}/test/schemas/string.json +If-Modified-Since: Thu, 01 Jan 2100 00:00:00 GMT +If-None-Match: "definitely-not-a-real-checksum" +HTTP 200 +Content-Type: application/schema+json +Access-Control-Allow-Origin: * +[Asserts] +header "Referrer-Policy" not exists +header "Content-Security-Policy" not exists +header "X-Frame-Options" not exists +header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/ +header "ETag" matches /^"[0-9a-f]+"$/ +header "Vary" == "Accept-Encoding" +bytes count > 0 + +# (3) Sanity baseline: IMS in the future + no INM evaluates IMS +# normally and returns 304. +GET {{base}}/test/schemas/string.json +If-Modified-Since: Thu, 01 Jan 2100 00:00:00 GMT +HTTP 304 +Access-Control-Allow-Origin: * +[Asserts] +header "Referrer-Policy" not exists +header "Content-Security-Policy" not exists +header "X-Frame-Options" not exists +header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/ +header "Content-Type" not exists +bytes count == 0