Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/http/include/sourcemeta/one/http_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ class HTTPRequest {
return this->request_->getHeader(name);
}

[[nodiscard]] auto header_exists(const std::string_view name) const noexcept
-> bool {
for (const auto [key, value] : *this->request_) {
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
if (key == name) {
Comment thread
jviotti marked this conversation as resolved.
return true;
}
}
return false;
}

[[nodiscard]] auto query(const std::string_view name) const
-> std::string_view {
return this->request_->getQuery(name);
Expand Down
62 changes: 39 additions & 23 deletions src/router/artifact.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,25 +144,6 @@ auto RouterAction::artifact_serve(
return;
}

// Note that `If-Modified-Since` can only be used with a `GET` or `HEAD`.
// See
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
const auto if_modified_since{request.header_gmt("if-modified-since")};
// Time comparison can be flaky, but adding a bit of tolerance leads
// to more consistent behavior.
if (if_modified_since.has_value() &&
(if_modified_since.value() + std::chrono::seconds(1)) >=
info->last_modified) {
response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED);
if (enable_cors) {
response.write_header("Access-Control-Allow-Origin", "*");
}

sourcemeta::one::send_response(sourcemeta::one::STATUS_NOT_MODIFIED,
request, response);
return;
}

// Our checksum is computed over the identity (uncompressed) payload at
// index time. When the wire response is gzip-encoded the wire bytes are
// not what the checksum covers, so per RFC 9110 §8.8.1 the validator must
Expand All @@ -179,10 +160,45 @@ auto RouterAction::artifact_serve(
const auto &checksum{info->checksum_hex};
const std::string etag_strong{std::string{"\""} + checksum + "\""};
const std::string etag_weak{std::string{"W/\""} + checksum + "\""};
for (const auto &match : request.header_list("if-none-match")) {
// Cache hit
if (match.first == "*" || match.first == etag_weak ||
match.first == etag_strong) {

// RFC 9110 §13.2.2 (Precedence of Preconditions): If-None-Match is
// evaluated before If-Modified-Since. RFC 9110 §13.1.3: "A recipient
// MUST ignore If-Modified-Since if the request contains an If-None-Match
// header field; the condition in If-None-Match is considered to be a
// more accurate replacement for the condition in If-Modified-Since, and
// the two are only combined for the sake of interoperating with older
// intermediaries that might not implement If-None-Match." Header
// *presence* (not match outcome, and not non-empty value) is what
// suppresses If-Modified-Since: even a malformed `If-None-Match:` with
// an empty value still triggers the MUST per spec letter.
// https://datatracker.ietf.org/doc/html/rfc9110#section-13.2.2
// https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.3
if (request.header_exists("if-none-match")) {
for (const auto &match : request.header_list("if-none-match")) {
// Cache hit
if (match.first == "*" || match.first == etag_weak ||
match.first == etag_strong) {
response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED);
if (enable_cors) {
response.write_header("Access-Control-Allow-Origin", "*");
}

sourcemeta::one::send_response(sourcemeta::one::STATUS_NOT_MODIFIED,
request, response);
return;
}
}
} else {
// Per RFC 9110 §13.1.3 If-Modified-Since is only evaluated when
// If-None-Match is absent. It can only be used with GET or HEAD; this
// function is reached only on those methods (checked at the entry).
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
const auto if_modified_since{request.header_gmt("if-modified-since")};
// Time comparison can be flaky, but adding a bit of tolerance leads
// to more consistent behavior.
if (if_modified_since.has_value() &&
(if_modified_since.value() + std::chrono::seconds(1)) >=
info->last_modified) {
response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED);
if (enable_cors) {
response.write_header("Access-Control-Allow-Origin", "*");
Expand Down
56 changes: 56 additions & 0 deletions test/e2e/html/hurl/etag.all.hurl
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,59 @@ header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
bytes count > 0

# RFC 9110 §13.1.3 / §13.2.2: If-None-Match takes precedence over
# If-Modified-Since. When both headers are present the recipient MUST
# ignore If-Modified-Since and evaluate If-None-Match alone, regardless
# of the match outcome.
# https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.3
# https://datatracker.ietf.org/doc/html/rfc9110#section-13.2.2

# (1) IMS in the past (would normally return 200) + matching INM:
# INM wins, response is 304 by ETag match.
GET {{base}}/test/schemas/string.json
If-Modified-Since: Thu, 01 Jan 1970 00:00:00 GMT
If-None-Match: {{test_schemas_string_json_etag}}
HTTP 304
Access-Control-Allow-Origin: *
[Asserts]
header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
header "Content-Type" not exists
bytes count == 0

# (2) IMS in the future (would normally return 304) + non-matching INM:
# INM wins, response is 200 because INM did not match. The IMS that
# would have produced 304 is ignored per the MUST in §13.1.3. This is
# the key regression the precondition-order fix prevents: previously
# the server short-circuited on IMS before INM was even consulted.
GET {{base}}/test/schemas/string.json
If-Modified-Since: Thu, 01 Jan 2100 00:00:00 GMT
If-None-Match: "definitely-not-a-real-checksum"
HTTP 200
Content-Type: application/schema+json
Access-Control-Allow-Origin: *
[Asserts]
header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
header "ETag" matches /^"[0-9a-f]+"$/
header "Vary" == "Accept-Encoding"
bytes count > 0

# (3) Sanity baseline: IMS in the future + no INM evaluates IMS
# normally and returns 304.
GET {{base}}/test/schemas/string.json
If-Modified-Since: Thu, 01 Jan 2100 00:00:00 GMT
HTTP 304
Access-Control-Allow-Origin: *
[Asserts]
header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
header "Content-Type" not exists
bytes count == 0
Loading