Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions src/router/artifact.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include <exception> // std::exception
#include <filesystem> // std::filesystem
#include <optional> // std::optional
#include <sstream> // std::ostringstream
#include <string> // std::string
#include <string_view> // std::string_view
#include <utility> // std::move
Expand Down Expand Up @@ -164,15 +163,26 @@ auto RouterAction::artifact_serve(
return;
}

// Our checksum is computed over the identity (uncompressed) payload at
// index time. When the wire response is gzip-encoded the wire bytes are
// not what the checksum covers, so per RFC 9110 §8.8.1 the validator must
// be marked weak:
//
// "if the origin server sends the same validator for a representation
// with a gzip content coding applied as it does for a representation
// with no content coding, then that validator is weak."
//
// https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.1
//
// When the wire response is identity, the wire bytes exactly match what
// the checksum covers, so the validator can be strong.
const auto &checksum{info->checksum_hex};
std::ostringstream etag_value_strong;
std::ostringstream etag_value_weak;
etag_value_strong << '"' << checksum << '"';
etag_value_weak << 'W' << '/' << '"' << checksum << '"';
const std::string etag_strong{std::string{"\""} + checksum + "\""};
const std::string etag_weak{std::string{"W/\""} + checksum + "\""};
for (const auto &match : request.header_list("if-none-match")) {
// Cache hit
if (match.first == "*" || match.first == etag_value_weak.str() ||
match.first == etag_value_strong.str()) {
if (match.first == "*" || match.first == etag_weak ||
match.first == etag_strong) {
response.write_status(sourcemeta::one::STATUS_NOT_MODIFIED);
if (enable_cors) {
response.write_header("Access-Control-Allow-Origin", "*");
Expand Down Expand Up @@ -208,9 +218,10 @@ auto RouterAction::artifact_serve(
response.write_header("Last-Modified",
sourcemeta::core::to_gmt(info->last_modified));

std::ostringstream etag;
etag << '"' << checksum << '"';
response.write_header("ETag", std::move(etag).str());
response.write_header("ETag", request.response_encoding() ==
Comment thread
jviotti marked this conversation as resolved.
Comment thread
jviotti marked this conversation as resolved.
sourcemeta::one::Encoding::GZIP
? etag_weak
: etag_strong);

// See
// https://json-schema.org/draft/2020-12/json-schema-core.html#section-9.5.1.1
Expand Down
33 changes: 33 additions & 0 deletions test/e2e/html/hurl/etag.all.hurl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,39 @@ header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
# Identity response: strong ETag form `"<hex>"` per RFC 9110 §8.8.1
header "ETag" matches /^"[0-9a-f]+"$/

# Gzip-encoded response: weak ETag form `W/"<hex>"` per RFC 9110 §8.8.1
# https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.1
GET {{base}}/test/schemas/string.json
Accept-Encoding: gzip
HTTP 200
Content-Type: application/schema+json
Content-Encoding: gzip
Access-Control-Allow-Origin: *
[Captures]
test_schemas_string_json_etag_weak: header "ETag"
[Asserts]
header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
header "ETag" matches /^W\/"[0-9a-f]+"$/

# Weak-form ETag from a gzip request still matches the strong-form
# If-None-Match (and vice-versa). The comparison strips the W/ prefix.
Comment thread
jviotti marked this conversation as resolved.
Outdated
GET {{base}}/test/schemas/string.json
If-None-Match: {{test_schemas_string_json_etag_weak}}
HTTP 304
Access-Control-Allow-Origin: *
[Asserts]
header "Referrer-Policy" not exists
header "Content-Security-Policy" not exists
header "X-Frame-Options" not exists
header "Date" matches /^(Mon|Tue|Wed|Thu|Fri|Sat|Sun), (0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]{4} ([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9] GMT$/
header "Content-Type" not exists
bytes count == 0

GET {{base}}/test/schemas/string.json
HTTP 200
Expand Down
Loading