Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 35 additions & 9 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,13 @@ func Open(options *Options) (Archive, error) {
type fetchFlags uint

const (
fetchBulk fetchFlags = 1 << iota
fetchBulk fetchFlags = 1 << iota
fetchGzip
fetchDefault fetchFlags = 0
)

var errNotFound = fmt.Errorf("cannot find archive data")

var httpClient = &http.Client{
Timeout: 30 * time.Second,
}
Expand Down Expand Up @@ -326,17 +329,40 @@ func (index *ubuntuIndex) fetchRelease() error {
}

func (index *ubuntuIndex) fetchIndex() error {
digests := index.release.Get("SHA256")
releaseDigests := index.release.Get("SHA256")

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Note to reviewer]: I did some renaming to hopefully clarify what these variables are.

packagesPath := fmt.Sprintf("%s/binary-%s/Packages", index.component, index.arch)
digest, _, _ := control.ParsePathInfo(digests, packagesPath)
if digest == "" {
packagesDigest, _, _ := control.ParsePathInfo(releaseDigests, packagesPath)
if packagesDigest == "" {
return fmt.Errorf("%s is missing from %s %s component digests", packagesPath, index.suite, index.component)
}

logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component)
reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk)
if err != nil {
return err

// Prefer acquire-by-hash when the archive advertises it. By-hash URLs
// are content-addressed and so are immune to the inconsistent view of
// InRelease vs Packages.gz that mirrors and CDNs can serve while a
// publication is propagating. See https://wiki.ubuntu.com/AptByHash.
packagesGzPath := packagesPath + ".gz"
var reader io.ReadSeekCloser
if index.release.Get("Acquire-By-Hash") == "yes" {
packagesGzDigest, _, _ := control.ParsePathInfo(releaseDigests, packagesGzPath)
if packagesGzDigest != "" {
packagesByHashPath := fmt.Sprintf("%s/binary-%s/by-hash/SHA256/%s", index.component, index.arch, packagesGzDigest)
r, err := index.fetch(index.distPath(packagesByHashPath), packagesDigest, fetchBulk|fetchGzip)
if err != nil && err != errNotFound {
return err
}
// On 404 fall through to the named-path fetch below: the hash
// may have been garbage-collected on the mirror.
reader = r
}
}
if reader == nil {
r, err := index.fetch(index.distPath(packagesGzPath), packagesDigest, fetchBulk|fetchGzip)
if err != nil {
return err
}
reader = r
}
ctrl, err := control.ParseReader("Package", reader)
if err != nil {
Expand Down Expand Up @@ -410,13 +436,13 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS
case 401:
return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label)
case 404:
return nil, fmt.Errorf("cannot find archive data")
return nil, errNotFound
default:
return nil, fmt.Errorf("error from archive: %v", resp.Status)
}

body := resp.Body
if strings.HasSuffix(path, ".gz") {
if flags&fetchGzip != 0 {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a behavioral change. Has anyone double checked that we don't have any other cases that were handled before and are not handled now?

@upils upils Jun 16, 2026

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I checked. We only had one call site with a path ending with .gz, in fetchIndex, which is reworked in this PR to use the fetchGzip flag.

reader, err := gzip.NewReader(body)
if err != nil {
return nil, fmt.Errorf("cannot decompress data: %v", err)
Expand Down
153 changes: 140 additions & 13 deletions internal/archive/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,25 @@ import (
"github.com/canonical/chisel/internal/testutil"
)

type requestResult struct {
path string
status int
}

type httpSuite struct {
logf func(string, ...any)
base string
request *http.Request
requests []*http.Request
response string
responses map[string][]byte
err error
header http.Header
status int
restore func()
privKey *packet.PrivateKey
pubKey *packet.PublicKey
logf func(string, ...any)
base string
request *http.Request
requests []*http.Request
requestResults []requestResult

@upils upils Jun 17, 2026

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Note to reviewer]: This line is the only new one in this struct. It enables recording submitted requests and associated status. The additional tests were then reworked to properly check when the "by-hash" URL was used and what was the result.

I expect this additional capability will be useful in the future to test other fallback mechanisms.

response string
responses map[string][]byte
err error
header http.Header
status int
restore func()
privKey *packet.PrivateKey
pubKey *packet.PublicKey
}

var _ = Suite(&httpSuite{})
Expand All @@ -54,6 +60,7 @@ func (s *httpSuite) SetUpTest(c *C) {
s.base = "http://archive.ubuntu.com/ubuntu/"
s.request = nil
s.requests = nil
s.requestResults = nil
s.response = ""
s.responses = make(map[string][]byte)
s.header = nil
Expand Down Expand Up @@ -83,14 +90,19 @@ func (s *httpSuite) Do(req *http.Request) (*http.Response, error) {
s.request = req
s.requests = append(s.requests, req)
body := s.response
status := s.status
s.logf("Request: %s", req.URL.String())
if response, ok := s.responses[path.Clean(req.URL.Path)]; ok {
body = string(response)
} else if len(s.responses) > 0 && s.status == 200 {
// Unknown path with responses populated: behave like a real archive.
status = 404
}
s.requestResults = append(s.requestResults, requestResult{path: req.URL.Path, status: status})
rsp := &http.Response{
Body: io.NopCloser(strings.NewReader(body)),
Header: s.header,
StatusCode: s.status,
StatusCode: status,
}
return rsp, s.err
}
Expand Down Expand Up @@ -625,6 +637,121 @@ func read(r io.Reader) string {
return string(data)
}

// fetchRequestStatus checks whether a request was made whose URL path
// contains the given substring. If so, it returns true and the HTTP status
// code from the most recent matching request. If not, it returns false, 0.
func (s *httpSuite) fetchRequestStatus(pathSubstring string) (bool, int) {
for i := len(s.requestResults) - 1; i >= 0; i-- {
if strings.Contains(s.requestResults[i].path, pathSubstring) {
return true, s.requestResults[i].status
}
}
return false, 0
}

func (s *httpSuite) TestFetchByHashSucceedsWhenNamedPathIsStale(c *C) {
s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) {
r.ByHash = true
})

// Simulate a mirror serving stale content at the named Packages.gz path
// while the by-hash path still has the correct data.
for p := range s.responses {
if strings.Contains(p, "Packages.gz") && !strings.Contains(p, "/by-hash/") {
s.responses[p] = testarchive.MakeGzip([]byte("stale Packages from previous publication"))
}
}

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

testArchive, err := archive.Open(&options)
c.Assert(err, IsNil)

pkg, _, err := testArchive.Fetch("mypkg1")
c.Assert(err, IsNil)
c.Assert(read(pkg), Equals, "mypkg1 1.1 data")

// The by-hash request must have been attempted and succeeded with 200,
// since the named path has stale content and only by-hash has the
// correct data.
attempted, status := s.fetchRequestStatus("/by-hash/SHA256/")
c.Assert(attempted, Equals, true)
c.Assert(status, Equals, 200)
}

func (s *httpSuite) TestFetchByHashFallsBackOnNotFound(c *C) {
s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) {
r.ByHash = true
})

// Simulate a mirror that garbage-collected the by-hash entries.
for p := range s.responses {
if strings.Contains(p, "/by-hash/") {
delete(s.responses, p)
}
}

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

testArchive, err := archive.Open(&options)
c.Assert(err, IsNil)

pkg, _, err := testArchive.Fetch("mypkg1")
c.Assert(err, IsNil)
c.Assert(read(pkg), Equals, "mypkg1 1.1 data")

// The by-hash request must have been attempted but got 404 (the hash
// was garbage-collected), so we fell back to the named path which
// returned 200 with the correct data.
attempted, status := s.fetchRequestStatus("/by-hash/SHA256/")
c.Assert(attempted, Equals, true)
c.Assert(status, Equals, 404)
attempted, status = s.fetchRequestStatus("Packages.gz")
c.Assert(attempted, Equals, true)
c.Assert(status, Equals, 200)
}

func (s *httpSuite) TestFetchSkipsByHashWhenNotAdvertised(c *C) {
s.prepareArchive("jammy", "22.04", "amd64", []string{"main"})

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

_, err := archive.Open(&options)
c.Assert(err, IsNil)

// When by-hash is not advertised, no by-hash request should be
// attempted; only the named Packages.gz path is fetched.
attempted, _ := s.fetchRequestStatus("/by-hash/SHA256/")
c.Assert(attempted, Equals, false)
attempted, status := s.fetchRequestStatus("Packages.gz")
c.Assert(attempted, Equals, true)
c.Assert(status, Equals, 200)
}

// ----------------------------------------------------------------------------------------
// Real archive tests, only enabled via:
// 1. --real-archive for non-Pro archives (e.g. standard jammy archive),
Expand Down
28 changes: 20 additions & 8 deletions internal/archive/testarchive/testarchive.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (gz *Gzip) Section() []byte {
}

func (gz *Gzip) Content() []byte {
return makeGzip(gz.Item.Content())
return MakeGzip(gz.Item.Content())
}

type Package struct {
Expand Down Expand Up @@ -107,6 +107,9 @@ type Release struct {
Label string
Items []Item
PrivKey *packet.PrivateKey
// ByHash enables the Acquire-By-Hash flag in the Release file
// and renders by-hash URLs alongside named paths.
ByHash bool
}

func (r *Release) Walk(f func(Item) error) error {
Expand All @@ -127,6 +130,10 @@ func (r *Release) Content() []byte {
content := item.Content()
fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path())
}
acquireByHash := ""
if r.ByHash {
acquireByHash = "Acquire-By-Hash: yes\n"
}
content := fmt.Sprintf(string(testutil.Reindent(`
Origin: Ubuntu
Label: %s
Expand All @@ -137,9 +144,9 @@ func (r *Release) Content() []byte {
Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x
Components: main restricted universe multiverse
Description: Ubuntu %s
SHA256:
%sSHA256:
%s
`)), r.Label, r.Suite, r.Version, r.Version, digests.String())
`)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String())

var buf bytes.Buffer
writer, err := clearsign.Encode(&buf, r.PrivKey, nil)
Expand All @@ -160,12 +167,17 @@ func (r *Release) Content() []byte {
func (r *Release) Render(prefix string, content map[string][]byte) error {
return r.Walk(func(item Item) error {
itemPath := item.Path()
itemContent := item.Content()
if strings.HasPrefix(itemPath, "pool/") {
itemPath = path.Join(prefix, itemPath)
} else {
itemPath = path.Join(prefix, "dists", r.Suite, itemPath)
content[path.Join(prefix, itemPath)] = itemContent
return nil
}
distItemPath := path.Join(prefix, "dists", r.Suite, itemPath)
content[distItemPath] = itemContent
if r.ByHash && itemPath != r.Path() {
byHashPath := path.Join(prefix, "dists", r.Suite, path.Dir(itemPath), "by-hash", "SHA256", makeSha256(itemContent))
content[byHashPath] = itemContent

@niemeyer niemeyer Jun 15, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This used to be a very clean and simple function, and it'd be nice to keep it that way. From these options, having a ByHash bool (not "Acquire", that's on the fetch side) seems in line with the type semantics. The other options look like custom hacks that require reading both the test and the implementation to be made sense of, so we need to think a bit about how to represent these while keeping that clean and simple aspect.

/cc @upils

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is now simpler. The remaining additional complexity is due to adding the byHash path.

Tests are now responsible for tweaking the responses to simulate a bad mirror.

}
content[itemPath] = item.Content()
return nil
})
}
Expand Down Expand Up @@ -204,7 +216,7 @@ func makeSha256(b []byte) string {
return fmt.Sprintf("%x", sha256.Sum256(b))
}

func makeGzip(b []byte) []byte {
func MakeGzip(b []byte) []byte {
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, err := gz.Write(b)
Expand Down