diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 6ee1a01b..262025fd 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -64,10 +64,13 @@ func Open(options *Options) (Archive, error) { type fetchFlags uint const ( - fetchBulk fetchFlags = 1 << iota + fetchBulk fetchFlags = 1 << iota + fetchGzip fetchDefault fetchFlags = 0 ) +var errNotFound = fmt.Errorf("cannot find archive data") + var httpClient = &http.Client{ Timeout: 30 * time.Second, } @@ -326,17 +329,40 @@ func (index *ubuntuIndex) fetchRelease() error { } func (index *ubuntuIndex) fetchIndex() error { - digests := index.release.Get("SHA256") + releaseDigests := index.release.Get("SHA256") packagesPath := fmt.Sprintf("%s/binary-%s/Packages", index.component, index.arch) - digest, _, _ := control.ParsePathInfo(digests, packagesPath) - if digest == "" { + packagesDigest, _, _ := control.ParsePathInfo(releaseDigests, packagesPath) + if packagesDigest == "" { return fmt.Errorf("%s is missing from %s %s component digests", packagesPath, index.suite, index.component) } logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component) - reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk) - if err != nil { - return err + + // Prefer acquire-by-hash when the archive advertises it. By-hash URLs + // are content-addressed and so are immune to the inconsistent view of + // InRelease vs Packages.gz that mirrors and CDNs can serve while a + // publication is propagating. See https://wiki.ubuntu.com/AptByHash. + packagesGzPath := packagesPath + ".gz" + var reader io.ReadSeekCloser + if index.release.Get("Acquire-By-Hash") == "yes" { + packagesGzDigest, _, _ := control.ParsePathInfo(releaseDigests, packagesGzPath) + if packagesGzDigest != "" { + packagesByHashPath := fmt.Sprintf("%s/binary-%s/by-hash/SHA256/%s", index.component, index.arch, packagesGzDigest) + r, err := index.fetch(index.distPath(packagesByHashPath), packagesDigest, fetchBulk|fetchGzip) + if err != nil && err != errNotFound { + return err + } + // On 404 fall through to the named-path fetch below: the hash + // may have been garbage-collected on the mirror. + reader = r + } + } + if reader == nil { + r, err := index.fetch(index.distPath(packagesGzPath), packagesDigest, fetchBulk|fetchGzip) + if err != nil { + return err + } + reader = r } ctrl, err := control.ParseReader("Package", reader) if err != nil { @@ -410,13 +436,13 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS case 401: return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label) case 404: - return nil, fmt.Errorf("cannot find archive data") + return nil, errNotFound default: return nil, fmt.Errorf("error from archive: %v", resp.Status) } body := resp.Body - if strings.HasSuffix(path, ".gz") { + if flags&fetchGzip != 0 { reader, err := gzip.NewReader(body) if err != nil { return nil, fmt.Errorf("cannot decompress data: %v", err) diff --git a/internal/archive/archive_test.go b/internal/archive/archive_test.go index 8ca840e5..591defb6 100644 --- a/internal/archive/archive_test.go +++ b/internal/archive/archive_test.go @@ -22,19 +22,25 @@ import ( "github.com/canonical/chisel/internal/testutil" ) +type requestResult struct { + path string + status int +} + type httpSuite struct { - logf func(string, ...any) - base string - request *http.Request - requests []*http.Request - response string - responses map[string][]byte - err error - header http.Header - status int - restore func() - privKey *packet.PrivateKey - pubKey *packet.PublicKey + logf func(string, ...any) + base string + request *http.Request + requests []*http.Request + requestResults []requestResult + response string + responses map[string][]byte + err error + header http.Header + status int + restore func() + privKey *packet.PrivateKey + pubKey *packet.PublicKey } var _ = Suite(&httpSuite{}) @@ -54,6 +60,7 @@ func (s *httpSuite) SetUpTest(c *C) { s.base = "http://archive.ubuntu.com/ubuntu/" s.request = nil s.requests = nil + s.requestResults = nil s.response = "" s.responses = make(map[string][]byte) s.header = nil @@ -83,14 +90,19 @@ func (s *httpSuite) Do(req *http.Request) (*http.Response, error) { s.request = req s.requests = append(s.requests, req) body := s.response + status := s.status s.logf("Request: %s", req.URL.String()) if response, ok := s.responses[path.Clean(req.URL.Path)]; ok { body = string(response) + } else if len(s.responses) > 0 && s.status == 200 { + // Unknown path with responses populated: behave like a real archive. + status = 404 } + s.requestResults = append(s.requestResults, requestResult{path: req.URL.Path, status: status}) rsp := &http.Response{ Body: io.NopCloser(strings.NewReader(body)), Header: s.header, - StatusCode: s.status, + StatusCode: status, } return rsp, s.err } @@ -625,6 +637,121 @@ func read(r io.Reader) string { return string(data) } +// fetchRequestStatus checks whether a request was made whose URL path +// contains the given substring. If so, it returns true and the HTTP status +// code from the most recent matching request. If not, it returns false, 0. +func (s *httpSuite) fetchRequestStatus(pathSubstring string) (bool, int) { + for i := len(s.requestResults) - 1; i >= 0; i-- { + if strings.Contains(s.requestResults[i].path, pathSubstring) { + return true, s.requestResults[i].status + } + } + return false, 0 +} + +func (s *httpSuite) TestFetchByHashSucceedsWhenNamedPathIsStale(c *C) { + s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { + r.ByHash = true + }) + + // Simulate a mirror serving stale content at the named Packages.gz path + // while the by-hash path still has the correct data. + for p := range s.responses { + if strings.Contains(p, "Packages.gz") && !strings.Contains(p, "/by-hash/") { + s.responses[p] = testarchive.MakeGzip([]byte("stale Packages from previous publication")) + } + } + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + testArchive, err := archive.Open(&options) + c.Assert(err, IsNil) + + pkg, _, err := testArchive.Fetch("mypkg1") + c.Assert(err, IsNil) + c.Assert(read(pkg), Equals, "mypkg1 1.1 data") + + // The by-hash request must have been attempted and succeeded with 200, + // since the named path has stale content and only by-hash has the + // correct data. + attempted, status := s.fetchRequestStatus("/by-hash/SHA256/") + c.Assert(attempted, Equals, true) + c.Assert(status, Equals, 200) +} + +func (s *httpSuite) TestFetchByHashFallsBackOnNotFound(c *C) { + s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { + r.ByHash = true + }) + + // Simulate a mirror that garbage-collected the by-hash entries. + for p := range s.responses { + if strings.Contains(p, "/by-hash/") { + delete(s.responses, p) + } + } + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + testArchive, err := archive.Open(&options) + c.Assert(err, IsNil) + + pkg, _, err := testArchive.Fetch("mypkg1") + c.Assert(err, IsNil) + c.Assert(read(pkg), Equals, "mypkg1 1.1 data") + + // The by-hash request must have been attempted but got 404 (the hash + // was garbage-collected), so we fell back to the named path which + // returned 200 with the correct data. + attempted, status := s.fetchRequestStatus("/by-hash/SHA256/") + c.Assert(attempted, Equals, true) + c.Assert(status, Equals, 404) + attempted, status = s.fetchRequestStatus("Packages.gz") + c.Assert(attempted, Equals, true) + c.Assert(status, Equals, 200) +} + +func (s *httpSuite) TestFetchSkipsByHashWhenNotAdvertised(c *C) { + s.prepareArchive("jammy", "22.04", "amd64", []string{"main"}) + + options := archive.Options{ + Label: "ubuntu", + Version: "22.04", + Arch: "amd64", + Suites: []string{"jammy"}, + Components: []string{"main"}, + CacheDir: c.MkDir(), + PubKeys: []*packet.PublicKey{s.pubKey}, + } + + _, err := archive.Open(&options) + c.Assert(err, IsNil) + + // When by-hash is not advertised, no by-hash request should be + // attempted; only the named Packages.gz path is fetched. + attempted, _ := s.fetchRequestStatus("/by-hash/SHA256/") + c.Assert(attempted, Equals, false) + attempted, status := s.fetchRequestStatus("Packages.gz") + c.Assert(attempted, Equals, true) + c.Assert(status, Equals, 200) +} + // ---------------------------------------------------------------------------------------- // Real archive tests, only enabled via: // 1. --real-archive for non-Pro archives (e.g. standard jammy archive), diff --git a/internal/archive/testarchive/testarchive.go b/internal/archive/testarchive/testarchive.go index ea5b8e08..593b443a 100644 --- a/internal/archive/testarchive/testarchive.go +++ b/internal/archive/testarchive/testarchive.go @@ -54,7 +54,7 @@ func (gz *Gzip) Section() []byte { } func (gz *Gzip) Content() []byte { - return makeGzip(gz.Item.Content()) + return MakeGzip(gz.Item.Content()) } type Package struct { @@ -107,6 +107,9 @@ type Release struct { Label string Items []Item PrivKey *packet.PrivateKey + // ByHash enables the Acquire-By-Hash flag in the Release file + // and renders by-hash URLs alongside named paths. + ByHash bool } func (r *Release) Walk(f func(Item) error) error { @@ -127,6 +130,10 @@ func (r *Release) Content() []byte { content := item.Content() fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path()) } + acquireByHash := "" + if r.ByHash { + acquireByHash = "Acquire-By-Hash: yes\n" + } content := fmt.Sprintf(string(testutil.Reindent(` Origin: Ubuntu Label: %s @@ -137,9 +144,9 @@ func (r *Release) Content() []byte { Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x Components: main restricted universe multiverse Description: Ubuntu %s - SHA256: + %sSHA256: %s - `)), r.Label, r.Suite, r.Version, r.Version, digests.String()) + `)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String()) var buf bytes.Buffer writer, err := clearsign.Encode(&buf, r.PrivKey, nil) @@ -160,12 +167,17 @@ func (r *Release) Content() []byte { func (r *Release) Render(prefix string, content map[string][]byte) error { return r.Walk(func(item Item) error { itemPath := item.Path() + itemContent := item.Content() if strings.HasPrefix(itemPath, "pool/") { - itemPath = path.Join(prefix, itemPath) - } else { - itemPath = path.Join(prefix, "dists", r.Suite, itemPath) + content[path.Join(prefix, itemPath)] = itemContent + return nil + } + distItemPath := path.Join(prefix, "dists", r.Suite, itemPath) + content[distItemPath] = itemContent + if r.ByHash && itemPath != r.Path() { + byHashPath := path.Join(prefix, "dists", r.Suite, path.Dir(itemPath), "by-hash", "SHA256", makeSha256(itemContent)) + content[byHashPath] = itemContent } - content[itemPath] = item.Content() return nil }) } @@ -204,7 +216,7 @@ func makeSha256(b []byte) string { return fmt.Sprintf("%x", sha256.Sum256(b)) } -func makeGzip(b []byte) []byte { +func MakeGzip(b []byte) []byte { var buf bytes.Buffer gz := gzip.NewWriter(&buf) _, err := gz.Write(b)