-
Notifications
You must be signed in to change notification settings - Fork 61
fix: prevent intermittent digest-mismatch errors during archive updates #296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
31bcd80
4cb9d90
0c9b69b
4837940
5f1bc3b
aa82b97
f80cc1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,10 +64,13 @@ func Open(options *Options) (Archive, error) { | |
| type fetchFlags uint | ||
|
|
||
| const ( | ||
| fetchBulk fetchFlags = 1 << iota | ||
| fetchBulk fetchFlags = 1 << iota | ||
| fetchGzip | ||
| fetchDefault fetchFlags = 0 | ||
| ) | ||
|
|
||
| var errNotFound = fmt.Errorf("cannot find archive data") | ||
|
|
||
| var httpClient = &http.Client{ | ||
| Timeout: 30 * time.Second, | ||
| } | ||
|
|
@@ -326,17 +329,40 @@ func (index *ubuntuIndex) fetchRelease() error { | |
| } | ||
|
|
||
| func (index *ubuntuIndex) fetchIndex() error { | ||
| digests := index.release.Get("SHA256") | ||
| releaseDigests := index.release.Get("SHA256") | ||
| packagesPath := fmt.Sprintf("%s/binary-%s/Packages", index.component, index.arch) | ||
| digest, _, _ := control.ParsePathInfo(digests, packagesPath) | ||
| if digest == "" { | ||
| packagesDigest, _, _ := control.ParsePathInfo(releaseDigests, packagesPath) | ||
| if packagesDigest == "" { | ||
| return fmt.Errorf("%s is missing from %s %s component digests", packagesPath, index.suite, index.component) | ||
| } | ||
|
|
||
| logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component) | ||
| reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk) | ||
| if err != nil { | ||
| return err | ||
|
|
||
| // Prefer acquire-by-hash when the archive advertises it. By-hash URLs | ||
| // are content-addressed and so are immune to the inconsistent view of | ||
| // InRelease vs Packages.gz that mirrors and CDNs can serve while a | ||
| // publication is propagating. See https://wiki.ubuntu.com/AptByHash. | ||
| packagesGzPath := packagesPath + ".gz" | ||
| var reader io.ReadSeekCloser | ||
| if index.release.Get("Acquire-By-Hash") == "yes" { | ||
| packagesGzDigest, _, _ := control.ParsePathInfo(releaseDigests, packagesGzPath) | ||
| if packagesGzDigest != "" { | ||
| packagesByHashPath := fmt.Sprintf("%s/binary-%s/by-hash/SHA256/%s", index.component, index.arch, packagesGzDigest) | ||
| r, err := index.fetch(index.distPath(packagesByHashPath), packagesDigest, fetchBulk|fetchGzip) | ||
| if err != nil && err != errNotFound { | ||
| return err | ||
| } | ||
| // On 404 fall through to the named-path fetch below: the hash | ||
| // may have been garbage-collected on the mirror. | ||
| reader = r | ||
| } | ||
| } | ||
| if reader == nil { | ||
| r, err := index.fetch(index.distPath(packagesGzPath), packagesDigest, fetchBulk|fetchGzip) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| reader = r | ||
| } | ||
| ctrl, err := control.ParseReader("Package", reader) | ||
| if err != nil { | ||
|
|
@@ -410,13 +436,13 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS | |
| case 401: | ||
| return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label) | ||
| case 404: | ||
| return nil, fmt.Errorf("cannot find archive data") | ||
| return nil, errNotFound | ||
| default: | ||
| return nil, fmt.Errorf("error from archive: %v", resp.Status) | ||
| } | ||
|
|
||
| body := resp.Body | ||
| if strings.HasSuffix(path, ".gz") { | ||
| if flags&fetchGzip != 0 { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a behavioral change. Has anyone double checked that we don't have any other cases that were handled before and are not handled now?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I checked. We only had one call site with a path ending with |
||
| reader, err := gzip.NewReader(body) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("cannot decompress data: %v", err) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,19 +22,25 @@ import ( | |
| "github.com/canonical/chisel/internal/testutil" | ||
| ) | ||
|
|
||
| type requestResult struct { | ||
| path string | ||
| status int | ||
| } | ||
|
|
||
| type httpSuite struct { | ||
| logf func(string, ...any) | ||
| base string | ||
| request *http.Request | ||
| requests []*http.Request | ||
| response string | ||
| responses map[string][]byte | ||
| err error | ||
| header http.Header | ||
| status int | ||
| restore func() | ||
| privKey *packet.PrivateKey | ||
| pubKey *packet.PublicKey | ||
| logf func(string, ...any) | ||
| base string | ||
| request *http.Request | ||
| requests []*http.Request | ||
| requestResults []requestResult | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Note to reviewer]: This line is the only new one in this struct. It enables recording submitted requests and associated status. The additional tests were then reworked to properly check when the "by-hash" URL was used and what was the result. I expect this additional capability will be useful in the future to test other fallback mechanisms. |
||
| response string | ||
| responses map[string][]byte | ||
| err error | ||
| header http.Header | ||
| status int | ||
| restore func() | ||
| privKey *packet.PrivateKey | ||
| pubKey *packet.PublicKey | ||
| } | ||
|
|
||
| var _ = Suite(&httpSuite{}) | ||
|
|
@@ -54,6 +60,7 @@ func (s *httpSuite) SetUpTest(c *C) { | |
| s.base = "http://archive.ubuntu.com/ubuntu/" | ||
| s.request = nil | ||
| s.requests = nil | ||
| s.requestResults = nil | ||
| s.response = "" | ||
| s.responses = make(map[string][]byte) | ||
| s.header = nil | ||
|
|
@@ -83,14 +90,19 @@ func (s *httpSuite) Do(req *http.Request) (*http.Response, error) { | |
| s.request = req | ||
| s.requests = append(s.requests, req) | ||
| body := s.response | ||
| status := s.status | ||
| s.logf("Request: %s", req.URL.String()) | ||
| if response, ok := s.responses[path.Clean(req.URL.Path)]; ok { | ||
| body = string(response) | ||
| } else if len(s.responses) > 0 && s.status == 200 { | ||
| // Unknown path with responses populated: behave like a real archive. | ||
| status = 404 | ||
| } | ||
| s.requestResults = append(s.requestResults, requestResult{path: req.URL.Path, status: status}) | ||
| rsp := &http.Response{ | ||
| Body: io.NopCloser(strings.NewReader(body)), | ||
| Header: s.header, | ||
| StatusCode: s.status, | ||
| StatusCode: status, | ||
| } | ||
| return rsp, s.err | ||
| } | ||
|
|
@@ -625,6 +637,121 @@ func read(r io.Reader) string { | |
| return string(data) | ||
| } | ||
|
|
||
| // fetchRequestStatus checks whether a request was made whose URL path | ||
| // contains the given substring. If so, it returns true and the HTTP status | ||
| // code from the most recent matching request. If not, it returns false, 0. | ||
| func (s *httpSuite) fetchRequestStatus(pathSubstring string) (bool, int) { | ||
| for i := len(s.requestResults) - 1; i >= 0; i-- { | ||
| if strings.Contains(s.requestResults[i].path, pathSubstring) { | ||
| return true, s.requestResults[i].status | ||
| } | ||
| } | ||
| return false, 0 | ||
| } | ||
|
|
||
| func (s *httpSuite) TestFetchByHashSucceedsWhenNamedPathIsStale(c *C) { | ||
| s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { | ||
| r.ByHash = true | ||
| }) | ||
|
|
||
| // Simulate a mirror serving stale content at the named Packages.gz path | ||
| // while the by-hash path still has the correct data. | ||
| for p := range s.responses { | ||
| if strings.Contains(p, "Packages.gz") && !strings.Contains(p, "/by-hash/") { | ||
| s.responses[p] = testarchive.MakeGzip([]byte("stale Packages from previous publication")) | ||
| } | ||
| } | ||
|
|
||
| options := archive.Options{ | ||
| Label: "ubuntu", | ||
| Version: "22.04", | ||
| Arch: "amd64", | ||
| Suites: []string{"jammy"}, | ||
| Components: []string{"main"}, | ||
| CacheDir: c.MkDir(), | ||
| PubKeys: []*packet.PublicKey{s.pubKey}, | ||
| } | ||
|
|
||
| testArchive, err := archive.Open(&options) | ||
| c.Assert(err, IsNil) | ||
|
|
||
| pkg, _, err := testArchive.Fetch("mypkg1") | ||
| c.Assert(err, IsNil) | ||
| c.Assert(read(pkg), Equals, "mypkg1 1.1 data") | ||
|
|
||
| // The by-hash request must have been attempted and succeeded with 200, | ||
| // since the named path has stale content and only by-hash has the | ||
| // correct data. | ||
| attempted, status := s.fetchRequestStatus("/by-hash/SHA256/") | ||
| c.Assert(attempted, Equals, true) | ||
| c.Assert(status, Equals, 200) | ||
| } | ||
|
|
||
| func (s *httpSuite) TestFetchByHashFallsBackOnNotFound(c *C) { | ||
| s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { | ||
| r.ByHash = true | ||
| }) | ||
|
|
||
| // Simulate a mirror that garbage-collected the by-hash entries. | ||
| for p := range s.responses { | ||
| if strings.Contains(p, "/by-hash/") { | ||
| delete(s.responses, p) | ||
| } | ||
| } | ||
|
|
||
| options := archive.Options{ | ||
| Label: "ubuntu", | ||
| Version: "22.04", | ||
| Arch: "amd64", | ||
| Suites: []string{"jammy"}, | ||
| Components: []string{"main"}, | ||
| CacheDir: c.MkDir(), | ||
| PubKeys: []*packet.PublicKey{s.pubKey}, | ||
| } | ||
|
|
||
| testArchive, err := archive.Open(&options) | ||
| c.Assert(err, IsNil) | ||
|
|
||
| pkg, _, err := testArchive.Fetch("mypkg1") | ||
| c.Assert(err, IsNil) | ||
| c.Assert(read(pkg), Equals, "mypkg1 1.1 data") | ||
|
|
||
| // The by-hash request must have been attempted but got 404 (the hash | ||
| // was garbage-collected), so we fell back to the named path which | ||
| // returned 200 with the correct data. | ||
| attempted, status := s.fetchRequestStatus("/by-hash/SHA256/") | ||
| c.Assert(attempted, Equals, true) | ||
| c.Assert(status, Equals, 404) | ||
| attempted, status = s.fetchRequestStatus("Packages.gz") | ||
| c.Assert(attempted, Equals, true) | ||
| c.Assert(status, Equals, 200) | ||
| } | ||
|
|
||
| func (s *httpSuite) TestFetchSkipsByHashWhenNotAdvertised(c *C) { | ||
| s.prepareArchive("jammy", "22.04", "amd64", []string{"main"}) | ||
|
|
||
| options := archive.Options{ | ||
| Label: "ubuntu", | ||
| Version: "22.04", | ||
| Arch: "amd64", | ||
| Suites: []string{"jammy"}, | ||
| Components: []string{"main"}, | ||
| CacheDir: c.MkDir(), | ||
| PubKeys: []*packet.PublicKey{s.pubKey}, | ||
| } | ||
|
|
||
| _, err := archive.Open(&options) | ||
| c.Assert(err, IsNil) | ||
|
|
||
| // When by-hash is not advertised, no by-hash request should be | ||
| // attempted; only the named Packages.gz path is fetched. | ||
| attempted, _ := s.fetchRequestStatus("/by-hash/SHA256/") | ||
| c.Assert(attempted, Equals, false) | ||
| attempted, status := s.fetchRequestStatus("Packages.gz") | ||
| c.Assert(attempted, Equals, true) | ||
| c.Assert(status, Equals, 200) | ||
| } | ||
|
|
||
| // ---------------------------------------------------------------------------------------- | ||
| // Real archive tests, only enabled via: | ||
| // 1. --real-archive for non-Pro archives (e.g. standard jammy archive), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,7 @@ func (gz *Gzip) Section() []byte { | |
| } | ||
|
|
||
| func (gz *Gzip) Content() []byte { | ||
| return makeGzip(gz.Item.Content()) | ||
| return MakeGzip(gz.Item.Content()) | ||
| } | ||
|
|
||
| type Package struct { | ||
|
|
@@ -107,6 +107,9 @@ type Release struct { | |
| Label string | ||
| Items []Item | ||
| PrivKey *packet.PrivateKey | ||
| // ByHash enables the Acquire-By-Hash flag in the Release file | ||
| // and renders by-hash URLs alongside named paths. | ||
| ByHash bool | ||
| } | ||
|
|
||
| func (r *Release) Walk(f func(Item) error) error { | ||
|
|
@@ -127,6 +130,10 @@ func (r *Release) Content() []byte { | |
| content := item.Content() | ||
| fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path()) | ||
| } | ||
| acquireByHash := "" | ||
| if r.ByHash { | ||
| acquireByHash = "Acquire-By-Hash: yes\n" | ||
| } | ||
| content := fmt.Sprintf(string(testutil.Reindent(` | ||
| Origin: Ubuntu | ||
| Label: %s | ||
|
|
@@ -137,9 +144,9 @@ func (r *Release) Content() []byte { | |
| Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x | ||
| Components: main restricted universe multiverse | ||
| Description: Ubuntu %s | ||
| SHA256: | ||
| %sSHA256: | ||
| %s | ||
| `)), r.Label, r.Suite, r.Version, r.Version, digests.String()) | ||
| `)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String()) | ||
|
|
||
| var buf bytes.Buffer | ||
| writer, err := clearsign.Encode(&buf, r.PrivKey, nil) | ||
|
|
@@ -160,12 +167,17 @@ func (r *Release) Content() []byte { | |
| func (r *Release) Render(prefix string, content map[string][]byte) error { | ||
| return r.Walk(func(item Item) error { | ||
| itemPath := item.Path() | ||
| itemContent := item.Content() | ||
| if strings.HasPrefix(itemPath, "pool/") { | ||
| itemPath = path.Join(prefix, itemPath) | ||
| } else { | ||
| itemPath = path.Join(prefix, "dists", r.Suite, itemPath) | ||
| content[path.Join(prefix, itemPath)] = itemContent | ||
| return nil | ||
| } | ||
| distItemPath := path.Join(prefix, "dists", r.Suite, itemPath) | ||
| content[distItemPath] = itemContent | ||
| if r.ByHash && itemPath != r.Path() { | ||
| byHashPath := path.Join(prefix, "dists", r.Suite, path.Dir(itemPath), "by-hash", "SHA256", makeSha256(itemContent)) | ||
| content[byHashPath] = itemContent | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This used to be a very clean and simple function, and it'd be nice to keep it that way. From these options, having a /cc @upils
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is now simpler. The remaining additional complexity is due to adding the byHash path. Tests are now responsible for tweaking the responses to simulate a bad mirror. |
||
| } | ||
| content[itemPath] = item.Content() | ||
| return nil | ||
| }) | ||
| } | ||
|
|
@@ -204,7 +216,7 @@ func makeSha256(b []byte) string { | |
| return fmt.Sprintf("%x", sha256.Sum256(b)) | ||
| } | ||
|
|
||
| func makeGzip(b []byte) []byte { | ||
| func MakeGzip(b []byte) []byte { | ||
| var buf bytes.Buffer | ||
| gz := gzip.NewWriter(&buf) | ||
| _, err := gz.Write(b) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[Note to reviewer]: I did some renaming to hopefully clarify what these variables are.