-
Notifications
You must be signed in to change notification settings - Fork 61
fix: prevent intermittent digest-mismatch errors during archive updates #296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
31bcd80
4cb9d90
0c9b69b
4837940
5f1bc3b
aa82b97
f80cc1b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,10 +64,13 @@ func Open(options *Options) (Archive, error) { | |
| type fetchFlags uint | ||
|
|
||
| const ( | ||
| fetchBulk fetchFlags = 1 << iota | ||
| fetchBulk fetchFlags = 1 << iota | ||
| fetchGzip | ||
| fetchDefault fetchFlags = 0 | ||
| ) | ||
|
|
||
| var errNotFound = fmt.Errorf("cannot find archive data") | ||
|
|
||
| var httpClient = &http.Client{ | ||
| Timeout: 30 * time.Second, | ||
| } | ||
|
|
@@ -334,9 +337,32 @@ func (index *ubuntuIndex) fetchIndex() error { | |
| } | ||
|
|
||
| logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component) | ||
| reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk) | ||
| if err != nil { | ||
| return err | ||
|
|
||
| // Prefer acquire-by-hash when the archive advertises it. By-hash URLs | ||
| // are content-addressed and so are immune to the inconsistent view of | ||
| // InRelease vs Packages.gz that mirrors and CDNs can serve while a | ||
| // publication is propagating. See https://wiki.ubuntu.com/AptByHash. | ||
| gzPath := packagesPath + ".gz" | ||
| var reader io.ReadSeekCloser | ||
| if index.release.Get("Acquire-By-Hash") == "yes" { | ||
| gzDigest, _, _ := control.ParsePathInfo(digests, gzPath) | ||
| if gzDigest != "" { | ||
| byHashPath := fmt.Sprintf("%s/binary-%s/by-hash/SHA256/%s", index.component, index.arch, gzDigest) | ||
| r, err := index.fetch(index.distPath(byHashPath), digest, fetchBulk|fetchGzip) | ||
| if err != nil && err != errNotFound { | ||
| return err | ||
| } | ||
| // On 404 fall through to the named-path fetch below: the hash | ||
| // may have been garbage-collected on the mirror. | ||
| reader = r | ||
| } | ||
| } | ||
| if reader == nil { | ||
| r, err := index.fetch(index.distPath(gzPath), digest, fetchBulk|fetchGzip) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| reader = r | ||
| } | ||
| ctrl, err := control.ParseReader("Package", reader) | ||
| if err != nil { | ||
|
|
@@ -410,13 +436,13 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS | |
| case 401: | ||
| return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label) | ||
| case 404: | ||
| return nil, fmt.Errorf("cannot find archive data") | ||
| return nil, errNotFound | ||
| default: | ||
| return nil, fmt.Errorf("error from archive: %v", resp.Status) | ||
| } | ||
|
|
||
| body := resp.Body | ||
| if strings.HasSuffix(path, ".gz") { | ||
| if flags&fetchGzip != 0 { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a behavioral change. Has anyone double checked that we don't have any other cases that were handled before and are not handled now?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I checked. We only had one call site with a path ending with |
||
| reader, err := gzip.NewReader(body) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("cannot decompress data: %v", err) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,14 +83,18 @@ func (s *httpSuite) Do(req *http.Request) (*http.Response, error) { | |
| s.request = req | ||
| s.requests = append(s.requests, req) | ||
| body := s.response | ||
| status := s.status | ||
| s.logf("Request: %s", req.URL.String()) | ||
| if response, ok := s.responses[path.Clean(req.URL.Path)]; ok { | ||
| body = string(response) | ||
| } else if len(s.responses) > 0 && s.status == 200 { | ||
| // Unknown path with responses populated: behave like a real archive. | ||
| status = 404 | ||
| } | ||
| rsp := &http.Response{ | ||
| Body: io.NopCloser(strings.NewReader(body)), | ||
| Header: s.header, | ||
| StatusCode: s.status, | ||
| StatusCode: status, | ||
| } | ||
| return rsp, s.err | ||
| } | ||
|
|
@@ -625,6 +629,85 @@ func read(r io.Reader) string { | |
| return string(data) | ||
| } | ||
|
|
||
| func (s *httpSuite) sawByHashRequest() bool { | ||
| for _, req := range s.requests { | ||
| if strings.Contains(req.URL.Path, "/by-hash/SHA256/") { | ||
| return true | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the archive only has the right file in the right place, the only way for us to get the right content is to use that file. I might be missing something, but it looks like at the moment all we are looking at is a request being made inside a directory, possibly in the wrong place considering we fallback. |
||
| } | ||
| } | ||
| return false | ||
| } | ||
|
|
||
| func (s *httpSuite) TestFetchByHashSucceedsWhenNamedPathIsStale(c *C) { | ||
| s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { | ||
| r.AcquireByHash = true | ||
| r.PathOverrides = map[string][]byte{ | ||
| "main/binary-amd64/Packages.gz": testarchive.MakeGzip([]byte("stale Packages from previous publication")), | ||
| } | ||
| }) | ||
|
|
||
| options := archive.Options{ | ||
| Label: "ubuntu", | ||
| Version: "22.04", | ||
| Arch: "amd64", | ||
| Suites: []string{"jammy"}, | ||
| Components: []string{"main"}, | ||
| CacheDir: c.MkDir(), | ||
| PubKeys: []*packet.PublicKey{s.pubKey}, | ||
| } | ||
|
|
||
| testArchive, err := archive.Open(&options) | ||
| c.Assert(err, IsNil) | ||
|
|
||
| pkg, _, err := testArchive.Fetch("mypkg1") | ||
| c.Assert(err, IsNil) | ||
| c.Assert(read(pkg), Equals, "mypkg1 1.1 data") | ||
| c.Assert(s.sawByHashRequest(), Equals, true) | ||
| } | ||
|
|
||
| func (s *httpSuite) TestFetchByHashFallsBackOnNotFound(c *C) { | ||
| s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) { | ||
| r.AcquireByHash = true | ||
| r.ByHashSkips = []string{"main/binary-amd64/Packages.gz"} | ||
| }) | ||
|
|
||
| options := archive.Options{ | ||
| Label: "ubuntu", | ||
| Version: "22.04", | ||
| Arch: "amd64", | ||
| Suites: []string{"jammy"}, | ||
| Components: []string{"main"}, | ||
| CacheDir: c.MkDir(), | ||
| PubKeys: []*packet.PublicKey{s.pubKey}, | ||
| } | ||
|
|
||
| testArchive, err := archive.Open(&options) | ||
| c.Assert(err, IsNil) | ||
|
|
||
| pkg, _, err := testArchive.Fetch("mypkg1") | ||
| c.Assert(err, IsNil) | ||
| c.Assert(read(pkg), Equals, "mypkg1 1.1 data") | ||
| c.Assert(s.sawByHashRequest(), Equals, true) | ||
| } | ||
|
|
||
| func (s *httpSuite) TestFetchSkipsByHashWhenNotAdvertised(c *C) { | ||
| s.prepareArchive("jammy", "22.04", "amd64", []string{"main"}) | ||
|
|
||
| options := archive.Options{ | ||
| Label: "ubuntu", | ||
| Version: "22.04", | ||
| Arch: "amd64", | ||
| Suites: []string{"jammy"}, | ||
| Components: []string{"main"}, | ||
| CacheDir: c.MkDir(), | ||
| PubKeys: []*packet.PublicKey{s.pubKey}, | ||
| } | ||
|
|
||
| _, err := archive.Open(&options) | ||
| c.Assert(err, IsNil) | ||
| c.Assert(s.sawByHashRequest(), Equals, false) | ||
| } | ||
|
|
||
| // ---------------------------------------------------------------------------------------- | ||
| // Real archive tests, only enabled via: | ||
| // 1. --real-archive for non-Pro archives (e.g. standard jammy archive), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,7 @@ func (gz *Gzip) Section() []byte { | |
| } | ||
|
|
||
| func (gz *Gzip) Content() []byte { | ||
| return makeGzip(gz.Item.Content()) | ||
| return MakeGzip(gz.Item.Content()) | ||
| } | ||
|
|
||
| type Package struct { | ||
|
|
@@ -107,6 +107,10 @@ type Release struct { | |
| Label string | ||
| Items []Item | ||
| PrivKey *packet.PrivateKey | ||
| // Fields below model acquire-by-hash and mirror inconsistencies for tests. | ||
| AcquireByHash bool | ||
| ByHashSkips []string | ||
| PathOverrides map[string][]byte | ||
| } | ||
|
|
||
| func (r *Release) Walk(f func(Item) error) error { | ||
|
|
@@ -127,6 +131,10 @@ func (r *Release) Content() []byte { | |
| content := item.Content() | ||
| fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path()) | ||
| } | ||
| acquireByHash := "" | ||
| if r.AcquireByHash { | ||
| acquireByHash = "Acquire-By-Hash: yes\n" | ||
| } | ||
| content := fmt.Sprintf(string(testutil.Reindent(` | ||
| Origin: Ubuntu | ||
| Label: %s | ||
|
|
@@ -137,9 +145,9 @@ func (r *Release) Content() []byte { | |
| Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x | ||
| Components: main restricted universe multiverse | ||
| Description: Ubuntu %s | ||
| SHA256: | ||
| %sSHA256: | ||
| %s | ||
| `)), r.Label, r.Suite, r.Version, r.Version, digests.String()) | ||
| `)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String()) | ||
|
|
||
| var buf bytes.Buffer | ||
| writer, err := clearsign.Encode(&buf, r.PrivKey, nil) | ||
|
|
@@ -158,14 +166,27 @@ func (r *Release) Content() []byte { | |
| } | ||
|
|
||
| func (r *Release) Render(prefix string, content map[string][]byte) error { | ||
| skipByHash := make(map[string]bool, len(r.ByHashSkips)) | ||
| for _, p := range r.ByHashSkips { | ||
| skipByHash[p] = true | ||
| } | ||
| return r.Walk(func(item Item) error { | ||
| itemPath := item.Path() | ||
| itemContent := item.Content() | ||
| if strings.HasPrefix(itemPath, "pool/") { | ||
| itemPath = path.Join(prefix, itemPath) | ||
| content[path.Join(prefix, itemPath)] = itemContent | ||
| return nil | ||
| } | ||
| distItemPath := path.Join(prefix, "dists", r.Suite, itemPath) | ||
| if override, ok := r.PathOverrides[itemPath]; ok { | ||
| content[distItemPath] = override | ||
| } else { | ||
| itemPath = path.Join(prefix, "dists", r.Suite, itemPath) | ||
| content[distItemPath] = itemContent | ||
| } | ||
| if r.AcquireByHash && itemPath != r.Path() && !skipByHash[itemPath] { | ||
| byHashPath := path.Join(prefix, "dists", r.Suite, path.Dir(itemPath), "by-hash", "SHA256", makeSha256(itemContent)) | ||
| content[byHashPath] = itemContent | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This used to be a very clean and simple function, and it'd be nice to keep it that way. From these options, having a /cc @upils
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is now simpler. The remaining additional complexity is due to adding the byHash path. Tests are now responsible for tweaking the responses to simulate a bad mirror. |
||
| } | ||
| content[itemPath] = item.Content() | ||
| return nil | ||
| }) | ||
| } | ||
|
|
@@ -204,7 +225,7 @@ func makeSha256(b []byte) string { | |
| return fmt.Sprintf("%x", sha256.Sum256(b)) | ||
| } | ||
|
|
||
| func makeGzip(b []byte) []byte { | ||
| func MakeGzip(b []byte) []byte { | ||
| var buf bytes.Buffer | ||
| gz := gzip.NewWriter(&buf) | ||
| _, err := gz.Write(b) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This whole function needs to have its variable names reviewed and polished, as it's becoming unreadable. We have digests, digest, gzPath, gzDigest, byHashPath, etc. These things all talk about their types and suffixes, and not about what they are. And they are not the same thing.
/cc @upils