Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,13 @@ func Open(options *Options) (Archive, error) {
type fetchFlags uint

const (
fetchBulk fetchFlags = 1 << iota
fetchBulk fetchFlags = 1 << iota
fetchGzip
fetchDefault fetchFlags = 0
)

var errNotFound = fmt.Errorf("cannot find archive data")

var httpClient = &http.Client{
Timeout: 30 * time.Second,
}
Expand Down Expand Up @@ -334,9 +337,32 @@ func (index *ubuntuIndex) fetchIndex() error {
}

logf("Fetching index for %s %s %s %s component...", index.displayName(), index.version, index.suite, index.component)
reader, err := index.fetch(index.distPath(packagesPath+".gz"), digest, fetchBulk)
if err != nil {
return err

// Prefer acquire-by-hash when the archive advertises it. By-hash URLs
// are content-addressed and so are immune to the inconsistent view of
// InRelease vs Packages.gz that mirrors and CDNs can serve while a
// publication is propagating. See https://wiki.ubuntu.com/AptByHash.
gzPath := packagesPath + ".gz"

@niemeyer niemeyer Jun 15, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole function needs to have its variable names reviewed and polished, as it's becoming unreadable. We have digests, digest, gzPath, gzDigest, byHashPath, etc. These things all talk about their types and suffixes, and not about what they are. And they are not the same thing.

/cc @upils

var reader io.ReadSeekCloser
if index.release.Get("Acquire-By-Hash") == "yes" {
gzDigest, _, _ := control.ParsePathInfo(digests, gzPath)
if gzDigest != "" {
byHashPath := fmt.Sprintf("%s/binary-%s/by-hash/SHA256/%s", index.component, index.arch, gzDigest)
r, err := index.fetch(index.distPath(byHashPath), digest, fetchBulk|fetchGzip)
if err != nil && err != errNotFound {
return err
}
// On 404 fall through to the named-path fetch below: the hash
// may have been garbage-collected on the mirror.
reader = r
}
}
if reader == nil {
r, err := index.fetch(index.distPath(gzPath), digest, fetchBulk|fetchGzip)
if err != nil {
return err
}
reader = r
}
ctrl, err := control.ParseReader("Package", reader)
if err != nil {
Expand Down Expand Up @@ -410,13 +436,13 @@ func (index *ubuntuIndex) fetch(path, digest string, flags fetchFlags) (io.ReadS
case 401:
return nil, fmt.Errorf("cannot fetch from %q: unauthorized", index.label)
case 404:
return nil, fmt.Errorf("cannot find archive data")
return nil, errNotFound
default:
return nil, fmt.Errorf("error from archive: %v", resp.Status)
}

body := resp.Body
if strings.HasSuffix(path, ".gz") {
if flags&fetchGzip != 0 {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a behavioral change. Has anyone double checked that we don't have any other cases that were handled before and are not handled now?

@upils upils Jun 16, 2026

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I checked. We only had one call site with a path ending with .gz, in fetchIndex, which is reworked in this PR to use the fetchGzip flag.

reader, err := gzip.NewReader(body)
if err != nil {
return nil, fmt.Errorf("cannot decompress data: %v", err)
Expand Down
85 changes: 84 additions & 1 deletion internal/archive/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,18 @@ func (s *httpSuite) Do(req *http.Request) (*http.Response, error) {
s.request = req
s.requests = append(s.requests, req)
body := s.response
status := s.status
s.logf("Request: %s", req.URL.String())
if response, ok := s.responses[path.Clean(req.URL.Path)]; ok {
body = string(response)
} else if len(s.responses) > 0 && s.status == 200 {
// Unknown path with responses populated: behave like a real archive.
status = 404
}
rsp := &http.Response{
Body: io.NopCloser(strings.NewReader(body)),
Header: s.header,
StatusCode: s.status,
StatusCode: status,
}
return rsp, s.err
}
Expand Down Expand Up @@ -625,6 +629,85 @@ func read(r io.Reader) string {
return string(data)
}

func (s *httpSuite) sawByHashRequest() bool {
for _, req := range s.requests {
if strings.Contains(req.URL.Path, "/by-hash/SHA256/") {
return true

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the archive only has the right file in the right place, the only way for us to get the right content is to use that file. I might be missing something, but it looks like at the moment all we are looking at is a request being made inside a directory, possibly in the wrong place considering we fallback.

}
}
return false
}

func (s *httpSuite) TestFetchByHashSucceedsWhenNamedPathIsStale(c *C) {
s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) {
r.AcquireByHash = true
r.PathOverrides = map[string][]byte{
"main/binary-amd64/Packages.gz": testarchive.MakeGzip([]byte("stale Packages from previous publication")),
}
})

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

testArchive, err := archive.Open(&options)
c.Assert(err, IsNil)

pkg, _, err := testArchive.Fetch("mypkg1")
c.Assert(err, IsNil)
c.Assert(read(pkg), Equals, "mypkg1 1.1 data")
c.Assert(s.sawByHashRequest(), Equals, true)
}

func (s *httpSuite) TestFetchByHashFallsBackOnNotFound(c *C) {
s.prepareArchiveAdjustRelease("jammy", "22.04", "amd64", []string{"main"}, func(r *testarchive.Release) {
r.AcquireByHash = true
r.ByHashSkips = []string{"main/binary-amd64/Packages.gz"}
})

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

testArchive, err := archive.Open(&options)
c.Assert(err, IsNil)

pkg, _, err := testArchive.Fetch("mypkg1")
c.Assert(err, IsNil)
c.Assert(read(pkg), Equals, "mypkg1 1.1 data")
c.Assert(s.sawByHashRequest(), Equals, true)
}

func (s *httpSuite) TestFetchSkipsByHashWhenNotAdvertised(c *C) {
s.prepareArchive("jammy", "22.04", "amd64", []string{"main"})

options := archive.Options{
Label: "ubuntu",
Version: "22.04",
Arch: "amd64",
Suites: []string{"jammy"},
Components: []string{"main"},
CacheDir: c.MkDir(),
PubKeys: []*packet.PublicKey{s.pubKey},
}

_, err := archive.Open(&options)
c.Assert(err, IsNil)
c.Assert(s.sawByHashRequest(), Equals, false)
}

// ----------------------------------------------------------------------------------------
// Real archive tests, only enabled via:
// 1. --real-archive for non-Pro archives (e.g. standard jammy archive),
Expand Down
35 changes: 28 additions & 7 deletions internal/archive/testarchive/testarchive.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (gz *Gzip) Section() []byte {
}

func (gz *Gzip) Content() []byte {
return makeGzip(gz.Item.Content())
return MakeGzip(gz.Item.Content())
}

type Package struct {
Expand Down Expand Up @@ -107,6 +107,10 @@ type Release struct {
Label string
Items []Item
PrivKey *packet.PrivateKey
// Fields below model acquire-by-hash and mirror inconsistencies for tests.
AcquireByHash bool
ByHashSkips []string
PathOverrides map[string][]byte
}

func (r *Release) Walk(f func(Item) error) error {
Expand All @@ -127,6 +131,10 @@ func (r *Release) Content() []byte {
content := item.Content()
fmt.Fprintf(&digests, " %s %d %s\n", makeSha256(content), len(content), item.Path())
}
acquireByHash := ""
if r.AcquireByHash {
acquireByHash = "Acquire-By-Hash: yes\n"
}
content := fmt.Sprintf(string(testutil.Reindent(`
Origin: Ubuntu
Label: %s
Expand All @@ -137,9 +145,9 @@ func (r *Release) Content() []byte {
Architectures: amd64 arm64 armhf i386 ppc64el riscv64 s390x
Components: main restricted universe multiverse
Description: Ubuntu %s
SHA256:
%sSHA256:
%s
`)), r.Label, r.Suite, r.Version, r.Version, digests.String())
`)), r.Label, r.Suite, r.Version, r.Version, acquireByHash, digests.String())

var buf bytes.Buffer
writer, err := clearsign.Encode(&buf, r.PrivKey, nil)
Expand All @@ -158,14 +166,27 @@ func (r *Release) Content() []byte {
}

func (r *Release) Render(prefix string, content map[string][]byte) error {
skipByHash := make(map[string]bool, len(r.ByHashSkips))
for _, p := range r.ByHashSkips {
skipByHash[p] = true
}
return r.Walk(func(item Item) error {
itemPath := item.Path()
itemContent := item.Content()
if strings.HasPrefix(itemPath, "pool/") {
itemPath = path.Join(prefix, itemPath)
content[path.Join(prefix, itemPath)] = itemContent
return nil
}
distItemPath := path.Join(prefix, "dists", r.Suite, itemPath)
if override, ok := r.PathOverrides[itemPath]; ok {
content[distItemPath] = override
} else {
itemPath = path.Join(prefix, "dists", r.Suite, itemPath)
content[distItemPath] = itemContent
}
if r.AcquireByHash && itemPath != r.Path() && !skipByHash[itemPath] {
byHashPath := path.Join(prefix, "dists", r.Suite, path.Dir(itemPath), "by-hash", "SHA256", makeSha256(itemContent))
content[byHashPath] = itemContent

@niemeyer niemeyer Jun 15, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This used to be a very clean and simple function, and it'd be nice to keep it that way. From these options, having a ByHash bool (not "Acquire", that's on the fetch side) seems in line with the type semantics. The other options look like custom hacks that require reading both the test and the implementation to be made sense of, so we need to think a bit about how to represent these while keeping that clean and simple aspect.

/cc @upils

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is now simpler. The remaining additional complexity is due to adding the byHash path.

Tests are now responsible for tweaking the responses to simulate a bad mirror.

}
content[itemPath] = item.Content()
return nil
})
}
Expand Down Expand Up @@ -204,7 +225,7 @@ func makeSha256(b []byte) string {
return fmt.Sprintf("%x", sha256.Sum256(b))
}

func makeGzip(b []byte) []byte {
func MakeGzip(b []byte) []byte {
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, err := gz.Write(b)
Expand Down
Loading