diff --git a/scripts/repo/_repo_layout.py b/scripts/repo/_repo_layout.py
new file mode 100644
index 00000000000..d8334d8db79
--- /dev/null
+++ b/scripts/repo/_repo_layout.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: MIT
+"""Standard Azure Linux Repo Layout.
+
+Defines the fixed `channel x kind x arch` matrix that every published
+Azure Linux RPM tree follows. Both ``dnf-with-azl-repos`` (which
+discovers the layout under one or more URL prefixes) and
+``synthesize-repodata.py`` (which writes the layout from upstream
+inputs) consume :data:`SUBREPOS` directly.
+
+The matrix has six rows that have not changed for years; encoding it
+as a Python constant keeps the consumers trivial and avoids a JSON
+loader / validator layer that has to be kept in sync with the data.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+CHANNELS: tuple[str, ...] = ("base", "sdk")
+
+KIND_MAIN = "main"
+KIND_DEBUGINFO = "debuginfo"
+KIND_SRPMS = "srpms"
+ALL_KINDS: tuple[str, ...] = (KIND_MAIN, KIND_DEBUGINFO, KIND_SRPMS)
+
+
+@dataclass(frozen=True)
+class SubrepoSpec:
+    """One sub-repo in the standard layout."""
+
+    name: str           # stable short identifier (e.g. "base", "sdk-srpms")
+    channel: str        # one of CHANNELS
+    kind: str           # one of ALL_KINDS
+    per_arch: bool      # True iff `subpath` contains $basearch
+    subpath: str        # path under a layout prefix
+
+
+SUBREPOS: tuple[SubrepoSpec, ...] = (
+    SubrepoSpec("base",           "base", KIND_MAIN,      True,  "base/$basearch"),
+    SubrepoSpec("base-debuginfo", "base", KIND_DEBUGINFO, True,  "base/debuginfo/$basearch"),
+    SubrepoSpec("base-srpms",     "base", KIND_SRPMS,     False, "base/srpms"),
+    SubrepoSpec("sdk",            "sdk",  KIND_MAIN,      True,  "sdk/$basearch"),
+    SubrepoSpec("sdk-debuginfo",  "sdk",  KIND_DEBUGINFO, True,  "sdk/debuginfo/$basearch"),
+    SubrepoSpec("sdk-srpms",      "sdk",  KIND_SRPMS,     False, "sdk/srpms"),
+)
+
+
+# A handful of light invariants asserted at import time. These can
+# never fire with the constant above unmodified, but they guard
+# against typos in any future edit.
+assert all(s.channel in CHANNELS for s in SUBREPOS)
+assert all(s.kind in ALL_KINDS for s in SUBREPOS)
+assert all(s.per_arch == ("$basearch" in s.subpath) for s in SUBREPOS)
+assert len({s.name for s in SUBREPOS}) == len(SUBREPOS)
diff --git a/scripts/repo/dnf-with-azl-repos b/scripts/repo/dnf-with-azl-repos
new file mode 100755
index 00000000000..d12ab551648
--- /dev/null
+++ b/scripts/repo/dnf-with-azl-repos
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: MIT
+"""dnf-with-azl-repos -- invoke ``dnf`` against Azure Linux repos
+discovered under one or more URL prefixes that follow the Standard
+Azure Linux Repo Layout.
+
+For each ``--repo-prefix`` URL the conventional sub-repos described in
+``_repo_layout.SUBREPOS`` are HEAD-probed; any sub-repo whose
+``repodata/repomd.xml`` returns 404 is silently skipped (it just
+isn't published under that prefix). Other failure modes -- HTTP
+errors, TLS failures, DNS lookup failures, timeouts -- are surfaced
+as fatal so that a transient outage cannot silently shrink the repo
+set passed to ``dnf``. Reachable sub-repos are added with one
+``--repofrompath <id>,<url> --enablerepo=<id>`` pair each, then any
+trailing dnf arguments are appended verbatim and the wrapper
+``execvp``\\s into ``dnf``.
+
+Usage:
+    dnf-with-azl-repos [--repo-prefix URL]... [--no-debuginfo]
+                       [--no-srpms] [--] <dnf-args>...
+
+Examples:
+    dnf-with-azl-repos --repo-prefix https://example.com/azl4 list available
+    dnf-with-azl-repos --repo-prefix https://example.com/azl4 install foo
+
+Notes:
+  * ``--repo-prefix`` is repeatable; each prefix yields its own repo
+    IDs (suffixed ``-1``, ``-2``, ... when more than one prefix is
+    given so the IDs stay unique).
+  * Probing uses the host arch (``uname -m``); URLs handed to dnf keep
+    ``$basearch`` so dnf performs its own substitution at use time.
+  * To pass ``--help`` (or any flag the wrapper would otherwise
+    intercept) through to dnf, separate it with ``--``, e.g.
+    ``dnf-with-azl-repos --repo-prefix URL -- --help``.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import platform
+import shutil
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from _repo_layout import (  # noqa: E402
+    KIND_DEBUGINFO,
+    KIND_SRPMS,
+    SUBREPOS,
+)
+
+PROG = Path(sys.argv[0]).name
+USER_AGENT = "dnf-with-azl-repos/1"
+PROBE_TIMEOUT = 30.0
+
+# probe_repo() outcomes.
+_PROBE_OK = "ok"
+_PROBE_MISSING = "missing"      # 404 -- expected for absent sub-repos.
+_PROBE_FAIL = "fail"            # everything else -- surfaced to the user.
+
+
+def die(msg: str, *, code: int = 2) -> None:
+    print(f"{PROG}: {msg}", file=sys.stderr, flush=True)
+    sys.exit(code)
+
+
+def log(msg: str) -> None:
+    print(msg, file=sys.stderr, flush=True)
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        prog=PROG,
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--repo-prefix",
+        action="append",
+        default=[],
+        metavar="URL",
+        help=(
+            "URL prefix assumed to host the Standard Azure Linux Repo "
+            "Layout. Repeatable."
+        ),
+    )
+    parser.add_argument(
+        "--no-debuginfo",
+        action="store_true",
+        help="Do not enable debuginfo sub-repos from the discovered layout.",
+    )
+    parser.add_argument(
+        "--no-srpms",
+        action="store_true",
+        help="Do not enable srpm sub-repos from the discovered layout.",
+    )
+    args, passthrough = parser.parse_known_args(argv)
+
+    # parse_known_args preserves a leading `--` in the unknowns; drop
+    # exactly one so we don't hand `dnf -- ...` to dnf.
+    if passthrough and passthrough[0] == "--":
+        passthrough = passthrough[1:]
+    args.dnf_args = passthrough
+
+    if not args.repo_prefix:
+        die("at least one --repo-prefix URL is required (try --help)")
+
+    excluded: set[str] = set()
+    if args.no_debuginfo:
+        excluded.add(KIND_DEBUGINFO)
+    if args.no_srpms:
+        excluded.add(KIND_SRPMS)
+    args.excluded_kinds = excluded
+    return args
+
+
+def probe_repo(probe_url: str, *, timeout: float = PROBE_TIMEOUT) -> tuple[str, str | None]:
+    """HEAD ``<probe_url>/repodata/repomd.xml``.
+
+    Returns ``(_PROBE_OK, None)`` on 2xx (or successful non-HTTP
+    responses such as ``file://``), ``(_PROBE_MISSING, None)`` on 404,
+    and ``(_PROBE_FAIL, "...")`` on any other transport error or
+    non-2xx HTTP status. The error string is suitable for inclusion in
+    a fatal-error message so the user can see the underlying cause.
+    """
+    url = f"{probe_url.rstrip('/')}/repodata/repomd.xml"
+    req = urllib.request.Request(
+        url, method="HEAD", headers={"User-Agent": USER_AGENT}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            # ``status`` is the HTTP status code for http(s); for
+            # ``file://`` and other non-HTTP schemes urllib's response
+            # has no status attribute -- a successful urlopen there
+            # already proved the file exists.
+            status = getattr(resp, "status", None)
+            if status is None or 200 <= status < 300:
+                return _PROBE_OK, None
+            return _PROBE_FAIL, f"HTTP {status}"
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            return _PROBE_MISSING, None
+        return _PROBE_FAIL, f"HTTP {e.code}"
+    except urllib.error.URLError as e:
+        # urllib wraps a `file://` ENOENT as URLError(FileNotFoundError);
+        # treat that as MISSING so local fixtures behave like the HTTP 404
+        # case.
+        if isinstance(e.reason, FileNotFoundError):
+            return _PROBE_MISSING, None
+        return _PROBE_FAIL, f"URL error: {e.reason}"
+    except TimeoutError:
+        return _PROBE_FAIL, f"timed out after {timeout:.0f}s"
+    except OSError as e:
+        return _PROBE_FAIL, f"OS error: {e}"
+
+
+def main(argv: list[str] | None = None) -> int:
+    if argv is None:
+        argv = sys.argv[1:]
+    args = parse_args(argv)
+
+    if shutil.which("dnf") is None:
+        die("dnf is required")
+
+    host_arch = platform.machine()
+    prefixes: list[str] = args.repo_prefix
+    excluded_kinds: set[str] = args.excluded_kinds
+
+    dnf_args: list[str] = ["--disablerepo=*", "--refresh"]
+    total_found = 0
+    failures: list[str] = []
+
+    for idx, prefix in enumerate(prefixes, start=1):
+        prefix_trim = prefix.rstrip("/")
+        multi_suffix = f"-{idx}" if len(prefixes) > 1 else ""
+
+        log(f"{PROG}: discovering repos under {prefix_trim}")
+        found_here = 0
+        for sub in SUBREPOS:
+            if sub.kind in excluded_kinds:
+                continue
+            probe_rel = sub.subpath.replace("$basearch", host_arch)
+            probe_full = f"{prefix_trim}/{probe_rel}"
+            dnf_full = f"{prefix_trim}/{sub.subpath}"
+            repo_id = f"azl-{sub.name}{multi_suffix}"
+
+            status, err = probe_repo(probe_full)
+            if status == _PROBE_OK:
+                log(f"  + {repo_id} <- {dnf_full}")
+                dnf_args.extend([
+                    "--repofrompath", f"{repo_id},{dnf_full}",
+                    f"--enablerepo={repo_id}",
+                ])
+                found_here += 1
+            elif status == _PROBE_MISSING:
+                log(
+                    f"  - {repo_id} (no repodata at "
+                    f"{probe_full}/repodata/repomd.xml)"
+                )
+            else:
+                log(
+                    f"  ! {repo_id} ({err}) at "
+                    f"{probe_full}/repodata/repomd.xml"
+                )
+                failures.append(f"{repo_id} <- {probe_full}: {err}")
+        if found_here == 0 and not failures:
+            log(f"{PROG}: warning: no repos discovered under {prefix_trim}")
+        total_found += found_here
+
+    if failures:
+        die(
+            "transport failures while probing the following sub-repos -- "
+            "refusing to proceed with a partial repo set:\n  "
+            + "\n  ".join(failures)
+        )
+
+    if total_found == 0:
+        die("no repos discovered under any --repo-prefix")
+
+    cmd = ["dnf", *dnf_args, *args.dnf_args]
+    os.execvp(cmd[0], cmd)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/repo/synthesize-repodata.py b/scripts/repo/synthesize-repodata.py
new file mode 100755
index 00000000000..791f588949c
--- /dev/null
+++ b/scripts/repo/synthesize-repodata.py
@@ -0,0 +1,1237 @@
+#!/usr/bin/env python3
+"""Route packages from one or more upstream RPM repos into the standard
+Azure Linux per-channel/per-arch layout.
+
+Reads multiple input RPM repositories (with `$basearch` expansion), unions
+their packages, asks `azldev package list --rpm-file ...` to assign each
+package to a publish channel, then writes per-channel/per-arch repodata
+under the Standard Azure Linux Repo Layout:
+
+    <out>/base/<arch>/                # main binary RPMs, base channel
+    <out>/base/debuginfo/<arch>/      # debuginfo/debugsource, base channel
+    <out>/base/srpms/                 # source RPMs, base channel
+    <out>/sdk/<arch>/                 # main binary RPMs, sdk channel
+    <out>/sdk/debuginfo/<arch>/       # debuginfo/debugsource, sdk channel
+    <out>/sdk/srpms/                  # source RPMs, sdk channel
+
+Each emitted repo's `<location href>` references the original upstream RPM
+URL (so consumers download from the source repos).
+
+Two input-flag flavours, both repeatable and mixable:
+
+  --repo-prefix URL
+      Shorthand: assume URL is the prefix of a Standard Azure Linux Repo
+      Layout (i.e. the directory above `base/` and `sdk/`). The script
+      enumerates all six sub-repos under it and tolerates 404s on any of
+      them (silently skipped).
+
+  --repo TYPE:URL              (TYPE in {main, debuginfo, srpms})
+      Explicit single repo. URL may contain `$basearch`, expanded to each
+      configured arch. 404s on explicit repos are fatal.
+
+Dependencies: python3-createrepo_c, azldev, dnf
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import json
+import shutil
+import ssl
+import subprocess
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from collections import Counter, defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Iterable
+
+import createrepo_c as cr
+
+# `_repo_layout` is a sibling module in this directory.
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+from _repo_layout import (  # noqa: E402
+    ALL_KINDS,
+    CHANNELS,
+    KIND_DEBUGINFO,
+    KIND_MAIN,
+    KIND_SRPMS,
+    SUBREPOS,
+)
+
+# Repo root: this file lives at <repo>/scripts/repo/<name>.py, so the
+# project root is three parents up.
+DEFAULT_REPO_ROOT = Path(__file__).resolve().parent.parent.parent
+DEFAULT_ARCHES = ("x86_64", "aarch64")
+SRPM_ARCH = "src"
+CHANNEL_PREFIX = "rpm-"
+# The fixed Standard Azure Linux Repo Layout has exactly two output channels.
+# Anything else returned by azldev is treated as unpublished (and reported).
+ALLOWED_OUTPUT_CHANNELS = frozenset(CHANNELS)
+
+# HTTP knobs for repodata fetches.
+USER_AGENT = "synthesize-repodata/1"
+HTTP_TIMEOUT = 60.0
+HTTP_RETRIES = 3
+HTTP_BACKOFF_BASE = 1.0     # seconds; doubled per attempt.
+
+# repomd record types we generate ourselves in the output. The synth
+# tool only emits these — auxiliary records (updateinfo, group,
+# group_gz, modules, ...) are intentionally NOT propagated because the
+# routing decisions can split a contributing input's packages across
+# destinations, so blindly copying e.g. a groupfile that references
+# packages by NEVRA would mis-reference packages routed elsewhere.
+# Consumers who need updateinfo/groups should fetch them from the
+# upstream repos directly (e.g. via a layered repo config) rather
+# than relying on the synth output.
+PACKAGE_RECORD_TYPES = frozenset({
+    "primary", "filelists", "other",
+    "primary_db", "filelists_db", "other_db",
+})
+
+# When the Phase-4 channel-inheritance fallback finds two or more channels
+# tied for the top spot among a component's published sibling rpms, prefer
+# this one explicitly rather than letting the decision fall out of a lex
+# sort over ALLOWED_OUTPUT_CHANNELS. The pick is still surfaced in the
+# decision's reason string AND a per-decision ``tie_break_used`` flag, and
+# decide_routing emits a WARN log once per tied component, so the case is
+# never silently masked.
+INHERITANCE_TIE_BREAK_DEFAULT = "base"
+
+
+# ---------------------------------------------------------------------------
+# Logging helpers (everything goes to stderr; stdout left clean)
+# ---------------------------------------------------------------------------
+
+def log(msg: str) -> None:
+    print(msg, file=sys.stderr, flush=True)
+
+
+def warn(msg: str) -> None:
+    print(f"WARN: {msg}", file=sys.stderr, flush=True)
+
+
+def fatal(msg: str) -> int:
+    print(f"ERROR: {msg}", file=sys.stderr, flush=True)
+    return 1
+
+
+# ---------------------------------------------------------------------------
+# Input-repo modelling
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class InputRepo:
+    """One concrete (post-`$basearch`-expansion) upstream repo to ingest."""
+
+    kind: str            # main | debuginfo | srpms
+    arch: str            # x86_64 | aarch64 | src
+    url: str             # e.g. https://.../base/x86_64
+    origin: str          # 'prefix' (404 silent) | 'explicit' (404 fatal)
+
+    def cache_key(self) -> str:
+        # Stable, filesystem-safe; uniqueness comes from the full URL.
+        safe = self.url.replace("://", "_").replace("/", "_").replace(":", "_")
+        return f"{self.kind}-{self.arch}-{safe}"
+
+
+def expand_repo_prefix(prefix: str, arches: Iterable[str]) -> list[InputRepo]:
+    base = prefix.rstrip("/")
+    out: list[InputRepo] = []
+    for sub in SUBREPOS:
+        if sub.per_arch:
+            for arch in arches:
+                out.append(InputRepo(
+                    sub.kind, arch,
+                    f"{base}/{sub.subpath.replace('$basearch', arch)}",
+                    "prefix",
+                ))
+        else:
+            out.append(InputRepo(
+                sub.kind, SRPM_ARCH, f"{base}/{sub.subpath}", "prefix",
+            ))
+    return out
+
+
+def parse_explicit_repo(spec: str, arches: Iterable[str]) -> list[InputRepo]:
+    """Parse `--repo TYPE:URL` into one or more InputRepos.
+
+    URL handling for `main` and `debuginfo`:
+      * If URL contains `$basearch`, expand it once per arch in *arches*.
+      * Otherwise, the URL is taken as a single-arch repo and the arch is
+        inferred from the URL's final path segment (which must match one
+        of *arches*). Pass `--arch <arch>` with a single value to control
+        which arch list this is matched against.
+
+    `srpms` URLs are arch-agnostic and rejected if they contain `$basearch`.
+    """
+    if ":" not in spec:
+        raise ValueError(
+            f"--repo {spec!r}: expected TYPE:URL where TYPE in "
+            f"{{{', '.join(ALL_KINDS)}}}"
+        )
+    kind, url = spec.split(":", 1)
+    kind = kind.strip().lower()
+    url = url.strip()
+    if kind not in ALL_KINDS:
+        raise ValueError(
+            f"--repo {spec!r}: unknown TYPE {kind!r}; expected one of "
+            f"{{{', '.join(ALL_KINDS)}}}"
+        )
+    if kind == KIND_SRPMS:
+        if "$basearch" in url:
+            raise ValueError(
+                f"--repo {spec!r}: srpms repos are arch-agnostic; "
+                f"`$basearch` is not allowed in the URL"
+            )
+        return [InputRepo(KIND_SRPMS, SRPM_ARCH, url.rstrip("/"), "explicit")]
+    out: list[InputRepo] = []
+    if "$basearch" in url:
+        for arch in arches:
+            out.append(InputRepo(
+                kind, arch, url.replace("$basearch", arch).rstrip("/"),
+                "explicit",
+            ))
+    else:
+        # No $basearch: caller is asserting "this URL is for one specific
+        # arch". We can't tell which from the URL alone, so we infer from the
+        # last path component if it matches a known arch; otherwise refuse.
+        # Strip query/fragment first so signed URLs (`...?sig=...`) don't
+        # poison the inference.
+        parts = urllib.parse.urlsplit(url)
+        path = parts.path.rstrip("/")
+        last = path.rsplit("/", 1)[-1] if path else ""
+        if last in arches:
+            out.append(InputRepo(kind, last, url.rstrip("/"), "explicit"))
+        else:
+            raise ValueError(
+                f"--repo {spec!r}: URL has no `$basearch` and its final path "
+                f"component {last!r} is not a known arch ({', '.join(arches)}); "
+                f"cannot determine arch"
+            )
+    return out
+
+
+def dedup_input_repos(repos: Iterable[InputRepo]) -> list[InputRepo]:
+    """Drop duplicate (kind, arch, url) entries, preserving order. Explicit
+    origin wins over prefix origin so 404s remain fatal where the user asked
+    for them explicitly."""
+    seen: dict[tuple[str, str, str], InputRepo] = {}
+    for r in repos:
+        key = (r.kind, r.arch, r.url)
+        existing = seen.get(key)
+        if existing is None:
+            seen[key] = r
+        elif r.origin == "explicit" and existing.origin == "prefix":
+            seen[key] = r
+    return list(seen.values())
+
+
+# ---------------------------------------------------------------------------
+# Phase 1: download repodata
+# ---------------------------------------------------------------------------
+
+def _http_get(
+    url: str, dest: Path, ssl_context: ssl.SSLContext | None,
+    *, timeout: float = HTTP_TIMEOUT, retries: int = HTTP_RETRIES,
+) -> None:
+    """Download *url* to *dest* with timeout, User-Agent, and bounded retry.
+
+    Retries on TimeoutError / OSError / URLError and on HTTP 5xx; bails
+    immediately on HTTP 4xx and on permanent local-fs errors
+    (``FileNotFoundError`` wrapped in URLError, e.g. for ``file://``
+    URLs that point at a missing file) so the caller can react
+    (e.g. silently skip 404 / ENOENT from a prefix-derived sub-repo).
+    """
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
+    last_exc: BaseException | None = None
+    for attempt in range(retries):
+        try:
+            with urllib.request.urlopen(
+                req, timeout=timeout, context=ssl_context,
+            ) as resp, open(dest, "wb") as fh:
+                shutil.copyfileobj(resp, fh)
+            return
+        except urllib.error.HTTPError as e:
+            if 500 <= e.code < 600 and attempt < retries - 1:
+                last_exc = e
+                log(f"    HTTP {e.code} fetching {url}; retrying")
+                time.sleep(HTTP_BACKOFF_BASE * (2 ** attempt))
+                continue
+            raise
+        except urllib.error.URLError as e:
+            if isinstance(e.reason, FileNotFoundError):
+                raise
+            if attempt < retries - 1:
+                last_exc = e
+                log(f"    URL error fetching {url} ({e.reason}); retrying")
+                time.sleep(HTTP_BACKOFF_BASE * (2 ** attempt))
+                continue
+            raise
+        except (TimeoutError, OSError) as e:
+            if attempt < retries - 1:
+                last_exc = e
+                log(f"    transport error fetching {url} ({e}); retrying")
+                time.sleep(HTTP_BACKOFF_BASE * (2 ** attempt))
+                continue
+            raise
+    # Defensive: loop only exits via return/raise above.
+    if last_exc is not None:
+        raise last_exc
+
+
+# ---------------------------------------------------------------------------
+# SSL configuration
+# ---------------------------------------------------------------------------
+
+def build_ssl_context(ca_bundle: Path | None, insecure: bool) -> ssl.SSLContext | None:
+    """Return an SSLContext honouring --ca-bundle / --insecure, or None for
+    Python's default behaviour.
+    """
+    if insecure:
+        ctx = ssl.create_default_context()
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
+        warn("TLS certificate verification disabled (--insecure); "
+             "connections are NOT authenticated")
+        return ctx
+    if ca_bundle is not None:
+        ctx = ssl.create_default_context(cafile=str(ca_bundle))
+        log(f"    using custom CA bundle: {ca_bundle}")
+        return ctx
+    return None
+
+
+def download_repo_metadata(
+    repo: InputRepo, cache_root: Path,
+    ssl_context: ssl.SSLContext | None,
+) -> Path | None:
+    """Download every record listed in *repo*'s repomd into a cache dir.
+
+    We pull primary/filelists/other for the package universe AND every
+    auxiliary record (updateinfo, group, modules, ...) so phase 6 can
+    copy non-package metadata through to routed destinations.
+
+    Returns the path to the dir containing ``repodata/``, or None if
+    the repo's ``repomd.xml`` returned 404 and *repo* was prefix-derived
+    (silent skip). Other HTTP errors and explicit-origin 404s raise.
+    """
+    cache_dir = cache_root / repo.cache_key()
+    repodata_dir = cache_dir / "repodata"
+    repodata_dir.mkdir(parents=True, exist_ok=True)
+
+    repomd_url = urllib.parse.urljoin(repo.url.rstrip("/") + "/",
+                                      "repodata/repomd.xml")
+    repomd_path = repodata_dir / "repomd.xml"
+    log(f"  fetching {repomd_url}")
+    try:
+        _http_get(repomd_url, repomd_path, ssl_context)
+    except urllib.error.HTTPError as e:
+        if e.code == 404 and repo.origin == "prefix":
+            log(f"    -> 404, skipping (prefix-derived, non-fatal)")
+            shutil.rmtree(cache_dir, ignore_errors=True)
+            return None
+        raise
+    except urllib.error.URLError as e:
+        # ``file://`` URLs surface a missing file as
+        # URLError(FileNotFoundError) rather than HTTPError(404); treat
+        # that as the local-fs equivalent so prefix-derived sub-repos
+        # under ``file://`` fixtures are silently skipped just like 404s.
+        if (
+            isinstance(e.reason, FileNotFoundError)
+            and repo.origin == "prefix"
+        ):
+            log(f"    -> not found, skipping (prefix-derived, non-fatal)")
+            shutil.rmtree(cache_dir, ignore_errors=True)
+            return None
+        raise
+
+    repomd = cr.Repomd()
+    cr.xml_parse_repomd(str(repomd_path), repomd, lambda *_: True)
+
+    base = repo.url.rstrip("/") + "/"
+    for record in repomd.records:
+        # Only fetch the records we'll actually consume (primary,
+        # filelists, other, plus their _db variants). See
+        # PACKAGE_RECORD_TYPES above for why we skip aux records.
+        if record.type not in PACKAGE_RECORD_TYPES:
+            continue
+        href = record.location_href or ""
+        if not href:
+            continue
+        url = urllib.parse.urljoin(base, href)
+        # Constrain the cache destination path so a hostile/malformed
+        # repomd can't write outside cache_dir.
+        safe_rel = href.lstrip("/")
+        if ".." in Path(safe_rel).parts:
+            raise RuntimeError(
+                f"refusing to write metadata record outside cache: {href!r}"
+            )
+        dest = cache_dir / safe_rel
+        log(f"  fetching {url}")
+        _http_get(url, dest, ssl_context)
+    return cache_dir
+
+
+# ---------------------------------------------------------------------------
+# Phase 2: build the package universe + RPM source map
+# ---------------------------------------------------------------------------
+
+# Universe key: (repo_kind, repo_arch, pkg_name, pkg_epoch, pkg_version,
+# pkg_release, pkg_arch). The first two fields identify the destination
+# (channel/arch) slot; the last five form the package's NEVRA so that two
+# different versions of the same package occupy different slots and are both
+# preserved in the output.
+UniverseKey = tuple[str, str, str, str, str, str, str]
+
+
+@dataclass
+class UniverseEntry:
+    """One NEVRA slot in the unioned package universe (one entry per
+    distinct package version)."""
+
+    repo: InputRepo
+    source_pkg_name: str  # extracted from rpm_sourcerpm (or pkg name for srpms)
+
+
+def _pkg_identity(pkg) -> tuple[str, str, str, str, str]:
+    """Return the package's NEVRA tuple (name, epoch, version, release, arch).
+
+    Epoch is normalised to '0' when missing/empty so two records that differ
+    only by `epoch=None` vs `epoch="0"` compare equal.
+    """
+    return (pkg.name, pkg.epoch or "0", pkg.version, pkg.release, pkg.arch)
+
+
+def _format_nevra(pkg) -> str:
+    """Return a human-readable NEVRA string, suitable for log/warn messages."""
+    epoch = pkg.epoch or "0"
+    epoch_prefix = f"{epoch}:" if epoch != "0" else ""
+    return f"{pkg.name}-{epoch_prefix}{pkg.version}-{pkg.release}.{pkg.arch}"
+
+
+def _strip_srpm_suffix(rpm_sourcerpm: str | None) -> str:
+    """Extract the source-package name from an RPM's `<sourcerpm>` field.
+
+    Example: `bash-5.2.21-1.azl4.src.rpm` -> `bash`.
+    """
+    if not rpm_sourcerpm:
+        return ""
+    s = rpm_sourcerpm
+    if s.endswith(".src.rpm"):
+        s = s[: -len(".src.rpm")]
+    # Strip -release then -version (best-effort; matches the inspiration
+    # script's approach).
+    parts = s.rsplit("-", 2)
+    if len(parts) >= 3:
+        return parts[0]
+    return s
+
+
+def _find_metadata_path(repo_dir: Path, kind: str) -> str:
+    """Return the absolute path of *kind* (primary|filelists|other) for the
+    cached repo at *repo_dir*."""
+    repomd = cr.Repomd()
+    cr.xml_parse_repomd(
+        str(repo_dir / "repodata" / "repomd.xml"), repomd, lambda *_: True
+    )
+    for rec in repomd.records:
+        if rec.type == kind:
+            return str(repo_dir / rec.location_href)
+    raise RuntimeError(f"{repo_dir}/repodata: no `{kind}` record in repomd.xml")
+
+
+def build_package_universe(
+    repo_to_dir: dict[InputRepo, Path],
+) -> tuple[
+    dict[UniverseKey, UniverseEntry],
+    list[dict],
+]:
+    """First pass: scan only primary.xml of each repo to build the
+    package universe (one entry per distinct NEVRA) and the rpm_source_map
+    for azldev.
+
+    Returns (universe, rpm_source_map) where:
+      universe[(kind, arch, name, epoch, version, release, pkg_arch)]
+          -> UniverseEntry
+      rpm_source_map: list of {packageName, sourcePackageName} (deduped).
+
+    Duplicate-NEVRA collisions are deduped:
+      * cross-repo (same NEVRA in two input repos) -> WARN, keep first.
+      * same-repo (broken upstream metadata) -> quiet log, keep first.
+    Multiple distinct versions of the same package name are NOT collisions:
+    each NEVRA gets its own universe entry and lands in the output.
+    """
+    universe: dict[UniverseKey, UniverseEntry] = {}
+    src_map_set: set[tuple[str, str]] = set()
+
+    for repo, repo_dir in repo_to_dir.items():
+        primary = _find_metadata_path(repo_dir, "primary")
+        log(f"  scanning {repo.kind}/{repo.arch}: {repo.url}")
+
+        def pkgcb(pkg, *, _repo=repo):
+            key: UniverseKey = (_repo.kind, _repo.arch) + _pkg_identity(pkg)
+            if _repo.kind == KIND_SRPMS:
+                source_name = pkg.name
+            else:
+                source_name = _strip_srpm_suffix(pkg.rpm_sourcerpm)
+                if not source_name:
+                    # An RPM with no sourcerpm is unusual but harmless: route
+                    # it as if it were its own SRPM so azldev still receives
+                    # an entry.
+                    source_name = pkg.name
+            # Always feed the source map; set semantics dedupe.
+            src_map_set.add((pkg.name, source_name))
+            existing = universe.get(key)
+            if existing is None:
+                universe[key] = UniverseEntry(_repo, source_name)
+                return
+            nevra = _format_nevra(pkg)
+            if existing.repo.url != _repo.url:
+                warn(
+                    f"duplicate NEVRA in {_repo.kind}/{_repo.arch}: "
+                    f"{nevra} found in both {existing.repo.url} and "
+                    f"{_repo.url}; keeping the copy from the first repo"
+                )
+            else:
+                # Same NEVRA listed twice within one repo -> broken upstream
+                # metadata. Worth a note but not alarming.
+                log(
+                    f"    note: NEVRA {nevra} appears multiple times in "
+                    f"{_repo.url}; deduping"
+                )
+
+        cr.xml_parse_primary(
+            primary, pkgcb=pkgcb, do_files=False, warningcb=lambda *_: True
+        )
+
+    rpm_source_map = sorted(
+        ({"packageName": pn, "sourcePackageName": sn}
+         for pn, sn in src_map_set),
+        key=lambda r: (r["packageName"], r["sourcePackageName"]),
+    )
+    return universe, rpm_source_map
+
+
+# ---------------------------------------------------------------------------
+# Phase 3: ask azldev for routing
+# ---------------------------------------------------------------------------
+
+def query_known_components(repo_root: Path) -> set[str]:
+    """Return the set of legitimate Azure Linux component names.
+
+    Used to gate package routing: a row whose `component` is not in this
+    set was synthesised by azldev's project-default fallback for an
+    unknown source package, so the row's `publishChannel` is meaningless
+    and we treat the package as foreign / unpublished.
+    """
+    log("  querying azldev comp list for legitimate component names")
+    proc = subprocess.run(
+        ["azldev", "comp", "list", "-a", "-q", "-O", "json"],
+        capture_output=True, text=True, cwd=repo_root, check=False,
+    )
+    if proc.returncode != 0:
+        sys.stderr.write(proc.stderr)
+        raise RuntimeError("azldev comp list -a failed")
+    rows = json.loads(proc.stdout)
+    names = {row["name"] for row in rows if row.get("name")}
+    log(f"    {len(names)} legitimate component(s)")
+    return names
+
+
+@dataclass
+class AzldevRouting:
+    """Resolved azldev routing tables, keyed for lookup."""
+
+    # By type, then by package name. publishChannel may be empty.
+    rpm: dict[str, dict] = field(default_factory=dict)
+    srpm: dict[str, dict] = field(default_factory=dict)
+    # Component -> Counter[channel-suffix] for inheritance fallback. Only
+    # populated from rpm rows whose component is a legitimate Azure Linux
+    # component AND that have a non-empty, allowed publishChannel.
+    component_channels: dict[str, Counter] = field(default_factory=dict)
+    # Names rejected because their component is not a legitimate AZL
+    # component (i.e. azldev fell back to project-default routing for
+    # something that isn't actually built by AZL).
+    foreign_names: set[str] = field(default_factory=set)
+
+
+def query_azldev(
+    repo_root: Path,
+    rpm_source_map: list[dict],
+    scratch_dir: Path,
+    known_components: set[str],
+) -> AzldevRouting:
+    map_path = scratch_dir / "rpm_source_map.json"
+    map_path.write_text(json.dumps(rpm_source_map, indent=2))
+
+    log(f"  invoking azldev (map: {len(rpm_source_map)} entries)")
+    proc = subprocess.run(
+        ["azldev", "package", "list", "--rpm-file", str(map_path),
+         "-q", "-O", "json"],
+        capture_output=True, text=True, cwd=repo_root, check=False,
+    )
+    if proc.returncode != 0:
+        sys.stderr.write(proc.stderr)
+        raise RuntimeError("azldev package list --rpm-file failed")
+    rows = json.loads(proc.stdout)
+
+    routing = AzldevRouting()
+    component_channels: dict[str, Counter] = defaultdict(Counter)
+    for row in rows:
+        name = row.get("packageName", "")
+        rtype = row.get("type", "")
+        component = row.get("component", "") or ""
+        raw_channel = row.get("publishChannel", "") or ""
+        channel = (
+            raw_channel[len(CHANNEL_PREFIX):]
+            if raw_channel.startswith(CHANNEL_PREFIX) else raw_channel
+        )
+        if component and component not in known_components:
+            # Foreign package: azldev synthesised a default channel for
+            # something not actually built by AZL. Track and skip.
+            routing.foreign_names.add(name)
+            continue
+        record = {
+            "component": component,
+            "channel": channel,         # may be ""
+            "raw_channel": raw_channel,
+            "group": row.get("group", "") or "",
+        }
+        if rtype == "srpm":
+            routing.srpm[name] = record
+        else:  # default to rpm
+            routing.rpm[name] = record
+            if (channel and component
+                    and channel in ALLOWED_OUTPUT_CHANNELS):
+                component_channels[component][channel] += 1
+    routing.component_channels = dict(component_channels)
+    return routing
+
+
+# ---------------------------------------------------------------------------
+# Phase 4: route each universe entry -> (channel, kind, arch) destination
+# ---------------------------------------------------------------------------
+
+@dataclass
+class RoutingDecision:
+    """Per-universe-entry decision: where the package should land, or why
+    it was excluded."""
+
+    dest_channel: str | None = None     # 'base' | 'sdk' | None (=excluded)
+    reason: str = ""                    # human-readable provenance
+    inherited: bool = False             # was Phase-4 inheritance used?
+    tie_break_used: bool = False        # did inheritance pick via tie-break?
+
+
+def _inherit_channel(
+    component: str,
+    component_channels: dict[str, Counter],
+    tie_break_default: str = INHERITANCE_TIE_BREAK_DEFAULT,
+) -> tuple[str | None, str, bool]:
+    """Infer a publish channel for *component* from its sibling rpms.
+
+    Returns ``(channel, reason, tie_break_used)``. ``channel`` is None
+    when there are no published siblings to inherit from. When two or
+    more channels tie for the top spot ``tie_break_default`` is
+    preferred, with lex sort as a defensive fallback if the configured
+    default isn't among the tied channels (which shouldn't be reachable
+    today since only ALLOWED_OUTPUT_CHANNELS are added to
+    ``component_channels`` in :func:`query_azldev`, but the code
+    shouldn't silently do the wrong thing if that invariant ever
+    breaks). ``tie_break_used`` lets callers surface the case
+    explicitly rather than letting the pick masquerade as "the data
+    said so".
+    """
+    counts = component_channels.get(component)
+    if not counts:
+        return None, "no sibling rpm has a published channel", False
+
+    ranked = counts.most_common()
+    top_count = ranked[0][1]
+    tied = [ch for ch, n in ranked if n == top_count]
+
+    if len(tied) > 1:
+        picked = (
+            tie_break_default if tie_break_default in tied else sorted(tied)[0]
+        )
+        reason = (
+            f"inherited from sibling rpms (component={component}, "
+            f"channels={dict(ranked)}, tied at {top_count}, picked "
+            f"{picked} via tie-break default)"
+        )
+        return picked, reason, True
+
+    picked = tied[0]
+    if len(counts) == 1:
+        return picked, (
+            f"inherited from sibling rpms (component={component})"
+        ), False
+    return picked, (
+        f"inherited from sibling rpms (component={component}, "
+        f"channels={dict(ranked)}, picked {picked})"
+    ), False
+
+
+def decide_routing(
+    universe: dict[UniverseKey, UniverseEntry],
+    routing: AzldevRouting,
+    tie_break_default: str = INHERITANCE_TIE_BREAK_DEFAULT,
+) -> dict[UniverseKey, RoutingDecision]:
+    """Produce one RoutingDecision per universe entry (i.e. per NEVRA).
+
+    Routing lookup is name-based: every NEVRA of a given package name lands
+    in the same destination channel. Decisions are still keyed per NEVRA so
+    that downstream emit + count logic can iterate them 1:1 with the
+    universe.
+
+    NOTE: TODO(channel-inheritance) -- the new `azldev package list
+    --rpm-file` output reports an empty publishChannel for type=srpm rows
+    and for binary RPMs not explicitly configured (e.g. *-debuginfo,
+    *-debugsource). We work around this by inheriting the channel from the
+    parent component's published binary rpms. Once the underlying TOML
+    config (and azldev) is updated to publish srpm/debuginfo channels
+    explicitly, remove this inference and treat empty publishChannel
+    strictly (i.e. mark the package as unpublished).
+    """
+    decisions: dict[UniverseKey, RoutingDecision] = {}
+    tied_components_warned: set[str] = set()
+    for key, entry in universe.items():
+        kind = key[0]
+        name = key[2]
+        # Foreign packages (azldev fell back to project defaults for an
+        # unknown source component) are unpublished by definition.
+        if name in routing.foreign_names:
+            decisions[key] = RoutingDecision(
+                None,
+                "azldev row had a project-default channel but the resolved "
+                "component is not a legitimate Azure Linux component",
+            )
+            continue
+        if kind == KIND_SRPMS:
+            row = routing.srpm.get(name)
+        else:
+            row = routing.rpm.get(name)
+        if row is None:
+            decisions[key] = RoutingDecision(
+                None, "no azldev entry for package"
+            )
+            continue
+        channel = row["channel"]
+        if channel:
+            if channel not in ALLOWED_OUTPUT_CHANNELS:
+                decisions[key] = RoutingDecision(
+                    None,
+                    f"azldev publishChannel={row['raw_channel']!r} is not "
+                    f"one of the allowed standard-layout channels "
+                    f"({sorted(ALLOWED_OUTPUT_CHANNELS)})",
+                )
+                continue
+            decisions[key] = RoutingDecision(
+                channel, f"azldev publishChannel={row['raw_channel']!r}"
+            )
+            continue
+        # Empty channel -> inheritance fallback (TODO above).
+        inherited, why, tie_break_used = _inherit_channel(
+            row["component"],
+            routing.component_channels,
+            tie_break_default,
+        )
+        if inherited is None:
+            decisions[key] = RoutingDecision(
+                None, f"azldev publishChannel empty and {why}"
+            )
+        else:
+            # Warn at most once per tied component: the tie depends
+            # only on (component, component_channels) so emitting on
+            # every affected NEVRA would just spam.
+            if (
+                tie_break_used
+                and row["component"] not in tied_components_warned
+            ):
+                tied_components_warned.add(row["component"])
+                warn(
+                    f"channel-inheritance tie for component "
+                    f"{row['component']!r}: {why}; the configured "
+                    f"tie-break default ({tie_break_default!r}) was "
+                    f"used, not the data -- fix the source TOML if "
+                    f"this is wrong"
+                )
+            decisions[key] = RoutingDecision(
+                inherited,
+                f"azldev publishChannel empty; {why}",
+                inherited=True,
+                tie_break_used=tie_break_used,
+            )
+    return decisions
+
+
+# ---------------------------------------------------------------------------
+# Phase 5: per-destination writers
+# ---------------------------------------------------------------------------
+
+@dataclass(frozen=True)
+class Destination:
+    channel: str        # 'base' | 'sdk'
+    kind: str           # main | debuginfo | srpms
+    arch: str           # x86_64 | aarch64 | src
+
+    def relpath(self) -> str:
+        if self.kind == KIND_MAIN:
+            return f"{self.channel}/{self.arch}"
+        if self.kind == KIND_DEBUGINFO:
+            return f"{self.channel}/debuginfo/{self.arch}"
+        if self.kind == KIND_SRPMS:
+            return f"{self.channel}/srpms"
+        raise ValueError(f"unknown kind: {self.kind}")
+
+
+class _RepoWriter:
+    """Manages the createrepo_c XML+sqlite triple for one destination."""
+
+    # (xml record name, db record name, xml class, db class)
+    _STREAMS: tuple[tuple[str, str, type, type], ...] = (
+        ("primary",   "primary_db",   cr.PrimaryXmlFile,   cr.PrimarySqlite),
+        ("filelists", "filelists_db", cr.FilelistsXmlFile, cr.FilelistsSqlite),
+        ("other",     "other_db",     cr.OtherXmlFile,     cr.OtherSqlite),
+    )
+
+    def __init__(self, dest: Destination, output_dir: Path, pkg_count: int):
+        self.dest = dest
+        self.repodata_dir = output_dir / dest.relpath() / "repodata"
+        if self.repodata_dir.exists():
+            shutil.rmtree(self.repodata_dir)
+        self.repodata_dir.mkdir(parents=True, exist_ok=True)
+
+        self._streams: list[tuple[str, str, str, str, object, object]] = []
+        for xml_name, db_name, xml_cls, db_cls in self._STREAMS:
+            xml_path = str(self.repodata_dir / f"{xml_name}.xml.gz")
+            db_path = str(self.repodata_dir / f"{xml_name}.sqlite")
+            xml = xml_cls(xml_path)
+            db = db_cls(db_path)
+            xml.set_num_of_pkgs(pkg_count)
+            self._streams.append(
+                (xml_name, db_name, xml_path, db_path, xml, db)
+            )
+        self.added = 0
+
+    def add_pkg(self, pkg: cr.Package) -> None:
+        for _, _, _, _, xml, db in self._streams:
+            xml.add_pkg(pkg)
+            db.add_pkg(pkg)
+        self.added += 1
+
+    def finish(self) -> None:
+        """Close all streams and write repomd.xml."""
+        repomd = cr.Repomd()
+        xml_records: list[cr.RepomdRecord] = []
+        db_records: list[cr.RepomdRecord] = []
+        for xml_name, db_name, xml_path, db_path, xml, db in self._streams:
+            xml.close()
+            xml_rec = cr.RepomdRecord(xml_name, xml_path)
+            xml_rec.fill(cr.SHA256)
+            db.dbinfo_update(xml_rec.checksum)
+            db.close()
+            db_rec = cr.RepomdRecord(db_name, db_path)
+            db_rec.fill(cr.SHA256)
+            xml_records.append(xml_rec)
+            db_records.append(db_rec)
+        for rec in xml_records:
+            repomd.set_record(rec)
+        for rec in db_records:
+            repomd.set_record(rec)
+        (self.repodata_dir / "repomd.xml").write_text(repomd.xml_dump())
+
+
+# ---------------------------------------------------------------------------
+# Phase 6: emit packages into writers
+# ---------------------------------------------------------------------------
+
+
+def emit_repos(
+    repo_to_dir: dict[InputRepo, Path],
+    universe: dict[UniverseKey, UniverseEntry],
+    decisions: dict[UniverseKey, RoutingDecision],
+    output_dir: Path,
+) -> tuple[dict[Destination, int], list[dict], list[dict]]:
+    """Second pass over each input repo: stream every package, decide its
+    destination, set its absolute location_href, hand it to the writer.
+
+    Returns (per_destination_counts, unpublished_records, fallback_records).
+
+    Counts are per NEVRA. The unpublished and fallback reports both dedupe
+    by (kind, arch, name) since the routing reason is name-based and
+    listing every NEVRA of an affected name would just be noise.
+    """
+    # Precompute counts per destination (for XML headers), unpublished
+    # records (excluded from output), and fallback records (routed via
+    # Phase-4 inheritance rather than an explicit publishChannel).
+    dest_counts: Counter[Destination] = Counter()
+    unpublished: list[dict] = []
+    unpub_seen: set[tuple[str, str, str]] = set()
+    fallbacks: list[dict] = []
+    fb_seen: set[tuple[str, str, str]] = set()
+    for key, decision in decisions.items():
+        kind = key[0]
+        arch = key[1]
+        name = key[2]
+        entry = universe[key]
+        if decision.dest_channel is None:
+            nameslot = (kind, arch, name)
+            if nameslot not in unpub_seen:
+                unpub_seen.add(nameslot)
+                unpublished.append({
+                    "name": name,
+                    "kind": kind,
+                    "arch": arch,
+                    "source_repo": entry.repo.url,
+                    "source_package": entry.source_pkg_name,
+                    "reason": decision.reason,
+                })
+            continue
+        dest = Destination(decision.dest_channel, kind, arch)
+        dest_counts[dest] += 1
+        if decision.inherited:
+            nameslot = (kind, arch, name)
+            if nameslot not in fb_seen:
+                fb_seen.add(nameslot)
+                fallbacks.append({
+                    "name": name,
+                    "kind": kind,
+                    "arch": arch,
+                    "source_repo": entry.repo.url,
+                    "source_package": entry.source_pkg_name,
+                    "dest_channel": decision.dest_channel,
+                    "reason": decision.reason,
+                    "tie_break_used": decision.tie_break_used,
+                })
+
+    # Open writers up-front with correct counts.
+    writers: dict[Destination, _RepoWriter] = {
+        d: _RepoWriter(d, output_dir, n) for d, n in dest_counts.items()
+    }
+
+    # Iterate each input repo's full metadata and route packages.
+    emitted: set[UniverseKey] = set()
+    for repo, repo_dir in repo_to_dir.items():
+        primary = _find_metadata_path(repo_dir, "primary")
+        filelists = _find_metadata_path(repo_dir, "filelists")
+        other = _find_metadata_path(repo_dir, "other")
+
+        repo_base = repo.url.rstrip("/") + "/"
+        pkg_iter = cr.PackageIterator(
+            primary_path=primary,
+            filelists_path=filelists,
+            other_path=other,
+            warningcb=lambda *_: True,
+        )
+        for pkg in pkg_iter:
+            key: UniverseKey = (repo.kind, repo.arch) + _pkg_identity(pkg)
+            entry = universe.get(key)
+            if entry is None or entry.repo.url != repo.url:
+                # Either filtered out earlier (shouldn't happen) or this is
+                # the cross-repo duplicate copy already warned about during
+                # the first pass; skip silently.
+                continue
+            if key in emitted:
+                # Same NEVRA appearing twice within this repo: already
+                # logged in build_package_universe; skip silently so writer
+                # counts stay consistent with the XML headers.
+                continue
+            decision = decisions[key]
+            if decision.dest_channel is None:
+                continue
+            dest = Destination(decision.dest_channel, repo.kind, repo.arch)
+            # Rewrite location_href to an absolute upstream URL so consumers
+            # download from the source repo. urljoin honors absolute hrefs
+            # in the input (in case the input repo already published one)
+            # and respects any xml:base on the input package.
+            input_base = pkg.location_base or repo_base
+            absolute_href = urllib.parse.urljoin(
+                input_base, pkg.location_href or ""
+            )
+            pkg.location_href = absolute_href
+            pkg.location_base = ""
+            writers[dest].add_pkg(pkg)
+            emitted.add(key)
+
+    for dest, writer in writers.items():
+        writer.finish()
+        if writer.added != dest_counts[dest]:
+            warn(
+                f"writer for {dest.relpath()} expected "
+                f"{dest_counts[dest]} pkgs but emitted {writer.added}"
+            )
+
+    return dict(dest_counts), unpublished, fallbacks
+
+
+# ---------------------------------------------------------------------------
+# Phase 7: unpublished-packages and fallback-channel reports
+# ---------------------------------------------------------------------------
+
+def write_unpublished_report(
+    unpublished: list[dict], output_dir: Path
+) -> tuple[Path, Path]:
+    json_path = output_dir / "unpublished-packages.json"
+    txt_path = output_dir / "unpublished-packages.txt"
+    json_path.write_text(json.dumps(unpublished, indent=2))
+
+    by_reason: dict[str, list[dict]] = defaultdict(list)
+    for r in unpublished:
+        by_reason[r["reason"]].append(r)
+
+    with txt_path.open("w") as fh:
+        fh.write(
+            f"# {len(unpublished)} package(s) excluded from the routed repos "
+            f"because no publish channel could be assigned.\n"
+            f"# Grouped by reason; within each group, sorted by "
+            f"(kind, arch, name).\n"
+        )
+        for reason in sorted(by_reason):
+            entries = by_reason[reason]
+            fh.write(f"\n## {reason}  ({len(entries)} package(s))\n")
+            for r in sorted(entries, key=lambda x: (x["kind"], x["arch"], x["name"])):
+                fh.write(
+                    f"  {r['kind']:9s} {r['arch']:7s} {r['name']}  "
+                    f"(srpm={r['source_package']!r}, src={r['source_repo']})\n"
+                )
+    return json_path, txt_path
+
+
+def write_fallback_report(
+    fallbacks: list[dict], output_dir: Path
+) -> tuple[Path, Path]:
+    """Mirror :func:`write_unpublished_report` for inheritance-fallback
+    routings. These packages WERE routed (so they appear in the published
+    repos) but only because Phase-4 inferred a channel from sibling rpms
+    rather than reading an explicit ``publishChannel`` from azldev. Once
+    the underlying TOML config publishes srpm/debuginfo channels
+    explicitly the fallback path goes away and these reports should
+    shrink to zero.
+    """
+    json_path = output_dir / "fallback-channel-packages.json"
+    txt_path = output_dir / "fallback-channel-packages.txt"
+    json_path.write_text(json.dumps(fallbacks, indent=2))
+
+    by_reason: dict[str, list[dict]] = defaultdict(list)
+    for r in fallbacks:
+        by_reason[r["reason"]].append(r)
+
+    with txt_path.open("w") as fh:
+        fh.write(
+            f"# {len(fallbacks)} package(s) routed via the Phase-4 channel "
+            f"inheritance fallback (no explicit publishChannel from azldev).\n"
+            f"# These packages ARE published, but only because a sibling rpm's "
+            f"channel was inferred. Once azldev publishes channels explicitly "
+            f"for srpm/debuginfo/etc. this list should be empty.\n"
+            f"# Grouped by reason; within each group, sorted by "
+            f"(kind, arch, name).\n"
+        )
+        for reason in sorted(by_reason):
+            entries = by_reason[reason]
+            fh.write(f"\n## {reason}  ({len(entries)} package(s))\n")
+            for r in sorted(entries, key=lambda x: (x["kind"], x["arch"], x["name"])):
+                marker = " [tie-break]" if r.get("tie_break_used") else ""
+                fh.write(
+                    f"  {r['kind']:9s} {r['arch']:7s} {r['name']}  "
+                    f"-> {r['dest_channel']}{marker}  "
+                    f"(srpm={r['source_package']!r}, src={r['source_repo']})\n"
+                )
+    return json_path, txt_path
+
+
+# ---------------------------------------------------------------------------
+# CLI / orchestration
+# ---------------------------------------------------------------------------
+
+class _OrderedRepoSourceAction(argparse.Action):
+    """Append (option_string, value) into a single shared list across
+    --repo-prefix and --repo, preserving CLI order.
+
+    This matters because cross-repo NEVRA dedup keeps the first repo
+    seen, so command-line order is the user's only knob to control
+    which input wins for an overlapping NEVRA.
+    """
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        items = getattr(namespace, self.dest, None)
+        if items is None:
+            items = []
+            setattr(namespace, self.dest, items)
+        items.append((option_string, values))
+
+
+def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--output-dir", required=True, type=Path,
+        help="Directory to write the routed per-channel/per-arch repos into.",
+    )
+    parser.add_argument(
+        "--repo-prefix", action=_OrderedRepoSourceAction,
+        dest="repo_sources", default=None, metavar="URL",
+        help=(
+            "URL prefix assumed to host the Standard Azure Linux Repo "
+            "Layout. Expanded into all six sub-repos (404s on any are "
+            "silently skipped). Repeatable; CLI order is preserved and "
+            "interleaved with --repo for cross-repo NEVRA-dedup precedence."
+        ),
+    )
+    parser.add_argument(
+        "--repo", action=_OrderedRepoSourceAction,
+        dest="repo_sources", default=None, metavar="TYPE:URL",
+        help=(
+            "Explicit single repo: TYPE:URL where TYPE is main, debuginfo, "
+            "or srpms. URL may contain `$basearch` for main/debuginfo. "
+            "404s are fatal. Repeatable; CLI order is preserved (see "
+            "--repo-prefix)."
+        ),
+    )
+    parser.add_argument(
+        "--repo-root", type=Path, default=DEFAULT_REPO_ROOT,
+        help="Path to the azurelinux project root (default: %(default)s).",
+    )
+    parser.add_argument(
+        "--arch", action="append", default=[],
+        help=(
+            f"Arch to expand `$basearch` into (default: "
+            f"{', '.join(DEFAULT_ARCHES)}). Repeatable."
+        ),
+    )
+    parser.add_argument(
+        "--keep-cache", action="store_true",
+        help="Don't delete the metadata cache dir under <output-dir>/.cache/.",
+    )
+    tls = parser.add_mutually_exclusive_group()
+    tls.add_argument(
+        "--ca-bundle", type=Path, default=None,
+        help=(
+            "Path to a PEM-encoded CA bundle to trust for HTTPS repo "
+            "fetches (e.g. for repos served by a self-signed CA). "
+            "Mutually exclusive with --insecure."
+        ),
+    )
+    tls.add_argument(
+        "--insecure", action="store_true",
+        help=(
+            "Disable TLS certificate verification entirely for HTTPS repo "
+            "fetches. Use only for trusted networks; prefer --ca-bundle "
+            "when possible. Mutually exclusive with --ca-bundle."
+        ),
+    )
+    return parser.parse_args(argv)
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = parse_args(argv)
+    arches = tuple(args.arch) if args.arch else DEFAULT_ARCHES
+
+    if args.ca_bundle is not None and not args.ca_bundle.is_file():
+        return fatal(f"--ca-bundle path does not exist: {args.ca_bundle}")
+    ssl_context = build_ssl_context(args.ca_bundle, args.insecure)
+
+    repo_sources: list[tuple[str, str]] = args.repo_sources or []
+    if not repo_sources:
+        return fatal("at least one --repo-prefix or --repo must be provided")
+
+    output_dir: Path = args.output_dir.resolve()
+    output_dir.mkdir(parents=True, exist_ok=True)
+    cache_root = output_dir / ".cache"
+    cache_root.mkdir(parents=True, exist_ok=True)
+
+    # ---- Resolve the InputRepo list ------------------------------------
+    log("==> Resolving input repos ...")
+    repos: list[InputRepo] = []
+    for option, value in repo_sources:
+        if option == "--repo-prefix":
+            repos.extend(expand_repo_prefix(value, arches))
+        else:  # --repo
+            try:
+                repos.extend(parse_explicit_repo(value, arches))
+            except ValueError as e:
+                return fatal(str(e))
+    repos = dedup_input_repos(repos)
+    log(f"    {len(repos)} candidate input repo(s) after dedup")
+
+    # ---- Phase 1: download repodata ------------------------------------
+    log("==> Downloading repodata ...")
+    repo_to_dir: dict[InputRepo, Path] = {}
+    for repo in repos:
+        try:
+            cache_dir = download_repo_metadata(repo, cache_root, ssl_context)
+        except urllib.error.HTTPError as e:
+            return fatal(
+                f"HTTP {e.code} fetching {repo.url}/repodata/repomd.xml "
+                f"(origin={repo.origin})"
+            )
+        if cache_dir is not None:
+            repo_to_dir[repo] = cache_dir
+    log(f"    {len(repo_to_dir)} repo(s) successfully downloaded "
+        f"({len(repos) - len(repo_to_dir)} skipped)")
+
+    if not repo_to_dir:
+        return fatal("no input repos with usable repodata; nothing to route")
+
+    # ---- Phase 2: build package universe + source map ------------------
+    log("==> Building package universe ...")
+    universe, src_map = build_package_universe(repo_to_dir)
+    log(f"    {len(universe)} unique (kind, arch, NEVRA) entries; "
+        f"{len(src_map)} unique (pkg, srpm) pairs for azldev")
+
+    # ---- Phase 3: query azldev -----------------------------------------
+    log("==> Querying azldev for routing ...")
+    known_components = query_known_components(args.repo_root)
+    routing = query_azldev(
+        args.repo_root, src_map, output_dir, known_components
+    )
+    log(f"    azldev returned {len(routing.rpm)} rpm row(s), "
+        f"{len(routing.srpm)} srpm row(s), "
+        f"{len(routing.foreign_names)} foreign name(s) (excluded)")
+
+    # ---- Phase 4: per-entry routing decisions --------------------------
+    log("==> Computing routing decisions ...")
+    decisions = decide_routing(universe, routing)
+    n_pub = sum(1 for d in decisions.values() if d.dest_channel is not None)
+    n_unpub = sum(1 for d in decisions.values() if d.dest_channel is None)
+    n_inh = sum(1 for d in decisions.values() if d.inherited)
+    log(f"    routed: {n_pub} | unpublished: {n_unpub} | "
+        f"inheritance-fallback used: {n_inh}")
+
+    # ---- Phase 5+6: open writers and emit ------------------------------
+    log("==> Writing per-destination repos ...")
+    dest_counts, unpublished, fallbacks = emit_repos(
+        repo_to_dir, universe, decisions, output_dir
+    )
+
+    # ---- Phase 7: unpublished + fallback reports -----------------------
+    log("==> Writing unpublished-packages report ...")
+    json_path, txt_path = write_unpublished_report(unpublished, output_dir)
+    log(f"    -> {json_path.name}, {txt_path.name}")
+
+    log("==> Writing fallback-channel-packages report ...")
+    fb_json, fb_txt = write_fallback_report(fallbacks, output_dir)
+    log(f"    -> {fb_json.name}, {fb_txt.name}")
+
+    # ---- Summary -------------------------------------------------------
+    log("\n==> Summary")
+    for dest in sorted(dest_counts, key=lambda d: (d.channel, d.kind, d.arch)):
+        log(f"    {dest.relpath():35s}  {dest_counts[dest]:6d} pkg(s)")
+    log(f"    {'(unpublished)':35s}  {len(unpublished):6d} pkg(s)")
+    log(f"    {'(fallback-channel)':35s}  {len(fallbacks):6d} pkg(s)")
+
+    if not args.keep_cache:
+        with contextlib.suppress(FileNotFoundError):
+            shutil.rmtree(cache_root)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())