From 16ba39c109a71fc159ec89973894a2371dc84e6e Mon Sep 17 00:00:00 2001 From: Paul Spooren Date: Sun, 3 May 2026 11:44:07 +0200 Subject: [PATCH] feat: validate package availability before queuing builds Adds an opt-in `validate_packages` setting that rejects build requests naming packages not present in the upstream index or in any user-supplied repository, avoiding the cost of a full ImageBuilder run for typos and dropped packages. Sets are cached in Redis (24h for releases, 15m for snapshots and user repos). Apk repos pointing at packages.adb resolve to the sibling index.json. Off by default; intended for the main server. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Paul Spooren --- asu/config.py | 1 + asu/repositories.py | 36 ++++++++++++ asu/routers/api.py | 46 ++++++++++++++- asu/util.py | 83 +++++++++++++++++++++++++++ tests/conftest.py | 1 + tests/test_api.py | 135 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 301 insertions(+), 1 deletion(-) diff --git a/asu/config.py b/asu/config.py index 15cd4c7e..7517eb74 100644 --- a/asu/config.py +++ b/asu/config.py @@ -115,6 +115,7 @@ def settings_customise_sources(cls, settings_cls, **kwargs): build_failure_ttl: str = "1h" max_pending_jobs: int = 200 job_timeout: str = "10m" + validate_packages: bool = False settings = Settings() diff --git a/asu/repositories.py b/asu/repositories.py index 7f1f63fd..a40603ef 100644 --- a/asu/repositories.py +++ b/asu/repositories.py @@ -1,6 +1,15 @@ +from typing import Optional from urllib.parse import urlparse from asu.config import settings +from asu.util import ( + get_redis_client, + get_str_hash, + packages_from_index, + parse_packages_file, +) + +PACKAGE_CACHE_TTL_REPO = 900 # user-supplied repos can update anytime def is_repo_allowed(repo_url: str, allow_list: list[str]) -> bool: @@ -48,6 +57,33 @@ def merge_repositories( return "\n".join(lines) + "\n" +def get_repo_packages(url: str) -> Optional[set[str]]: + """Return cached set of available package names for a single repo URL. + + Used for user-supplied repositories. Apk repos point at the `packages.adb` + file but their index.json sits next to it in the same directory, so the + trailing filename is stripped before fetching. + """ + # apk: /packages.adb -> sibling /index.json + base = url.removesuffix("/packages.adb") + cache_key = f"pkgs:repo:{get_str_hash(base)}" + rc = get_redis_client() + + cached = rc.smembers(cache_key) + if cached: + return cached + + packages = packages_from_index(parse_packages_file(base)) + if not packages: + return None + + pipe = rc.pipeline() + pipe.sadd(cache_key, *packages) + pipe.expire(cache_key, PACKAGE_CACHE_TTL_REPO) + pipe.execute() + return packages + + def validate_repos(repositories: dict[str, str]) -> dict[str, str]: """Filter repositories against the allow list. diff --git a/asu/routers/api.py b/asu/routers/api.py index 585e5b5c..6d9b5f52 100644 --- a/asu/routers/api.py +++ b/asu/routers/api.py @@ -8,11 +8,13 @@ from asu.build import build from asu.build_request import BuildRequest from asu.config import settings -from asu.repositories import is_repo_allowed +from asu.package_changes import apply_package_changes +from asu.repositories import get_repo_packages, is_repo_allowed from asu.util import ( add_timestamp, add_build_event, client_get, + get_available_packages, get_branch, get_queue, get_request_hash, @@ -149,6 +151,48 @@ def valid_profile(profile: str, build_request: BuildRequest) -> bool: build_request.profile = app.profiles[build_request.version][build_request.target][ build_request.profile ] + + # Off by default — small setups don't benefit and pay the upstream + # round-trip cost on cache miss. Enable on busy servers via asu.toml. + if settings.validate_packages: + # In replace-mode with user repos, only those repos count; otherwise + # the standard upstream is part of the available universe. + needs_upstream = not ( + build_request.repositories and build_request.repositories_mode == "replace" + ) + + available: set[str] = set() + skip_check = False + + if needs_upstream: + arch = app.targets[build_request.version].get(build_request.target, "") + upstream = get_available_packages( + build_request.version, build_request.target, arch + ) + if upstream is None: + skip_check = True + else: + available |= upstream + + for url in build_request.repositories.values(): + repo = get_repo_packages(url) + if repo is None: + # Repo unreachable or has no index.json — fail open rather + # than falsely reject. + skip_check = True + break + available |= repo + + if not skip_check: + # apply_package_changes mutates .packages; copy first so the build + # phase still sees the original list and reapplies its own changes. + rewritten = build_request.model_copy(deep=True) + apply_package_changes(rewritten) + requested = {p for p in rewritten.packages if not p.startswith("-")} + missing = sorted(requested - available) + if missing: + return validation_failure(f"Unsupported packages: {', '.join(missing)}") + return ({}, None) diff --git a/asu/util.py b/asu/util.py index e961a640..8b04b8f1 100644 --- a/asu/util.py +++ b/asu/util.py @@ -453,6 +453,89 @@ def check_package_errors(stderr: str) -> str: return f"Impossible package selection{pkg_list}" +PACKAGE_CACHE_TTL_RELEASE = 86400 # releases are immutable, 24h is fine +PACKAGE_CACHE_TTL_SNAPSHOT = 900 # snapshots refresh ~daily, 15min keeps it fresh + + +def packages_from_index(idx: dict) -> set[str]: + """Extract package names from a parse_packages_file return value. + + parse_packages_file returns either {architecture, packages} (opkg fallback + or v2 index.json) or a flat {pkg: version} dict (v1 index.json without + Packages fallback). Handle both shapes. + """ + pkgs = idx.get("packages") + if isinstance(pkgs, dict): + return set(pkgs.keys()) + return {k for k, v in idx.items() if isinstance(v, str)} + + +def fetch_available_packages(version: str, target: str, arch: str) -> set[str]: + """Fetch the union of available package names from upstream. + + Combines target-specific packages, kmods (when split), and per-arch feeds. + Returns an empty set when nothing could be fetched (upstream down, unknown + version/target, etc.). + """ + branch_data = get_branch(version) + if "path" not in branch_data: + return set() + version_path = branch_data["path"].format(version=version) + base = f"{settings.upstream_url}/{version_path}" + + packages: set[str] = set() + packages.update( + packages_from_index(parse_packages_file(f"{base}/targets/{target}/packages")) + ) + + if is_post_kmod_split_build(f"{version_path}/targets/{target}"): + kmod_dir = parse_kernel_version(f"{base}/targets/{target}/profiles.json") + if kmod_dir: + packages.update( + packages_from_index( + parse_packages_file(f"{base}/targets/{target}/kmods/{kmod_dir}") + ) + ) + + if arch: + feed_url = f"{base}/packages/{arch}" + for feed in parse_feeds_conf(feed_url): + packages.update( + packages_from_index(parse_packages_file(f"{feed_url}/{feed}")) + ) + + return packages + + +def get_available_packages(version: str, target: str, arch: str) -> Optional[set[str]]: + """Return cached set of available package names for (version, target). + + Returns None when nothing is known about the upstream so callers treat it + as 'cannot validate, allow through' rather than rejecting blindly. + """ + cache_key = f"pkgs:{version}:{target}" + rc = get_redis_client() + + cached = rc.smembers(cache_key) + if cached: + return cached + + packages = fetch_available_packages(version, target, arch) + if not packages: + return None + + ttl = ( + PACKAGE_CACHE_TTL_SNAPSHOT + if "snapshot" in version.lower() + else PACKAGE_CACHE_TTL_RELEASE + ) + pipe = rc.pipeline() + pipe.sadd(cache_key, *packages) + pipe.expire(cache_key, ttl) + pipe.execute() + return packages + + def parse_packages_file(url: str) -> dict[str, str]: """Any index.json without a "version" tag is assumed to be v1, containing ABI-versioned package names, which may cause issues for those packages. diff --git a/tests/conftest.py b/tests/conftest.py index 00a586c5..3a9de438 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -114,6 +114,7 @@ def mocked_redis_queue(): monkeypatch.setattr("asu.util.get_queue", mocked_redis_queue) monkeypatch.setattr("asu.routers.api.get_queue", mocked_redis_queue) monkeypatch.setattr("asu.util.get_redis_client", mocked_redis_client) + monkeypatch.setattr("asu.repositories.get_redis_client", mocked_redis_client) from asu.main import app as real_app diff --git a/tests/test_api.py b/tests/test_api.py index 6e5f63b4..6932c094 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from fastapi.testclient import TestClient @@ -455,6 +457,139 @@ def test_api_build_missing_package(app): assert "this-package-does-not-exist" in data["detail"] +def test_validate_packages_rejects_unknown(client, httpserver): + """With validate_packages enabled, unknown packages are rejected at the + validation step rather than reaching the build worker.""" + upstream_path = Path("./tests/upstream/") + for f in [ + "snapshots/targets/testtarget/testsubtarget/packages/Packages", + "snapshots/packages/testarch/base/Packages", + ]: + httpserver.expect_request(f"/{f}").respond_with_data( + (upstream_path / f).read_bytes() + ) + + settings.validate_packages = True + try: + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["base-files", "this-package-does-not-exist"], + ), + ) + finally: + settings.validate_packages = False + + assert response.status_code == 400 + assert "this-package-does-not-exist" in response.json()["detail"] + assert "base-files" not in response.json()["detail"] + + +def test_validate_packages_custom_repo(client, httpserver): + """Packages from a user-supplied repo (opkg or apk) are merged into the + available universe, so a name found there is accepted.""" + import json as json_mod + + upstream_path = Path("./tests/upstream/") + for f in [ + "snapshots/targets/testtarget/testsubtarget/packages/Packages", + "snapshots/packages/testarch/base/Packages", + ]: + httpserver.expect_request(f"/{f}").respond_with_data( + (upstream_path / f).read_bytes() + ) + # opkg-style repo: serve a Packages file with an extra package. + httpserver.expect_request("/custom-repo/index.json").respond_with_data( + "", status=404 + ) + httpserver.expect_request("/custom-repo/Packages").respond_with_data( + "Package: from-custom-repo\n" + "Version: 1.0\n" + "Architecture: testarch\n" + "Filename: from-custom-repo_1.0_testarch.ipk\n" + "Size: 1\n" + "SHA256sum: 0000\n" + "Description: test\n" + ) + # apk-style repo: client URL points at packages.adb but the v2 index.json + # sits in the same directory. + httpserver.expect_request("/apk-repo/index.json").respond_with_json( + json_mod.loads( + '{"version": 2, "architecture": "testarch", ' + '"packages": {"from-apk-repo": "1.0"}}' + ) + ) + + saved_allow_list = settings.repository_allow_list + settings.repository_allow_list = ["http://localhost:8123/"] + settings.validate_packages = True + try: + # Package only present in the opkg repo: must pass validation. + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["from-custom-repo"], + repositories={"custom": "http://localhost:8123/custom-repo"}, + repositories_mode="append", + ), + ) + assert response.status_code != 400, response.json() + + # Package only present in the apk repo (URL ends with packages.adb). + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["from-apk-repo"], + repositories={"custom": "http://localhost:8123/apk-repo/packages.adb"}, + repositories_mode="append", + ), + ) + assert response.status_code != 400, response.json() + + # Truly unknown package: still rejected even with the custom repo. + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["this-package-does-not-exist"], + repositories={"custom": "http://localhost:8123/custom-repo"}, + repositories_mode="append", + ), + ) + assert response.status_code == 400 + assert "this-package-does-not-exist" in response.json()["detail"] + finally: + settings.validate_packages = False + settings.repository_allow_list = saved_allow_list + + +def test_validate_packages_skipped_when_disabled(client): + """With validate_packages disabled (the default), unknown packages are + not rejected at validation — they would reach the build worker.""" + response = client.post( + "/api/v1/build", + json=dict( + version="1.2.3", + target="testtarget/testsubtarget", + profile="testprofile", + packages=["this-package-does-not-exist"], + ), + ) + # No 400 from validation — request proceeds (will eventually fail in build). + assert response.status_code != 400 + + def test_api_build_without_packages_list(client): response = client.post( "/api/v1/build",