diff --git a/asu/config.py b/asu/config.py index 15cd4c7e..7517eb74 100644 --- a/asu/config.py +++ b/asu/config.py @@ -115,6 +115,7 @@ def settings_customise_sources(cls, settings_cls, **kwargs): build_failure_ttl: str = "1h" max_pending_jobs: int = 200 job_timeout: str = "10m" + validate_packages: bool = False settings = Settings() diff --git a/asu/repositories.py b/asu/repositories.py index 7f1f63fd..a40603ef 100644 --- a/asu/repositories.py +++ b/asu/repositories.py @@ -1,6 +1,15 @@ +from typing import Optional from urllib.parse import urlparse from asu.config import settings +from asu.util import ( + get_redis_client, + get_str_hash, + packages_from_index, + parse_packages_file, +) + +PACKAGE_CACHE_TTL_REPO = 900 # user-supplied repos can update anytime def is_repo_allowed(repo_url: str, allow_list: list[str]) -> bool: @@ -48,6 +57,33 @@ def merge_repositories( return "\n".join(lines) + "\n" +def get_repo_packages(url: str) -> Optional[set[str]]: + """Return cached set of available package names for a single repo URL. + + Used for user-supplied repositories. Apk repos point at the `packages.adb` + file but their index.json sits next to it in the same directory, so the + trailing filename is stripped before fetching. + """ + # apk: /packages.adb -> sibling /index.json + base = url.removesuffix("/packages.adb") + cache_key = f"pkgs:repo:{get_str_hash(base)}" + rc = get_redis_client() + + cached = rc.smembers(cache_key) + if cached: + return cached + + packages = packages_from_index(parse_packages_file(base)) + if not packages: + return None + + pipe = rc.pipeline() + pipe.sadd(cache_key, *packages) + pipe.expire(cache_key, PACKAGE_CACHE_TTL_REPO) + pipe.execute() + return packages + + def validate_repos(repositories: dict[str, str]) -> dict[str, str]: """Filter repositories against the allow list. diff --git a/asu/routers/api.py b/asu/routers/api.py index 585e5b5c..6d9b5f52 100644 --- a/asu/routers/api.py +++ b/asu/routers/api.py @@ -8,11 +8,13 @@ from asu.build import build from asu.build_request import BuildRequest from asu.config import settings -from asu.repositories import is_repo_allowed +from asu.package_changes import apply_package_changes +from asu.repositories import get_repo_packages, is_repo_allowed from asu.util import ( add_timestamp, add_build_event, client_get, + get_available_packages, get_branch, get_queue, get_request_hash, @@ -149,6 +151,48 @@ def valid_profile(profile: str, build_request: BuildRequest) -> bool: build_request.profile = app.profiles[build_request.version][build_request.target][ build_request.profile ] + + # Off by default — small setups don't benefit and pay the upstream + # round-trip cost on cache miss. Enable on busy servers via asu.toml. + if settings.validate_packages: + # In replace-mode with user repos, only those repos count; otherwise + # the standard upstream is part of the available universe. + needs_upstream = not ( + build_request.repositories and build_request.repositories_mode == "replace" + ) + + available: set[str] = set() + skip_check = False + + if needs_upstream: + arch = app.targets[build_request.version].get(build_request.target, "") + upstream = get_available_packages( + build_request.version, build_request.target, arch + ) + if upstream is None: + skip_check = True + else: + available |= upstream + + for url in build_request.repositories.values(): + repo = get_repo_packages(url) + if repo is None: + # Repo unreachable or has no index.json — fail open rather + # than falsely reject. + skip_check = True + break + available |= repo + + if not skip_check: + # apply_package_changes mutates .packages; copy first so the build + # phase still sees the original list and reapplies its own changes. + rewritten = build_request.model_copy(deep=True) + apply_package_changes(rewritten) + requested = {p for p in rewritten.packages if not p.startswith("-")} + missing = sorted(requested - available) + if missing: + return validation_failure(f"Unsupported packages: {', '.join(missing)}") + return ({}, None) diff --git a/asu/util.py b/asu/util.py index e961a640..8b04b8f1 100644 --- a/asu/util.py +++ b/asu/util.py @@ -453,6 +453,89 @@ def check_package_errors(stderr: str) -> str: return f"Impossible package selection{pkg_list}" +PACKAGE_CACHE_TTL_RELEASE = 86400 # releases are immutable, 24h is fine +PACKAGE_CACHE_TTL_SNAPSHOT = 900 # snapshots refresh ~daily, 15min keeps it fresh + + +def packages_from_index(idx: dict) -> set[str]: + """Extract package names from a parse_packages_file return value. + + parse_packages_file returns either {architecture, packages} (opkg fallback + or v2 index.json) or a flat {pkg: version} dict (v1 index.json without + Packages fallback). Handle both shapes. + """ + pkgs = idx.get("packages") + if isinstance(pkgs, dict): + return set(pkgs.keys()) + return {k for k, v in idx.items() if isinstance(v, str)} + + +def fetch_available_packages(version: str, target: str, arch: str) -> set[str]: + """Fetch the union of available package names from upstream. + + Combines target-specific packages, kmods (when split), and per-arch feeds. + Returns an empty set when nothing could be fetched (upstream down, unknown + version/target, etc.). + """ + branch_data = get_branch(version) + if "path" not in branch_data: + return set() + version_path = branch_data["path"].format(version=version) + base = f"{settings.upstream_url}/{version_path}" + + packages: set[str] = set() + packages.update( + packages_from_index(parse_packages_file(f"{base}/targets/{target}/packages")) + ) + + if is_post_kmod_split_build(f"{version_path}/targets/{target}"): + kmod_dir = parse_kernel_version(f"{base}/targets/{target}/profiles.json") + if kmod_dir: + packages.update( + packages_from_index( + parse_packages_file(f"{base}/targets/{target}/kmods/{kmod_dir}") + ) + ) + + if arch: + feed_url = f"{base}/packages/{arch}" + for feed in parse_feeds_conf(feed_url): + packages.update( + packages_from_index(parse_packages_file(f"{feed_url}/{feed}")) + ) + + return packages + + +def get_available_packages(version: str, target: str, arch: str) -> Optional[set[str]]: + """Return cached set of available package names for (version, target). + + Returns None when nothing is known about the upstream so callers treat it + as 'cannot validate, allow through' rather than rejecting blindly. + """ + cache_key = f"pkgs:{version}:{target}" + rc = get_redis_client() + + cached = rc.smembers(cache_key) + if cached: + return cached + + packages = fetch_available_packages(version, target, arch) + if not packages: + return None + + ttl = ( + PACKAGE_CACHE_TTL_SNAPSHOT + if "snapshot" in version.lower() + else PACKAGE_CACHE_TTL_RELEASE + ) + pipe = rc.pipeline() + pipe.sadd(cache_key, *packages) + pipe.expire(cache_key, ttl) + pipe.execute() + return packages + + def parse_packages_file(url: str) -> dict[str, str]: """Any index.json without a "version" tag is assumed to be v1, containing ABI-versioned package names, which may cause issues for those packages. diff --git a/tests/conftest.py b/tests/conftest.py index 00a586c5..3a9de438 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -114,6 +114,7 @@ def mocked_redis_queue(): monkeypatch.setattr("asu.util.get_queue", mocked_redis_queue) monkeypatch.setattr("asu.routers.api.get_queue", mocked_redis_queue) monkeypatch.setattr("asu.util.get_redis_client", mocked_redis_client) + monkeypatch.setattr("asu.repositories.get_redis_client", mocked_redis_client) from asu.main import app as real_app diff --git a/tests/test_api.py b/tests/test_api.py index 6e5f63b4..6932c094 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from fastapi.testclient import TestClient @@ -455,6 +457,139 @@ def test_api_build_missing_package(app): assert "this-package-does-not-exist" in data["detail"] +def test_validate_packages_rejects_unknown(client, httpserver): + """With validate_packages enabled, unknown packages are rejected at the + validation step rather than reaching the build worker.""" + upstream_path = Path("./tests/upstream/") + for f in [ + "snapshots/targets/testtarget/testsubtarget/packages/Packages", + "snapshots/packages/testarch/base/Packages", + ]: + httpserver.expect_request(f"/{f}").respond_with_data( + (upstream_path / f).read_bytes() + ) + + settings.validate_packages = True + try: + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["base-files", "this-package-does-not-exist"], + ), + ) + finally: + settings.validate_packages = False + + assert response.status_code == 400 + assert "this-package-does-not-exist" in response.json()["detail"] + assert "base-files" not in response.json()["detail"] + + +def test_validate_packages_custom_repo(client, httpserver): + """Packages from a user-supplied repo (opkg or apk) are merged into the + available universe, so a name found there is accepted.""" + import json as json_mod + + upstream_path = Path("./tests/upstream/") + for f in [ + "snapshots/targets/testtarget/testsubtarget/packages/Packages", + "snapshots/packages/testarch/base/Packages", + ]: + httpserver.expect_request(f"/{f}").respond_with_data( + (upstream_path / f).read_bytes() + ) + # opkg-style repo: serve a Packages file with an extra package. + httpserver.expect_request("/custom-repo/index.json").respond_with_data( + "", status=404 + ) + httpserver.expect_request("/custom-repo/Packages").respond_with_data( + "Package: from-custom-repo\n" + "Version: 1.0\n" + "Architecture: testarch\n" + "Filename: from-custom-repo_1.0_testarch.ipk\n" + "Size: 1\n" + "SHA256sum: 0000\n" + "Description: test\n" + ) + # apk-style repo: client URL points at packages.adb but the v2 index.json + # sits in the same directory. + httpserver.expect_request("/apk-repo/index.json").respond_with_json( + json_mod.loads( + '{"version": 2, "architecture": "testarch", ' + '"packages": {"from-apk-repo": "1.0"}}' + ) + ) + + saved_allow_list = settings.repository_allow_list + settings.repository_allow_list = ["http://localhost:8123/"] + settings.validate_packages = True + try: + # Package only present in the opkg repo: must pass validation. + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["from-custom-repo"], + repositories={"custom": "http://localhost:8123/custom-repo"}, + repositories_mode="append", + ), + ) + assert response.status_code != 400, response.json() + + # Package only present in the apk repo (URL ends with packages.adb). + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["from-apk-repo"], + repositories={"custom": "http://localhost:8123/apk-repo/packages.adb"}, + repositories_mode="append", + ), + ) + assert response.status_code != 400, response.json() + + # Truly unknown package: still rejected even with the custom repo. + response = client.post( + "/api/v1/build", + json=dict( + version="SNAPSHOT", + target="testtarget/testsubtarget", + profile="generic", + packages=["this-package-does-not-exist"], + repositories={"custom": "http://localhost:8123/custom-repo"}, + repositories_mode="append", + ), + ) + assert response.status_code == 400 + assert "this-package-does-not-exist" in response.json()["detail"] + finally: + settings.validate_packages = False + settings.repository_allow_list = saved_allow_list + + +def test_validate_packages_skipped_when_disabled(client): + """With validate_packages disabled (the default), unknown packages are + not rejected at validation — they would reach the build worker.""" + response = client.post( + "/api/v1/build", + json=dict( + version="1.2.3", + target="testtarget/testsubtarget", + profile="testprofile", + packages=["this-package-does-not-exist"], + ), + ) + # No 400 from validation — request proceeds (will eventually fail in build). + assert response.status_code != 400 + + def test_api_build_without_packages_list(client): response = client.post( "/api/v1/build",