diff --git a/.github/workflows/build-check-test.yaml b/.github/workflows/build-check-test.yaml
index 5d2723ff6..66928bca4 100644
--- a/.github/workflows/build-check-test.yaml
+++ b/.github/workflows/build-check-test.yaml
@@ -32,21 +32,59 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  # Detect which paths have changed to conditionally run E2E tests
-  changes:
+  # E2E matrix selector. Computes which (suite × backend) rows actually run
+  # and which fall to the e2e-skip mirror, based on changed paths, PR labels,
+  # and draft state. See hack/e2e-select/main.py + testdata/ for the rules
+  # and golden samples.
+  e2e-select:
     runs-on: ubuntu-latest
     if: github.event_name == 'pull_request'
     outputs:
-      e2e-relevant: ${{ steps.filter.outputs.e2e-relevant }}
+      run: ${{ steps.select.outputs.run }}
+      skip: ${{ steps.select.outputs.skip }}
+      has_run: ${{ steps.select.outputs.has_run }}
+      has_skip: ${{ steps.select.outputs.has_skip }}
+      reason: ${{ steps.select.outputs.reason }}
     steps:
       - uses: actions/checkout@v4
-      - uses: dorny/paths-filter@v3
-        id: filter
         with:
-          filters: |
-            e2e-relevant:
-              - 'operator/**'
-              - '.github/**'
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Compute changed files
+        run: |
+          git diff --name-only \
+            "${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}" \
+            > /tmp/changed-files.txt
+          echo "Changed files in this PR:"
+          cat /tmp/changed-files.txt
+      - name: Run selector
+        id: select
+        run: |
+          LABELS="${{ join(github.event.pull_request.labels.*.name, ',') }}"
+          DRAFT_FLAG=""
+          if [ "${{ github.event.pull_request.draft }}" = "true" ]; then
+            DRAFT_FLAG="--draft"
+          fi
+
+          # Print full result to logs for transparency.
+          echo "=== Selector result ==="
+          python3 hack/e2e-select/main.py \
+            --mode pr \
+            --changed-files /tmp/changed-files.txt \
+            --labels "$LABELS" \
+            $DRAFT_FLAG | tee /tmp/select.json
+          echo "======================="
+
+          # Slice into individual job outputs (one Python invocation, jq slices).
+          {
+            echo "run=$(jq -c .run /tmp/select.json)"
+            echo "skip=$(jq -c .skip /tmp/select.json)"
+            echo "has_run=$(jq -r .has_run /tmp/select.json)"
+            echo "has_skip=$(jq -r .has_skip /tmp/select.json)"
+            echo "reason=$(jq -r .reason /tmp/select.json)"
+          } >> "$GITHUB_OUTPUT"
 
   test:
     runs-on: ubuntu-latest
@@ -87,48 +125,30 @@ jobs:
       - name: check
         run: make check
 
-  # E2E tests - only run after build, check, and test jobs succeed
-  # Only triggered by changes to operator or .github folders
+  # E2E tests — matrix comes from the selector job.
+  #
+  # Matrix entries (defined in hack/e2e-select/main.py ALL_ROWS):
+  #   test_name     - name shown in the GitHub Actions UI
+  #   test_pattern  - Go test -run regex
+  #   backend       - kai-scheduler | default-scheduler | ...
+  #   create_flags  - extra flags appended to E2E_CREATE_FLAGS
+  #                   (empty string means "use the base e2e.yaml preset, KAI")
+  #   make_target   - Makefile target (run-e2e-full | run-e2e-real-full | run-e2e-mnnvl-full)
   #
-  # Matrix entries can set:
-  #   test_name     (required) - name shown in the GitHub Actions UI
-  #   test_pattern  (optional) - Go test -run pattern (standard e2e tests)
-  #   make_target   (optional) - Makefile target, defaults to run-e2e-full
+  # The selector handles draft policy: a draft PR without the 'run-e2e' label
+  # gets has_run=false (all rows fall to e2e-skip). The 'run-e2e' label forces
+  # the full matrix (safety escape for reviewers).
   e2e:
-    needs: [test, build, check, changes]
-    # Run on non-draft PRs (or draft PRs with 'run-e2e' label)
-    # AND only when operator or .github files are changed
+    needs: [test, build, check, e2e-select]
     if: |
       github.event_name == 'pull_request' &&
-      needs.changes.outputs.e2e-relevant == 'true' &&
-      (github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'run-e2e'))
+      needs.e2e-select.outputs.has_run == 'true'
     # use NVIDIA self-hosted runner setting is on Velonix repository
     runs-on: prod-grove-e2e-v1
     timeout-minutes: 60
     strategy:
       fail-fast: false
-      matrix:
-        include:
-          - test_name: gang_scheduling
-            test_pattern: "^Test_GS"
-          - test_name: rolling_updates
-            test_pattern: "^Test_RU"
-          - test_name: ondelete_updates
-            test_pattern: "^Test_OD"
-          - test_name: startup_ordering
-            test_pattern: "^Test_SO"
-            make_target: "run-e2e-real-full"
-          - test_name: Topology_Aware_Scheduling
-            test_pattern: "^Test_TAS"
-          - test_name: cert_management
-            test_pattern: "^Test_CM"
-          - test_name: auto_mnnvl
-            test_pattern: "^Test_AutoMNNVL"
-            make_target: "run-e2e-mnnvl-full"
-          - test_name: crd_installer
-            test_pattern: "^Test_CRD_Installer"
-          - test_name: resource_sharing
-            test_pattern: "^Test_RS"
+      matrix: ${{ fromJSON(needs.e2e-select.outputs.run) }}
     name: E2E - ${{ matrix.test_name }}
     steps:
       # print runner specs so we have a record in case of failures
@@ -136,6 +156,7 @@ jobs:
         run: |
           echo "CPUs: $(nproc)"
           echo "RAM: $(free -h | awk '/^Mem:/ {print $2}')"
+          echo "Selection reason: ${{ needs.e2e-select.outputs.reason }}"
 
       - name: Checkout code
         uses: actions/checkout@v4
@@ -150,7 +171,7 @@ jobs:
 
       - name: Run e2e tests - ${{ matrix.test_name }}
         run: |
-          make ${{ matrix.make_target || 'run-e2e-full' }} TEST_PATTERN='${{ matrix.test_pattern }}' E2E_CREATE_FLAGS='--dind-memory-mode'
+          make ${{ matrix.make_target }} TEST_PATTERN='${{ matrix.test_pattern }}' E2E_CREATE_FLAGS='${{ matrix.create_flags }} --dind-memory-mode'
         working-directory: operator
 
       # The test code handles cleanup via Teardown(), but this step provides
@@ -173,29 +194,22 @@ jobs:
           if-no-files-found: warn
           retention-days: 7
 
-  # This job runs with the same matrix as 'e2e' when E2E tests are skipped (no relevant
-  # file changes and no 'run-e2e' label). It reports a passing status so that required
-  # branch protection checks are satisfied even for documentation-only PRs.
+  # Mirror that emits synthetic passes for matrix rows the selector excluded
+  # (path-filtered out, or all-rows when the PR is in draft state without the
+  # 'run-e2e' label). This keeps the required branch-protection check names
+  # (E2E - <test_name>) resolvable even when nothing real runs.
   e2e-skip:
-    needs: [changes]
+    needs: [e2e-select]
     if: |
       github.event_name == 'pull_request' &&
-      needs.changes.outputs.e2e-relevant != 'true' &&
-      !contains(github.event.pull_request.labels.*.name, 'run-e2e')
+      needs.e2e-select.outputs.has_skip == 'true'
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
-      matrix:
-        include:
-          - test_name: gang_scheduling
-          - test_name: rolling_updates
-          - test_name: startup_ordering
-          - test_name: Topology_Aware_Scheduling
-          - test_name: cert_management
-          - test_name: auto_mnnvl
-          - test_name: crd_installer
-          - test_name: resource_sharing
+      matrix: ${{ fromJSON(needs.e2e-select.outputs.skip) }}
     name: E2E - ${{ matrix.test_name }}
     steps:
-      - name: Skip E2E (no relevant changes)
-        run: echo "E2E skipped — no changes to operator/ or .github/ and 'run-e2e' label not set"
+      - name: Skip E2E
+        run: |
+          echo "Skipped: ${{ matrix.test_name }} on ${{ matrix.backend }}"
+          echo "Selection reason: ${{ needs.e2e-select.outputs.reason }}"
diff --git a/.github/workflows/e2e-nightly.yaml b/.github/workflows/e2e-nightly.yaml
new file mode 100644
index 000000000..636022d8e
--- /dev/null
+++ b/.github/workflows/e2e-nightly.yaml
@@ -0,0 +1,130 @@
+# /*
+# Copyright 2026 The Grove Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# */
+
+# Nightly E2E — runs the full (suite × capable backend) matrix on schedule.
+#
+# Rationale: the PR matrix (build-check-test.yaml) is path-filtered to keep
+# per-PR cost bounded as more backends land. That means cross-backend
+# regressions in code paths neither PR touched can slip in. This workflow
+# catches them by running the exhaustive matrix once a day.
+#
+# Failure routing: this initial cut uploads per-row diagnostic artifacts and
+# writes a job-summary report. Issue auto-open / Slack notification is left
+# as a follow-up so the first weeks of nightly runs do not spam the repo
+# while the matrix is stabilising.
+
+name: Nightly E2E
+
+on:
+  schedule:
+    # 07:00 UTC = 15:00 Beijing / 00:00 PST. Adjust to maintainer preference.
+    - cron: "0 7 * * *"
+  # Allow manual trigger for ad-hoc verification (e.g. after a flaky run).
+  workflow_dispatch:
+
+# Don't run multiple nightlies in parallel (matrix already consumes the
+# self-hosted runner pool; concurrent runs would queue and bleed into the
+# next day's window).
+concurrency:
+  group: e2e-nightly
+  cancel-in-progress: false
+
+jobs:
+  # Compute the full matrix via the same selector the PR workflow uses, in
+  # nightly mode (ignores path filter / labels / draft state).
+  matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      run: ${{ steps.select.outputs.run }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Run selector (nightly mode)
+        id: select
+        run: |
+          echo "=== Selector result ==="
+          python3 hack/e2e-select/main.py --mode nightly | tee /tmp/select.json
+          echo "======================="
+          echo "run=$(jq -c .run /tmp/select.json)" >> "$GITHUB_OUTPUT"
+
+  e2e:
+    needs: matrix
+    runs-on: prod-grove-e2e-v1
+    timeout-minutes: 90
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.matrix.outputs.run) }}
+    name: Nightly E2E - ${{ matrix.test_name }}
+    steps:
+      - name: Print runner specs
+        run: |
+          echo "CPUs: $(nproc)"
+          echo "RAM: $(free -h | awk '/^Mem:/ {print $2}')"
+
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Pull registry image from GHCR to avoid Docker Hub rate limits
+        run: |
+          docker pull ghcr.io/distribution/distribution:3.1.0
+          docker tag ghcr.io/distribution/distribution:3.1.0 registry:2
+
+      - name: E2E Setup
+        uses: ./.github/actions/e2e-setup
+
+      - name: Run e2e tests - ${{ matrix.test_name }}
+        run: |
+          make ${{ matrix.make_target }} TEST_PATTERN='${{ matrix.test_pattern }}' E2E_CREATE_FLAGS='${{ matrix.create_flags }} --dind-memory-mode'
+        working-directory: operator
+
+      - name: Cleanup k3d cluster
+        if: always()
+        working-directory: operator
+        run: make e2e-cluster-down || true
+
+      - name: Upload test logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: nightly-e2e-logs-${{ matrix.test_name }}
+          path: operator/e2e-diagnostics/
+          if-no-files-found: warn
+          retention-days: 14
+
+  # Aggregate report. Runs unconditionally after the matrix so we always have
+  # a single place to look at the night's results.
+  summary:
+    needs: e2e
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Write summary
+        run: |
+          {
+            echo "## Nightly E2E summary"
+            echo ""
+            echo "Run: ${{ github.run_id }}"
+            echo "Trigger: ${{ github.event_name }}"
+            echo "Status: ${{ needs.e2e.result }}"
+            echo ""
+            if [ "${{ needs.e2e.result }}" != "success" ]; then
+              echo "⚠️  One or more matrix rows failed. See the [run page](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) and the per-row \`nightly-e2e-logs-*\` artifacts."
+            else
+              echo "✅ All matrix rows passed."
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
diff --git a/hack/e2e-select/main.py b/hack/e2e-select/main.py
new file mode 100644
index 000000000..6cf16cb3f
--- /dev/null
+++ b/hack/e2e-select/main.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+# /*
+# Copyright 2026 The Grove Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# */
+"""
+E2E test matrix selector for GitHub Actions.
+
+Computes which (suite × backend) rows the e2e job should run on a given
+PR (mode=pr) or scheduled run (mode=nightly), based on changed file paths
+and PR labels.
+
+The selector emits the matrix as JSON in the GHA ``{"include": [...]}``
+shape. Two outputs are emitted in a single call:
+
+  - ``run``: rows the matrix should actually execute
+  - ``skip``: rows the e2e-skip mirror should emit as synthetic passes,
+    so that branch-protection-required check names stay stable across PRs
+
+The union of ``run`` + ``skip`` always equals the full matrix
+(``ALL_ROWS``); the two are disjoint.
+
+Selection logic
+---------------
+- ``mode=nightly``: ``run = ALL_ROWS``, ``skip = []``. Path filter and
+  labels are ignored.
+- ``mode=pr`` with ``--has-label run-e2e``: same as nightly. This is the
+  "safety escape" — a reviewer can force the full matrix without having
+  to figure out which path triggers which rows.
+- ``mode=pr`` with ``--draft`` and no ``run-e2e`` label: ``run = []``,
+  ``skip = ALL_ROWS``. Draft PRs do not gate merges, so we emit the full
+  set as synthetic passes; the contributor can add the label to force
+  real runs.
+- ``mode=pr`` otherwise: changed files are matched against ``PATH_RULES``
+  in order; the union of matched "affected" sets selects which rows run.
+  Unselected rows go to ``skip``.
+
+Adding a new backend
+--------------------
+1. Add the per-backend rows to ``ALL_ROWS`` (test_name must be unique).
+2. If the backend has its own scheduler package subdir
+   (``operator/internal/scheduler/<name>/``), add a path rule above the
+   generic ``scheduler/**`` shared-framework rule.
+3. Add testdata samples covering the new backend's path-filter case and
+   re-run the unit tests.
+"""
+
+import argparse
+import fnmatch
+import json
+import sys
+from typing import Any
+
+# ---------------------------------------------------------------------------
+# Matrix definition. Keep test_name unique across rows.
+# ---------------------------------------------------------------------------
+ALL_ROWS: list[dict[str, Any]] = [
+    # ---- kai-scheduler (primary backend) ----
+    {"test_name": "gang_scheduling", "test_pattern": "^Test_GS",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "capability"},
+    {"test_name": "rolling_updates", "test_pattern": "^Test_RU",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "sensitive"},
+    {"test_name": "ondelete_updates", "test_pattern": "^Test_OD",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "sensitive"},
+    {"test_name": "startup_ordering", "test_pattern": "^Test_SO",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-real-full", "tier": "sensitive"},
+    {"test_name": "Topology_Aware_Scheduling", "test_pattern": "^Test_TAS",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "capability"},
+    {"test_name": "cert_management", "test_pattern": "^Test_CM",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "agnostic"},
+    {"test_name": "auto_mnnvl", "test_pattern": "^Test_AutoMNNVL",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-mnnvl-full", "tier": "capability"},
+    {"test_name": "crd_installer", "test_pattern": "^Test_CRD_Installer",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "agnostic"},
+    {"test_name": "resource_sharing", "test_pattern": "^Test_RS",
+     "backend": "kai-scheduler", "create_flags": "",
+     "make_target": "run-e2e-full", "tier": "capability"},
+    # ---- default-scheduler ----
+    {"test_name": "rolling_updates_default-scheduler", "test_pattern": "^Test_RU",
+     "backend": "default-scheduler",
+     "create_flags": "-f hack/e2e-default-scheduler.yaml",
+     "make_target": "run-e2e-full", "tier": "sensitive"},
+    {"test_name": "ondelete_updates_default-scheduler", "test_pattern": "^Test_OD",
+     "backend": "default-scheduler",
+     "create_flags": "-f hack/e2e-default-scheduler.yaml",
+     "make_target": "run-e2e-full", "tier": "sensitive"},
+    {"test_name": "startup_ordering_default-scheduler", "test_pattern": "^Test_SO",
+     "backend": "default-scheduler",
+     "create_flags": "-f hack/e2e-default-scheduler.yaml",
+     "make_target": "run-e2e-real-full", "tier": "sensitive"},
+]
+
+# ---------------------------------------------------------------------------
+# Path filter rules. Order matters: first matching rule per file wins.
+# Each rule maps a glob set to an "affected" set; "all" means all backends,
+# "agnostic" means include agnostic-tier rows even when no specific backend
+# matched. The selector unions affected sets across all changed files.
+# ---------------------------------------------------------------------------
+PATH_RULES: list[dict[str, Any]] = [
+    # Docs / pure markdown / top-level metadata: never trigger e2e.
+    {"globs": ["docs/**", "*.md", "**/*.md",
+               "ATTRIBUTION.md", "LICENSE", "OWNERS",
+               "MAINTAINERS.md", "code-of-conduct.md",
+               "SECURITY.md", "CONTRIBUTING.md"],
+     "affected": set()},
+    # Backend-specific subpaths under the scheduler package.
+    {"globs": ["operator/internal/scheduler/kai/**"],
+     "affected": {"kai-scheduler"}},
+    {"globs": ["operator/internal/scheduler/kube/**"],
+     "affected": {"default-scheduler"}},
+    # Shared scheduler framework (anything else under scheduler/).
+    {"globs": ["operator/internal/scheduler/**"],
+     "affected": {"all"}},
+    # API surface and Helm charts: broad — affects every backend's deploy.
+    {"globs": ["operator/api/**", "operator/charts/**"],
+     "affected": {"all", "agnostic"}},
+    # E2E infra, CI workflows, hack scripts: broad.
+    {"globs": ["operator/e2e/**", "operator/hack/**",
+               ".github/**", "hack/**"],
+     "affected": {"all", "agnostic"}},
+    # Fallback for anything else under operator/: treat as broad change.
+    {"globs": ["operator/**"],
+     "affected": {"all"}},
+]
+
+
+def _match(path: str, globs: list[str]) -> bool:
+    return any(fnmatch.fnmatch(path, g) for g in globs)
+
+
+def compute_affected(changed_files: list[str]) -> set[str]:
+    """Walk PATH_RULES; first match per file contributes to the affected set."""
+    affected: set[str] = set()
+    for path in changed_files:
+        for rule in PATH_RULES:
+            if _match(path, rule["globs"]):
+                affected |= rule["affected"]
+                break
+        # No-match files (e.g. unknown top-level paths) are ignored intentionally.
+    return affected
+
+
+def select_rows(affected: set[str]) -> list[dict[str, Any]]:
+    """Filter ALL_ROWS by an affected set.
+
+    'all' matches every row; otherwise a row matches if its backend is in the
+    set, or if 'agnostic' is in the set and the row is agnostic-tier.
+    """
+    if not affected:
+        return []
+    if "all" in affected:
+        return list(ALL_ROWS)
+    selected: list[dict[str, Any]] = []
+    include_agnostic = "agnostic" in affected
+    for row in ALL_ROWS:
+        if row["backend"] in affected:
+            selected.append(row)
+        elif include_agnostic and row["tier"] == "agnostic":
+            selected.append(row)
+    return selected
+
+
+def split(rows_run: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    """Return (run, skip) where skip = ALL_ROWS - run, preserving order."""
+    run_names = {r["test_name"] for r in rows_run}
+    skip = [r for r in ALL_ROWS if r["test_name"] not in run_names]
+    return rows_run, skip
+
+
+def _strip_internal(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Drop selector-internal fields before emitting JSON."""
+    out = []
+    for r in rows:
+        out.append({k: v for k, v in r.items() if k != "tier"})
+    return out
+
+
+def compute(mode: str, changed_files: list[str], labels: list[str],
+            draft: bool) -> dict[str, Any]:
+    """Top-level decision tree. Returns dict with 'run', 'skip', 'has_run',
+    'has_skip', 'reason' (a short string for logging)."""
+    has_run_e2e_label = "run-e2e" in labels
+
+    if mode == "nightly":
+        run = list(ALL_ROWS)
+        reason = "nightly: full matrix"
+    elif has_run_e2e_label:
+        run = list(ALL_ROWS)
+        reason = "pr+run-e2e label: full matrix (safety escape)"
+    elif draft:
+        # Draft PR with no label: do not run e2e; all rows go to skip mirror.
+        run = []
+        reason = "pr+draft+no run-e2e label: skip all"
+    else:
+        affected = compute_affected(changed_files)
+        run = select_rows(affected)
+        if not run:
+            reason = f"pr: no rows affected (affected={sorted(affected) or '∅'})"
+        else:
+            reason = f"pr: affected={sorted(affected)}"
+
+    run, skip = split(run)
+    return {
+        "run": {"include": _strip_internal(run)},
+        "skip": {"include": _strip_internal(skip)},
+        "has_run": len(run) > 0,
+        "has_skip": len(skip) > 0,
+        "reason": reason,
+    }
+
+
+def _read_changed_files(arg: str) -> list[str]:
+    if arg == "-":
+        lines = sys.stdin.read().splitlines()
+    else:
+        with open(arg) as f:
+            lines = f.read().splitlines()
+    return [line.strip() for line in lines if line.strip()]
+
+
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description=__doc__,
+                                 formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--mode", required=True, choices=["pr", "nightly"])
+    ap.add_argument("--changed-files", default="-",
+                    help="path to file containing one changed path per line, "
+                         "or '-' for stdin. ignored for mode=nightly.")
+    ap.add_argument("--labels", default="",
+                    help="comma-separated list of PR labels. "
+                         "'run-e2e' triggers full matrix in pr mode.")
+    ap.add_argument("--draft", action="store_true",
+                    help="set if the PR is in draft state.")
+    ap.add_argument("--show", default="all",
+                    choices=["all", "run", "skip", "has_run", "has_skip", "reason"],
+                    help="which part of the result to print "
+                         "(default: full JSON object).")
+    args = ap.parse_args(argv)
+
+    if args.mode == "nightly":
+        changed_files: list[str] = []
+    else:
+        changed_files = _read_changed_files(args.changed_files)
+
+    labels = [s.strip() for s in args.labels.split(",") if s.strip()]
+    result = compute(args.mode, changed_files, labels, args.draft)
+
+    if args.show == "all":
+        print(json.dumps(result, indent=2))
+    elif args.show in ("run", "skip"):
+        print(json.dumps(result[args.show], separators=(",", ":")))
+    else:
+        print(result[args.show])
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/hack/e2e-select/testdata/nightly-mode-full.json b/hack/e2e-select/testdata/nightly-mode-full.json
new file mode 100644
index 000000000..dcb333c60
--- /dev/null
+++ b/hack/e2e-select/testdata/nightly-mode-full.json
@@ -0,0 +1,95 @@
+{
+  "run": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "skip": {
+    "include": []
+  },
+  "has_run": true,
+  "has_skip": false
+}
diff --git a/hack/e2e-select/testdata/pr-mode-docs-only.json b/hack/e2e-select/testdata/pr-mode-docs-only.json
new file mode 100644
index 000000000..535173844
--- /dev/null
+++ b/hack/e2e-select/testdata/pr-mode-docs-only.json
@@ -0,0 +1,95 @@
+{
+  "run": {
+    "include": []
+  },
+  "skip": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "has_run": false,
+  "has_skip": true
+}
diff --git a/hack/e2e-select/testdata/pr-mode-draft-no-label.json b/hack/e2e-select/testdata/pr-mode-draft-no-label.json
new file mode 100644
index 000000000..535173844
--- /dev/null
+++ b/hack/e2e-select/testdata/pr-mode-draft-no-label.json
@@ -0,0 +1,95 @@
+{
+  "run": {
+    "include": []
+  },
+  "skip": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "has_run": false,
+  "has_skip": true
+}
diff --git a/hack/e2e-select/testdata/pr-mode-kai-only.json b/hack/e2e-select/testdata/pr-mode-kai-only.json
new file mode 100644
index 000000000..269a03806
--- /dev/null
+++ b/hack/e2e-select/testdata/pr-mode-kai-only.json
@@ -0,0 +1,96 @@
+{
+  "run": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      }
+    ]
+  },
+  "skip": {
+    "include": [
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "has_run": true,
+  "has_skip": true
+}
diff --git a/hack/e2e-select/testdata/pr-mode-kube-only.json b/hack/e2e-select/testdata/pr-mode-kube-only.json
new file mode 100644
index 000000000..542927c69
--- /dev/null
+++ b/hack/e2e-select/testdata/pr-mode-kube-only.json
@@ -0,0 +1,96 @@
+{
+  "run": {
+    "include": [
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "skip": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      }
+    ]
+  },
+  "has_run": true,
+  "has_skip": true
+}
diff --git a/hack/e2e-select/testdata/pr-mode-run-e2e-label.json b/hack/e2e-select/testdata/pr-mode-run-e2e-label.json
new file mode 100644
index 000000000..dcb333c60
--- /dev/null
+++ b/hack/e2e-select/testdata/pr-mode-run-e2e-label.json
@@ -0,0 +1,95 @@
+{
+  "run": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "skip": {
+    "include": []
+  },
+  "has_run": true,
+  "has_skip": false
+}
diff --git a/hack/e2e-select/testdata/pr-mode-shared-scheduler.json b/hack/e2e-select/testdata/pr-mode-shared-scheduler.json
new file mode 100644
index 000000000..dcb333c60
--- /dev/null
+++ b/hack/e2e-select/testdata/pr-mode-shared-scheduler.json
@@ -0,0 +1,95 @@
+{
+  "run": {
+    "include": [
+      {
+        "test_name": "gang_scheduling",
+        "test_pattern": "^Test_GS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates",
+        "test_pattern": "^Test_RU",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates",
+        "test_pattern": "^Test_OD",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering",
+        "test_pattern": "^Test_SO",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-real-full"
+      },
+      {
+        "test_name": "Topology_Aware_Scheduling",
+        "test_pattern": "^Test_TAS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "cert_management",
+        "test_pattern": "^Test_CM",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "auto_mnnvl",
+        "test_pattern": "^Test_AutoMNNVL",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-mnnvl-full"
+      },
+      {
+        "test_name": "crd_installer",
+        "test_pattern": "^Test_CRD_Installer",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "resource_sharing",
+        "test_pattern": "^Test_RS",
+        "backend": "kai-scheduler",
+        "create_flags": "",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "rolling_updates_default-scheduler",
+        "test_pattern": "^Test_RU",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "ondelete_updates_default-scheduler",
+        "test_pattern": "^Test_OD",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-full"
+      },
+      {
+        "test_name": "startup_ordering_default-scheduler",
+        "test_pattern": "^Test_SO",
+        "backend": "default-scheduler",
+        "create_flags": "-f hack/e2e-default-scheduler.yaml",
+        "make_target": "run-e2e-real-full"
+      }
+    ]
+  },
+  "skip": {
+    "include": []
+  },
+  "has_run": true,
+  "has_skip": false
+}
diff --git a/hack/e2e-select/tests/test_selector.py b/hack/e2e-select/tests/test_selector.py
new file mode 100644
index 000000000..a4b6a6fe1
--- /dev/null
+++ b/hack/e2e-select/tests/test_selector.py
@@ -0,0 +1,252 @@
+# /*
+# Copyright 2026 The Grove Authors.
+# SPDX-License-Identifier: Apache-2.0
+# */
+"""Unit tests for the e2e matrix selector.
+
+Run from repo root:
+
+    python3 -m pytest hack/e2e-select/tests/ -v
+
+or without pytest:
+
+    python3 hack/e2e-select/tests/test_selector.py
+"""
+
+import json
+import os
+import sys
+import unittest
+from pathlib import Path
+
+# Make the selector importable when invoked from the repo root.
+HERE = Path(__file__).resolve().parent
+sys.path.insert(0, str(HERE.parent))
+
+import main as selector  # noqa: E402
+
+
+REPO_ROOT = HERE.parent.parent.parent
+TESTDATA = HERE.parent / "testdata"
+
+
+def _names(rows: list[dict]) -> list[str]:
+    return [r["test_name"] for r in rows]
+
+
+def _backends(rows: list[dict]) -> set[str]:
+    return {r["backend"] for r in rows}
+
+
+class TestModes(unittest.TestCase):
+    """Top-level mode selection."""
+
+    def test_nightly_runs_full_matrix(self):
+        r = selector.compute("nightly", changed_files=[], labels=[], draft=False)
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+        self.assertEqual(r["skip"]["include"], [])
+        self.assertTrue(r["has_run"])
+        self.assertFalse(r["has_skip"])
+
+    def test_nightly_ignores_changed_files_and_labels(self):
+        r = selector.compute("nightly",
+                             changed_files=["docs/foo.md"],
+                             labels=["run-e2e"],
+                             draft=True)
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+
+    def test_pr_run_e2e_label_forces_full_matrix(self):
+        # Even with a docs-only change, run-e2e label overrides.
+        r = selector.compute("pr",
+                             changed_files=["docs/foo.md"],
+                             labels=["run-e2e"],
+                             draft=False)
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+        self.assertEqual(r["skip"]["include"], [])
+
+    def test_pr_draft_no_label_emits_no_run(self):
+        r = selector.compute("pr",
+                             changed_files=["operator/internal/scheduler/kai/foo.go"],
+                             labels=[],
+                             draft=True)
+        self.assertEqual(r["run"]["include"], [])
+        self.assertEqual(len(r["skip"]["include"]), len(selector.ALL_ROWS))
+
+
+class TestPathFilters(unittest.TestCase):
+    """Path-rule logic for mode=pr (non-draft, no label)."""
+
+    def _run(self, files):
+        return selector.compute("pr", changed_files=files,
+                                labels=[], draft=False)
+
+    def test_docs_only_runs_nothing(self):
+        r = self._run(["docs/foo.md", "README.md"])
+        self.assertEqual(r["run"]["include"], [])
+        self.assertEqual(len(r["skip"]["include"]), len(selector.ALL_ROWS))
+
+    def test_kai_subpath_runs_only_kai_rows(self):
+        r = self._run(["operator/internal/scheduler/kai/backend.go"])
+        backends = _backends(r["run"]["include"])
+        self.assertEqual(backends, {"kai-scheduler"})
+        # Ensure default-scheduler rows landed in skip.
+        skip_backends = _backends(r["skip"]["include"])
+        self.assertIn("default-scheduler", skip_backends)
+
+    def test_kube_subpath_runs_only_default_scheduler(self):
+        r = self._run(["operator/internal/scheduler/kube/backend.go"])
+        backends = _backends(r["run"]["include"])
+        self.assertEqual(backends, {"default-scheduler"})
+
+    def test_shared_scheduler_runs_all(self):
+        # File under scheduler/ but NOT under kai/ or kube/ → all backends.
+        r = self._run(["operator/internal/scheduler/types.go"])
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+
+    def test_charts_runs_all_plus_agnostic(self):
+        r = self._run(["operator/charts/values.yaml"])
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+
+    def test_e2e_infra_runs_all(self):
+        r = self._run(["operator/e2e/tests/foo_test.go"])
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+
+    def test_workflow_change_runs_all(self):
+        r = self._run([".github/workflows/build-check-test.yaml"])
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+
+    def test_mixed_kai_plus_docs_keeps_only_kai(self):
+        r = self._run(["operator/internal/scheduler/kai/x.go",
+                       "docs/proposals/foo.md"])
+        backends = _backends(r["run"]["include"])
+        self.assertEqual(backends, {"kai-scheduler"})
+
+    def test_mixed_kai_plus_kube_runs_both(self):
+        r = self._run(["operator/internal/scheduler/kai/x.go",
+                       "operator/internal/scheduler/kube/y.go"])
+        backends = _backends(r["run"]["include"])
+        self.assertEqual(backends, {"kai-scheduler", "default-scheduler"})
+
+    def test_empty_changed_files_emits_no_run(self):
+        r = self._run([])
+        self.assertEqual(r["run"]["include"], [])
+        self.assertEqual(len(r["skip"]["include"]), len(selector.ALL_ROWS))
+
+    def test_unknown_top_level_path_is_ignored(self):
+        # Some random top-level file no rule matches → contributes nothing.
+        r = self._run(["unrelated-top-level-file.txt"])
+        self.assertEqual(r["run"]["include"], [])
+
+    def test_fallback_rule_for_other_operator_paths(self):
+        # operator/scheduler.go (hypothetical) — not under api/charts/e2e/
+        # but under operator/ → fallback rule fires.
+        r = self._run(["operator/some-toplevel-go-file.go"])
+        self.assertEqual(len(r["run"]["include"]), len(selector.ALL_ROWS))
+
+
+class TestSplitInvariants(unittest.TestCase):
+    """Invariants of the split() helper."""
+
+    def test_run_plus_skip_equals_all(self):
+        for files in (
+            [],
+            ["operator/internal/scheduler/kai/x.go"],
+            ["operator/internal/scheduler/kube/x.go"],
+            ["operator/internal/scheduler/types.go"],
+            ["docs/foo.md"],
+            ["operator/charts/values.yaml"],
+        ):
+            r = selector.compute("pr", files, labels=[], draft=False)
+            run = _names(r["run"]["include"])
+            skip = _names(r["skip"]["include"])
+            self.assertEqual(set(run) | set(skip),
+                             {row["test_name"] for row in selector.ALL_ROWS},
+                             msg=f"files={files}")
+            self.assertEqual(set(run) & set(skip), set(),
+                             msg=f"run/skip not disjoint for files={files}")
+
+    def test_emitted_rows_have_no_tier_field(self):
+        r = selector.compute("nightly", [], [], draft=False)
+        for row in r["run"]["include"]:
+            self.assertNotIn("tier", row,
+                             msg="tier is selector-internal, must not leak to GHA")
+
+
+class TestRowsConsistency(unittest.TestCase):
+    """Sanity checks on ALL_ROWS itself."""
+
+    def test_test_names_are_unique(self):
+        names = [r["test_name"] for r in selector.ALL_ROWS]
+        self.assertEqual(len(names), len(set(names)))
+
+    def test_required_fields_present(self):
+        required = {"test_name", "test_pattern", "backend",
+                    "create_flags", "make_target", "tier"}
+        for row in selector.ALL_ROWS:
+            self.assertEqual(set(row.keys()) & required, required,
+                             msg=f"missing fields in row: {row}")
+
+    def test_only_known_tiers(self):
+        for row in selector.ALL_ROWS:
+            self.assertIn(row["tier"],
+                          {"agnostic", "sensitive", "capability"},
+                          msg=f"unknown tier in row: {row}")
+
+
+class TestGoldenSamples(unittest.TestCase):
+    """Regenerate samples in testdata/ and assert they match committed files.
+
+    Run with E2E_SELECT_REGENERATE=1 to update the golden files instead.
+    """
+
+    SAMPLES = {
+        "pr-mode-kai-only.json": dict(
+            mode="pr",
+            changed_files=["operator/internal/scheduler/kai/backend.go"],
+            labels=[], draft=False),
+        "pr-mode-kube-only.json": dict(
+            mode="pr",
+            changed_files=["operator/internal/scheduler/kube/backend.go"],
+            labels=[], draft=False),
+        "pr-mode-shared-scheduler.json": dict(
+            mode="pr",
+            changed_files=["operator/internal/scheduler/types.go"],
+            labels=[], draft=False),
+        "pr-mode-docs-only.json": dict(
+            mode="pr",
+            changed_files=["docs/proposals/foo.md", "README.md"],
+            labels=[], draft=False),
+        "pr-mode-run-e2e-label.json": dict(
+            mode="pr",
+            changed_files=["docs/proposals/foo.md"],
+            labels=["run-e2e"], draft=False),
+        "pr-mode-draft-no-label.json": dict(
+            mode="pr",
+            changed_files=["operator/internal/scheduler/kai/backend.go"],
+            labels=[], draft=True),
+        "nightly-mode-full.json": dict(
+            mode="nightly",
+            changed_files=[], labels=[], draft=False),
+    }
+
+    def test_golden_samples_match(self):
+        regenerate = os.environ.get("E2E_SELECT_REGENERATE") == "1"
+        for fname, kwargs in self.SAMPLES.items():
+            with self.subTest(sample=fname):
+                got = selector.compute(**kwargs)
+                # Drop 'reason' from golden files — it's diagnostic, not contract.
+                got_stripped = {k: v for k, v in got.items() if k != "reason"}
+                path = TESTDATA / fname
+                if regenerate:
+                    path.write_text(json.dumps(got_stripped, indent=2) + "\n")
+                    continue
+                self.assertTrue(path.exists(),
+                                f"missing golden file {path}; "
+                                f"run with E2E_SELECT_REGENERATE=1 to create")
+                expected = json.loads(path.read_text())
+                self.assertEqual(got_stripped, expected,
+                                 f"selector output diverged from {fname}")
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/operator/e2e/tests/capabilities.go b/operator/e2e/tests/capabilities.go
new file mode 100644
index 000000000..c32566d24
--- /dev/null
+++ b/operator/e2e/tests/capabilities.go
@@ -0,0 +1,109 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package tests
+
+import (
+	"sync"
+	"testing"
+)
+
+// Capability is a scheduler feature the E2E suite may require (e.g. gang
+// scheduling, topology-aware scheduling). Tests gate themselves with
+// RequireCapability and auto-skip when the active backend does not provide it.
+type Capability string
+
+const (
+	// GangScheduling indicates the active backend treats a PodGang as an
+	// all-or-nothing scheduling unit.
+	GangScheduling Capability = "GangScheduling"
+
+	// TopologyAwareScheduling indicates the active backend implements the
+	// scheduler.TopologyAwareSchedBackend interface AND the operator has
+	// topologyAwareScheduling.enabled=true.
+	TopologyAwareScheduling Capability = "TopologyAwareScheduling"
+
+	// AutoMNNVL indicates the operator has network.autoMNNVLEnabled=true.
+	// Config-only — no backend coupling.
+	AutoMNNVL Capability = "AutoMNNVL"
+)
+
+// CapabilitySet is the resolved set of capabilities for a single E2E run.
+type CapabilitySet struct {
+	// ActiveBackend is the value of OperatorConfiguration.scheduler.defaultProfileName.
+	ActiveBackend string
+	// caps is the set of capabilities present on the active backend.
+	caps map[Capability]bool
+}
+
+// Has reports whether the set contains the given capability.
+func (s CapabilitySet) Has(c Capability) bool {
+	return s.caps[c]
+}
+
+// backendInterfaceCapabilities is the hardcoded map of backend → capabilities
+// that depend on Go interface implementation in the operator. Entries here are
+// what E2E cannot deduce from a live OperatorConfiguration alone (the operator
+// uses Go type assertions; the test binary runs out-of-process and cannot).
+//
+// Capabilities derived purely from configuration flags (e.g. AutoMNNVL from
+// network.autoMNNVLEnabled) are NOT listed here — they are resolved directly
+// from OperatorConfiguration in DiscoverCapabilities.
+//
+// When adding a new backend, add a row here AND update the developer
+// checklist in the design proposal. The capabilities_test.go cross-check
+// fails the build if this table disagrees with the actual Go interfaces.
+var backendInterfaceCapabilities = map[string]map[Capability]bool{
+	"kai-scheduler": {
+		GangScheduling:          true,
+		TopologyAwareScheduling: true,
+	},
+	"default-scheduler": {
+		// KubeSchedulerConfig.GangScheduling is forward-looking — the kube
+		// backend does not yet read or act on it. When it does, set
+		// GangScheduling: true here.
+	},
+}
+
+// currentCapabilities holds the resolved CapabilitySet for the running e2e
+// suite. DiscoverCapabilities (in capability_discovery.go, e2e build tag)
+// populates it once at TestMain time; RequireCapability reads it on every
+// gated test entry.
+var (
+	currentCapabilities    CapabilitySet
+	currentCapabilitiesSet bool
+	currentCapabilitiesMu  sync.RWMutex
+)
+
+// RequireCapability skips t when the active backend does not provide cap.
+// Tests gated with RequireCapability are listed in the design proposal's
+// Test Classification table as "Capability-gated".
+//
+// The function is no-op if capabilities have not been discovered yet (e.g. when
+// running unit tests with go test ./... without an e2e cluster); the e2e build
+// flow guarantees discovery runs before any test that calls this.
+func RequireCapability(t *testing.T, cap Capability) {
+	t.Helper()
+	currentCapabilitiesMu.RLock()
+	defer currentCapabilitiesMu.RUnlock()
+	if !currentCapabilitiesSet {
+		return
+	}
+	if !currentCapabilities.Has(cap) {
+		t.Skipf("skipping: active backend %q does not provide capability %q",
+			currentCapabilities.ActiveBackend, cap)
+	}
+}
diff --git a/operator/e2e/tests/capabilities_test.go b/operator/e2e/tests/capabilities_test.go
new file mode 100644
index 000000000..be1a21da7
--- /dev/null
+++ b/operator/e2e/tests/capabilities_test.go
@@ -0,0 +1,100 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package tests
+
+import (
+	"testing"
+
+	configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+	"github.com/ai-dynamo/grove/operator/internal/scheduler"
+	"github.com/ai-dynamo/grove/operator/internal/scheduler/kai"
+	"github.com/ai-dynamo/grove/operator/internal/scheduler/kube"
+
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/tools/record"
+	"sigs.k8s.io/controller-runtime/pkg/client/fake"
+)
+
+// backendConstructors mirrors the switch in
+// operator/internal/scheduler/manager/manager.go newBackendForProfile.
+// Adding a new backend means adding a row here AND in
+// backendInterfaceCapabilities (in capabilities.go); TestCapabilityTableMatchesBackends
+// fails the build if the two disagree.
+var backendConstructors = map[configv1alpha1.SchedulerName]func() scheduler.Backend{
+	configv1alpha1.SchedulerNameKai: func() scheduler.Backend {
+		return kai.New(
+			fake.NewClientBuilder().Build(),
+			runtime.NewScheme(),
+			record.NewFakeRecorder(1),
+			configv1alpha1.SchedulerProfile{Name: configv1alpha1.SchedulerNameKai},
+		)
+	},
+	configv1alpha1.SchedulerNameKube: func() scheduler.Backend {
+		return kube.New(
+			fake.NewClientBuilder().Build(),
+			runtime.NewScheme(),
+			record.NewFakeRecorder(1),
+			configv1alpha1.SchedulerProfile{Name: configv1alpha1.SchedulerNameKube},
+		)
+	},
+}
+
+// TestCapabilityTableCoversAllSupportedBackends ensures the hardcoded
+// capability table has a row for every backend the operator can construct.
+// Catches the failure mode where a contributor adds a backend to
+// SupportedSchedulerNames + manager.newBackendForProfile but forgets the
+// capability table — without this, the new backend's capability-gated tests
+// would silently skip rather than fail.
+func TestCapabilityTableCoversAllSupportedBackends(t *testing.T) {
+	for _, name := range configv1alpha1.SupportedSchedulerNames {
+		if _, ok := backendInterfaceCapabilities[string(name)]; !ok {
+			t.Errorf("backend %q is in SupportedSchedulerNames but missing from "+
+				"backendInterfaceCapabilities; add a row to "+
+				"operator/e2e/tests/capabilities.go", name)
+		}
+		if _, ok := backendConstructors[name]; !ok {
+			t.Errorf("backend %q is in SupportedSchedulerNames but missing from "+
+				"backendConstructors; add a row to "+
+				"operator/e2e/tests/capabilities_test.go", name)
+		}
+	}
+}
+
+// TestCapabilityTableMatchesBackends cross-checks the hardcoded capability
+// table against actual Go interface implementation for each backend. Catches
+// the failure mode where a backend's interface set changes (e.g. KAI drops
+// TopologyAwareSchedBackend) but the table is not updated — without this, the
+// E2E suite would either skip valid TAS tests or run them against a backend
+// that no longer supports TAS.
+func TestCapabilityTableMatchesBackends(t *testing.T) {
+	for name, ctor := range backendConstructors {
+		t.Run(string(name), func(t *testing.T) {
+			b := ctor()
+			table := backendInterfaceCapabilities[string(name)]
+
+			// TopologyAwareScheduling: tied to the Go interface assertion
+			// the operator itself uses (clustertopology.go L46–54).
+			_, gotTAS := b.(scheduler.TopologyAwareSchedBackend)
+			wantTAS := table[TopologyAwareScheduling]
+			if gotTAS != wantTAS {
+				t.Errorf("backend %q: TopologyAwareScheduling table=%v but "+
+					"interface assertion=%v; update either the backend "+
+					"or backendInterfaceCapabilities", name, wantTAS, gotTAS)
+			}
+		})
+	}
+}
diff --git a/operator/e2e/tests/capability_discovery.go b/operator/e2e/tests/capability_discovery.go
new file mode 100644
index 000000000..ee04f9a50
--- /dev/null
+++ b/operator/e2e/tests/capability_discovery.go
@@ -0,0 +1,75 @@
+//go:build e2e
+
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package tests
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/ai-dynamo/grove/operator/e2e/grove/config"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// DiscoverCapabilities resolves the active backend and its capabilities from
+// the live OperatorConfiguration plus the hardcoded interface table, stores
+// the result in the package-level currentCapabilities so RequireCapability
+// can read it, and returns the resolved set so callers may log/inspect it.
+//
+// Called once from TestMain before any test runs.
+func DiscoverCapabilities(ctx context.Context, crClient client.Client) (CapabilitySet, error) {
+	md, err := config.NewOperatorConfig(crClient).ReadGroveMetadata(ctx)
+	if err != nil {
+		return CapabilitySet{}, fmt.Errorf("read OperatorConfiguration: %w", err)
+	}
+
+	backend := md.Config.Scheduler.DefaultProfileName
+	table, ok := backendInterfaceCapabilities[backend]
+	if !ok {
+		return CapabilitySet{}, fmt.Errorf(
+			"active backend %q has no entry in backendInterfaceCapabilities; "+
+				"please update operator/e2e/tests/capabilities.go", backend)
+	}
+
+	set := CapabilitySet{
+		ActiveBackend: backend,
+		caps:          map[Capability]bool{},
+	}
+
+	// Backend-coupled capability: present iff backend is in the table for it.
+	if table[GangScheduling] {
+		set.caps[GangScheduling] = true
+	}
+
+	// Backend-coupled capability gated by an additional config flag.
+	if md.Config.TopologyAwareScheduling.Enabled && table[TopologyAwareScheduling] {
+		set.caps[TopologyAwareScheduling] = true
+	}
+
+	// Config-only capability: no interface-table lookup.
+	if md.Config.Network.AutoMNNVLEnabled {
+		set.caps[AutoMNNVL] = true
+	}
+
+	currentCapabilitiesMu.Lock()
+	currentCapabilities = set
+	currentCapabilitiesSet = true
+	currentCapabilitiesMu.Unlock()
+
+	return set, nil
+}
diff --git a/operator/e2e/tests/gang_scheduling_test.go b/operator/e2e/tests/gang_scheduling_test.go
index 3a971d6eb..b5095b143 100644
--- a/operator/e2e/tests/gang_scheduling_test.go
+++ b/operator/e2e/tests/gang_scheduling_test.go
@@ -32,6 +32,7 @@ import (
 // 3. Verify all workload pods are pending due to insufficient resources
 // 4. Uncordon the node and verify all pods get scheduled
 func Test_GS1_GangSchedulingWithFullReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 10-node Grove cluster, then cordon 1 node")
@@ -83,6 +84,7 @@ func Test_GS1_GangSchedulingWithFullReplicas(t *testing.T) {
 // 6. Scale PCSG replicas to 3 and verify 4 new pending pods
 // 7. Uncordon remaining nodes and verify all pods get scheduled
 func Test_GS2_GangSchedulingWithScalingFullReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	// Setup cluster (shared or individual based on test run mode)
@@ -152,6 +154,7 @@ func Test_GS2_GangSchedulingWithScalingFullReplicas(t *testing.T) {
 // 6. Scale PCS replicas to 2 and verify 10 new pending pods
 // 7. Uncordon remaining nodes and verify all pods get scheduled
 func Test_GS3_GangSchedulingWithPCSScalingFullReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 20-node Grove cluster, then cordon 11 nodes")
@@ -217,6 +220,7 @@ func Test_GS3_GangSchedulingWithPCSScalingFullReplicas(t *testing.T) {
 // 9. Scale PCSG replicas to 3 and verify 4 new pending pods
 // 10. Uncordon remaining nodes and verify all pods get scheduled
 func Test_GS4_GangSchedulingWithPCSAndPCSGScalingFullReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster, then cordon 19 nodes")
@@ -283,6 +287,7 @@ func Test_GS4_GangSchedulingWithPCSAndPCSGScalingFullReplicas(t *testing.T) {
 // 5. Wait for scheduled pods to become ready
 // 6. Uncordon 7 nodes and verify all remaining workload pods get scheduled
 func Test_GS5_GangSchedulingWithMinReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 10-node Grove cluster, then cordon 8 nodes")
@@ -351,6 +356,7 @@ func Test_GS5_GangSchedulingWithMinReplicas(t *testing.T) {
 // 11. Wait for scheduled pods to become ready
 // 12. Uncordon 2 nodes and verify remaining workload pods get scheduled
 func Test_GS6_GangSchedulingWithPCSGScalingMinReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 14-node Grove cluster, then cordon 12 nodes")
@@ -463,6 +469,7 @@ func Test_GS6_GangSchedulingWithPCSGScalingMinReplicas(t *testing.T) {
 // 13. Wait for scheduled pods to become ready
 // 14. Uncordon 2 nodes and verify remaining workload pods get scheduled
 func Test_GS7_GangSchedulingWithPCSGScalingMinReplicasAdvanced1(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 14-node Grove cluster, then cordon 12 nodes")
@@ -584,6 +591,7 @@ func Test_GS7_GangSchedulingWithPCSGScalingMinReplicasAdvanced1(t *testing.T) {
 // 9. Wait for scheduled pods to become ready
 // 10. Uncordon 7 nodes and verify the remaining workload pods get scheduled
 func Test_GS8_GangSchedulingWithPCSGScalingMinReplicasAdvanced2(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 14-node Grove cluster, then cordon 12 nodes")
@@ -679,6 +687,7 @@ func Test_GS8_GangSchedulingWithPCSGScalingMinReplicasAdvanced2(t *testing.T) {
 // 10. Wait for scheduled pods to become ready
 // 11. Uncordon 7 nodes and verify the remaining workload pods get scheduled
 func Test_GS9_GangSchedulingWithPCSScalingMinReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 20-node Grove cluster, then cordon 18 nodes")
@@ -782,6 +791,7 @@ func Test_GS9_GangSchedulingWithPCSScalingMinReplicas(t *testing.T) {
 // 9. Wait for scheduled pods to become ready
 // 10. Uncordon 10 nodes and verify the remaining workload pods get scheduled
 func Test_GS10_GangSchedulingWithPCSScalingMinReplicasAdvanced(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 20-node Grove cluster, then cordon 18 nodes")
@@ -886,6 +896,7 @@ func Test_GS10_GangSchedulingWithPCSScalingMinReplicasAdvanced(t *testing.T) {
 // 19. Wait for 2 more pods to be scheduled (min-available for pcs-1-sg-x-2)
 // 20. Uncordon 2 nodes and verify remaining workload pods get scheduled
 func Test_GS11_GangSchedulingWithPCSAndPCSGScalingMinReplicas(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster, then cordon 26 nodes")
@@ -1028,6 +1039,7 @@ func Test_GS11_GangSchedulingWithPCSAndPCSGScalingMinReplicas(t *testing.T) {
 // 11. Wait for scheduled pods to become ready
 // 12. Uncordon 14 nodes and verify the remaining workload pods get scheduled
 func Test_GS12_GangSchedulingWithComplexPCSGScaling(t *testing.T) {
+	RequireCapability(t, GangScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster, then cordon 26 nodes")
diff --git a/operator/e2e/tests/suite_test.go b/operator/e2e/tests/suite_test.go
index a9a2e4847..7a0381324 100644
--- a/operator/e2e/tests/suite_test.go
+++ b/operator/e2e/tests/suite_test.go
@@ -45,6 +45,17 @@ func TestMain(m *testing.M) {
 		os.Exit(1)
 	}
 
+	// Discover scheduler capabilities from the live OperatorConfiguration
+	// before any test runs. RequireCapability uses the result to auto-skip
+	// tests whose required capability is not provided by the active backend.
+	caps, err := DiscoverCapabilities(ctx, sharedCluster.GetClient())
+	if err != nil {
+		Logger.Errorf("failed to discover scheduler capabilities: %s", err)
+		sharedCluster.Teardown()
+		os.Exit(1)
+	}
+	Logger.Infof("Active backend: %s", caps.ActiveBackend)
+
 	// Run tests
 	code := m.Run()
 
diff --git a/operator/e2e/tests/topology_test.go b/operator/e2e/tests/topology_test.go
index 204cef333..57678a17c 100644
--- a/operator/e2e/tests/topology_test.go
+++ b/operator/e2e/tests/topology_test.go
@@ -101,6 +101,7 @@ func GetPodGroupOrFail(t *testing.T, tc *testctx.TestContext, podGroupVerifier *
 // Note: grove-topology is NOT cleaned up after this test — it is shared cluster infrastructure
 // used by TAS2-TAS16. ensureGroveTopology() in each subsequent test is idempotent.
 func Test_TAS1_TopologyInfrastructure(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	tc, cleanup := testctx.PrepareTest(ctx, t, 0)
@@ -166,6 +167,7 @@ func Test_TAS1_TopologyInfrastructure(t *testing.T) {
 // 4. Verify worker-block pods (4) are in the same block
 // 5. Verify different cliques can have independent topology constraints
 func Test_TAS2_MultipleCliquesWithDifferentConstraints(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -225,6 +227,7 @@ func Test_TAS2_MultipleCliquesWithDifferentConstraints(t *testing.T) {
 // 4. Verify router pods (2 standalone)
 // 5. Verify KAI PodGroup SubGroups: NO PCSG parent groups (because PCSG constraint is nil, per PR #357)
 func Test_TAS3_PCSOnlyConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -279,6 +282,7 @@ func Test_TAS3_PCSOnlyConstraint(t *testing.T) {
 // 3. Verify PCSG worker pods (2 total) respect rack constraint
 // 4. Router pods (2 standalone) are unconstrained
 func Test_TAS4_PCSGOnlyConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -344,6 +348,7 @@ func Test_TAS4_PCSGOnlyConstraint(t *testing.T) {
 // 2. PCS has NO explicit constraint
 // 3. Verify all 2 pods on same host (strictest constraint)
 func Test_TAS5_HostLevelConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -406,6 +411,7 @@ func Test_TAS5_HostLevelConstraint(t *testing.T) {
 // 3. Verify KAI PodGroup has zone constraint at top level
 // 4. Verify 1 SubGroup (standalone PCLQ) with NO additional constraint
 func Test_TAS6_StandalonePCLQOnlyPCSZoneConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -454,6 +460,7 @@ func Test_TAS6_StandalonePCLQOnlyPCSZoneConstraint(t *testing.T) {
 // 2. Verify all 4 pods scheduled (gang scheduling works)
 // 3. Verify KAI PodGroup has 4 SubGroups with NO topology constraints
 func Test_TAS7_NoTopologyConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -506,6 +513,7 @@ func Test_TAS7_NoTopologyConstraint(t *testing.T) {
 // 5. Verify all pods in same block (PCS constraint)
 // 6. Verify KAI PodGroup hierarchy with correct topology constraints
 func Test_TAS8_FullHierarchyWithCascadingConstraints(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize an 8-node Grove cluster for topology testing")
@@ -583,6 +591,7 @@ func Test_TAS8_FullHierarchyWithCascadingConstraints(t *testing.T) {
 // 3. Verify pods on same host (PCLQ constraint - strictest)
 // 4. Verify KAI PodGroup has block constraint at top level, host constraint at PCLQ level
 func Test_TAS9_PCSPlusPCLQConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -633,6 +642,7 @@ func Test_TAS9_PCSPlusPCLQConstraint(t *testing.T) {
 // 5. Verify base PodGang KAI PodGroup topology constraints
 // 6. Verify scaled PodGangs' KAI PodGroups (replicas 1-2)
 func Test_TAS10_PCSGScalingWithTopologyConstraints(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -713,6 +723,7 @@ func Test_TAS10_PCSGScalingWithTopologyConstraints(t *testing.T) {
 // 3. Verify each PCSG replica's pods on same host
 // 4. Verify KAI PodGroup has PCSG rack + PCLQ host constraints, NO top-level PCS constraint
 func Test_TAS11_PCSGPlusPCLQNoParentConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -772,6 +783,7 @@ func Test_TAS11_PCSGPlusPCLQNoParentConstraint(t *testing.T) {
 // 5. Verify base PodGang KAI PodGroup contains minAvailable=3 replicas
 // 6. Verify 7 scaled PodGangs' KAI PodGroups (replicas 3-9)
 func Test_TAS12_LargeScalingRatio(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -866,6 +878,7 @@ func Test_TAS12_LargeScalingRatio(t *testing.T) {
 // 4. Verify pod events show Unschedulable reason
 // 5. Verify KAI PodGroup exists with correct constraints even though pods are pending
 func Test_TAS13_InsufficientNodesForConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -925,6 +938,7 @@ func Test_TAS13_InsufficientNodesForConstraint(t *testing.T) {
 // 3. Verify each PCS replica's pods in same rack
 // 4. Verify KAI PodGroups for both PCS replicas have correct topology constraints
 func Test_TAS14_MultiReplicaWithRackConstraint(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -983,6 +997,7 @@ func Test_TAS14_MultiReplicaWithRackConstraint(t *testing.T) {
 // 6. Verify base PodGang KAI PodGroup topology for complex multi-PCSG workload
 // 7. Verify scaled PodGangs' KAI PodGroups (decoder replica 1, prefill replica 1)
 func Test_TAS15_DisaggregatedInferenceMultiplePCSGs(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for topology testing")
@@ -1097,6 +1112,7 @@ func Test_TAS15_DisaggregatedInferenceMultiplePCSGs(t *testing.T) {
 // 4. Verify block constraint at PCS level, rack at PCSG, for both PCS replicas
 // 5. Similar to TAS15 but scaled across 2 PCS replicas
 func Test_TAS16_MultiReplicaPCSWithThreeLevelHierarchy(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for multi-replica PCS testing")
@@ -1198,6 +1214,7 @@ func Test_TAS16_MultiReplicaPCSWithThreeLevelHierarchy(t *testing.T) {
 // 5. Verify KAI Topology CRs auto-created with correct keys
 // 6. Deploy H100 and GB200 workloads, verify pods packed at block level on correct node segments
 func Test_TAS17_HeterogeneousGPUCluster(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 28-node Grove cluster for heterogeneous GPU testing")
@@ -1374,6 +1391,7 @@ func Test_TAS17_HeterogeneousGPUCluster(t *testing.T) {
 // 2. Verify SchedulerTopologyDrift condition becomes True/Drift
 // 3. Verify SchedulerTopologyStatuses shows InSync=false
 func Test_TAS18_ClusterTopologyDriftDetection(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	const ctName = "drift-detect-topo"
 	const kaiTopoRef = "non-existent-kai-topo"
 	ctx := context.Background()
@@ -1434,6 +1452,7 @@ func Test_TAS18_ClusterTopologyDriftDetection(t *testing.T) {
 // 5. Verify KAI Topology recreated with 3 keys
 // 6. Verify SchedulerTopologyDrift remains False/InSync
 func Test_TAS19_AutoManagedCTLifecycle(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	const ctName = "lifecycle-topo"
 	ctx := context.Background()
 
@@ -1514,6 +1533,7 @@ func Test_TAS19_AutoManagedCTLifecycle(t *testing.T) {
 // 9. Re-create the ClusterTopology
 // 10. Verify TopologyLevelsUnavailable = False/AllClusterTopologyLevelsAvailable
 func Test_TAS20_PCSTopologyLevelsUnavailableCondition(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a 2-node Grove cluster for PCS condition testing")
@@ -1639,6 +1659,7 @@ func Test_TAS20_PCSTopologyLevelsUnavailableCondition(t *testing.T) {
 // Test_TAS21_ClusterTopologyValidationWebhook verifies that the ClusterTopology validating webhook
 // rejects invalid topology definitions and invalid schedulerTopologyReferences.
 func Test_TAS21_ClusterTopologyValidationWebhook(t *testing.T) {
+	RequireCapability(t, TopologyAwareScheduling)
 	ctx := context.Background()
 
 	Logger.Info("1. Initialize a Grove cluster for ClusterTopology webhook validation testing")
diff --git a/operator/e2e/yaml/tas-hierarchy.yaml b/operator/e2e/yaml/tas-hierarchy.yaml
index 9c6c0572a..2794efd1e 100644
--- a/operator/e2e/yaml/tas-hierarchy.yaml
+++ b/operator/e2e/yaml/tas-hierarchy.yaml
@@ -36,7 +36,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -70,7 +69,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-host-level.yaml b/operator/e2e/yaml/tas-host-level.yaml
index b13bda54d..44bb968ae 100644
--- a/operator/e2e/yaml/tas-host-level.yaml
+++ b/operator/e2e/yaml/tas-host-level.yaml
@@ -23,7 +23,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-indep-clq.yaml b/operator/e2e/yaml/tas-indep-clq.yaml
index 614c867bb..b11b0e9f6 100644
--- a/operator/e2e/yaml/tas-indep-clq.yaml
+++ b/operator/e2e/yaml/tas-indep-clq.yaml
@@ -24,7 +24,6 @@ spec:
           minAvailable: 3
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -67,7 +66,6 @@ spec:
           minAvailable: 4
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-insuffic.yaml b/operator/e2e/yaml/tas-insuffic.yaml
index f77278abc..cbc2cc0a9 100644
--- a/operator/e2e/yaml/tas-insuffic.yaml
+++ b/operator/e2e/yaml/tas-insuffic.yaml
@@ -23,7 +23,6 @@ spec:
           minAvailable: 10  # All-or-nothing gang scheduling
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-large-scale.yaml b/operator/e2e/yaml/tas-large-scale.yaml
index 034d17769..5253f297a 100644
--- a/operator/e2e/yaml/tas-large-scale.yaml
+++ b/operator/e2e/yaml/tas-large-scale.yaml
@@ -33,7 +33,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-multirep.yaml b/operator/e2e/yaml/tas-multirep.yaml
index 71218e47e..aa2ed865f 100644
--- a/operator/e2e/yaml/tas-multirep.yaml
+++ b/operator/e2e/yaml/tas-multirep.yaml
@@ -23,7 +23,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-no-constraint.yaml b/operator/e2e/yaml/tas-no-constraint.yaml
index 111937d41..22bfa6623 100644
--- a/operator/e2e/yaml/tas-no-constraint.yaml
+++ b/operator/e2e/yaml/tas-no-constraint.yaml
@@ -26,7 +26,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-pcs-multi-pcsg-multi-replica.yaml b/operator/e2e/yaml/tas-pcs-multi-pcsg-multi-replica.yaml
index 24f7b69e7..e29e8aa91 100644
--- a/operator/e2e/yaml/tas-pcs-multi-pcsg-multi-replica.yaml
+++ b/operator/e2e/yaml/tas-pcs-multi-pcsg-multi-replica.yaml
@@ -42,7 +42,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -73,7 +72,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -107,7 +105,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -138,7 +135,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -169,7 +165,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-pcs-multi-pcsg.yaml b/operator/e2e/yaml/tas-pcs-multi-pcsg.yaml
index 115caf612..69bec7fe5 100644
--- a/operator/e2e/yaml/tas-pcs-multi-pcsg.yaml
+++ b/operator/e2e/yaml/tas-pcs-multi-pcsg.yaml
@@ -42,7 +42,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -82,7 +81,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -122,7 +120,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -162,7 +159,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -202,7 +198,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-pcs-pclq.yaml b/operator/e2e/yaml/tas-pcs-pclq.yaml
index ca90817fe..1034d0539 100644
--- a/operator/e2e/yaml/tas-pcs-pclq.yaml
+++ b/operator/e2e/yaml/tas-pcs-pclq.yaml
@@ -26,7 +26,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-pcsg-pclq.yaml b/operator/e2e/yaml/tas-pcsg-pclq.yaml
index c69cac669..97be1aa97 100644
--- a/operator/e2e/yaml/tas-pcsg-pclq.yaml
+++ b/operator/e2e/yaml/tas-pcsg-pclq.yaml
@@ -32,7 +32,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-pcsg-scale.yaml b/operator/e2e/yaml/tas-pcsg-scale.yaml
index ffcc06e65..6cc999249 100644
--- a/operator/e2e/yaml/tas-pcsg-scale.yaml
+++ b/operator/e2e/yaml/tas-pcsg-scale.yaml
@@ -32,7 +32,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-sl-pcs-only.yaml b/operator/e2e/yaml/tas-sl-pcs-only.yaml
index 3b71e2849..fac96f735 100644
--- a/operator/e2e/yaml/tas-sl-pcs-only.yaml
+++ b/operator/e2e/yaml/tas-sl-pcs-only.yaml
@@ -29,7 +29,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -69,7 +68,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-sl-pcsg-only.yaml b/operator/e2e/yaml/tas-sl-pcsg-only.yaml
index 598bdff55..336a587ee 100644
--- a/operator/e2e/yaml/tas-sl-pcsg-only.yaml
+++ b/operator/e2e/yaml/tas-sl-pcsg-only.yaml
@@ -29,7 +29,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -69,7 +68,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/tas-standalone-pclq-only-pcs-zone.yaml b/operator/e2e/yaml/tas-standalone-pclq-only-pcs-zone.yaml
index 77a1887ca..d5231b034 100644
--- a/operator/e2e/yaml/tas-standalone-pclq-only-pcs-zone.yaml
+++ b/operator/e2e/yaml/tas-standalone-pclq-only-pcs-zone.yaml
@@ -23,7 +23,6 @@ spec:
           minAvailable: 4
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload-ondelete.yaml b/operator/e2e/yaml/workload-ondelete.yaml
index b8a94162d..de77b56f3 100644
--- a/operator/e2e/yaml/workload-ondelete.yaml
+++ b/operator/e2e/yaml/workload-ondelete.yaml
@@ -21,7 +21,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -52,7 +51,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -83,7 +81,6 @@ spec:
           minAvailable: 3
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload1.yaml b/operator/e2e/yaml/workload1.yaml
index cfd254429..b81de1414 100644
--- a/operator/e2e/yaml/workload1.yaml
+++ b/operator/e2e/yaml/workload1.yaml
@@ -19,7 +19,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -50,7 +49,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -81,7 +79,6 @@ spec:
           minAvailable: 3
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload2.yaml b/operator/e2e/yaml/workload2.yaml
index 0d3a6b22a..01b15c5dd 100644
--- a/operator/e2e/yaml/workload2.yaml
+++ b/operator/e2e/yaml/workload2.yaml
@@ -20,7 +20,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -51,7 +50,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -82,7 +80,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload3.yaml b/operator/e2e/yaml/workload3.yaml
index ccc7b8fae..d7c7bef8d 100644
--- a/operator/e2e/yaml/workload3.yaml
+++ b/operator/e2e/yaml/workload3.yaml
@@ -20,7 +20,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -51,7 +50,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -82,7 +80,6 @@ spec:
           minAvailable: 3
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload4.yaml b/operator/e2e/yaml/workload4.yaml
index 9e63f3e33..80bdcaac5 100644
--- a/operator/e2e/yaml/workload4.yaml
+++ b/operator/e2e/yaml/workload4.yaml
@@ -20,7 +20,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -51,7 +50,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -82,7 +80,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload5.yaml b/operator/e2e/yaml/workload5.yaml
index 5c760a088..1c693229f 100644
--- a/operator/e2e/yaml/workload5.yaml
+++ b/operator/e2e/yaml/workload5.yaml
@@ -20,7 +20,6 @@ spec:
           minAvailable: 2
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -53,7 +52,6 @@ spec:
             - pc-c
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -86,7 +84,6 @@ spec:
             - pc-a
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/e2e/yaml/workload6.yaml b/operator/e2e/yaml/workload6.yaml
index 7c11982cb..4d7570e95 100644
--- a/operator/e2e/yaml/workload6.yaml
+++ b/operator/e2e/yaml/workload6.yaml
@@ -20,7 +20,6 @@ spec:
           minAvailable: 1
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -53,7 +52,6 @@ spec:
             - pc-a
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
@@ -86,7 +84,6 @@ spec:
             - pc-b
           podSpec:
             terminationGracePeriodSeconds: 5
-            schedulerName: kai-scheduler
             affinity:
               nodeAffinity:
                 requiredDuringSchedulingIgnoredDuringExecution:
diff --git a/operator/hack/README.md b/operator/hack/README.md
index 8417510d1..fa58ed7a5 100644
--- a/operator/hack/README.md
+++ b/operator/hack/README.md
@@ -89,7 +89,7 @@ All configuration can be overridden via `E2E_*` environment variables (used by `
 **Components (ComponentConfig):**
 
 - `E2E_KAI_VERSION` - Kai Scheduler version (default: from `dependencies.yaml`)
-- `E2E_SKAFFOLD_PROFILE` - Skaffold profile for Grove (default: `topology-test`)
+- `E2E_SKAFFOLD_PROFILE` - Skaffold profile for Grove (default: `e2e-kai`)
 - `E2E_GROVE_NAMESPACE` - Grove operator namespace (default: `grove-system`)
 - `E2E_REGISTRY` - Container registry override (default: none)
 
diff --git a/operator/hack/e2e-autoMNNVL/README.md b/operator/hack/e2e-autoMNNVL/README.md
index 3670eea74..4a1d4de6b 100644
--- a/operator/hack/e2e-autoMNNVL/README.md
+++ b/operator/hack/e2e-autoMNNVL/README.md
@@ -93,5 +93,5 @@ make e2e-cluster-down
 - **Cluster name:** `shared-e2e-test-cluster` (same as standard e2e)
 - **Nodes:** 1 server + 2 agents (lightweight — standard e2e uses 30)
 - **Registry:** local registry on port 5001
-- **Skaffold profile:** `topology-test` (same as standard e2e; Kai and topology are installed, only worker count and prepull are reduced)
+- **Skaffold profile:** `e2e-kai` (same as standard e2e; Kai and topology are installed, only worker count and prepull are reduced)
 - **Fake GPU:** [fake-gpu-operator](https://github.com/run-ai/fake-gpu-operator) v0.0.72 (provides ComputeDomain CRD)
diff --git a/operator/hack/e2e-cluster/create-e2e-cluster.py b/operator/hack/e2e-cluster/create-e2e-cluster.py
index 54af806dd..7f710f98b 100755
--- a/operator/hack/e2e-cluster/create-e2e-cluster.py
+++ b/operator/hack/e2e-cluster/create-e2e-cluster.py
@@ -129,7 +129,7 @@ class ClusterConfig(BaseSettings):
     worker_memory: Optional[str] = Field(default=DEFAULT_WORKER_MEMORY, pattern=r"^\d+[mMgG]?$")
     k3s_image: str = "rancher/k3s:v1.34.2-k3s1"
     kai_version: str = Field(default=DEPENDENCIES['kai_scheduler']['version'], pattern=r"^v[\d.]+(-[\w.]+)?$")
-    skaffold_profile: str = "topology-test"
+    skaffold_profile: str = "e2e-kai"
     max_retries: int = Field(default=3, ge=1, le=10)
 
     # Constants (not configurable via environment variables)
diff --git a/operator/hack/e2e-default-scheduler.yaml b/operator/hack/e2e-default-scheduler.yaml
new file mode 100644
index 000000000..0214b5566
--- /dev/null
+++ b/operator/hack/e2e-default-scheduler.yaml
@@ -0,0 +1,14 @@
+# E2E preset overlay for the in-tree default-scheduler backend.
+# Layers on top of e2e.yaml; cluster shape and KWOK config are inherited so
+# kai vs default-scheduler comparisons stay fair.
+#
+# Activated via: infra-manager.py setup -f hack/e2e-default-scheduler.yaml
+# (threaded through E2E_CREATE_FLAGS in the CI matrix; see build-check-test.yaml).
+
+scheduler:
+  kai:
+    enabled: false
+
+grove:
+  local:
+    skaffold_profile: e2e-default-scheduler
diff --git a/operator/hack/e2e.yaml b/operator/hack/e2e.yaml
index e09cedd40..a50290b9b 100644
--- a/operator/hack/e2e.yaml
+++ b/operator/hack/e2e.yaml
@@ -17,3 +17,5 @@ scheduler:
 grove:
   enabled: true
   profiling: false
+  local:
+    skaffold_profile: e2e-kai
diff --git a/operator/hack/infra_manager/constants.py b/operator/hack/infra_manager/constants.py
index 10a29d2b0..14b3be285 100644
--- a/operator/hack/infra_manager/constants.py
+++ b/operator/hack/infra_manager/constants.py
@@ -186,7 +186,7 @@ def parse_memory_mb(mem_str: str) -> int:
 DEFAULT_CLUSTER_CREATE_MAX_RETRIES = 3
 
 # -- Component defaults --
-DEFAULT_SKAFFOLD_PROFILE = "topology-test"
+DEFAULT_SKAFFOLD_PROFILE = "e2e-kai"
 DEFAULT_GROVE_NAMESPACE = "grove-system"
 
 # -- KWOK defaults --
diff --git a/operator/hack/infra_manager/orchestrator.py b/operator/hack/infra_manager/orchestrator.py
index 198fde710..ae7454109 100644
--- a/operator/hack/infra_manager/orchestrator.py
+++ b/operator/hack/infra_manager/orchestrator.py
@@ -126,16 +126,21 @@ def _run_task(name: str, fn: Callable) -> None:
             console.print(outputs[name], end="")
 
 
-def _run_prepull(registry_port: int) -> None:
+def _run_prepull(registry_port: int, kai_enabled: bool) -> None:
     """Pre-pull images to local registry in a single batch.
 
     Args:
         registry_port: Port for the local container registry.
+        kai_enabled: Whether the KAI scheduler is enabled. When false, the KAI
+            image group is skipped because no workload will reference it.
     """
     groups: list[tuple[list[str], str]] = [
-        (DEPENDENCIES["kai_scheduler"]["images"], DEPENDENCIES["kai_scheduler"]["version"]),
         (DEPENDENCIES["cert_manager"]["images"], DEPENDENCIES["cert_manager"]["version"]),
     ]
+    if kai_enabled:
+        groups.insert(
+            0, (DEPENDENCIES["kai_scheduler"]["images"], DEPENDENCIES["kai_scheduler"]["version"])
+        )
     busybox_images = dep_value("test_images", "busybox")
     if busybox_images:
         groups.append((busybox_images, "latest"))
@@ -211,7 +216,9 @@ def run_setup(cfg: SetupConfig) -> None:
     if cfg.cluster.create:
         parallel_tasks["topology"] = apply_topology_labels
     if do_prepull:
-        parallel_tasks["prepull"] = lambda: _run_prepull(cfg.cluster.registry_port)
+        parallel_tasks["prepull"] = lambda: _run_prepull(
+            cfg.cluster.registry_port, cfg.scheduler.kai.enabled
+        )
     if cfg.scheduler.kai.enabled:
         parallel_tasks["kai"] = lambda: install_kai_scheduler(cfg.scheduler.kai)
     if cfg.grove.enabled:
diff --git a/operator/skaffold.yaml b/operator/skaffold.yaml
index 72e403cc9..8a099ac46 100644
--- a/operator/skaffold.yaml
+++ b/operator/skaffold.yaml
@@ -73,7 +73,7 @@ profiles:
           config:
             leaderElection:
               enabled: false
-  - name: topology-test
+  - name: e2e-kai
     patches:
       - op: add
         path: /deploy/helm/releases/0/setValues
@@ -88,6 +88,17 @@ profiles:
               enabled: false
             topologyAwareScheduling:
               enabled: true
+  - name: e2e-default-scheduler
+    patches:
+      - op: add
+        path: /deploy/helm/releases/0/setValues
+        value:
+          replicaCount: 1
+          config:
+            scheduler:
+              defaultProfileName: default-scheduler
+            leaderElection:
+              enabled: false
   - name: mnnvl-test
     patches:
       - op: add