From fe1d5751c24409ac2d8b8022ea1f92af4d0b3f76 Mon Sep 17 00:00:00 2001
From: Bill Berry <wberry@microsoft.com>
Date: Mon, 1 Jun 2026 17:27:37 -0700
Subject: [PATCH] ci(workflows): add Vally eval workflows and update PR
 automation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- add six eval-* and evals-agent-matrix workflows
- update pr-review and pr-validation automation
- update .vally.yaml configuration

🔧 - Generated by Copilot
---
 .github/workflows/eval-corpus-moderation.yml |  89 ++++++++
 .github/workflows/eval-spec-lint.yml         | 100 +++++++++
 .github/workflows/eval-stimulus-presence.yml |  77 +++++++
 .github/workflows/eval-text-moderation.yml   |  61 ++++++
 .github/workflows/eval-vally.yml             | 206 +++++++++++++++++++
 .github/workflows/evals-agent-matrix.yml     | 106 ++++++++++
 .github/workflows/pr-review.md               |   5 +
 .github/workflows/pr-validation.yml          |  47 +++++
 .vally.yaml                                  |  19 +-
 9 files changed, 699 insertions(+), 11 deletions(-)
 create mode 100644 .github/workflows/eval-corpus-moderation.yml
 create mode 100644 .github/workflows/eval-spec-lint.yml
 create mode 100644 .github/workflows/eval-stimulus-presence.yml
 create mode 100644 .github/workflows/eval-text-moderation.yml
 create mode 100644 .github/workflows/eval-vally.yml
 create mode 100644 .github/workflows/evals-agent-matrix.yml

diff --git a/.github/workflows/eval-corpus-moderation.yml b/.github/workflows/eval-corpus-moderation.yml
new file mode 100644
index 000000000..2e7fc1661
--- /dev/null
+++ b/.github/workflows/eval-corpus-moderation.yml
@@ -0,0 +1,89 @@
+name: Evals - Corpus Content Moderation
+
+on:
+  workflow_call:
+    inputs:
+      base-sha:
+        description: 'Base SHA for changed-artifact detection'
+        required: true
+        type: string
+      head-sha:
+        description: 'Head SHA for changed-artifact detection'
+        required: true
+        type: string
+      soft-fail:
+        description: 'Whether to continue on content moderation failures'
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+
+jobs:
+  content-moderation:
+    name: Evals - Corpus Content Moderation
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+        with:
+          node-version: "24"
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+        with:
+          version: "0.10.9"
+
+      - name: Install moderation dependencies
+        run: uv pip install --system -r scripts/evals/moderation/requirements.txt
+
+      - name: Cache Detoxify model
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: ~/.cache/torch/hub/checkpoints
+          key: detoxify-unbiased-${{ hashFiles('scripts/evals/moderation/requirements.txt') }}
+
+      - name: Create logs directory
+        shell: pwsh
+        run: New-Item -ItemType Directory -Force -Path logs | Out-Null
+
+      - name: Detect changed AI artifacts
+        shell: pwsh
+        run: |
+          pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
+            -BaseRef "${{ inputs.base-sha }}" `
+            -HeadRef "${{ inputs.head-sha }}" `
+            -OutFile logs/changed-ai-artifacts.json
+
+      - name: Moderate changed corpus
+        shell: pwsh
+        continue-on-error: ${{ inputs.soft-fail }}
+        run: |
+          pwsh -NoProfile -File scripts/evals/Invoke-CorpusModeration.ps1 `
+            -ManifestPath logs/changed-ai-artifacts.json `
+            -OutFile logs/moderation-corpus.json
+
+      - name: Upload moderation artifacts on failure
+        if: failure()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: content-moderation-logs
+          path: |
+            logs/changed-ai-artifacts.json
+            logs/moderation-corpus.json
+          if-no-files-found: ignore
+          retention-days: 7
diff --git a/.github/workflows/eval-spec-lint.yml b/.github/workflows/eval-spec-lint.yml
new file mode 100644
index 000000000..78bc458db
--- /dev/null
+++ b/.github/workflows/eval-spec-lint.yml
@@ -0,0 +1,100 @@
+name: Evals - Spec Lint and Skill Hygiene
+
+on:
+  workflow_call:
+    inputs:
+      base-sha:
+        description: "Base commit SHA for changed-artifact detection."
+        required: true
+        type: string
+      head-sha:
+        description: "Head commit SHA for changed-artifact detection."
+        required: true
+        type: string
+      soft-fail:
+        description: "When true, lint failures do not fail the job."
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+
+jobs:
+  eval-lint:
+    name: Evals - Spec Lint and Skill Hygiene
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+        with:
+          node-version: "24"
+          cache: "npm"
+
+      - name: Install npm dependencies
+        run: npm ci
+
+      - name: Install PowerShell-Yaml
+        shell: pwsh
+        run: |
+          Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser
+
+      - name: Create logs directory
+        shell: pwsh
+        run: New-Item -ItemType Directory -Force -Path logs | Out-Null
+
+      - name: Detect changed AI artifacts
+        shell: pwsh
+        run: |
+          pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
+            -BaseRef "${{ inputs.base-sha }}" `
+            -HeadRef "${{ inputs.head-sha }}" `
+            -OutFile logs/changed-ai-artifacts.json
+
+      - name: Validate eval spec schema
+        shell: pwsh
+        continue-on-error: ${{ inputs.soft-fail }}
+        run: |
+          pwsh -NoProfile -File scripts/evals/Test-EvalSpec.ps1 `
+            -Root evals/ `
+            -OutputPath logs/eval-spec-lint.json
+
+      - name: Run skill hygiene lint
+        shell: pwsh
+        continue-on-error: ${{ inputs.soft-fail }}
+        run: |
+          $manifestPath = 'logs/changed-ai-artifacts.json'
+          if (-not (Test-Path -LiteralPath $manifestPath)) {
+              Write-Host "No changed-artifact manifest found; skipping skill hygiene lint."
+              return
+          }
+          $manifest = Get-Content -LiteralPath $manifestPath -Raw | ConvertFrom-Json
+          $skillChanges = @($manifest | Where-Object { $_.kind -eq 'skill' })
+          if ($skillChanges.Count -eq 0) {
+              Write-Host "No skill artifacts changed; skipping skill hygiene lint."
+              return
+          }
+          Write-Host "Detected $($skillChanges.Count) changed skill artifact(s); running vally lint."
+          npm run eval:lint:skills
+          if ($LASTEXITCODE -ne 0) {
+              throw "Skill hygiene lint failed with exit code $LASTEXITCODE."
+          }
+
+      - name: Upload eval-lint artifacts on failure
+        if: failure()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: eval-lint-logs
+          path: |
+            logs/eval-spec-lint.json
+            logs/changed-ai-artifacts.json
+          if-no-files-found: ignore
+          retention-days: 7
diff --git a/.github/workflows/eval-stimulus-presence.yml b/.github/workflows/eval-stimulus-presence.yml
new file mode 100644
index 000000000..e771c67a1
--- /dev/null
+++ b/.github/workflows/eval-stimulus-presence.yml
@@ -0,0 +1,77 @@
+name: Evals - Stimulus Presence
+
+on:
+  workflow_call:
+    inputs:
+      base-sha:
+        description: "Base commit SHA for change detection"
+        required: true
+        type: string
+      head-sha:
+        description: "Head commit SHA for change detection"
+        required: true
+        type: string
+      soft-fail:
+        description: "Whether to continue on validation failures"
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+
+jobs:
+  eval-presence:
+    name: Evals - Stimulus Presence
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+        with:
+          node-version: "24"
+          cache: "npm"
+
+      - name: Install PowerShell-Yaml
+        shell: pwsh
+        run: |
+          Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser
+
+      - name: Create logs directory
+        shell: pwsh
+        run: New-Item -ItemType Directory -Force -Path logs | Out-Null
+
+      - name: Detect changed AI artifacts
+        shell: pwsh
+        run: |
+          pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
+            -BaseRef "${{ inputs.base-sha }}" `
+            -HeadRef "${{ inputs.head-sha }}" `
+            -OutFile logs/changed-ai-artifacts.json
+
+      - name: Enforce stimulus presence
+        shell: pwsh
+        continue-on-error: ${{ inputs.soft-fail }}
+        run: |
+          pwsh -NoProfile -File scripts/evals/Test-StimulusPresence.ps1 `
+            -ManifestPath logs/changed-ai-artifacts.json `
+            -EvalRoot evals/ `
+            -OutFile logs/stimulus-presence.json
+
+      - name: Upload presence artifacts on failure
+        if: failure()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: eval-presence-logs
+          path: |
+            logs/changed-ai-artifacts.json
+            logs/stimulus-presence.json
+          if-no-files-found: ignore
+          retention-days: 7
diff --git a/.github/workflows/eval-text-moderation.yml b/.github/workflows/eval-text-moderation.yml
new file mode 100644
index 000000000..572e679b9
--- /dev/null
+++ b/.github/workflows/eval-text-moderation.yml
@@ -0,0 +1,61 @@
+name: Evals - Text Moderation
+
+on:
+  workflow_call:
+    inputs:
+      soft-fail:
+        description: 'Whether to continue on text moderation failures'
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+
+jobs:
+  text-moderation:
+    name: Evals - Stimulus Text Moderation
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+        with:
+          node-version: "24"
+          cache: "npm"
+
+      - name: Install npm dependencies
+        run: npm ci
+
+      - name: Install PowerShell-Yaml
+        shell: pwsh
+        run: |
+          Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser
+
+      - name: Create logs directory
+        shell: pwsh
+        run: New-Item -ItemType Directory -Force -Path logs | Out-Null
+
+      - name: Moderate AI artifact corpus (alex.js + retext-profanities)
+        shell: pwsh
+        continue-on-error: ${{ inputs.soft-fail }}
+        run: |
+          pwsh -NoProfile -File scripts/evals/Test-EvalSpecText.ps1 `
+            -OutputPath logs/eval-spec-text-moderation.json
+
+      - name: Upload text moderation artifacts on failure
+        if: failure()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: eval-text-moderation-logs
+          path: |
+            logs/eval-spec-text-moderation.json
+          if-no-files-found: ignore
+          retention-days: 7
diff --git a/.github/workflows/eval-vally.yml b/.github/workflows/eval-vally.yml
new file mode 100644
index 000000000..15e835065
--- /dev/null
+++ b/.github/workflows/eval-vally.yml
@@ -0,0 +1,206 @@
+name: Evals - Execute Vally Suites
+
+on:
+  workflow_call:
+    inputs:
+      base-sha:
+        description: 'Base SHA for changed-artifact detection'
+        required: true
+        type: string
+      head-sha:
+        description: 'Head SHA for changed-artifact detection'
+        required: true
+        type: string
+    secrets:
+      copilot-github-token:
+        description: 'Token used to authenticate Copilot for eval execution'
+        required: true
+
+permissions:
+  contents: read
+
+jobs:
+  eval-execute:
+    name: Evals - Execute Vally Suites
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    env:
+      COPILOT_GITHUB_TOKEN: ${{ secrets.copilot-github-token }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+        with:
+          node-version: "24"
+          cache: "npm"
+
+      - name: Install npm dependencies
+        run: npm ci
+
+      - name: Install PowerShell-Yaml
+        shell: pwsh
+        run: |
+          Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+        with:
+          version: "0.10.9"
+
+      - name: Install moderation dependencies
+        run: uv pip install --system -r scripts/evals/moderation/requirements.txt
+
+      - name: Cache Detoxify model
+        uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+        with:
+          path: ~/.cache/torch/hub/checkpoints
+          key: detoxify-unbiased-${{ hashFiles('scripts/evals/moderation/requirements.txt') }}
+
+      - name: Create logs directory
+        shell: pwsh
+        run: New-Item -ItemType Directory -Force -Path logs | Out-Null
+
+      - name: Configure Copilot home
+        shell: pwsh
+        run: |
+          $copilotHome = Join-Path $env:RUNNER_TEMP 'copilot-home'
+          New-Item -ItemType Directory -Force -Path $copilotHome | Out-Null
+          "COPILOT_HOME=$copilotHome" | Out-File -FilePath $env:GITHUB_ENV -Append
+
+      - name: Verify Copilot token
+        shell: pwsh
+        run: pwsh -NoProfile -File scripts/evals/Test-CopilotToken.ps1
+
+      - name: Detect changed AI artifacts
+        shell: pwsh
+        run: |
+          pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
+            -BaseRef "${{ inputs.base-sha }}" `
+            -HeadRef "${{ inputs.head-sha }}" `
+            -OutFile logs/changed-ai-artifacts.json
+
+      - name: Run vally evals for changed artifacts
+        shell: pwsh
+        run: |
+          pwsh -NoProfile -File scripts/evals/Invoke-VallyEvals.ps1 `
+            -ManifestPath logs/changed-ai-artifacts.json `
+            -LogsDir logs/
+
+      - name: Run per-agent agent-behavior matrix (changed)
+        shell: pwsh
+        continue-on-error: true
+        run: |
+          $manifestPath = 'logs/changed-ai-artifacts.json'
+          $changedPaths = @()
+          if (Test-Path -LiteralPath $manifestPath) {
+            $manifest = Get-Content -LiteralPath $manifestPath -Raw | ConvertFrom-Json
+            if ($manifest.artifacts) {
+              $changedPaths = @($manifest.artifacts | ForEach-Object { $_.path } | Where-Object { $_ })
+            }
+          }
+          if ($changedPaths.Count -eq 0) {
+            Write-Host 'No changed AI artifacts; skipping per-agent matrix.' -ForegroundColor Yellow
+            exit 0
+          }
+          pwsh -NoProfile -File scripts/evals/Invoke-AgentMatrix.ps1 `
+            -Changed $changedPaths `
+            -Tier pr
+
+      - name: Upload eval execution artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: eval-execute-logs
+          path: |
+            logs/eval-results-*.json
+            logs/eval-summary.json
+            logs/changed-ai-artifacts.json
+            logs/agent-matrix/**/*.log
+            evals/results/agent-matrix/**/*.json
+          if-no-files-found: ignore
+          retention-days: 14
+
+      - name: Post or update PR-comment summary
+        if: always() && github.event_name == 'pull_request'
+        uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
+        with:
+          script: |
+            const fs = require('fs');
+            const summaryPath = 'logs/eval-summary.json';
+            const marker = '<!-- evals-ci-summary -->';
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+
+            if (!fs.existsSync(summaryPath)) {
+              core.info(`${summaryPath} not found; skipping eval summary comment.`);
+              return;
+            }
+
+            let summary;
+            try {
+              summary = JSON.parse(fs.readFileSync(summaryPath, 'utf8'));
+            } catch (err) {
+              core.warning(`Could not parse ${summaryPath}: ${err.message}`);
+              return;
+            }
+
+            const totals = summary.totals || {};
+            const artifacts = Number(totals.artifacts || 0);
+            const specs = Number(totals.specs || 0);
+            const assertionsPassed = Number(totals.assertionsPassed || 0);
+            const assertionsFailed = Number(totals.assertionsFailed || 0);
+            const failedSpecs = Number(totals.failedSpecs || 0);
+            const perArtifact = Array.isArray(summary.perArtifact) ? summary.perArtifact : [];
+            const lines = perArtifact.map(a => {
+              const passed = Number(a.assertionsPassed || 0);
+              const failed = Number(a.assertionsFailed || 0);
+              const indicator = a.status === 'fail' ? ':x:' : ':white_check_mark:';
+              const id = a.artifactId || a.path || '(unknown)';
+              return `- ${indicator} \`${id}\` — ${passed} passed, ${failed} failed`;
+            });
+
+            const headline = `**Artifacts:** ${artifacts} | **Specs:** ${specs} (${failedSpecs} failed) | **Assertions:** ${assertionsPassed} passed, ${assertionsFailed} failed`;
+
+            const body = [
+              marker,
+              '## Eval Coverage Summary',
+              '',
+              headline,
+              '',
+              lines.length ? lines.join('\n') : '_No artifact-scoped results in this run._',
+              '',
+              `[View workflow run](${runUrl})`
+            ].join('\n');
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number
+            });
+            const existing = comments.find(c => c.body && c.body.includes(marker));
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body
+              });
+            }
diff --git a/.github/workflows/evals-agent-matrix.yml b/.github/workflows/evals-agent-matrix.yml
new file mode 100644
index 000000000..e16aedbae
--- /dev/null
+++ b/.github/workflows/evals-agent-matrix.yml
@@ -0,0 +1,106 @@
+name: Evals - Per-Agent Matrix (On-Demand)
+
+# Manual full-matrix dispatch for the agent-behavior suite. Honors the
+# 2026-05-24 cross-plan rule that prohibits scheduled eval jobs: this workflow
+# is invoked on demand via the GitHub UI / API and uses Tier nightly exit
+# policy (exit 1 on any per-agent overall: fail).
+
+on:
+  workflow_dispatch:
+    inputs:
+      tier:
+        description: "Exit policy tier (pr=advisory, nightly=strict)."
+        required: false
+        default: "nightly"
+        type: choice
+        options:
+          - nightly
+          - pr
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  agent-matrix:
+    name: Per-Agent Matrix
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+          fetch-depth: 0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
+        with:
+          node-version: "24"
+          cache: "npm"
+
+      - name: Install npm dependencies
+        run: npm ci
+
+      - name: Install PowerShell-Yaml
+        shell: pwsh
+        run: |
+          Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+        with:
+          version: "0.10.9"
+
+      - name: Create logs directory
+        shell: pwsh
+        run: New-Item -ItemType Directory -Force -Path logs | Out-Null
+
+      - name: Configure Copilot home
+        shell: pwsh
+        run: |
+          $copilotHome = Join-Path $env:RUNNER_TEMP 'copilot-home'
+          New-Item -ItemType Directory -Force -Path $copilotHome | Out-Null
+          "COPILOT_HOME=$copilotHome" | Out-File -FilePath $env:GITHUB_ENV -Append
+
+      - name: Verify Copilot token
+        shell: pwsh
+        run: pwsh -NoProfile -File scripts/evals/Test-CopilotToken.ps1
+
+      - name: Run per-agent agent-behavior matrix (all)
+        shell: pwsh
+        env:
+          MATRIX_TIER: ${{ inputs.tier }}
+        run: |
+          pwsh -NoProfile -File scripts/evals/Invoke-AgentMatrix.ps1 `
+            -All `
+            -Tier $env:MATRIX_TIER
+
+      - name: Render per-agent matrix dashboard
+        if: always()
+        shell: pwsh
+        run: |
+          pwsh -NoProfile -File scripts/evals/New-AgentMatrixDashboard.ps1
+
+      - name: Upload matrix artifacts
+        if: always()
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
+        with:
+          name: agent-matrix-results
+          path: |
+            logs/agent-matrix/**/*.log
+            logs/agent-matrix-dashboard.html
+            evals/results/agent-matrix/**/*.json
+          if-no-files-found: ignore
+          retention-days: 30
diff --git a/.github/workflows/pr-review.md b/.github/workflows/pr-review.md
index 06534a66c..826b31983 100644
--- a/.github/workflows/pr-review.md
+++ b/.github/workflows/pr-review.md
@@ -12,6 +12,7 @@ timeout-minutes: 15
 
 imports:
   - ../agents/hve-core/pr-review.agent.md
+  - ../agents/content-policy-citation.agent.md
 
 checkout:
   sparse-checkout: |
@@ -200,6 +201,10 @@ to submitting REQUEST_CHANGES and adding `needs-revision`. Add a comment
 explaining that the PR was converted to draft due to insufficient quality
 for review.
 
+## Output Style
+
+When any output emitted by this workflow (PR review comments, PR descriptions, or other public output) references or flags a suspected content-policy concern, follow the citation discretion rules from the imported Content Policy Citation agent as authoritative.
+
 ## Constraints
 
 * Do not approve PRs. Only use `COMMENT` or `REQUEST_CHANGES`.
diff --git a/.github/workflows/pr-validation.yml b/.github/workflows/pr-validation.yml
index 9bb949c12..24ce15f66 100644
--- a/.github/workflows/pr-validation.yml
+++ b/.github/workflows/pr-validation.yml
@@ -338,3 +338,50 @@ jobs:
       security-events: write  # Required for SARIF upload to Security tab
       actions: read
 
+  eval-presence:
+    name: Evals - Stimulus Presence
+    permissions:
+      contents: read
+    uses: ./.github/workflows/eval-stimulus-presence.yml
+    with:
+      base-sha: ${{ github.event.pull_request.base.sha }}
+      head-sha: ${{ github.event.pull_request.head.sha }}
+
+  eval-lint:
+    name: Evals - Spec Lint and Skill Hygiene
+    permissions:
+      contents: read
+    uses: ./.github/workflows/eval-spec-lint.yml
+    with:
+      base-sha: ${{ github.event.pull_request.base.sha }}
+      head-sha: ${{ github.event.pull_request.head.sha }}
+
+  eval-text-moderation:
+    name: Evals - Stimulus Text Moderation
+    permissions:
+      contents: read
+    uses: ./.github/workflows/eval-text-moderation.yml
+
+  content-moderation:
+    name: Evals - Corpus Content Moderation
+    permissions:
+      contents: read
+    uses: ./.github/workflows/eval-corpus-moderation.yml
+    with:
+      base-sha: ${{ github.event.pull_request.base.sha }}
+      head-sha: ${{ github.event.pull_request.head.sha }}
+
+  eval-execute:
+    name: Evals - Execute Vally Suites
+    needs: [eval-presence, eval-lint, eval-text-moderation, content-moderation]
+    if: github.event.pull_request.head.repo.fork == false
+    permissions:
+      contents: read
+      pull-requests: write
+    uses: ./.github/workflows/eval-vally.yml
+    with:
+      base-sha: ${{ github.event.pull_request.base.sha }}
+      head-sha: ${{ github.event.pull_request.head.sha }}
+    secrets:
+      copilot-github-token: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+
diff --git a/.vally.yaml b/.vally.yaml
index e0158de2b..19fd523af 100644
--- a/.vally.yaml
+++ b/.vally.yaml
@@ -5,32 +5,29 @@ paths:
 environments:
   security:
     skills:
-      - .github/skills/security/owasp-top-10
-      - .github/skills/security/owasp-cicd
+      - ../../.github/skills/security/owasp-top-10
+      - ../../.github/skills/security/owasp-cicd
   coding-standards:
     skills:
-      - .github/skills/coding-standards/python-foundational
+      - ../../.github/skills/coding-standards/python-foundational
   security-and-coding:
     skills:
-      - .github/skills/security/owasp-top-10
-      - .github/skills/coding-standards/python-foundational
+      - ../../.github/skills/security/owasp-top-10
+      - ../../.github/skills/coding-standards/python-foundational
 
 suites:
   skill-quality:
     description: Evaluate skill behavior via copilot-sdk agent conversations
     filter:
-      tags:
-        category: skill-quality
+      category: skill-quality
 
   agent-behavior:
     description: Evaluate agent routing and response quality
     filter:
-      tags:
-        category: agent-behavior
+      category: agent-behavior
 
   script-validation:
     description: Validate script correctness via copilot-sdk conversations
     filter:
-      tags:
-        category: script-validation
+      category: script-validation