Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions .github/workflows/eval-corpus-moderation.yml

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Detect changed AI artifacts step calls Get-ChangedAIArtifact.ps1, which requires the PowerShell-Yaml module, but this workflow never installs it. Every other workflow in this PR that calls the same script includes an Install PowerShell-Yaml step first (see eval-spec-lint.yml and eval-stimulus-presence.yml). At runtime the job will throw a module-not-found error and fail.

Please add the following step before Create logs directory:

- name: Install PowerShell-Yaml
  shell: pwsh
  run: |
    Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser

Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Evals - Corpus Content Moderation

on:
workflow_call:
inputs:
base-sha:
description: 'Base SHA for changed-artifact detection'
required: true
type: string
head-sha:
description: 'Head SHA for changed-artifact detection'
required: true
type: string
soft-fail:
description: 'Whether to continue on content moderation failures'
required: false
type: boolean
default: false

permissions:
contents: read

jobs:
content-moderation:
name: Evals - Corpus Content Moderation
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
fetch-depth: 0

- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"

- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"

- name: Install uv
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
with:
version: "0.10.9"

- name: Install moderation dependencies
run: uv pip install --system -r scripts/evals/moderation/requirements.txt

- name: Cache Detoxify model
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ~/.cache/torch/hub/checkpoints
key: detoxify-unbiased-${{ hashFiles('scripts/evals/moderation/requirements.txt') }}

- name: Create logs directory
shell: pwsh
run: New-Item -ItemType Directory -Force -Path logs | Out-Null

- name: Detect changed AI artifacts
shell: pwsh
run: |
pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
-BaseRef "${{ inputs.base-sha }}" `
-HeadRef "${{ inputs.head-sha }}" `
-OutFile logs/changed-ai-artifacts.json

- name: Moderate changed corpus
shell: pwsh
continue-on-error: ${{ inputs.soft-fail }}
run: |
pwsh -NoProfile -File scripts/evals/Invoke-CorpusModeration.ps1 `
-ManifestPath logs/changed-ai-artifacts.json `
-OutFile logs/moderation-corpus.json

- name: Upload moderation artifacts on failure
if: failure()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: content-moderation-logs
path: |
logs/changed-ai-artifacts.json
logs/moderation-corpus.json
if-no-files-found: ignore
retention-days: 7
100 changes: 100 additions & 0 deletions .github/workflows/eval-spec-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: Evals - Spec Lint and Skill Hygiene

on:
workflow_call:
inputs:
base-sha:
description: "Base commit SHA for changed-artifact detection."
required: true
type: string
head-sha:
description: "Head commit SHA for changed-artifact detection."
required: true
type: string
soft-fail:
description: "When true, lint failures do not fail the job."
required: false
type: boolean
default: false

permissions:
contents: read

jobs:
eval-lint:
name: Evals - Spec Lint and Skill Hygiene
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
fetch-depth: 0

- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "npm"

- name: Install npm dependencies
run: npm ci

- name: Install PowerShell-Yaml
shell: pwsh
run: |
Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser

- name: Create logs directory
shell: pwsh
run: New-Item -ItemType Directory -Force -Path logs | Out-Null

- name: Detect changed AI artifacts
shell: pwsh
run: |
pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
-BaseRef "${{ inputs.base-sha }}" `
-HeadRef "${{ inputs.head-sha }}" `
-OutFile logs/changed-ai-artifacts.json

- name: Validate eval spec schema
shell: pwsh
continue-on-error: ${{ inputs.soft-fail }}
run: |
pwsh -NoProfile -File scripts/evals/Test-EvalSpec.ps1 `
-Root evals/ `
-OutputPath logs/eval-spec-lint.json

- name: Run skill hygiene lint
shell: pwsh
continue-on-error: ${{ inputs.soft-fail }}
run: |
$manifestPath = 'logs/changed-ai-artifacts.json'
if (-not (Test-Path -LiteralPath $manifestPath)) {
Write-Host "No changed-artifact manifest found; skipping skill hygiene lint."
return
}
$manifest = Get-Content -LiteralPath $manifestPath -Raw | ConvertFrom-Json
$skillChanges = @($manifest | Where-Object { $_.kind -eq 'skill' })
if ($skillChanges.Count -eq 0) {
Write-Host "No skill artifacts changed; skipping skill hygiene lint."
return
}
Write-Host "Detected $($skillChanges.Count) changed skill artifact(s); running vally lint."
npm run eval:lint:skills
if ($LASTEXITCODE -ne 0) {
throw "Skill hygiene lint failed with exit code $LASTEXITCODE."
}

- name: Upload eval-lint artifacts on failure
if: failure()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: eval-lint-logs
path: |
logs/eval-spec-lint.json
logs/changed-ai-artifacts.json
if-no-files-found: ignore
retention-days: 7
77 changes: 77 additions & 0 deletions .github/workflows/eval-stimulus-presence.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Evals - Stimulus Presence

on:
workflow_call:
inputs:
base-sha:
description: "Base commit SHA for change detection"
required: true
type: string
head-sha:
description: "Head commit SHA for change detection"
required: true
type: string
soft-fail:
description: "Whether to continue on validation failures"
required: false
type: boolean
default: false

permissions:
contents: read

jobs:
eval-presence:
name: Evals - Stimulus Presence
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
fetch-depth: 0

- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "npm"

- name: Install PowerShell-Yaml
shell: pwsh
run: |
Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser

- name: Create logs directory
shell: pwsh
run: New-Item -ItemType Directory -Force -Path logs | Out-Null

- name: Detect changed AI artifacts
shell: pwsh
run: |
pwsh -NoProfile -File scripts/evals/Get-ChangedAIArtifact.ps1 `
-BaseRef "${{ inputs.base-sha }}" `
-HeadRef "${{ inputs.head-sha }}" `
-OutFile logs/changed-ai-artifacts.json

- name: Enforce stimulus presence
shell: pwsh
continue-on-error: ${{ inputs.soft-fail }}
run: |
pwsh -NoProfile -File scripts/evals/Test-StimulusPresence.ps1 `
-ManifestPath logs/changed-ai-artifacts.json `
-EvalRoot evals/ `
-OutFile logs/stimulus-presence.json

- name: Upload presence artifacts on failure
if: failure()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: eval-presence-logs
path: |
logs/changed-ai-artifacts.json
logs/stimulus-presence.json
if-no-files-found: ignore
retention-days: 7
61 changes: 61 additions & 0 deletions .github/workflows/eval-text-moderation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Evals - Text Moderation

on:
workflow_call:
inputs:
soft-fail:
description: 'Whether to continue on text moderation failures'
required: false
type: boolean
default: false

permissions:
contents: read

jobs:
text-moderation:
name: Evals - Stimulus Text Moderation
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
fetch-depth: 0

- name: Setup Node.js
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "24"
cache: "npm"

- name: Install npm dependencies
run: npm ci

- name: Install PowerShell-Yaml
shell: pwsh
run: |
Install-Module -Name PowerShell-Yaml -RequiredVersion 0.4.7 -Force -Scope CurrentUser

- name: Create logs directory
shell: pwsh
run: New-Item -ItemType Directory -Force -Path logs | Out-Null

- name: Moderate AI artifact corpus (alex.js + retext-profanities)
shell: pwsh
continue-on-error: ${{ inputs.soft-fail }}
run: |
pwsh -NoProfile -File scripts/evals/Test-EvalSpecText.ps1 `
-OutputPath logs/eval-spec-text-moderation.json

- name: Upload text moderation artifacts on failure
if: failure()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: eval-text-moderation-logs
path: |
logs/eval-spec-text-moderation.json
if-no-files-found: ignore
retention-days: 7
Loading