diff --git a/Makefile b/Makefile
index 5b45a9715..f781331d1 100644
--- a/Makefile
+++ b/Makefile
@@ -3,9 +3,9 @@ COMPOSE_FILE ?= compose.yaml
DRY_RUN ?= false
MOCK_JIRA ?= false
JIRA_DRY_RUN ?= false
+JIRA_ALLOW_STATUS_CHANGES ?= false
AUTO_CHAIN ?= true
FORCE_CVE_TRIAGE ?= false
-SILENT_RUN ?= false
RUN_LLM_JUDGE ?= true
COMPOSE ?= $(shell if podman compose ls >/dev/null 2>&1; then echo "podman compose"; elif command -v podman-compose >/dev/null 2>&1; then echo "podman-compose"; else echo "docker-compose"; fi)
@@ -44,7 +44,6 @@ run-triage-agent-standalone:
-e MOCK_JIRA=$(MOCK_JIRA) \
-e JIRA_DRY_RUN=$(JIRA_DRY_RUN) \
-e FORCE_CVE_TRIAGE=$(FORCE_CVE_TRIAGE) \
- -e SILENT_RUN=$(SILENT_RUN) \
triage-agent
.PHONY: run-triage-agent-e2e-tests
@@ -202,11 +201,11 @@ build-jira-issue-fetcher:
.PHONY: start
start:
- DRY_RUN=$(DRY_RUN) MOCK_JIRA=$(MOCK_JIRA) JIRA_DRY_RUN=$(JIRA_DRY_RUN) AUTO_CHAIN=$(AUTO_CHAIN) SILENT_RUN=$(SILENT_RUN) $(COMPOSE_AGENTS) up
+ DRY_RUN=$(DRY_RUN) MOCK_JIRA=$(MOCK_JIRA) JIRA_DRY_RUN=$(JIRA_DRY_RUN) JIRA_ALLOW_STATUS_CHANGES=$(JIRA_ALLOW_STATUS_CHANGES) AUTO_CHAIN=$(AUTO_CHAIN) $(COMPOSE_AGENTS) up
.PHONY: start-detached
start-detached:
- DRY_RUN=$(DRY_RUN) MOCK_JIRA=$(MOCK_JIRA) JIRA_DRY_RUN=$(JIRA_DRY_RUN) AUTO_CHAIN=$(AUTO_CHAIN) SILENT_RUN=$(SILENT_RUN) $(COMPOSE_AGENTS) up -d
+ DRY_RUN=$(DRY_RUN) MOCK_JIRA=$(MOCK_JIRA) JIRA_DRY_RUN=$(JIRA_DRY_RUN) JIRA_ALLOW_STATUS_CHANGES=$(JIRA_ALLOW_STATUS_CHANGES) AUTO_CHAIN=$(AUTO_CHAIN) $(COMPOSE_AGENTS) up -d
.PHONY: stop
stop:
diff --git a/README-agents.md b/README-agents.md
index 34d8ca6e0..51dba1772 100644
--- a/README-agents.md
+++ b/README-agents.md
@@ -22,6 +22,27 @@ Three agents process tasks through Redis queues:
**Always** use `DRY_RUN=true` if you are developing locally or just wanna give the agents a try.
+## Jira status changes (opt-in)
+
+By default, agents do NOT change the Jira workflow status of issues
+(e.g. "New" → "In Progress" when the backport agent picks up a task,
+or "Release Pending" / "Closed" when the issue-verification agent
+finishes), and the preliminary-testing agent does NOT set the
+`Preliminary Testing` field to `Pass`. The Pass field is gated by the
+same flag because setting it admits the build into the next compose,
+triggers erratum creation, and moves the issue to Integration — its
+downstream effect is equivalent to a status transition. To enable all
+of the above, set:
+
+```bash
+JIRA_ALLOW_STATUS_CHANGES=true
+```
+
+When unset or `false`, status-change calls and the prelim-testing Pass
+write are short-circuited and a log line records what *would* have
+happened.
+`DRY_RUN=true` also suppresses these writes independently of this flag.
+
## Jira mocking
If you clone testing Jira files from
diff --git a/agents_as_skills/triage/SKILL.md b/agents_as_skills/triage/SKILL.md
index 30310cd89..13151d9d1 100644
--- a/agents_as_skills/triage/SKILL.md
+++ b/agents_as_skills/triage/SKILL.md
@@ -13,9 +13,6 @@ arguments:
- name: force_cve_triage
description: "If true, force triage of CVE issues that would normally be deferred or rejected (eligibility=PENDING_DEPENDENCIES or NEVER). Default: false"
required: false
- - name: silent_run
- description: "If true, only update JIRA for not-affected and postponed resolutions. Default: false"
- required: false
---
# Triage Skill
@@ -30,7 +27,6 @@ You are a Red Hat Enterprise Linux developer tasked to analyze Jira issues for R
- `dry_run`: {{dry_run}}
- `auto_chain`: {{auto_chain}}
- `force_cve_triage`: {{force_cve_triage}}
-- `silent_run`: {{silent_run}}
## Tools
@@ -701,10 +697,7 @@ Proceed to **Step 7**.
If `dry_run` is true, end the workflow and output the `triage_result`.
-Check whether JIRA should be updated based on `silent_run`:
-- If `silent_run` is false: always update JIRA.
-- If `silent_run` is true: only update JIRA if `triage_result.resolution` is `"not-affected"` or `"postponed"`.
-- If JIRA should not be updated, end the workflow and output the `triage_result`.
+Default is silent: only post a JIRA comment when `triage_result.resolution` is `"not-affected"` or `"postponed"` (resolutions that have no MR artifact, so the comment is the only visible explanation). For all other resolutions, end the workflow and output the `triage_result` without commenting.
Format the JIRA comment based on the resolution type:
diff --git a/compose.yaml b/compose.yaml
index 255439151..61654d786 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -9,8 +9,8 @@ x-beeai-env: &beeai-env
MAX_RETRIES: 3
DRY_RUN: ${DRY_RUN:-false}
JIRA_DRY_RUN: ${JIRA_DRY_RUN:-false}
+ JIRA_ALLOW_STATUS_CHANGES: ${JIRA_ALLOW_STATUS_CHANGES:-false}
AUTO_CHAIN: ${AUTO_CHAIN:-true}
- SILENT_RUN: ${SILENT_RUN:-false}
REQUESTS_CA_BUNDLE: /etc/pki/tls/certs/ca-bundle.crt
# Base template for BeeAI agents
diff --git a/jira_label_workflow_routing.md b/jira_label_workflow_routing.md
index 3aa3db22c..178988a0e 100644
--- a/jira_label_workflow_routing.md
+++ b/jira_label_workflow_routing.md
@@ -30,7 +30,7 @@ stateDiagram-v2
flowchart TD
FETCH[Jira Issue Fetcher]
- FETCH -->|No Ymir labels
OR ymir_retry_needed| TRIAGE[triage_queue]
+ FETCH -->|No Ymir labels
OR ymir_retry_needed
OR ymir_todo added by RH-Employee| TRIAGE[triage_queue]
TRIAGE --> TRIAGE_AGENT[Triage Agent]
@@ -93,12 +93,13 @@ flowchart TD
| `ymir_retry_needed` | Trigger retry | Forces reprocessing |
| `ymir_triaged` | Triage completed, no automated follow-up | Terminal state |
| `ymir_fusa` | Functional Safety | Requires maintainer review |
+| `ymir_todo` | Maintainer-facing trigger for an e2e run | Fetcher swaps it for `ymir_triage_in_progress` on enqueue; only honored when the changelog shows the label was added by a member of the `Red Hat Employee` Jira group (verified per-issue, not via JQL). The triage run posts an ack comment and a result comment so the requester gets feedback. Default is silent — without `ymir_todo`, no comments are posted. |
## Queue Types Summary
| Queue | Type | Triggers | Labels Added | Status |
|-------|------|----------|--------------|--------|
-| `triage_queue` | Input | No labels OR retry_needed | - | Active |
+| `triage_queue` | Input | No labels OR `ymir_retry_needed` OR `ymir_todo` | `ymir_triage_in_progress` (set by fetcher atomic flip for retry/todo, or by agent at triage start for fresh issues) | Active |
| `rebase_queue_c9s` | Input | Resolution=REBASE, RHEL 8/9 | `ymir_triaged_rebase` | Active (AUTO_CHAIN only) |
| `rebase_queue_c10s` | Input | Resolution=REBASE, RHEL 10+ | `ymir_triaged_rebase` | Active (AUTO_CHAIN only) |
| `backport_queue_c9s` | Input | Resolution=BACKPORT, RHEL 8/9 | `ymir_triaged_backport` | Active (AUTO_CHAIN only) |
@@ -113,28 +114,60 @@ flowchart TD
## Deduplication Logic
-**Note:** The Jira Issue Fetcher only decides whether to queue an issue for processing based on labels. The actual label cleanup (including removal of `ymir_retry_needed`) happens in the Triage Agent after it consumes the task from the queue.
+**Trigger labels are consumed by the fetcher, all other labels by the agent.** The fetcher atomically removes `ymir_todo` and `ymir_retry_needed` before pushing to Redis, replacing them with `ymir_triage_in_progress` so the very next sweep sees the in-progress marker and skips. Every other `ymir_*` label is cleaned up by the triage agent when it pops the task. If the fetcher's atomic flip fails after retries, the Redis push is **skipped** — the issue stays eligible for the next sweep with its trigger label intact, rather than being enqueued without a dedup anchor.
```mermaid
flowchart TD
START[Jira Issue Fetcher
Found issue]
- CHECK{Has any
ymir_* label?}
+ INPROG{Has any
ymir_*_in_progress?}
+ TODO{Has ymir_todo?
added by a member of
Red Hat Employee group}
RETRY{Has
ymir_retry_needed?}
+ OTHER{Has any other
ymir_* label?}
+
+ START --> INPROG
+ INPROG -->|Yes| SKIP_RUN[Skip — already running]
+ INPROG -->|No| TODO
+ TODO -->|Yes| FLIP_TODO[Atomic flip:
+ymir_triage_in_progress
-ymir_todo]
+ TODO -->|No| RETRY
+ RETRY -->|Yes| FLIP_RETRY[Atomic flip:
+ymir_triage_in_progress
-ymir_retry_needed]
+ RETRY -->|No| OTHER
+ OTHER -->|Yes| SKIP_DONE[Skip — already processed]
+ OTHER -->|No| PUSH_FRESH[Push to triage_queue
user_triggered=False]
+
+ FLIP_TODO --> PUSH_USER[Push to triage_queue
user_triggered=True]
+ FLIP_RETRY --> PUSH_RETRY[Push to triage_queue
user_triggered=False]
+
+ style PUSH_FRESH fill:#c8e6c9
+ style PUSH_USER fill:#c8e6c9
+ style PUSH_RETRY fill:#c8e6c9
+ style SKIP_RUN fill:#ffcdd2
+ style SKIP_DONE fill:#ffcdd2
+```
- START --> CHECK
- CHECK -->|No| ADD[Add to triage_queue]
- CHECK -->|Yes| RETRY
- RETRY -->|Yes| ADD
- RETRY -->|No| SKIP[Skip - already processed]
+## Run Behaviour by Trigger and Flag
- ADD --> TRIAGE_PROCESS[Triage Agent processes issue]
- TRIAGE_PROCESS --> CLEANUP[Triage Agent removes
all ymir_* labels]
+Two env-var flags affect pipeline behaviour: `DRY_RUN` and `JIRA_ALLOW_STATUS_CHANGES`. Verbosity is no longer controlled by an env var — the system is silent by default. The only way to opt into comments is per-issue, by adding `ymir_todo` (which flows through the task as `user_triggered=True`).
- style ADD fill:#c8e6c9
- style SKIP fill:#ffcdd2
- style CLEANUP fill:#e1f5fe
-```
+Ground rules:
+
+- **Default is silent.** No result or error comments are posted on the Jira issue, and intermediate `_failed` labels are not written. Only `not-affected`, `postponed`, `open-ended-analysis`, and `clarification-needed` triage resolutions still post a comment unbidden (those have no MR to look at, so the comment is the only visible explanation).
+- **`user_triggered=True`** (set on the task when the issue carried `ymir_todo`) **bypasses every silence filter.** The triage agent posts an immediate private ack comment, posts the result comment, and writes `_failed` labels normally.
+- **Labels that are state, not notification, are always written.** `ymir_triage_in_progress` at the start of triage, terminal `ymir_*_errored` / `ymir_triaged_*` at the end. Suppressing them would break dedup against the next fetcher sweep.
+- **Jira workflow status changes are opt-in via `JIRA_ALLOW_STATUS_CHANGES`.** When the env var is unset or `false` (the default), the rebase/backport agents do NOT move the issue to "In Progress" on task pop, and the issue-verification agent does NOT transition issues to "Release Pending" / "Closed". When set to `true`, all of those transitions happen. The same flag also gates the preliminary-testing agent setting **`Preliminary Testing = Pass`** — that field admits the build into the next compose, triggers erratum creation, and moves the issue to Integration. Triage and the fetcher never touch the workflow status, regardless of the flag.
+- **`DRY_RUN` is read by both fetcher and agent.** On the fetcher, `DRY_RUN=true` skips the atomic Jira label flip (`ymir_todo` / `ymir_retry_needed` are NOT consumed; `ymir_triage_in_progress` is NOT stamped) but the task is still pushed to Redis with the correct `user_triggered` value, so the agent — also presumably in `DRY_RUN` — can exercise its full dry-mode flow. Implication: the trigger label stays on the issue, so every subsequent fetcher sweep re-picks the same issue. That is fine in a test environment; never run a production cron with `DRY_RUN=true`. `DRY_RUN=true` also implies status changes are skipped, independent of `JIRA_ALLOW_STATUS_CHANGES`.
+
+What happens for each trigger state:
+
+| Trigger state at sweep time | Default behaviour | `DRY_RUN=true` |
+|---|---|---|
+| **No `ymir_*` labels** (fresh issue) | Fetcher pushes to `triage_queue`. Agent stamps `ymir_triage_in_progress`, runs triage, writes a terminal `ymir_*` label. Result comment is suppressed unless the resolution is `not-affected`, `postponed`, `open-ended-analysis`, or `clarification-needed`. If the run auto-chains to rebase or backport, the downstream agent moves the Jira workflow status to "In Progress" when it pops the task — **only if `JIRA_ALLOW_STATUS_CHANGES=true`**; otherwise the status is left untouched. | Agent runs triage but `set_jira_labels` / `add_jira_comment` short-circuit on `DRY_RUN`. No labels, no comment, no MR, no workflow status change. Issue untouched in Jira. |
+| **`ymir_todo`** (added by a member of `Red Hat Employee`, no `_in_progress`) | Fetcher verifies the latest `ymir_todo` add in the issue's changelog was performed by a Red Hat Employee; if so, atomically flips `ymir_todo` → `ymir_triage_in_progress` and pushes with `user_triggered=True`. Agent posts a private ack comment and a result comment on completion. `_failed` labels are written normally. Workflow status change is the same as the fresh-issue path (set by rebase/backport on auto-chain, gated on `JIRA_ALLOW_STATUS_CHANGES`). | Fetcher still verifies the author and skips the atomic flip (`ymir_todo` stays on the issue), but still pushes to Redis with `user_triggered=True`. Agent runs in dry mode and writes nothing; workflow status not changed. **Subsequent fetcher sweeps will re-push the same issue** because the trigger label was never consumed. |
+| **`ymir_retry_needed`** (no `_in_progress`) | Fetcher atomically flips `ymir_retry_needed` → `ymir_triage_in_progress`, pushes with `user_triggered=False`. Agent runs full triage; behaves exactly like a fresh-issue run (no ack comment, result comment only for the four "no-MR" resolutions). Workflow status change is the same as the fresh-issue path (gated on `JIRA_ALLOW_STATUS_CHANGES`). | Fetcher skips the atomic flip (`ymir_retry_needed` stays on the issue) but still pushes to Redis with `user_triggered=False`. Agent runs in dry mode and writes nothing; workflow status not changed. Subsequent fetcher sweeps will re-push the same issue. |
+| **`ymir_todo`** or **`ymir_retry_needed`** **+** any `ymir_*_in_progress` label | Fetcher skips. Not enqueued. Workflow status not affected. | Fetcher skips. Not enqueued. Workflow status not affected. |
+| **Any other terminal `ymir_*` label** (e.g. `ymir_triaged_rebase`, `ymir_rebased`, `ymir_triage_errored`) | Fetcher skips. Re-run by adding `ymir_todo` (recommended — produces an ack + result comment) or `ymir_retry_needed`. Workflow status not affected. | Fetcher skips. Workflow status not affected. |
+
+JQL no longer restricts `ymir_todo` by assignee — the fetcher instead verifies per-issue that the user who added the label belongs to the `Red Hat Employee` Jira group by walking the issue's changelog. If the latest `ymir_todo` add was performed by an external collaborator (or the author cannot be verified), the fetcher skips the issue with a warning and does not flip the label.
---
-**Last Updated:** 2026-03-03
+**Last Updated:** 2026-06-04
diff --git a/openshift/Makefile b/openshift/Makefile
index 659a56cee..fe05ba0b2 100644
--- a/openshift/Makefile
+++ b/openshift/Makefile
@@ -5,4 +5,8 @@ run-jira-issue-fetcher:
oc delete job jira-issue-fetcher-manual || true
oc create job jira-issue-fetcher-manual --from=cronjob/jira-issue-fetcher
-.PHONY: deploy run-jira-issue-fetcher
+run-jira-issue-fetcher-todo:
+ oc delete job jira-issue-fetcher-todo-manual || true
+ oc create job jira-issue-fetcher-todo-manual --from=cronjob/jira-issue-fetcher-todo
+
+.PHONY: deploy run-jira-issue-fetcher run-jira-issue-fetcher-todo
diff --git a/openshift/README.md b/openshift/README.md
index 1ca68b2cf..aa99ba834 100644
--- a/openshift/README.md
+++ b/openshift/README.md
@@ -102,10 +102,33 @@ Agents are deployed in the `jotnar-ymir--jotnar-ymir` project.
## Jira Issue Fetcher Deployment
-Manually run jira issue fetcher:
+Two CronJobs run the fetcher with different JQL queries:
+
+| CronJob | Schedule | QUERY | ConfigMap |
+|---|---|---|---|
+| `jira-issue-fetcher` | `*/30 * * * *` | A generic filter for processing a batch of issues (e.g. early adopters) — currently `filter = "Ymir early adopters CVEs"` | `jira-issue-fetcher-filter-env` |
+| `jira-issue-fetcher-todo` | `*/5 * * * *` | `labels = "ymir_todo"` | `jira-issue-fetcher-todo-env` |
+
+Both share the common knobs (`IGNORED_COMPONENTS`, `MAX_ISSUES`, `LOGLEVEL`) from `jira-issue-fetcher-env`. Each pod mounts the shared configmap plus its per-cron QUERY configmap. To target a different batch, edit `configmap-jira-issue-fetcher-filter-env.yml` and re-apply.
+
+Manually run either fetcher:
+
+```bash
+make run-jira-issue-fetcher # generic batch filter
+make run-jira-issue-fetcher-todo # ymir_todo sweep
+```
+
+## Agent runtime knobs (`agents-env` ConfigMap)
+
+| Key | Default | Effect |
+|---|---|---|
+| `JIRA_ALLOW_STATUS_CHANGES` | `"false"` | When `"false"`, agents do NOT change Jira issue statuses (no "New" → "In Progress" on rebase/backport start, no "Release Pending" / "Closed" on verification finish) and the preliminary-testing agent does NOT set `Preliminary Testing = Pass` (that field admits the build into a compose, triggers erratum creation, and moves the issue to Integration). Flip to `"true"` to allow all of the above. `DRY_RUN=true` further short-circuits these writes independently. |
+
+To enable production status transitions:
```bash
-make run-jira-issue-fetcher
+oc patch configmap agents-env --type merge -p '{"data":{"JIRA_ALLOW_STATUS_CHANGES":"true"}}'
+oc rollout restart deployment -l app=triage-agent # plus any other agent deployments
```
## Triggering the Pipeline Manually
diff --git a/openshift/configmap-agents-env.yml b/openshift/configmap-agents-env.yml
index 8e522418e..82f494a9a 100644
--- a/openshift/configmap-agents-env.yml
+++ b/openshift/configmap-agents-env.yml
@@ -1,6 +1,5 @@
apiVersion: v1
data:
- SILENT_RUN: "true"
MAX_RETRIES: "3"
GIT_REPO_BASEPATH: /git-repos
# The maximum number of retries for a single step in the agent execution.
@@ -11,6 +10,12 @@ data:
# This was increased from 140 to handle complex rebases and backports.
BEEAI_MAX_ITERATIONS: "255"
REASONING_EFFORT: high
+ # When "false" (default), agents do NOT change Jira issue statuses
+ # (e.g. New -> In Progress on rebase/backport start, Release Pending on
+ # verification close). Set to "true" to allow status transitions. The
+ # status-change comment produced by issue-verification is suppressed
+ # alongside the transition.
+ JIRA_ALLOW_STATUS_CHANGES: "false"
immutable: false
kind: ConfigMap
metadata:
diff --git a/openshift/configmap-jira-issue-fetcher-env.yml b/openshift/configmap-jira-issue-fetcher-env.yml
index 0702c793d..ecfdc16a8 100644
--- a/openshift/configmap-jira-issue-fetcher-env.yml
+++ b/openshift/configmap-jira-issue-fetcher-env.yml
@@ -1,6 +1,5 @@
apiVersion: v1
data:
- QUERY: 'filter = "Ymir early adopters + CVE work to do"'
IGNORED_COMPONENTS: "firefox,thunderbird,firefox-flatpak-container,thunderbird-flatpak-container,webkit2gtk3,openjdk,postfix"
MAX_ISSUES: "20"
LOGLEVEL: "INFO"
diff --git a/openshift/configmap-jira-issue-fetcher-filter-env.yml b/openshift/configmap-jira-issue-fetcher-filter-env.yml
new file mode 100644
index 000000000..5071ad507
--- /dev/null
+++ b/openshift/configmap-jira-issue-fetcher-filter-env.yml
@@ -0,0 +1,7 @@
+apiVersion: v1
+data:
+ QUERY: 'filter = "Ymir early adopters + CVE work to do"'
+immutable: false
+kind: ConfigMap
+metadata:
+ name: jira-issue-fetcher-filter-env
diff --git a/openshift/configmap-jira-issue-fetcher-todo-env.yml b/openshift/configmap-jira-issue-fetcher-todo-env.yml
new file mode 100644
index 000000000..c4456b292
--- /dev/null
+++ b/openshift/configmap-jira-issue-fetcher-todo-env.yml
@@ -0,0 +1,7 @@
+apiVersion: v1
+data:
+ QUERY: 'labels = "ymir_todo"'
+immutable: false
+kind: ConfigMap
+metadata:
+ name: jira-issue-fetcher-todo-env
diff --git a/openshift/cronjob-jira-issue-fetcher-todo.yml b/openshift/cronjob-jira-issue-fetcher-todo.yml
new file mode 100644
index 000000000..9ce8c8284
--- /dev/null
+++ b/openshift/cronjob-jira-issue-fetcher-todo.yml
@@ -0,0 +1,62 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+ name: jira-issue-fetcher-todo
+ labels:
+ app: jira-issue-fetcher-todo
+ component: scheduler
+spec:
+ schedule: "*/5 * * * *" # Every 5 minutes — picks up ymir_todo maintainer requests promptly
+ concurrencyPolicy: Forbid # Prevent overlapping runs
+ successfulJobsHistoryLimit: 3
+ failedJobsHistoryLimit: 5
+ suspend: true
+ jobTemplate:
+ metadata:
+ labels:
+ app: jira-issue-fetcher-todo
+ component: job
+ spec:
+ backoffLimit: 2
+ activeDeadlineSeconds: 240 # 4 minutes max runtime (cron is */5, must not overlap)
+ template:
+ metadata:
+ labels:
+ app: jira-issue-fetcher-todo
+ component: pod
+ spec:
+ restartPolicy: Never
+ containers:
+ - name: jira-issue-fetcher-env
+ image: 'jira-issue-fetcher:prod'
+ imagePullPolicy: Always
+ envFrom:
+ - configMapRef:
+ name: endpoints-env
+ - configMapRef:
+ name: jira-env
+ - secretRef:
+ name: jira-env
+ - configMapRef:
+ name: jira-issue-fetcher-env
+ - configMapRef:
+ name: jira-issue-fetcher-todo-env
+ resources:
+ limits:
+ cpu: "200m"
+ memory: "128Mi"
+ securityContext:
+ allowPrivilegeEscalation: false
+ runAsNonRoot: true
+ seccompProfile:
+ type: RuntimeDefault
+ capabilities:
+ drop:
+ - ALL
+ securityContext:
+ runAsNonRoot: true
+ seccompProfile:
+ type: RuntimeDefault
+ dnsPolicy: ClusterFirst
+ schedulerName: default-scheduler
+ terminationGracePeriodSeconds: 30
diff --git a/openshift/cronjob-jira-issue-fetcher.yml b/openshift/cronjob-jira-issue-fetcher.yml
index 179d8fe70..d859273b6 100644
--- a/openshift/cronjob-jira-issue-fetcher.yml
+++ b/openshift/cronjob-jira-issue-fetcher.yml
@@ -6,7 +6,7 @@ metadata:
app: jira-issue-fetcher
component: scheduler
spec:
- schedule: "*/30 * * * *" # Every 30 minutes
+ schedule: "*/30 * * * *" # Every 30 minutes — runs the early-adopters filter
concurrencyPolicy: Forbid # Prevent overlapping runs
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 5
@@ -18,7 +18,7 @@ spec:
component: job
spec:
backoffLimit: 2
- activeDeadlineSeconds: 1800 # 30 minutes max runtime
+ activeDeadlineSeconds: 600 # 10 minutes max runtime (cron is */30, must not overlap)
template:
metadata:
labels:
@@ -39,6 +39,8 @@ spec:
name: jira-env
- configMapRef:
name: jira-issue-fetcher-env
+ - configMapRef:
+ name: jira-issue-fetcher-filter-env
resources:
limits:
cpu: "200m"
diff --git a/openshift/deploy.sh b/openshift/deploy.sh
index ab9fc32c6..4c0c17393 100755
--- a/openshift/deploy.sh
+++ b/openshift/deploy.sh
@@ -97,7 +97,10 @@ apply deployment-rebuild-agent-c10s.yml
apply imagestream-jira-issue-fetcher.yml
import_image jira-issue-fetcher
apply configmap-jira-issue-fetcher-env.yml
+apply configmap-jira-issue-fetcher-filter-env.yml
+apply configmap-jira-issue-fetcher-todo-env.yml
apply cronjob-jira-issue-fetcher.yml
+apply cronjob-jira-issue-fetcher-todo.yml
# # Supervisor
# apply imagestream-supervisor.yml
diff --git a/openshift/deployment-mcp-gateway.yml b/openshift/deployment-mcp-gateway.yml
index 7322a5ea0..add9c2484 100644
--- a/openshift/deployment-mcp-gateway.yml
+++ b/openshift/deployment-mcp-gateway.yml
@@ -29,8 +29,6 @@ spec:
configMapKeyRef:
key: GIT_REPO_BASEPATH
name: agents-env
- - name: SILENT_RUN
- value: "true"
- name: SSE_PORT
value: "8000"
- name: FORK_NAMESPACE
diff --git a/ymir/agents/backport_agent.py b/ymir/agents/backport_agent.py
index 579ccc5c0..c422da226 100644
--- a/ymir/agents/backport_agent.py
+++ b/ymir/agents/backport_agent.py
@@ -903,6 +903,7 @@ async def run_workflow(
backport_agent_factory=None,
max_build_attempts=10,
max_incremental_fix_attempts=None,
+ user_triggered=False,
):
if max_incremental_fix_attempts is None:
max_incremental_fix_attempts = max_build_attempts
@@ -910,10 +911,6 @@ async def run_workflow(
local_tool_options: dict[str, Any] = {"working_directory": None}
if mock_env := get_mock_local_tool_env(jira_issue):
local_tool_options["env"] = mock_env
- # In tests SILENT_RUN is typically unset, so Jira status updates are
- # attempted (and skipped via dry_run). Set SILENT_RUN=true to suppress
- # Jira transitions even when dry_run is False.
- silent_run = os.getenv("SILENT_RUN", "false").lower() == "true"
async with mcp_tools(
os.environ["MCP_GATEWAY_URL"], call_meta={"jira_issue": jira_issue}
@@ -930,17 +927,19 @@ async def run_workflow(
workflow = Workflow(BackportState, name="BackportWorkflow")
async def change_jira_status(state):
- if not dry_run and not silent_run:
- try:
- await tasks.change_jira_status(
- jira_issue=state.jira_issue,
- status="In Progress",
- available_tools=gateway_tools,
- )
- except Exception as status_error:
- logger.warning(f"Failed to change status for {state.jira_issue}: {status_error}")
- else:
- logger.info(f"Dry run: would change status of {state.jira_issue} to In Progress")
+ if dry_run:
+ logger.info(f"Dry run: skipping Jira status change of {state.jira_issue} to In Progress")
+ return "fork_and_prepare_dist_git"
+ # tasks.change_jira_status further gates the write on
+ # JIRA_ALLOW_STATUS_CHANGES; nothing else to check here.
+ try:
+ await tasks.change_jira_status(
+ jira_issue=state.jira_issue,
+ status="In Progress",
+ available_tools=gateway_tools,
+ )
+ except Exception as status_error:
+ logger.warning(f"Failed to change status for {state.jira_issue}: {status_error}")
return "fork_and_prepare_dist_git"
async def fork_and_prepare_dist_git(state):
@@ -1337,6 +1336,7 @@ async def comment_in_jira(state):
comment_text=comment_text,
is_error=is_error,
available_tools=gateway_tools,
+ user_triggered=user_triggered,
)
return Workflow.END
@@ -1432,13 +1432,15 @@ async def main() -> None:
triage_state = task.metadata
backport_data = BackportData.model_validate(triage_state["triage_result"]["data"])
dist_git_branch = triage_state["target_branch"]
+ user_triggered = task.user_triggered
logger.info(
f"Processing backport for package: {backport_data.package}, "
f"JIRA: {backport_data.jira_issue}, branch: {dist_git_branch}, "
f"attempt: {task.attempts + 1}"
+ + (" (user-triggered via ymir_todo)" if user_triggered else "")
)
- async def retry(task, error, backport_data=backport_data):
+ async def retry(task, error, backport_data=backport_data, user_triggered=user_triggered):
task.attempts += 1
if task.attempts < max_retries:
logger.warning(
@@ -1456,6 +1458,7 @@ async def retry(task, error, backport_data=backport_data):
labels_to_add=[JiraLabels.BACKPORT_ERRORED.value],
labels_to_remove=[JiraLabels.TRIAGED_BACKPORT.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
await fix_await(redis.lpush(RedisQueues.ERROR_LIST.value, error))
@@ -1474,6 +1477,7 @@ async def retry(task, error, backport_data=backport_data):
dry_run=dry_run,
max_build_attempts=max_build_attempts,
max_incremental_fix_attempts=max_incremental_fix_attempts,
+ user_triggered=user_triggered,
)
logger.info(
f"Backport processing completed for {backport_data.jira_issue}, "
@@ -1501,6 +1505,7 @@ async def retry(task, error, backport_data=backport_data):
JiraLabels.BACKPORT_FAILED.value,
],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
await fix_await(
redis.lpush(
@@ -1517,6 +1522,7 @@ async def retry(task, error, backport_data=backport_data):
labels_to_add=[JiraLabels.BACKPORT_FAILED.value],
labels_to_remove=[JiraLabels.TRIAGED_BACKPORT.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
await retry(task, state.backport_result.error)
diff --git a/ymir/agents/issue_verification_agent.py b/ymir/agents/issue_verification_agent.py
index 250434b69..6d8657d4d 100644
--- a/ymir/agents/issue_verification_agent.py
+++ b/ymir/agents/issue_verification_agent.py
@@ -442,9 +442,16 @@ async def _change_status(
dry_run: bool,
) -> WorkflowResult:
comment = f"*Changing status from {current_status} => {new_status}*\n\n{why}"
-
- if dry_run:
- logger.info("Dry run: would change issue %s status to %s", issue_key, new_status)
+ status_changes_allowed = os.getenv("JIRA_ALLOW_STATUS_CHANGES", "false").lower() == "true"
+
+ if dry_run or not status_changes_allowed:
+ reason = "dry run" if dry_run else "JIRA_ALLOW_STATUS_CHANGES is not set"
+ logger.info(
+ "Skipping Jira status change of %s to %s and the accompanying status-change comment (%s)",
+ issue_key,
+ new_status,
+ reason,
+ )
else:
await run_tool(
"change_jira_status",
diff --git a/ymir/agents/preliminary_testing_agent.py b/ymir/agents/preliminary_testing_agent.py
index ff4b6c1e8..38c423915 100644
--- a/ymir/agents/preliminary_testing_agent.py
+++ b/ymir/agents/preliminary_testing_agent.py
@@ -414,6 +414,16 @@ async def act_on_result(state: PreliminaryTestingWorkflowState):
details_comment=state.result.comment,
gateway_tools=gateway_tools,
)
+ elif os.getenv("JIRA_ALLOW_STATUS_CHANGES", "false").lower() != "true":
+ # Setting Preliminary Testing = Pass admits the build into
+ # the next compose, triggers erratum creation, and moves
+ # the issue to Integration — same blast radius as a status
+ # transition, so it shares the same opt-in flag.
+ logger.info(
+ "JIRA_ALLOW_STATUS_CHANGES is not set; "
+ "skipping Preliminary Testing = Pass for %s",
+ state.jira_issue,
+ )
else:
comment = state.result.comment or "Preliminary testing has passed."
await run_tool(
diff --git a/ymir/agents/rebase_agent.py b/ymir/agents/rebase_agent.py
index 02eedf067..8396f8bb6 100644
--- a/ymir/agents/rebase_agent.py
+++ b/ymir/agents/rebase_agent.py
@@ -242,7 +242,13 @@ class State(PackageUpdateState):
abandon_autorelease: bool = Field(default=False)
async def run_workflow(
- package, dist_git_branch, version, jira_issue, justification=None, redis_conn=None
+ package,
+ dist_git_branch,
+ version,
+ jira_issue,
+ justification=None,
+ redis_conn=None,
+ user_triggered=False,
):
local_tool_options["working_directory"] = None
if mock_env := get_mock_local_tool_env(jira_issue):
@@ -256,20 +262,21 @@ async def run_workflow(
log_agent = create_log_agent(gateway_tools, local_tool_options)
workflow = Workflow(State, name="RebaseWorkflow")
- silent_run = os.getenv("SILENT_RUN", "false").lower() == "true"
async def change_jira_status(state):
- if not dry_run and not silent_run:
- try:
- await tasks.change_jira_status(
- jira_issue=state.jira_issue,
- status="In Progress",
- available_tools=gateway_tools,
- )
- except Exception as status_error:
- logger.warning(f"Failed to change status for {state.jira_issue}: {status_error}")
- else:
- logger.info(f"Dry run: would change status of {state.jira_issue} to In Progress")
+ if dry_run:
+ logger.info(f"Dry run: skipping Jira status change of {state.jira_issue} to In Progress")
+ return "fork_and_prepare_dist_git"
+ # tasks.change_jira_status further gates the write on
+ # JIRA_ALLOW_STATUS_CHANGES; nothing else to check here.
+ try:
+ await tasks.change_jira_status(
+ jira_issue=state.jira_issue,
+ status="In Progress",
+ available_tools=gateway_tools,
+ )
+ except Exception as status_error:
+ logger.warning(f"Failed to change status for {state.jira_issue}: {status_error}")
return "fork_and_prepare_dist_git"
async def fork_and_prepare_dist_git(state):
@@ -473,6 +480,7 @@ async def comment_in_jira(state):
comment_text=comment_text,
is_error=is_error,
available_tools=gateway_tools,
+ user_triggered=user_triggered,
)
return Workflow.END
@@ -545,13 +553,15 @@ async def comment_in_jira(state):
triage_state = task.metadata
rebase_data = RebaseData.model_validate(triage_state["triage_result"]["data"])
dist_git_branch = triage_state["target_branch"]
+ user_triggered = task.user_triggered
logger.info(
f"Processing rebase for package: {rebase_data.package}, "
f"version: {rebase_data.version}, JIRA: {rebase_data.jira_issue}, "
f"branch: {dist_git_branch}, attempt: {task.attempts + 1}"
+ + (" (user-triggered via ymir_todo)" if user_triggered else "")
)
- async def retry(task, error, rebase_data=rebase_data):
+ async def retry(task, error, rebase_data=rebase_data, user_triggered=user_triggered):
task.attempts += 1
if task.attempts < max_retries:
logger.warning(
@@ -569,6 +579,7 @@ async def retry(task, error, rebase_data=rebase_data):
labels_to_add=[JiraLabels.REBASE_ERRORED.value],
labels_to_remove=[JiraLabels.TRIAGED_REBASE.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
await fix_await(redis.lpush(RedisQueues.ERROR_LIST.value, error))
@@ -582,6 +593,7 @@ async def retry(task, error, rebase_data=rebase_data):
jira_issue=rebase_data.jira_issue,
justification=rebase_data.justification,
redis_conn=redis,
+ user_triggered=user_triggered,
)
logger.info(
f"Rebase processing completed for {rebase_data.jira_issue}, "
@@ -607,6 +619,7 @@ async def retry(task, error, rebase_data=rebase_data):
JiraLabels.REBASE_FAILED.value,
],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
await fix_await(
redis.lpush(
@@ -621,6 +634,7 @@ async def retry(task, error, rebase_data=rebase_data):
labels_to_add=[JiraLabels.REBASE_FAILED.value],
labels_to_remove=[JiraLabels.TRIAGED_REBASE.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
await retry(task, state.rebase_result.error)
diff --git a/ymir/agents/rebuild_agent.py b/ymir/agents/rebuild_agent.py
index f244b2f7b..5a229398f 100644
--- a/ymir/agents/rebuild_agent.py
+++ b/ymir/agents/rebuild_agent.py
@@ -72,6 +72,7 @@ async def run_workflow(
dependency_component=None,
consolidated_issues=None,
consolidation_summary=None,
+ user_triggered=False,
):
local_tool_options["working_directory"] = None
if mock_env := get_mock_local_tool_env(jira_issue):
@@ -274,6 +275,7 @@ async def comment_in_jira(state):
comment_text=comment_text,
is_error=is_error,
available_tools=gateway_tools,
+ user_triggered=user_triggered,
)
except Exception as e:
logger.warning(f"Failed to comment on issue {issue_key}: {e}")
@@ -353,6 +355,7 @@ async def comment_in_jira(state):
triage_state = task.metadata
rebuild_data = RebuildData.model_validate(triage_state["triage_result"]["data"])
dist_git_branch = triage_state["target_branch"]
+ user_triggered = task.user_triggered
except Exception as e:
logger.error(f"Failed to parse task payload, skipping: {e}")
await fix_await(
@@ -369,9 +372,10 @@ async def comment_in_jira(state):
f"Processing rebuild for package: {rebuild_data.package}, "
f"JIRA: {rebuild_data.jira_issue}, branch: {dist_git_branch}, "
f"attempt: {task.attempts + 1}"
+ + (" (user-triggered via ymir_todo)" if user_triggered else "")
)
- async def retry(task, error, rebuild_data=rebuild_data):
+ async def retry(task, error, rebuild_data=rebuild_data, user_triggered=user_triggered):
task.attempts += 1
if task.attempts < max_retries:
logger.warning(
@@ -391,6 +395,7 @@ async def retry(task, error, rebuild_data=rebuild_data):
labels_to_add=[JiraLabels.REBUILD_ERRORED.value],
labels_to_remove=[JiraLabels.TRIAGED_REBUILD.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
except Exception as e:
logger.warning(f"Failed to set labels on {issue_key}: {e}")
@@ -407,6 +412,7 @@ async def retry(task, error, rebuild_data=rebuild_data):
dependency_component=rebuild_data.dependency_component,
consolidated_issues=rebuild_data.consolidated_issues,
consolidation_summary=rebuild_data.consolidation_summary,
+ user_triggered=user_triggered,
)
logger.info(
f"Rebuild processing completed for {rebuild_data.jira_issue}, "
@@ -434,6 +440,7 @@ async def retry(task, error, rebuild_data=rebuild_data):
JiraLabels.REBUILD_FAILED.value,
],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
except Exception as e:
logger.warning(f"Failed to set labels on {issue_key}: {e}")
@@ -455,6 +462,7 @@ async def retry(task, error, rebuild_data=rebuild_data):
labels_to_add=[JiraLabels.REBUILD_FAILED.value],
labels_to_remove=[JiraLabels.TRIAGED_REBUILD.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
except Exception as e:
logger.warning(f"Failed to set labels on {issue_key}: {e}")
diff --git a/ymir/agents/tasks.py b/ymir/agents/tasks.py
index 8750acead..77cfe8ced 100644
--- a/ymir/agents/tasks.py
+++ b/ymir/agents/tasks.py
@@ -1,3 +1,4 @@
+import asyncio
import hashlib
import logging
import os
@@ -16,6 +17,7 @@
LogOutputSchema,
MergeRequestDetails,
OpenMergeRequestResult,
+ Task,
)
from ymir.tools.unprivileged.specfile import UpdateReleaseTool
@@ -250,9 +252,13 @@ async def comment_in_jira(
comment_text: str,
available_tools: list[Tool],
is_error: bool = False,
+ user_triggered: bool = False,
) -> None:
- if is_error and os.getenv("SILENT_RUN", "false").lower() == "true":
- logger.info(f"Silent run: skipping Jira error comment for {jira_issue}")
+ # Default is silent: error comments are only posted on user-triggered runs.
+ # A maintainer who didn't ask for processing should not be spammed with
+ # error notifications; if they want to see them, they add ymir_todo.
+ if is_error and not user_triggered:
+ logger.info(f"Skipping Jira error comment for {jira_issue} (not user-triggered)")
return
await run_tool(
@@ -264,6 +270,39 @@ async def comment_in_jira(
)
+async def post_user_ack_once(
+ task: Task,
+ jira_issue: str,
+ agent_type: str,
+ comment_text: str,
+ user_triggered: bool,
+ dry_run: bool,
+) -> None:
+ """Post a user-triggered acknowledgement comment to Jira exactly once per task.
+
+ Tracks delivery via ``task.metadata['ack_posted']`` so a re-queued retry
+ of the same task sees it as already delivered and skips the post. The
+ flag is only set after ``comment_in_jira`` returns successfully, so a
+ failed post still leaves the next retry free to try again.
+ """
+ if not user_triggered or dry_run:
+ return
+ if task.metadata.get("ack_posted"):
+ return
+ try:
+ async with mcp_tools(os.environ["MCP_GATEWAY_URL"]) as gateway_tools:
+ await comment_in_jira(
+ jira_issue=jira_issue,
+ agent_type=agent_type,
+ comment_text=comment_text,
+ available_tools=gateway_tools,
+ user_triggered=True,
+ )
+ task.metadata["ack_posted"] = True
+ except Exception as e:
+ logger.warning(f"Failed to post user-triggered ack comment for {jira_issue}: {e}")
+
+
async def comment_in_mr(
merge_request_url: str,
comment_text: str,
@@ -282,6 +321,11 @@ async def change_jira_status(
status: str,
available_tools: list[Tool],
) -> None:
+ if os.getenv("JIRA_ALLOW_STATUS_CHANGES", "false").lower() != "true":
+ logger.info(
+ f"JIRA_ALLOW_STATUS_CHANGES is not set; skipping status change of {jira_issue} to {status!r}"
+ )
+ return
await run_tool(
"change_jira_status",
issue_key=jira_issue,
@@ -304,7 +348,15 @@ async def get_jira_labels(jira_issue: str) -> list[str]:
return []
-_FAILURE_LABEL_SUFFIXES = ("_failed", "_errored")
+# Intermediate "_failed" labels (transient retry-state) are suppressed for
+# non-user-triggered runs — they're noise for maintainers and a retry will
+# follow. Terminal "_errored" labels are kept regardless: they are the only
+# dedup anchor left after retries are exhausted, so suppressing them would let
+# the next fetcher sweep re-enqueue the same issue forever.
+_INTERMEDIATE_LABEL_SUFFIXES = ("_failed",)
+
+
+_CRITICAL_WRITE_MAX_ATTEMPTS = 3
async def set_jira_labels(
@@ -312,33 +364,63 @@ async def set_jira_labels(
labels_to_add: list[str] | None = None,
labels_to_remove: list[str] | None = None,
dry_run: bool = False,
+ user_triggered: bool = False,
+ critical: bool = False,
) -> None:
+ """Edit labels on a Jira issue.
+
+ When ``critical=True``, the write is treated as load-bearing for dedup:
+ failures are retried with exponential backoff and re-raised on permanent
+ failure so the caller can take recovery action (typically: re-queue the
+ task and abort processing). When ``critical=False`` (default), failures
+ are logged and swallowed.
+ """
if dry_run or os.getenv("JIRA_DRY_RUN", "false").lower() == "true":
logger.info(f"Dry run, not updating labels for {jira_issue}")
return
- if os.getenv("SILENT_RUN", "false").lower() == "true":
+ if not labels_to_add and not labels_to_remove:
+ return
+
+ if not user_triggered:
original_count = len(labels_to_add or [])
labels_to_add = [
- label for label in (labels_to_add or []) if not label.endswith(_FAILURE_LABEL_SUFFIXES)
+ label for label in (labels_to_add or []) if not label.endswith(_INTERMEDIATE_LABEL_SUFFIXES)
]
if len(labels_to_add) != original_count:
- logger.info(f"Silent run: skipping failure labels for {jira_issue}")
+ logger.info(f"Skipping intermediate failure labels for {jira_issue} (not user-triggered)")
if not labels_to_add and not (labels_to_remove or []):
return
- try:
- async with mcp_tools(os.environ["MCP_GATEWAY_URL"]) as gateway_tools:
- await run_tool(
- "edit_jira_labels",
- issue_key=jira_issue,
- labels_to_add=labels_to_add or [],
- labels_to_remove=labels_to_remove or [],
- available_tools=gateway_tools,
- )
-
- except Exception as e:
- logger.warning(f"Failed to update labels for {jira_issue}: {e}")
+ max_attempts = _CRITICAL_WRITE_MAX_ATTEMPTS if critical else 1
+ last_exc: Exception | None = None
+ for attempt in range(1, max_attempts + 1):
+ try:
+ async with mcp_tools(os.environ["MCP_GATEWAY_URL"]) as gateway_tools:
+ await run_tool(
+ "edit_jira_labels",
+ issue_key=jira_issue,
+ labels_to_add=labels_to_add or [],
+ labels_to_remove=labels_to_remove or [],
+ available_tools=gateway_tools,
+ )
+ return
+ except Exception as e:
+ last_exc = e
+ if not critical:
+ logger.warning(f"Failed to update labels for {jira_issue}: {e}")
+ return
+ if attempt < max_attempts:
+ backoff_seconds = 2 ** (attempt - 1)
+ logger.warning(
+ f"Critical label write failed for {jira_issue} "
+ f"(attempt {attempt}/{max_attempts}): {e}; "
+ f"retrying in {backoff_seconds}s"
+ )
+ await asyncio.sleep(backoff_seconds)
+
+ logger.error(f"Critical label write for {jira_issue} failed after {max_attempts} attempts: {last_exc}")
+ raise last_exc # type: ignore[misc]
async def cache_mr_metadata(
diff --git a/ymir/agents/tests/unit/test_tasks.py b/ymir/agents/tests/unit/test_tasks.py
index 1d0947a25..3a0276d35 100644
--- a/ymir/agents/tests/unit/test_tasks.py
+++ b/ymir/agents/tests/unit/test_tasks.py
@@ -1,8 +1,24 @@
+from contextlib import asynccontextmanager
from unittest.mock import AsyncMock, patch
import pytest
-from ymir.agents.tasks import fork_and_prepare_dist_git
+from ymir.agents.tasks import change_jira_status, fork_and_prepare_dist_git, post_user_ack_once
+from ymir.common.models import Task
+
+
+@asynccontextmanager
+async def _fake_mcp_tools(_url):
+ yield []
+
+
+def _make_task(metadata: dict | None = None, attempts: int = 0) -> Task:
+ return Task(metadata=metadata or {"issue": "RHEL-1"}, attempts=attempts, user_triggered=True)
+
+
+@pytest.fixture(autouse=True)
+def _mcp_url_env(monkeypatch):
+ monkeypatch.setenv("MCP_GATEWAY_URL", "http://mcp-gateway:8000/sse")
@pytest.fixture
@@ -40,3 +56,140 @@ async def test_fork_and_prepare_dist_git_wipes_stale_working_dir(git_repo_basepa
assert working_dir.is_dir(), "working_dir should be recreated"
assert not stale_file.exists(), "stale artifacts from previous run should be gone"
+
+
+@pytest.mark.asyncio
+async def test_post_user_ack_once_posts_on_first_call():
+ """User-triggered, not dry-run, never posted → posts and persists the flag."""
+ task = _make_task()
+ with (
+ patch("ymir.agents.tasks.mcp_tools", _fake_mcp_tools),
+ patch("ymir.agents.tasks.comment_in_jira", new_callable=AsyncMock) as mock_comment,
+ ):
+ await post_user_ack_once(
+ task=task,
+ jira_issue="RHEL-1",
+ agent_type="Triage",
+ comment_text="hello",
+ user_triggered=True,
+ dry_run=False,
+ )
+
+ mock_comment.assert_awaited_once()
+ assert task.metadata["ack_posted"] is True
+
+
+@pytest.mark.asyncio
+async def test_post_user_ack_once_skips_when_already_posted():
+ """Second call with the same task must not re-post — even after re-queue."""
+ task = _make_task(metadata={"issue": "RHEL-1", "ack_posted": True})
+ with (
+ patch("ymir.agents.tasks.mcp_tools", _fake_mcp_tools),
+ patch("ymir.agents.tasks.comment_in_jira", new_callable=AsyncMock) as mock_comment,
+ ):
+ await post_user_ack_once(
+ task=task,
+ jira_issue="RHEL-1",
+ agent_type="Triage",
+ comment_text="hello",
+ user_triggered=True,
+ dry_run=False,
+ )
+
+ mock_comment.assert_not_awaited()
+ assert task.metadata["ack_posted"] is True
+
+
+@pytest.mark.asyncio
+async def test_post_user_ack_once_skips_when_not_user_triggered():
+ task = _make_task()
+ with (
+ patch("ymir.agents.tasks.mcp_tools", _fake_mcp_tools),
+ patch("ymir.agents.tasks.comment_in_jira", new_callable=AsyncMock) as mock_comment,
+ ):
+ await post_user_ack_once(
+ task=task,
+ jira_issue="RHEL-1",
+ agent_type="Triage",
+ comment_text="hello",
+ user_triggered=False,
+ dry_run=False,
+ )
+
+ mock_comment.assert_not_awaited()
+ assert "ack_posted" not in task.metadata
+
+
+@pytest.mark.asyncio
+async def test_post_user_ack_once_skips_on_dry_run():
+ task = _make_task()
+ with (
+ patch("ymir.agents.tasks.mcp_tools", _fake_mcp_tools),
+ patch("ymir.agents.tasks.comment_in_jira", new_callable=AsyncMock) as mock_comment,
+ ):
+ await post_user_ack_once(
+ task=task,
+ jira_issue="RHEL-1",
+ agent_type="Triage",
+ comment_text="hello",
+ user_triggered=True,
+ dry_run=True,
+ )
+
+ mock_comment.assert_not_awaited()
+ assert "ack_posted" not in task.metadata
+
+
+@pytest.mark.asyncio
+async def test_change_jira_status_skips_when_flag_unset(monkeypatch):
+ """Default behavior: JIRA_ALLOW_STATUS_CHANGES unset → no MCP call."""
+ monkeypatch.delenv("JIRA_ALLOW_STATUS_CHANGES", raising=False)
+ with patch("ymir.agents.tasks.run_tool", new_callable=AsyncMock) as mock_run_tool:
+ await change_jira_status("RHEL-1", "In Progress", available_tools=[])
+ mock_run_tool.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_change_jira_status_skips_when_flag_false(monkeypatch):
+ monkeypatch.setenv("JIRA_ALLOW_STATUS_CHANGES", "false")
+ with patch("ymir.agents.tasks.run_tool", new_callable=AsyncMock) as mock_run_tool:
+ await change_jira_status("RHEL-1", "In Progress", available_tools=[])
+ mock_run_tool.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_change_jira_status_runs_when_flag_true(monkeypatch):
+ monkeypatch.setenv("JIRA_ALLOW_STATUS_CHANGES", "true")
+ with patch("ymir.agents.tasks.run_tool", new_callable=AsyncMock) as mock_run_tool:
+ await change_jira_status("RHEL-1", "In Progress", available_tools=[])
+ mock_run_tool.assert_awaited_once()
+ # The MCP tool is called with the expected arguments
+ _, kwargs = mock_run_tool.call_args
+ assert kwargs["issue_key"] == "RHEL-1"
+ assert kwargs["status"] == "In Progress"
+
+
+@pytest.mark.asyncio
+async def test_post_user_ack_once_does_not_persist_on_failure():
+ """On post failure, ack_posted stays unset so the next retry can try again."""
+ task = _make_task()
+ with (
+ patch("ymir.agents.tasks.mcp_tools", _fake_mcp_tools),
+ patch(
+ "ymir.agents.tasks.comment_in_jira",
+ new_callable=AsyncMock,
+ side_effect=RuntimeError("jira down"),
+ ) as mock_comment,
+ ):
+ # Must swallow the exception (caller relies on this)
+ await post_user_ack_once(
+ task=task,
+ jira_issue="RHEL-1",
+ agent_type="Triage",
+ comment_text="hello",
+ user_triggered=True,
+ dry_run=False,
+ )
+
+ mock_comment.assert_awaited_once()
+ assert "ack_posted" not in task.metadata
diff --git a/ymir/agents/tests/unit/test_triage_agent.py b/ymir/agents/tests/unit/test_triage_agent.py
new file mode 100644
index 000000000..d88ec6a5e
--- /dev/null
+++ b/ymir/agents/tests/unit/test_triage_agent.py
@@ -0,0 +1,56 @@
+import pytest
+
+from ymir.agents.triage_agent import _should_update_jira
+from ymir.common.models import Resolution
+
+
+@pytest.mark.parametrize(
+ "resolution",
+ [
+ Resolution.REBASE,
+ Resolution.BACKPORT,
+ Resolution.REBUILD,
+ Resolution.NOT_AFFECTED,
+ Resolution.POSTPONED,
+ Resolution.OPEN_ENDED_ANALYSIS,
+ Resolution.CLARIFICATION_NEEDED,
+ Resolution.ERROR,
+ ],
+)
+def test_user_triggered_always_posts(resolution):
+ """A maintainer-triggered run always gets a comment, regardless of resolution."""
+ assert _should_update_jira(resolution=resolution, user_triggered=True) is True
+
+
+@pytest.mark.parametrize(
+ "resolution",
+ [
+ Resolution.REBASE,
+ Resolution.BACKPORT,
+ Resolution.REBUILD,
+ ],
+)
+def test_non_user_triggered_skips_comment_when_mr_will_be_opened(resolution):
+ """Without ymir_todo, runs do not comment when an MR will be opened —
+ the MR itself is the user-visible artifact."""
+ assert _should_update_jira(resolution=resolution, user_triggered=False) is False
+
+
+@pytest.mark.parametrize(
+ "resolution",
+ [
+ Resolution.NOT_AFFECTED,
+ Resolution.POSTPONED,
+ Resolution.OPEN_ENDED_ANALYSIS,
+ Resolution.CLARIFICATION_NEEDED,
+ ],
+)
+def test_non_user_triggered_still_posts_when_no_mr_will_open(resolution):
+ """Resolutions that do not produce an MR must still post a comment —
+ otherwise the result is invisible to the requester."""
+ assert _should_update_jira(resolution=resolution, user_triggered=False) is True
+
+
+def test_non_user_triggered_error_does_not_post():
+ """ERROR is handled by separate error-path machinery, not this helper."""
+ assert _should_update_jira(resolution=Resolution.ERROR, user_triggered=False) is False
diff --git a/ymir/agents/triage_agent.py b/ymir/agents/triage_agent.py
index c22ded929..45f0c6c1a 100644
--- a/ymir/agents/triage_agent.py
+++ b/ymir/agents/triage_agent.py
@@ -79,11 +79,25 @@
redis_logger = logging.getLogger("agent.redis")
-def _should_update_jira(silent_run: bool, resolution: Resolution = None) -> bool:
- """In silent mode, only update Jira for not-affected and postponed resolutions."""
- if not silent_run:
+def _should_update_jira(resolution: Resolution = None, user_triggered: bool = False) -> bool:
+ """Whether to post a user-facing Jira comment for this run.
+
+ Used only for comments — labels are dedup anchors and are written
+ unconditionally. Default is silent: comments are suppressed unless the
+ run was explicitly requested by a maintainer (via ymir_todo) or the
+ resolution carries information the requester needs even unbidden.
+ The unbidden cases are the resolutions that do NOT produce an MR —
+ without a comment the result would be invisible to the requester:
+ not-affected, postponed, open-ended-analysis, clarification-needed.
+ """
+ if user_triggered:
return True
- return resolution in (Resolution.NOT_AFFECTED, Resolution.POSTPONED)
+ return resolution in (
+ Resolution.NOT_AFFECTED,
+ Resolution.POSTPONED,
+ Resolution.OPEN_ENDED_ANALYSIS,
+ Resolution.CLARIFICATION_NEEDED,
+ )
_RESOLUTION_TO_LABEL: dict[Resolution, JiraLabels] = {
@@ -643,7 +657,12 @@ def create_triage_agent(gateway_tools, local_tool_options=None) -> ReasoningAgen
async def run_workflow(
- jira_issue, dry_run, triage_agent_factory, auto_chain=False, force_cve_triage=False, silent_run=False
+ jira_issue,
+ dry_run,
+ triage_agent_factory,
+ auto_chain=False,
+ force_cve_triage=False,
+ user_triggered=False,
):
local_tool_options = None
if mock_env := get_mock_local_tool_env(jira_issue):
@@ -1193,10 +1212,10 @@ async def comment_in_jira(state):
logger.info(f"Result to be put in Jira comment: {comment_text}")
if dry_run:
return Workflow.END
- if not _should_update_jira(silent_run, state.triage_result.resolution):
+ if not _should_update_jira(state.triage_result.resolution, user_triggered):
logger.info(
- f"Silent run: skipping Jira comment for {state.jira_issue} "
- f"(resolution={state.triage_result.resolution.value})"
+ f"Skipping Jira comment for {state.jira_issue} "
+ f"(resolution={state.triage_result.resolution.value}, not user-triggered)"
)
return Workflow.END
await tasks.comment_in_jira(
@@ -1204,6 +1223,7 @@ async def comment_in_jira(state):
agent_type="Triage",
comment_text=comment_text,
available_tools=gateway_tools,
+ user_triggered=user_triggered,
)
return Workflow.END
@@ -1231,7 +1251,6 @@ async def main() -> None:
dry_run = os.getenv("DRY_RUN", "False").lower() == "true"
auto_chain = os.getenv("AUTO_CHAIN", "true").lower() == "true"
force_cve_triage = os.getenv("FORCE_CVE_TRIAGE", "false").lower() == "true"
- silent_run = os.getenv("SILENT_RUN", "false").lower() == "true"
if jira_issue := os.getenv("JIRA_ISSUE", None):
logger.info("Running in direct mode with environment variable")
@@ -1243,7 +1262,6 @@ async def main() -> None:
agent_factory,
auto_chain=auto_chain,
force_cve_triage=force_cve_triage,
- silent_run=silent_run,
)
logger.info(f"Direct run completed: {state.triage_result.model_dump_json(indent=4)}")
if state.cve_eligibility_result:
@@ -1269,7 +1287,18 @@ async def main() -> None:
task = Task.model_validate_json(payload)
input = InputSchema.model_validate(task.metadata)
- logger.info(f"Processing triage for JIRA issue: {input.issue}, attempt: {task.attempts + 1}")
+ user_triggered = task.user_triggered
+ logger.info(
+ f"Processing triage for JIRA issue: {input.issue}, attempt: {task.attempts + 1}"
+ + (" (user-triggered via ymir_todo)" if user_triggered else "")
+ )
+
+ # User-triggered runs will receive an acknowledgement comment,
+ # but only after we successfully write the in-progress label to
+ # avoid duplicate comments if the label write later fails.
+ # post_user_ack_once persists ack_posted in task.metadata so that
+ # retries do not re-post the ack after it has already been
+ # delivered.
current_labels = await tasks.get_jira_labels(input.issue)
all_labels = JiraLabels.all_labels()
@@ -1278,14 +1307,19 @@ async def main() -> None:
for label in current_labels
if label in all_labels and label != JiraLabels.TRIAGE_IN_PROGRESS.value
]
- if terminal_ymir_labels and JiraLabels.RETRY_NEEDED.value not in current_labels:
+ if (
+ terminal_ymir_labels
+ and JiraLabels.RETRY_NEEDED.value not in current_labels
+ and JiraLabels.TRIAGE_IN_PROGRESS.value not in current_labels
+ and not user_triggered
+ ):
logger.info(
f"Skipping duplicate triage for {input.issue} — "
f"already has labels: {terminal_ymir_labels}"
)
continue
- async def retry(task, error, input=input):
+ async def retry(task, error, input=input, user_triggered=user_triggered):
task.attempts += 1
if task.attempts < max_retries:
logger.warning(
@@ -1297,28 +1331,72 @@ async def retry(task, error, input=input):
logger.error(
f"Task failed after {max_retries} attempts, moving to error list: {input.issue}"
)
- await tasks.set_jira_labels(
- jira_issue=input.issue,
- labels_to_add=[JiraLabels.TRIAGE_ERRORED.value],
- labels_to_remove=[JiraLabels.TRIAGE_IN_PROGRESS.value],
- dry_run=dry_run,
- )
+ try:
+ await tasks.set_jira_labels(
+ jira_issue=input.issue,
+ labels_to_add=[JiraLabels.TRIAGE_ERRORED.value],
+ labels_to_remove=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ dry_run=dry_run,
+ user_triggered=user_triggered,
+ )
+ except Exception as label_error:
+ logger.warning(f"Failed to set error labels on {input.issue}: {label_error}")
await fix_await(redis.lpush(RedisQueues.ERROR_LIST.value, error))
+ # ymir_triage_in_progress is the dedup anchor for the next fetcher
+ # sweep. If we cannot write it, we must not proceed — otherwise the
+ # fetcher will re-enqueue this issue and a second triage will run in
+ # parallel. Re-queue the task as-is (task.attempts tracks triage
+ # retries, not Jira-write retries — set_jira_labels already retries
+ # the write internally) and skip processing this iteration.
try:
- if _should_update_jira(silent_run):
- await tasks.set_jira_labels(
- jira_issue=input.issue,
- labels_to_add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
- labels_to_remove=[
- label
- for label in JiraLabels.all_labels()
- if label != JiraLabels.TRIAGE_IN_PROGRESS.value
- ],
- dry_run=dry_run,
- )
- logger.info(f"Cleaned up existing labels for {input.issue}")
+ await tasks.set_jira_labels(
+ jira_issue=input.issue,
+ labels_to_add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ labels_to_remove=[
+ label
+ for label in JiraLabels.all_labels()
+ if label != JiraLabels.TRIAGE_IN_PROGRESS.value
+ ],
+ dry_run=dry_run,
+ user_triggered=user_triggered,
+ critical=True,
+ )
+ logger.info(f"Cleaned up existing labels for {input.issue}")
+ # Post acknowledgement comment for user-triggered runs now that
+ # the in-progress label write succeeded. This prevents duplicate
+ # comments if the critical label write were to fail.
+ await tasks.post_user_ack_once(
+ task=task,
+ jira_issue=input.issue,
+ agent_type="Triage",
+ comment_text=(
+ "Ymir picked up your request and started processing. "
+ "Results will be posted here when triage completes."
+ ),
+ user_triggered=user_triggered,
+ dry_run=dry_run,
+ )
+ except Exception as e:
+ # Route through retry() so a permanently failing issue (deleted
+ # ticket, per-issue permission error) is bounded by max_retries
+ # instead of looping forever and blocking the queue. On
+ # exhaustion the task lands in ERROR_LIST; the best-effort
+ # ymir_triage_errored label write may also fail if Jira itself
+ # is down — that's accepted (ERROR_LIST is the durable record).
+ logger.error(
+ f"Could not set {JiraLabels.TRIAGE_IN_PROGRESS.value} on "
+ f"{input.issue} after retries: {e}; re-queuing to avoid duplicate triage."
+ )
+ error_msg = f"Failed to set in-progress label: {e}"
+ await retry(task, ErrorData(details=error_msg, jira_issue=input.issue).model_dump_json())
+ # Long sleep on purpose: critical-write retries already burned
+ # ~7s, so we're past transient blips. Typical Jira outages last
+ # minutes; cycling faster just spams the API.
+ await asyncio.sleep(60)
+ continue
+ try:
logger.info(f"Starting triage processing for {input.issue}")
with span_processor.jira_issue_context(input.issue):
state = await run_workflow(
@@ -1327,7 +1405,7 @@ async def retry(task, error, input=input):
create_triage_agent,
auto_chain=auto_chain,
force_cve_triage=input.force_cve_triage,
- silent_run=silent_run,
+ user_triggered=user_triggered,
)
output = state.triage_result
logger.info(
@@ -1347,16 +1425,19 @@ async def retry(task, error, input=input):
ErrorData(details=error, jira_issue=input.issue).model_dump_json(),
)
else:
- update_jira = _should_update_jira(silent_run, output.resolution)
logger.info(f"Triage resolved as {output.resolution.value} for {input.issue}")
resolution_label = _RESOLUTION_TO_LABEL.get(output.resolution)
- if update_jira and resolution_label:
+ if resolution_label and output.resolution != Resolution.ERROR:
+ # Terminal resolution label is the dedup anchor that replaces
+ # ymir_triage_in_progress — must be written unconditionally so
+ # the next fetcher sweep skips this issue.
await tasks.set_jira_labels(
jira_issue=input.issue,
labels_to_add=[resolution_label.value],
labels_to_remove=[JiraLabels.TRIAGE_IN_PROGRESS.value],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
if output.resolution == Resolution.REBUILD:
for consolidated in output.data.consolidated_issues:
@@ -1369,6 +1450,7 @@ async def retry(task, error, input=input):
JiraLabels.REBUILT.value,
],
dry_run=dry_run,
+ user_triggered=user_triggered,
)
except Exception as e:
logger.warning(
@@ -1399,7 +1481,7 @@ async def retry(task, error, input=input):
queue = RedisQueues.OPEN_ENDED_ANALYSIS_LIST.value
payload = output.data.model_dump_json()
else:
- task = Task(metadata=state.model_dump())
+ task = Task(metadata=state.model_dump(), user_triggered=user_triggered)
payload = task.model_dump_json()
if output.resolution == Resolution.REBASE:
queue = RedisQueues.get_rebase_queue_for_branch(state.target_branch)
diff --git a/ymir/common/constants.py b/ymir/common/constants.py
index 1ebbf2097..eb0fa2dd2 100644
--- a/ymir/common/constants.py
+++ b/ymir/common/constants.py
@@ -128,6 +128,11 @@ class JiraLabels(Enum):
RETRY_NEEDED = "ymir_retry_needed"
FUSA = "ymir_fusa"
+ # Maintainer-facing trigger: when a Red Hat Employee adds this label to a CVE
+ # issue, the fetcher enqueues it for an e2e run and swaps the label for
+ # TRIAGE_IN_PROGRESS on enqueue.
+ TODO = "ymir_todo"
+
@classmethod
def all_labels(cls) -> set[str]:
"""Return all Ymir labels for cleanup operations"""
diff --git a/ymir/common/models.py b/ymir/common/models.py
index 5ea9dda29..e5ecfb366 100644
--- a/ymir/common/models.py
+++ b/ymir/common/models.py
@@ -69,16 +69,22 @@ class Task(BaseModel):
metadata: dict[str, Any] = Field(description="Task metadata containing issue information")
attempts: int = Field(default=0, description="Number of processing attempts")
+ user_triggered: bool = Field(
+ default=False,
+ description="True when a maintainer triggered this run via the ymir_todo label — "
+ "causes agents to post comments and intermediate failure labels that are "
+ "otherwise suppressed (default is silent).",
+ )
def to_json(self) -> str:
"""Convert to JSON string for Redis queue storage."""
return self.model_dump_json()
@classmethod
- def from_issue(cls, issue: str, attempts: int = 0) -> "Task":
+ def from_issue(cls, issue: str, attempts: int = 0, user_triggered: bool = False) -> "Task":
"""Create a task from a JIRA issue key."""
metadata = TriageInputSchema(issue=issue)
- return cls(metadata=metadata.model_dump(), attempts=attempts)
+ return cls(metadata=metadata.model_dump(), attempts=attempts, user_triggered=user_triggered)
# ============================================================================
diff --git a/ymir/jira_issue_fetcher/jira_issue_fetcher.py b/ymir/jira_issue_fetcher/jira_issue_fetcher.py
index 1828f68fd..5f76f7268 100644
--- a/ymir/jira_issue_fetcher/jira_issue_fetcher.py
+++ b/ymir/jira_issue_fetcher/jira_issue_fetcher.py
@@ -45,6 +45,10 @@
configure_logging(level=logging.INFO)
logger = logging.getLogger(__name__)
+# Name of the Jira group that membership in identifies a Red Hat Employee.
+# Matches the value used by ymir/tools/privileged/jira.py.
+_RH_EMPLOYEE_GROUP = "Red Hat Employee"
+
class JiraIssueFetcher:
DEFAULT_QUERY = "project=RHEL and assignee = jotnar-project"
@@ -77,6 +81,16 @@ def __init__(self):
# Rate limiting
self.last_request_time = 0.0
+ # DRY_RUN: skip Jira writes (atomic label flips for ymir_todo /
+ # ymir_retry_needed) but still push tasks to Redis. The agent (also
+ # presumably in DRY_RUN) handles the rest of the dry-mode flow.
+ self.dry_run = os.getenv("DRY_RUN", "false").lower() == "true"
+ if self.dry_run:
+ logger.info(
+ "DRY_RUN=true — Jira label writes (atomic flips) will be "
+ "skipped; Redis pushes will proceed normally"
+ )
+
async def _rate_limit(self):
"""Enforce rate limiting of RATE_LIMIT_CALLS_PER_SECOND calls per second"""
current_time = time.time()
@@ -110,6 +124,136 @@ def _make_request_with_retries(self, url: str, json_data: dict[str, Any]) -> dic
response.raise_for_status()
return response.json()
+ @backoff.on_exception(
+ backoff.expo,
+ (requests.RequestException, requests.HTTPError),
+ max_tries=4,
+ base=2,
+ logger=logger,
+ )
+ def _edit_jira_labels(self, issue_key: str, add: list[str], remove: list[str]) -> None:
+ """
+ Atomically add/remove labels on a Jira issue via PUT /rest/api/3/issue/{key}.
+
+ Raises on permanent failure after retries. Callers must skip side effects
+ (e.g. Redis enqueue) when this raises — otherwise the next sweep would
+ re-pick-up the same issue without the in-progress marker.
+ """
+ url = urljoin(self.jira_url, f"rest/api/3/issue/{issue_key}")
+ update_ops: list[dict[str, str]] = [{"add": label} for label in add]
+ update_ops.extend({"remove": label} for label in remove)
+ payload = {"update": {"labels": update_ops}}
+
+ response = requests.put(url, json=payload, headers=self.headers, timeout=self.API_TIMEOUT)
+ if response.status_code == 429:
+ logger.warning(f"Rate limited (429) editing labels on {issue_key}, will retry")
+ raise requests.HTTPError("Rate limited", response=response)
+ response.raise_for_status()
+
+ @backoff.on_exception(
+ backoff.expo,
+ (requests.RequestException, requests.HTTPError),
+ max_tries=4,
+ base=2,
+ logger=logger,
+ )
+ def _make_get_request_with_retries(
+ self, url: str, params: dict[str, Any] | None = None
+ ) -> dict[str, Any]:
+ """Make a GET request with exponential backoff retries."""
+ response = requests.get(url, params=params, headers=self.headers, timeout=self.API_TIMEOUT)
+ if response.status_code == 429:
+ logger.warning("Rate limited (429), will retry with backoff")
+ raise requests.HTTPError("Rate limited", response=response)
+ response.raise_for_status()
+ return response.json()
+
+ def _label_added_by_rh_employee(self, issue_key: str) -> bool:
+ """Verify that the latest add of ymir_todo was performed by a Red Hat Employee.
+
+ The JQL no longer gates ymir_todo on the assignee, so the fetcher must
+ check per-issue that the label was added by a Red Hat Employee rather
+ than (e.g.) an external collaborator. Fetches the full changelog via
+ the dedicated /rest/api/3/issue/{issueKey}/changelog endpoint with
+ pagination to handle issues with long histories (which may exceed the
+ 100-entry limit when expanded inline). Picks the most-recent
+ ``ymir_todo`` add event and looks up that author's Jira group
+ memberships.
+
+ Returns False on any lookup or parsing failure — that path skips the
+ issue with a warning rather than treating an unverifiable label as a
+ legitimate trigger.
+ """
+ try:
+ # Fetch full changelog with pagination to handle long histories
+ changelog_url = urljoin(self.jira_url, f"rest/api/3/issue/{issue_key}/changelog")
+ latest_add_author: str | None = None
+ latest_add_time = ""
+ start_at = 0
+ max_results = 100 # Jira default and max per request
+
+ while True:
+ data = self._make_get_request_with_retries(
+ changelog_url,
+ params={"startAt": start_at, "maxResults": max_results},
+ )
+ histories = data.get("values", [])
+
+ if not histories:
+ break
+
+ # Find the most-recent entry that adds ymir_todo to labels. Track by
+ # `created` timestamp so the result is order-independent (ISO 8601
+ # strings are lexically comparable).
+ for history in histories:
+ created = history.get("created") or ""
+ for item in history.get("items", []):
+ if item.get("field") != "labels":
+ continue
+ from_labels = set((item.get("fromString") or "").split())
+ to_labels = set((item.get("toString") or "").split())
+ if JiraLabels.TODO.value in (to_labels - from_labels) and created > latest_add_time:
+ latest_add_time = created
+ latest_add_author = (history.get("author") or {}).get("accountId")
+ break # one labels item per history
+
+ # Check if there are more pages
+ is_last_page = data.get("isLastPage", True)
+ if is_last_page:
+ break
+
+ start_at += max_results
+
+ if not latest_add_author:
+ logger.warning(
+ f"No changelog entry adds {JiraLabels.TODO.value} to {issue_key}; "
+ f"cannot verify author, treating as non-RH-employee"
+ )
+ return False
+
+ user_data = self._make_get_request_with_retries(
+ urljoin(self.jira_url, "rest/api/3/user"),
+ params={"accountId": latest_add_author, "expand": "groups"},
+ )
+ groups = user_data.get("groups") or {}
+ items = groups.get("items") or []
+ group_names = [g.get("name") for g in items if g]
+ return _RH_EMPLOYEE_GROUP in group_names
+ except requests.HTTPError as e:
+ if e.response is not None and e.response.status_code in (400, 401, 403, 404):
+ logger.warning(
+ f"Permanent API error verifying {JiraLabels.TODO.value} author on {issue_key}: {e}; "
+ f"treating as non-RH-employee to avoid infinite retries"
+ )
+ return False
+ raise
+ except (ValueError, KeyError, AttributeError) as e:
+ logger.warning(
+ f"Failed to parse {JiraLabels.TODO.value} author on {issue_key}: {e}; "
+ f"treating as non-RH-employee"
+ )
+ return False
+
async def search_issues(self) -> list[dict[str, Any]]:
"""
Search for issues using the configured query with cursor-based pagination.
@@ -273,24 +417,83 @@ async def push_issues_to_queue(self, issues: list[dict[str, Any]]) -> int:
existing_keys = await self._get_existing_issue_keys(redis_conn)
remove_issues_for_retry = set()
- # Extend existing_keys with issues that have Ymir labels (except ymir_retry_needed)
+ user_triggered_keys = set()
+ retry_needed_keys = set()
+ # Extend existing_keys with issues that have Ymir labels (except ymir_retry_needed
+ # and ymir_todo, which both signal a re-run is wanted).
for issue in issues:
issue_key = issue.get("key")
- if issue_key:
- fields = issue.get("fields", {})
- labels = fields.get("labels", [])
- ymir_labels = [label for label in labels if label.startswith("ymir_")]
+ if not issue_key:
+ continue
- # If issue has Ymir labels and there is no ymir_retry_needed label, mark as existing
- if ymir_labels and JiraLabels.RETRY_NEEDED.value not in ymir_labels:
+ fields = issue.get("fields", {})
+ labels = fields.get("labels", [])
+ ymir_labels = [label for label in labels if label.startswith("ymir_")]
+ has_in_progress = any(label.endswith("_in_progress") for label in ymir_labels)
+
+ # ymir_todo is the maintainer-facing trigger. A run already in progress
+ # must not be re-enqueued — let it finish and the maintainer can re-add
+ # the label later if needed.
+ if JiraLabels.TODO.value in ymir_labels and not has_in_progress:
+ # The JQL no longer restricts ymir_todo by assignee, so we verify
+ # per-issue that the label was added by a Red Hat Employee. If
+ # not (or if the author can't be verified), skip the issue — the
+ # label may have been added by an external collaborator.
+ try:
+ is_rh_employee = self._label_added_by_rh_employee(issue_key)
+ except requests.RequestException as e:
+ logger.warning(
+ f"Transient error verifying {JiraLabels.TODO.value} author on {issue_key}: {e}; "
+ f"skipping this issue for this sweep"
+ )
existing_keys.add(issue_key)
- logger.info(f"Issue {issue_key} has Ymir labels {ymir_labels} - marking as existing")
- elif JiraLabels.RETRY_NEEDED.value in ymir_labels:
- logger.info(f"Issue {issue_key} has ymir_retry_needed label - marking for retry")
- remove_issues_for_retry.add(issue_key)
- elif not ymir_labels:
- logger.info(f"Issue {issue_key} has no Ymir labels - marking for retry")
+ continue
+
+ if not is_rh_employee:
+ logger.warning(
+ f"Issue {issue_key} has {JiraLabels.TODO.value} but the "
+ f"label was not added by a Red Hat Employee - skipping "
+ f"and removing the label"
+ )
+ existing_keys.add(issue_key)
+ # Remove the bogus label so we don't repeat the verification
+ # (two HTTP calls) on every subsequent sweep.
+ if self.dry_run:
+ logger.info(f"DRY_RUN: would remove {JiraLabels.TODO.value} from {issue_key}")
+ else:
+ try:
+ self._edit_jira_labels(issue_key, add=[], remove=[JiraLabels.TODO.value])
+ except Exception as e:
+ logger.warning(
+ f"Failed to remove {JiraLabels.TODO.value} from {issue_key}: {e}"
+ )
+ continue
+ logger.info(
+ f"Issue {issue_key} has {JiraLabels.TODO.value} added by an "
+ f"RH employee - marking for user-triggered run"
+ )
+ remove_issues_for_retry.add(issue_key)
+ user_triggered_keys.add(issue_key)
+ continue
+
+ # If issue has Ymir labels and there is no ymir_retry_needed label, mark as existing
+ if ymir_labels and JiraLabels.RETRY_NEEDED.value not in ymir_labels:
+ existing_keys.add(issue_key)
+ logger.info(f"Issue {issue_key} has Ymir labels {ymir_labels} - marking as existing")
+ elif JiraLabels.RETRY_NEEDED.value in ymir_labels:
+ if has_in_progress:
+ # Don't re-enqueue a retry-needed issue that's already running.
+ existing_keys.add(issue_key)
+ logger.info(
+ f"Issue {issue_key} has {JiraLabels.RETRY_NEEDED.value} "
+ "but is already in progress - skipping"
+ )
+ else:
+ logger.info(
+ f"Issue {issue_key} has {JiraLabels.RETRY_NEEDED.value} - marking for retry"
+ )
remove_issues_for_retry.add(issue_key)
+ retry_needed_keys.add(issue_key)
pushed_count = 0
skipped_count = 0
@@ -329,8 +532,46 @@ async def push_issues_to_queue(self, issues: list[dict[str, Any]]) -> int:
skipped_count += 1
continue
+ user_triggered = issue_key in user_triggered_keys
+ retry_needed = issue_key in retry_needed_keys
+
+ # For ymir_todo and ymir_retry_needed issues, atomically swap
+ # the trigger label for ymir_triage_in_progress before enqueueing.
+ # This dedupes against the very next sweep (which will see the
+ # in-progress marker and skip), and consumes the trigger so a
+ # stuck run doesn't loop. If this write fails after retries, do
+ # NOT push to the queue — otherwise the issue would be picked up
+ # again on the next sweep without the in-progress marker.
+ label_to_consume = None
+ if user_triggered:
+ label_to_consume = JiraLabels.TODO.value
+ elif retry_needed:
+ label_to_consume = JiraLabels.RETRY_NEEDED.value
+
+ if label_to_consume:
+ if self.dry_run:
+ logger.info(
+ f"DRY_RUN: would flip {label_to_consume} → "
+ f"{JiraLabels.TRIAGE_IN_PROGRESS.value} on {issue_key}; "
+ f"skipping Jira write but proceeding with Redis push"
+ )
+ else:
+ try:
+ self._edit_jira_labels(
+ issue_key,
+ add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ remove=[label_to_consume],
+ )
+ except Exception as e:
+ logger.error(
+ f"Failed to flip {label_to_consume} → "
+ f"{JiraLabels.TRIAGE_IN_PROGRESS.value} on {issue_key} "
+ f"after retries; skipping enqueue to avoid duplicate processing: {e}"
+ )
+ continue
+
# Create task using shared Pydantic model
- task = Task.from_issue(issue_key)
+ task = Task.from_issue(issue_key, user_triggered=user_triggered)
await fix_await(redis_conn.lpush(RedisQueues.TRIAGE_QUEUE.value, task.to_json()))
pushed_count += 1
diff --git a/ymir/jira_issue_fetcher/tests/unit/test_jira_issue_fetcher.py b/ymir/jira_issue_fetcher/tests/unit/test_jira_issue_fetcher.py
index 4510196b1..e01d7beb4 100644
--- a/ymir/jira_issue_fetcher/tests/unit/test_jira_issue_fetcher.py
+++ b/ymir/jira_issue_fetcher/tests/unit/test_jira_issue_fetcher.py
@@ -304,6 +304,13 @@ async def test_push_issues_to_queue_skip_labeled_issues(fetcher, mock_redis_cont
create_async_mock_return_value(existing_keys)
)
+ # RETRY-1 must have its trigger label flipped atomically before enqueue.
+ flexmock(fetcher).should_receive("_edit_jira_labels").with_args(
+ "RETRY-1",
+ add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ remove=[JiraLabels.RETRY_NEEDED.value],
+ ).once()
+
# Create real tasks and get their JSON representations
task1 = Task.from_issue("RETRY-1")
task2 = Task.from_issue("CLEAN-1")
@@ -447,6 +454,123 @@ async def test_push_issues_to_queue_max_issues_excludes_filtered(fetcher, mock_r
assert result == 2
+@pytest.mark.asyncio
+async def test_push_retry_needed_issue(fetcher, mock_redis_context):
+ """ymir_retry_needed: flip the label atomically, enqueue without user_triggered."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [{"key": "RETRY-1", "fields": {"labels": [JiraLabels.RETRY_NEEDED.value]}}]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+
+ flexmock(fetcher).should_receive("_edit_jira_labels").with_args(
+ "RETRY-1",
+ add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ remove=[JiraLabels.RETRY_NEEDED.value],
+ ).once()
+
+ # ymir_retry_needed can be set by either a maintainer or an agent retrying a
+ # failed run, so user_triggered stays False here — maintainers who want the
+ # user-triggered treatment (ack comment, comments on results) use ymir_todo.
+ expected_task = Task.from_issue("RETRY-1", user_triggered=False)
+ mock_redis.should_receive("lpush").with_args(
+ RedisQueues.TRIAGE_QUEUE.value, expected_task.to_json()
+ ).and_return(create_async_mock_return_value(1)).once()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 1
+
+
+@pytest.mark.asyncio
+async def test_skip_retry_needed_when_in_progress(fetcher, mock_redis_context):
+ """ymir_retry_needed + an in-progress label: do not enqueue, do not flip labels."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [
+ {
+ "key": "RETRY-INPROG-1",
+ "fields": {"labels": [JiraLabels.RETRY_NEEDED.value, JiraLabels.TRIAGE_IN_PROGRESS.value]},
+ }
+ ]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+
+ flexmock(fetcher).should_receive("_edit_jira_labels").never()
+ mock_redis.should_receive("lpush").never()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 0
+
+
+@pytest.mark.asyncio
+async def test_retry_needed_skip_when_label_flip_fails(fetcher, mock_redis_context):
+ """If the retry-needed flip raises, skip the Redis push entirely."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [{"key": "RETRY-FAIL", "fields": {"labels": [JiraLabels.RETRY_NEEDED.value]}}]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+
+ flexmock(fetcher).should_receive("_edit_jira_labels").and_raise(
+ requests.HTTPError("Jira write failed")
+ ).once()
+
+ mock_redis.should_receive("lpush").never()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 0
+
+
+@pytest.mark.parametrize(
+ ("trigger_label", "user_triggered", "issue_key"),
+ [
+ (JiraLabels.TODO.value, True, "TODO-DRY"),
+ (JiraLabels.RETRY_NEEDED.value, False, "RETRY-DRY"),
+ ],
+)
+@pytest.mark.asyncio
+async def test_dry_run_skips_flip_but_still_pushes(
+ monkeypatch, mock_env_vars, mock_redis_context, trigger_label, user_triggered, issue_key
+):
+ """DRY_RUN=true: skip the Jira atomic flip for trigger labels, but still push to Redis.
+
+ The pushed Task preserves user_triggered so the agent (also presumably in
+ DRY_RUN) sees the same dry-mode flow as it would for a real trigger.
+ """
+ monkeypatch.setenv("DRY_RUN", "true")
+ fetcher = JiraIssueFetcher()
+ mock_redis, _ = mock_redis_context
+
+ issues = [{"key": issue_key, "fields": {"labels": [trigger_label]}}]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+ # For ymir_todo issues the fetcher verifies the label-add author; we don't
+ # exercise that path here so unconditionally pass.
+ flexmock(fetcher).should_receive("_label_added_by_rh_employee").and_return(True)
+ # Must NOT touch Jira in dry-run mode.
+ flexmock(fetcher).should_receive("_edit_jira_labels").never()
+
+ expected_task = Task.from_issue(issue_key, user_triggered=user_triggered)
+ mock_redis.should_receive("lpush").with_args(
+ RedisQueues.TRIAGE_QUEUE.value, expected_task.to_json()
+ ).and_return(create_async_mock_return_value(1)).once()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 1
+
+
@pytest.mark.asyncio
async def test_run_full_workflow(fetcher):
"""Test the complete run workflow."""
@@ -468,9 +592,15 @@ async def test_run_full_workflow_with_labeled_issues(fetcher, mock_redis_context
"""Test the complete run workflow with issues that have different label states."""
mock_redis, _ = mock_redis_context
- # Create test issues with different label states
+ # Create test issues with different label states. All of these are
+ # *already known to Redis* (added to lists/queues below), so the
+ # fetcher's dedup must skip them — except for the explicit retry trigger,
+ # which overrides the skip.
mock_issues = [
- {"key": "ISSUE-1", "fields": {"labels": []}}, # No labels - should be pushed
+ {
+ "key": "ISSUE-1",
+ "fields": {"labels": []},
+ }, # No labels but already in OPEN_ENDED_ANALYSIS_LIST - should be skipped
{
"key": "ISSUE-2",
"fields": {"labels": ["ymir_rebase_in_progress"]},
@@ -482,8 +612,11 @@ async def test_run_full_workflow_with_labeled_issues(fetcher, mock_redis_context
{
"key": "ISSUE-4",
"fields": {"labels": ["ymir_retry_needed"]},
- }, # Has retry label - should be pushed
- {"key": "ISSUE-5", "fields": {"labels": []}}, # No labels - should be pushed
+ }, # Has retry label - should be pushed (retry overrides 'already known')
+ {
+ "key": "ISSUE-5",
+ "fields": {"labels": []},
+ }, # No labels but already in CLARIFICATION_NEEDED_QUEUE - should be skipped
{
"key": "ISSUE-6",
"fields": {"labels": ["ymir_completed"]},
@@ -597,22 +730,23 @@ async def test_run_full_workflow_with_labeled_issues(fetcher, mock_redis_context
RedisQueues.COMPLETED_BACKPORT_LIST.value, 0, -1
).and_return(create_async_mock_return_value([]))
- # Mock lpush calls for issues that should be pushed despite already existing
- # ISSUE-1, ISSUE-4, and ISSUE-5 should be pushed (no labels or retry_needed)
- # ISSUE-2, ISSUE-3, and ISSUE-6 should be skipped (have ymir labels)
- # The actual code pushes JSON strings, not just issue keys
- task1 = Task.from_issue("ISSUE-1")
- task4 = Task.from_issue("ISSUE-4")
- task5 = Task.from_issue("ISSUE-5")
- mock_redis.should_receive("lpush").with_args(RedisQueues.TRIAGE_QUEUE.value, task1.to_json()).and_return(
- create_async_mock_return_value(1)
+ # ISSUE-4 has ymir_retry_needed → atomic label flip before enqueue.
+ flexmock(fetcher).should_receive("_edit_jira_labels").with_args(
+ "ISSUE-4",
+ add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ remove=[JiraLabels.RETRY_NEEDED.value],
).once()
+
+ # Only ISSUE-4 should be pushed: ymir_retry_needed explicitly overrides
+ # the "already known to Redis" skip. The two no-label issues
+ # (ISSUE-1, ISSUE-5) are already tracked in Redis lists, so the fetcher
+ # leaves them alone instead of double-pushing. The ymir-labelled issues
+ # (ISSUE-2, ISSUE-3, ISSUE-6) are skipped because they have terminal
+ # markers indicating processing is already happening or done.
+ task4 = Task.from_issue("ISSUE-4")
mock_redis.should_receive("lpush").with_args(RedisQueues.TRIAGE_QUEUE.value, task4.to_json()).and_return(
create_async_mock_return_value(1)
).once()
- mock_redis.should_receive("lpush").with_args(RedisQueues.TRIAGE_QUEUE.value, task5.to_json()).and_return(
- create_async_mock_return_value(1)
- ).once()
# Mock the methods that are called internally
flexmock(fetcher).should_receive("search_issues").and_return(
@@ -621,3 +755,234 @@ async def test_run_full_workflow_with_labeled_issues(fetcher, mock_redis_context
# Run the workflow
await fetcher.run()
+
+
+@pytest.mark.asyncio
+async def test_push_user_triggered_issue(fetcher, mock_redis_context):
+ """ymir_todo on an otherwise clean issue: enqueue as user_triggered, flip the label."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [{"key": "TODO-1", "fields": {"labels": [JiraLabels.TODO.value]}}]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+ # Author verification passes — label was added by a Red Hat Employee.
+ flexmock(fetcher).should_receive("_label_added_by_rh_employee").with_args("TODO-1").and_return(
+ True
+ ).once()
+
+ # The critical label flip must be invoked before the Redis push.
+ flexmock(fetcher).should_receive("_edit_jira_labels").with_args(
+ "TODO-1",
+ add=[JiraLabels.TRIAGE_IN_PROGRESS.value],
+ remove=[JiraLabels.TODO.value],
+ ).once()
+
+ expected_task = Task.from_issue("TODO-1", user_triggered=True)
+ mock_redis.should_receive("lpush").with_args(
+ RedisQueues.TRIAGE_QUEUE.value, expected_task.to_json()
+ ).and_return(create_async_mock_return_value(1)).once()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 1
+
+
+@pytest.mark.asyncio
+async def test_skip_user_triggered_when_in_progress(fetcher, mock_redis_context):
+ """ymir_todo on an issue already in-progress: do not enqueue, do not flip labels."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [
+ {
+ "key": "TODO-INPROG-1",
+ "fields": {"labels": [JiraLabels.TODO.value, JiraLabels.TRIAGE_IN_PROGRESS.value]},
+ }
+ ]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+
+ # in_progress short-circuits before author verification runs.
+ flexmock(fetcher).should_receive("_label_added_by_rh_employee").never()
+ # _edit_jira_labels must NOT be called for an in-progress issue.
+ flexmock(fetcher).should_receive("_edit_jira_labels").never()
+ # lpush must NOT be called either.
+ mock_redis.should_receive("lpush").never()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 0
+
+
+@pytest.mark.asyncio
+async def test_skip_user_triggered_when_not_rh_employee(fetcher, mock_redis_context):
+ """ymir_todo added by a non-RH user: skip the issue, remove the bogus label,
+ do not push."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [{"key": "TODO-EXT-1", "fields": {"labels": [JiraLabels.TODO.value]}}]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+ flexmock(fetcher).should_receive("_label_added_by_rh_employee").with_args("TODO-EXT-1").and_return(
+ False
+ ).once()
+ # Bogus label is removed so the per-sweep verification cost doesn't repeat
+ # forever. No Redis push.
+ flexmock(fetcher).should_receive("_edit_jira_labels").with_args(
+ "TODO-EXT-1", add=[], remove=[JiraLabels.TODO.value]
+ ).once()
+ mock_redis.should_receive("lpush").never()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 0
+
+
+@pytest.mark.asyncio
+async def test_user_triggered_skip_when_label_flip_fails(fetcher, mock_redis_context):
+ """If the atomic label flip raises after retries, skip the Redis push entirely."""
+ mock_redis, _ = mock_redis_context
+
+ issues = [{"key": "TODO-FAIL", "fields": {"labels": [JiraLabels.TODO.value]}}]
+
+ flexmock(fetcher).should_receive("_get_existing_issue_keys").and_return(
+ create_async_mock_return_value(set())
+ )
+ flexmock(fetcher).should_receive("_label_added_by_rh_employee").with_args("TODO-FAIL").and_return(
+ True
+ ).once()
+
+ flexmock(fetcher).should_receive("_edit_jira_labels").and_raise(
+ requests.HTTPError("Jira write failed")
+ ).once()
+
+ # No push must occur — pushing without the in-progress marker would cause
+ # the next sweep to re-enqueue the same issue.
+ mock_redis.should_receive("lpush").never()
+
+ result = await fetcher.push_issues_to_queue(issues)
+
+ assert result == 0
+
+
+def _changelog_response(histories, is_last_page=True):
+ """Build a fake requests.get response for the paginated /changelog endpoint."""
+ mock = flexmock(status_code=200)
+ mock.should_receive("raise_for_status")
+ mock.should_receive("json").and_return({"values": histories, "isLastPage": is_last_page})
+ return mock
+
+
+def _user_response(groups):
+ """Build a fake requests.get response for the user-with-groups endpoint."""
+ mock = flexmock(status_code=200)
+ mock.should_receive("raise_for_status")
+ mock.should_receive("json").and_return({"groups": {"items": [{"name": g} for g in groups]}})
+ return mock
+
+
+def test_label_added_by_rh_employee_true(fetcher):
+ """Latest ymir_todo add was performed by a member of the Red Hat Employee group."""
+ histories = [
+ {
+ "created": "2026-05-25T13:54:47.861+0000",
+ "author": {"accountId": "rh-user-1"},
+ "items": [{"field": "labels", "fromString": "", "toString": "ymir_todo"}],
+ }
+ ]
+ flexmock(requests).should_receive("get").and_return(
+ _changelog_response(histories),
+ _user_response(["Red Hat Employee", "confluence-users"]),
+ ).one_by_one()
+
+ assert fetcher._label_added_by_rh_employee("RHEL-1") is True
+
+
+def test_label_added_by_rh_employee_false_when_author_not_in_group(fetcher):
+ """Latest ymir_todo add was performed by a user outside the Red Hat Employee group."""
+ histories = [
+ {
+ "created": "2026-05-25T13:54:47.861+0000",
+ "author": {"accountId": "external-1"},
+ "items": [{"field": "labels", "fromString": "", "toString": "ymir_todo"}],
+ }
+ ]
+ flexmock(requests).should_receive("get").and_return(
+ _changelog_response(histories),
+ _user_response(["confluence-users"]),
+ ).one_by_one()
+
+ assert fetcher._label_added_by_rh_employee("RHEL-2") is False
+
+
+def test_label_added_by_rh_employee_picks_latest_add(fetcher):
+ """If ymir_todo was added by an RH user then removed and re-added by an external
+ user, the external user (latest add) wins and the helper returns False."""
+ histories = [
+ {
+ "created": "2026-05-20T10:00:00.000+0000",
+ "author": {"accountId": "rh-user-1"},
+ "items": [{"field": "labels", "fromString": "", "toString": "ymir_todo"}],
+ },
+ {
+ "created": "2026-05-21T10:00:00.000+0000",
+ "author": {"accountId": "rh-user-1"},
+ "items": [{"field": "labels", "fromString": "ymir_todo", "toString": ""}],
+ },
+ {
+ "created": "2026-05-22T10:00:00.000+0000",
+ "author": {"accountId": "external-1"},
+ "items": [{"field": "labels", "fromString": "", "toString": "ymir_todo"}],
+ },
+ ]
+ flexmock(requests).should_receive("get").and_return(
+ _changelog_response(histories),
+ _user_response(["confluence-users"]),
+ ).one_by_one()
+
+ assert fetcher._label_added_by_rh_employee("RHEL-3") is False
+
+
+def test_label_added_by_rh_employee_false_when_no_add_event(fetcher):
+ """No changelog entry adds ymir_todo (label predates the changelog or was
+ written via a path Jira does not record): treat as non-RH-employee."""
+ histories = [
+ {
+ "created": "2026-05-25T13:54:47.861+0000",
+ "author": {"accountId": "rh-user-1"},
+ "items": [{"field": "status", "fromString": "To Do", "toString": "In Progress"}],
+ }
+ ]
+ flexmock(requests).should_receive("get").and_return(_changelog_response(histories)).once()
+
+ assert fetcher._label_added_by_rh_employee("RHEL-4") is False
+
+
+def test_label_added_by_rh_employee_walks_paginated_changelog(fetcher):
+ """The ymir_todo add appears on the second page; helper must paginate to find it."""
+ page1 = [
+ {
+ "created": "2026-04-01T10:00:00.000+0000",
+ "author": {"accountId": "rh-user-1"},
+ "items": [{"field": "status", "fromString": "To Do", "toString": "In Progress"}],
+ }
+ ]
+ page2 = [
+ {
+ "created": "2026-05-25T13:54:47.861+0000",
+ "author": {"accountId": "rh-user-1"},
+ "items": [{"field": "labels", "fromString": "", "toString": "ymir_todo"}],
+ }
+ ]
+ flexmock(requests).should_receive("get").and_return(
+ _changelog_response(page1, is_last_page=False),
+ _changelog_response(page2, is_last_page=True),
+ _user_response(["Red Hat Employee"]),
+ ).one_by_one()
+
+ assert fetcher._label_added_by_rh_employee("RHEL-LONG") is True
diff --git a/ymir/tools/privileged/gitlab.py b/ymir/tools/privileged/gitlab.py
index 9d6830ef5..03a61cfbc 100644
--- a/ymir/tools/privileged/gitlab.py
+++ b/ymir/tools/privileged/gitlab.py
@@ -91,7 +91,10 @@ def _is_private_gitlab(url: str) -> bool:
return True
if hostname != "gitlab.com":
return False
- return parsed.path.startswith(_REDHAT_WEB_PREFIX) or parsed.path.startswith(_REDHAT_API_PREFIX)
+ if parsed.path.startswith(_REDHAT_WEB_PREFIX) or parsed.path.startswith(_REDHAT_API_PREFIX):
+ return True
+ fork_namespace = os.getenv("FORK_NAMESPACE", "").strip("/")
+ return bool(fork_namespace and parsed.path.startswith(f"/{fork_namespace}/"))
def _get_api_diff_url(url: str) -> str:
diff --git a/ymir/tools/privileged/tests/unit/test_gitlab.py b/ymir/tools/privileged/tests/unit/test_gitlab.py
index e5049e73c..3b7486de3 100644
--- a/ymir/tools/privileged/tests/unit/test_gitlab.py
+++ b/ymir/tools/privileged/tests/unit/test_gitlab.py
@@ -22,6 +22,7 @@
OpenMergeRequestTool,
PushToRemoteRepositoryTool,
RetryPipelineJobTool,
+ _get_git_auth_args,
)
@@ -82,6 +83,49 @@ async def test_fork_repository(repository, fork_exists, fork_namespace):
assert (await ForkRepositoryTool().run(input={"repository": repository})).result == clone_url
+@pytest.mark.parametrize(
+ "url, fork_namespace, token, expect_auth",
+ [
+ # Red Hat group on gitlab.com — always needs auth when token present
+ ("https://gitlab.com/redhat/centos-stream/rpms/vim", None, "tok", True),
+ # gitlab.cee.redhat.com — always needs auth when token present
+ ("https://gitlab.cee.redhat.com/foo/bar", None, "tok", True),
+ # Fork URL under bot namespace, FORK_NAMESPACE configured — needs auth
+ (
+ "https://gitlab.com/redhat-ymir-agent/centos_rpms_vim.git",
+ "redhat-ymir-agent",
+ "tok",
+ True,
+ ),
+ # Same fork URL but FORK_NAMESPACE not set — must NOT inject auth
+ # (would leak token to unrelated gitlab.com namespace)
+ ("https://gitlab.com/redhat-ymir-agent/centos_rpms_vim.git", None, "tok", False),
+ # Unrelated gitlab.com namespace with FORK_NAMESPACE set — still no auth
+ ("https://gitlab.com/some-other-user/repo.git", "redhat-ymir-agent", "tok", False),
+ # Public github.com — never auth
+ ("https://github.com/vim/vim", None, "tok", False),
+ # No token configured — no auth even for Red Hat URLs
+ ("https://gitlab.com/redhat/centos-stream/rpms/vim", None, None, False),
+ ],
+)
+def test_get_git_auth_args_handles_fork_namespace(monkeypatch, url, fork_namespace, token, expect_auth):
+ """Forks under FORK_NAMESPACE on gitlab.com must get the GITLAB_TOKEN injected for git push."""
+ monkeypatch.delenv("FORK_NAMESPACE", raising=False)
+ monkeypatch.delenv("GITLAB_TOKEN", raising=False)
+ if fork_namespace:
+ monkeypatch.setenv("FORK_NAMESPACE", fork_namespace)
+ if token:
+ monkeypatch.setenv("GITLAB_TOKEN", token)
+
+ args = _get_git_auth_args(url)
+ if expect_auth:
+ assert len(args) == 2
+ assert args[0] == "-c"
+ assert args[1].startswith("http.extraheader=Authorization: Basic ")
+ else:
+ assert args == []
+
+
@pytest.mark.asyncio
async def test_open_merge_request():
fork_url = "https://gitlab.com/ai-bot/bash.git"