RolnickLab · mihow · Jan 16, 2026 · Jan 22, 2026 · Feb 3, 2026 · Feb 10, 2026
diff --git a/.agents/AGENTS.md b/.agents/AGENTS.md
@@ -276,7 +276,8 @@ Processing services are FastAPI applications that implement the AMI ML API contr
 **Health Checks:**
 - Cached status with 3 retries and exponential backoff (0s, 2s, 4s)
 - Celery Beat task runs periodic checks (`ami.ml.tasks.check_processing_services_online`)
-- Status stored in `ProcessingService.last_checked_live` boolean field
+- Status stored in `ProcessingService.last_seen_live` boolean field
+- Async/pull-mode services update status via `mark_seen()` when they register pipelines
 - UI shows red/green indicator based on cached status
 
 Location: `processing_services/` directory contains example implementations

diff --git a/.agents/DATABASE_SCHEMA.md b/.agents/DATABASE_SCHEMA.md
@@ -255,8 +255,9 @@ erDiagram
         bigint id PK
         string name
         string endpoint_url
-        boolean last_checked_live
-        float last_checked_latency
+        datetime last_seen
+        boolean last_seen_live
+        float last_seen_latency
     }
 
     ProjectPipelineConfig {

diff --git a/ami/jobs/tests/test_tasks.py b/ami/jobs/tests/test_tasks.py
@@ -17,7 +17,8 @@
 from ami.jobs.models import Job, JobDispatchMode, JobState, MLJob
 from ami.jobs.tasks import process_nats_pipeline_result
 from ami.main.models import Detection, Project, SourceImage, SourceImageCollection
-from ami.ml.models import Pipeline
+from ami.ml.models import Algorithm, Pipeline
+from ami.ml.models.algorithm import AlgorithmTaskType
 from ami.ml.orchestration.async_job_state import AsyncJobStateManager
 from ami.ml.schemas import PipelineResultsError, PipelineResultsResponse, SourceImageResponse
 from ami.users.models import User
@@ -180,6 +181,15 @@ def test_process_nats_pipeline_result_mixed_results(self, mock_manager_class):
         """
         mock_manager = self._setup_mock_nats(mock_manager_class)
 
+        # Create detection algorithm for the pipeline
+        detection_algorithm = Algorithm.objects.create(
+            name="test-detector",
+            key="test-detector",
+            task_type=AlgorithmTaskType.LOCALIZATION,
+        )
+        # Update pipeline to include detection algorithm
+        self.pipeline.algorithms.add(detection_algorithm)
+
         # For this test, we just want to verify progress tracking works with mixed results
         # We'll skip checking final job completion status since that depends on all stages
 

diff --git a/ami/jobs/views.py b/ami/jobs/views.py
@@ -30,6 +30,30 @@
 logger = logging.getLogger(__name__)
 
 
+def _mark_pipeline_pull_services_seen(job: "Job") -> None:
+    """
+    Record a heartbeat for async (pull-mode) processing services linked to the job's pipeline.
+
+    Called on every task-fetch and result-submit request so that the worker's polling activity
+    keeps last_seen/last_seen_live current. The periodic check_processing_services_online task
+    will mark services offline if this heartbeat stops arriving within PROCESSING_SERVICE_LAST_SEEN_MAX.
+
+    IMPORTANT: This marks ALL async services on the pipeline within this project as live, not just
+    the specific service that made the request. If multiple async services share the same pipeline
+    within a project, a single worker polling will keep all of them appearing online.
+    Once application-token auth is available (PR #1117), this should be scoped to the individual
+    calling service instead.
+    """
+    import datetime
+
+    if not job.pipeline_id:
+        return
+    job.pipeline.processing_services.async_services().filter(projects=job.project_id).update(
+        last_seen=datetime.datetime.now(),
+        last_seen_live=True,
+    )
+
+
 class JobFilterSet(filters.FilterSet):
     """Custom filterset to enable pipeline name filtering."""
 
@@ -245,6 +269,9 @@ def tasks(self, request, pk=None):
         if not job.pipeline:
             raise ValidationError("This job does not have a pipeline configured")
 
+        # Record heartbeat for async processing services on this pipeline
+        _mark_pipeline_pull_services_seen(job)
+
         # Get tasks from NATS JetStream
         from ami.ml.orchestration.nats_queue import TaskQueueManager
 
@@ -272,6 +299,9 @@ def result(self, request, pk=None):
 
         job = self.get_object()
 
+        # Record heartbeat for async processing services on this pipeline
+        _mark_pipeline_pull_services_seen(job)
+
         # Validate request data is a list
         if isinstance(request.data, list):
             results = request.data

diff --git a/ami/main/admin.py b/ami/main/admin.py
@@ -265,6 +265,7 @@ class SourceImageAdmin(AdminBase):
         "checksum",
         "checksum_algorithm",
         "created_at",
+        "get_was_processed",
     )
 
     list_filter = (
@@ -281,7 +282,12 @@ class SourceImageAdmin(AdminBase):
     )
 
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
-        return super().get_queryset(request).select_related("event", "deployment", "deployment__data_source")
+        return (
+            super()
+            .get_queryset(request)
+            .select_related("event", "deployment", "deployment__data_source")
+            .with_was_processed()  # avoids N+1 from get_was_processed in list_display
+        )
 
 
 class ClassificationInline(admin.TabularInline):

diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -1246,6 +1246,7 @@ class Meta:
             "source_images",
             "source_images_count",
             "source_images_with_detections_count",
+            "source_images_processed_count",
             "occurrences_count",
             "taxa_count",
             "description",
@@ -1547,6 +1548,7 @@ class EventTimelineIntervalSerializer(serializers.Serializer):
     captures_count = serializers.IntegerField()
     detections_count = serializers.IntegerField()
     detections_avg = serializers.IntegerField()
+    was_processed = serializers.BooleanField()
 
 
 class EventTimelineMetaSerializer(serializers.Serializer):

diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -36,6 +36,7 @@
 from ami.utils.storages import ConnectionTestResult
 
 from ..models import (
+    NULL_DETECTIONS_FILTER,
     Classification,
     Deployment,
     Detection,
@@ -378,7 +379,7 @@ def timeline(self, request, pk=None):
         )
         resolution = datetime.timedelta(minutes=resolution_minutes)
 
-        qs = SourceImage.objects.filter(event=event)
+        qs = SourceImage.objects.filter(event=event).with_was_processed()  # type: ignore
 
         # Bulk update all source images where detections_count is null
         update_detection_counts(qs=qs, null_only=True)
@@ -404,7 +405,7 @@ def timeline(self, request, pk=None):
         source_images = list(
             qs.filter(timestamp__range=(start_time, end_time))
             .order_by("timestamp")
-            .values("id", "timestamp", "detections_count")
+            .values("id", "timestamp", "detections_count", "was_processed")
         )
 
         timeline = []
@@ -421,6 +422,7 @@ def timeline(self, request, pk=None):
                 "captures_count": 0,
                 "detections_count": 0,
                 "detection_counts": [],
+                "was_processed": False,
             }
 
             while image_index < len(source_images) and source_images[image_index]["timestamp"] <= interval_end:
@@ -432,6 +434,9 @@ def timeline(self, request, pk=None):
                 interval_data["detection_counts"] += [image["detections_count"]]
                 if image["detections_count"] >= max(interval_data["detection_counts"]):
                     interval_data["top_capture"] = SourceImage(pk=image["id"])
+                # Track if any image in this interval was processed
+                if image["was_processed"]:
+                    interval_data["was_processed"] = True
                 image_index += 1
 
             # Set a meaningful average detection count to display for the interval
@@ -602,7 +607,7 @@ def prefetch_detections(self, queryset: QuerySet, project: Project | None = None
         score = get_default_classification_threshold(project, self.request)
 
         prefetch_queryset = (
-            Detection.objects.all()
+            Detection.objects.exclude(NULL_DETECTIONS_FILTER)
             .annotate(
                 determination_score=models.Max("occurrence__detections__classifications__score"),
                 # Store whether this occurrence should be included based on default filters
@@ -709,6 +714,7 @@ class SourceImageCollectionViewSet(DefaultViewSet, ProjectMixin):
         SourceImageCollection.objects.all()
         .with_source_images_count()  # type: ignore
         .with_source_images_with_detections_count()
+        .with_source_images_processed_count()
         .prefetch_related("jobs")
     )
     serializer_class = SourceImageCollectionSerializer
@@ -724,6 +730,7 @@ class SourceImageCollectionViewSet(DefaultViewSet, ProjectMixin):
         "method",
         "source_images_count",
         "source_images_with_detections_count",
+        "source_images_processed_count",
         "occurrences_count",
     ]
 
@@ -898,7 +905,7 @@ class DetectionViewSet(DefaultViewSet, ProjectMixin):
     API endpoint that allows detections to be viewed or edited.
     """
 
-    queryset = Detection.objects.all().select_related("source_image", "detection_algorithm")
+    queryset = Detection.objects.exclude(NULL_DETECTIONS_FILTER).select_related("source_image", "detection_algorithm")
     serializer_class = DetectionSerializer
     filterset_fields = ["source_image", "detection_algorithm", "source_image__project"]
     ordering_fields = ["created_at", "updated_at", "detection_score", "timestamp"]

diff --git a/ami/main/integrity.py b/ami/main/integrity.py
@@ -0,0 +1,106 @@
+"""
+Data integrity checks for the main app.
+
+Functions here can be called from management commands, post-job hooks,
+or periodic Celery tasks.
+"""
+
+import dataclasses
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclasses.dataclass
+class ReconcileResult:
+    checked: int = 0
+    fixed: int = 0
+    unfixable: int = 0
+
+
+def get_occurrences_missing_determination(
+    project_id: int | None = None,
+    job_id: int | None = None,
+):
+    """
+    Return occurrences that have detections with classifications but no determination set.
+
+    Occurrences with no classifications at all are excluded (they legitimately have no
+    determination).
+    """
+    from ami.main.models import Occurrence
+
+    qs = Occurrence.objects.filter(
+        determination__isnull=True,
+        detections__classifications__isnull=False,
+    ).distinct()
+
+    if project_id is not None:
+        qs = qs.filter(project_id=project_id)
+
+    if job_id is not None:
+        from ami.jobs.models import Job
+
+        job = Job.objects.get(pk=job_id)
+        if job.pipeline:
+            qs = qs.filter(
+                detections__classifications__algorithm__in=job.pipeline.algorithms.all(),
+                project_id=job.project_id,
+            )
+
+    return qs
+
+
+def reconcile_missing_determinations(
+    project_id: int | None = None,
+    job_id: int | None = None,
+    occurrence_ids: list[int] | None = None,
+    dry_run: bool = False,
+) -> ReconcileResult:
+    """
+    Find occurrences missing determinations and attempt to fix them by re-running
+    update_occurrence_determination.
+    """
+    from ami.main.models import update_occurrence_determination
+
+    if occurrence_ids is not None:
+        from ami.main.models import Occurrence
+
+        occurrences = (
+            Occurrence.objects.filter(
+                pk__in=occurrence_ids,
+                determination__isnull=True,
+                detections__classifications__isnull=False,
+            )
+            .distinct()
+            .select_related("determination")
+        )
+    else:
+        occurrences = get_occurrences_missing_determination(
+            project_id=project_id,
+            job_id=job_id,
+        ).select_related("determination")
+
+    result = ReconcileResult(checked=occurrences.count())
+
+    if result.checked == 0 or dry_run:
+        return result
+
+    logger.info(f"Found {result.checked} occurrences missing determination")
+
+    for occurrence in occurrences.iterator():
+        try:
+            updated = update_occurrence_determination(occurrence, current_determination=None, save=True)
+            if updated:
+                result.fixed += 1
+            else:
+                result.unfixable += 1
+        except Exception:
+            result.unfixable += 1
+            logger.exception(f"Error reconciling occurrence {occurrence.pk}")
+
+    logger.info(
+        f"Reconciliation complete: {result.fixed} fixed, {result.unfixable} unfixable "
+        f"out of {result.checked} checked"
+    )
+    return result
diff --git a/ami/main/management/commands/check_data_integrity.py b/ami/main/management/commands/check_data_integrity.py
@@ -0,0 +1,35 @@
+import logging
+
+from django.core.management.base import BaseCommand
+
+from ami.main.integrity import reconcile_missing_determinations
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    help = "Find and fix occurrences missing determinations."
+
+    def add_arguments(self, parser):
+        parser.add_argument("--dry-run", action="store_true", help="Report issues without fixing them")
+        parser.add_argument("--project", type=int, help="Limit to a specific project ID")
+        parser.add_argument("--job", type=int, help="Limit to occurrences related to a specific job ID")
+
+    def handle(self, *args, **options):
+        dry_run = options["dry_run"]
+        if dry_run:
+            self.stdout.write("DRY RUN — no changes will be made\n")
+
+        result = reconcile_missing_determinations(
+            project_id=options.get("project"),
+            job_id=options.get("job"),
+            dry_run=dry_run,
+        )
+
+        self.stdout.write(f"Checked: {result.checked}")
+        if result.fixed:
+            self.stdout.write(self.style.SUCCESS(f"Fixed: {result.fixed}"))
+        if result.unfixable:
+            self.stdout.write(self.style.WARNING(f"Unfixable: {result.unfixable}"))
+        if result.checked == 0:
+            self.stdout.write(self.style.SUCCESS("No issues found."))