From ce0c53db7b86b9e9d7c1873417093bdc84df900c Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Thu, 23 Apr 2026 11:41:51 -0700 Subject: [PATCH 1/5] fix(grouping): cap event duration at 24h for continuous-monitoring deployments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `group_images_into_events` splits images into events when there is a gap larger than `max_time_gap` (default 2h). For continuous-monitoring deployments the stream never has a 2h quiet period, so every image in the deployment coalesces into a single multi-month event. Observed on production deployment 489: one event spanning Apr 24 → Jul 23 2023 with 355k source_images. Add a `max_event_duration` kwarg to `group_datetimes_by_gap` and thread it through `group_images_into_events` with a default of 24 hours. When set, a group is also split once its span would exceed the cap, even if no gap triggers the split. `None` disables the cap (backward compatible). Also removes the stale `# @TODO this event grouping needs testing. Still getting events over 24 hours` comment now that events over 24h are prevented by construction. Tests: doctest for the continuous-monitoring case in `dates.py`, plus a Django test that creates 3 days of 10-minute-interval captures and asserts grouping produces >=3 events each <=24h. Co-Authored-By: Claude Opus 4.7 (1M context) --- ami/main/models.py | 16 ++++++++++++---- ami/main/tests.py | 32 ++++++++++++++++++++++++++++++++ ami/utils/dates.py | 28 +++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 5 deletions(-) diff --git a/ami/main/models.py b/ami/main/models.py index c604c6319..555e5d09d 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -1389,8 +1389,14 @@ def audit_event_lengths(deployment: Deployment): logger.error(f"Found {events_ending_before_start} event(s) with start > end in deployment {deployment}") +DEFAULT_MAX_EVENT_DURATION = datetime.timedelta(hours=24) + + def group_images_into_events( - deployment: Deployment, max_time_gap=datetime.timedelta(minutes=120), delete_empty=True + deployment: Deployment, + max_time_gap=datetime.timedelta(minutes=120), + delete_empty=True, + max_event_duration: datetime.timedelta | None = DEFAULT_MAX_EVENT_DURATION, ) -> list[Event]: # Log a warning if multiple SourceImages have the same timestamp dupes = ( @@ -1417,9 +1423,11 @@ def group_images_into_events( .distinct() ) - timestamp_groups = ami.utils.dates.group_datetimes_by_gap(image_timestamps, max_time_gap) - # @TODO this event grouping needs testing. Still getting events over 24 hours - # timestamp_groups = ami.utils.dates.group_datetimes_by_shifted_day(image_timestamps) + timestamp_groups = ami.utils.dates.group_datetimes_by_gap( + image_timestamps, + max_time_gap, + max_event_duration=max_event_duration, + ) events = [] for group in timestamp_groups: diff --git a/ami/main/tests.py b/ami/main/tests.py index 95b5d980c..eb234bf9f 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -236,6 +236,38 @@ def test_grouping(self): for event in events: assert event.captures.count() == images_per_night + def test_continuous_monitoring_capped_at_24_hours(self): + """ + A deployment that captures images continuously (no gap > max_time_gap) + should still be broken into daily events by the max_event_duration cap, + not coalesced into one multi-day event. + """ + import pathlib + import uuid + + start = datetime.datetime(2023, 4, 24, 3, 22, 38) + # 3 days of images every 10 minutes — no gap ever exceeds max_time_gap + interval = datetime.timedelta(minutes=10) + total_span = datetime.timedelta(days=3) + count = int(total_span / interval) + for i in range(count): + SourceImage.objects.create( + deployment=self.deployment, + timestamp=start + i * interval, + path=pathlib.Path("test") / f"{uuid.uuid4().hex[:8]}_continuous_{i}.jpg", + ) + + events = group_images_into_events( + deployment=self.deployment, + max_time_gap=datetime.timedelta(hours=2), + max_event_duration=datetime.timedelta(hours=24), + ) + + assert len(events) >= 3, f"expected at least 3 daily events, got {len(events)}" + for event in events: + duration = event.end - event.start + assert duration <= datetime.timedelta(hours=24), f"event {event.pk} spans {duration}, exceeds 24h cap" + def test_pruning_empty_events(self): from ami.main.models import delete_empty_events diff --git a/ami/utils/dates.py b/ami/utils/dates.py index 02dfc4350..bdc2dcf21 100644 --- a/ami/utils/dates.py +++ b/ami/utils/dates.py @@ -112,10 +112,15 @@ def format_timedelta(duration: datetime.timedelta | None) -> str: def group_datetimes_by_gap( timestamps: list[datetime.datetime], max_time_gap=datetime.timedelta(minutes=120), + max_event_duration: datetime.timedelta | None = None, ) -> list[list[datetime.datetime]]: """ Divide a list of timestamps into groups based on a maximum time gap. + When ``max_event_duration`` is set, a group is also split once it would + exceed that duration. This prevents continuous-monitoring deployments + (no quiet gap between nights) from producing a single multi-month group. + >>> timestamps = [ ... datetime.datetime(2021, 1, 1, 0, 10, 0), # @TODO confirm the first gap is having an effect ... datetime.datetime(2021, 1, 1, 0, 19, 0), @@ -145,6 +150,22 @@ def group_datetimes_by_gap( >>> result = group_datetimes_by_gap(timestamps, max_time_gap=datetime.timedelta(minutes=1)) >>> len(result) 10 + + Continuous-monitoring case: a long gap-free stream gets capped by + ``max_event_duration`` even when no gap exceeds ``max_time_gap``. + + >>> continuous = [datetime.datetime(2021, 1, 1) + datetime.timedelta(minutes=5 * i) for i in range(24 * 12 * 3)] + >>> len(group_datetimes_by_gap(continuous, max_time_gap=datetime.timedelta(minutes=120))) + 1 + >>> groups = group_datetimes_by_gap( + ... continuous, + ... max_time_gap=datetime.timedelta(minutes=120), + ... max_event_duration=datetime.timedelta(hours=24), + ... ) + >>> len(groups) + 3 + >>> all((g[-1] - g[0]) <= datetime.timedelta(hours=24) for g in groups) + True """ timestamps.sort() prev_timestamp: datetime.datetime | None = None @@ -157,7 +178,12 @@ def group_datetimes_by_gap( else: delta = datetime.timedelta(0) - if delta >= max_time_gap: + split_by_gap = delta >= max_time_gap + split_by_duration = ( + max_event_duration is not None and current_group and (timestamp - current_group[0]) > max_event_duration + ) + + if split_by_gap or split_by_duration: groups.append(current_group) current_group = [] From 847b1c40af8951977b46d263b22a140cc6010fb4 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Fri, 24 Apr 2026 08:34:58 -0700 Subject: [PATCH 2/5] fix(grouping): refresh cached fields on events whose captures moved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Copilot review feedback on #1268. When regrouping a deployment that already has events, `group_images_into_events` reuses any existing event whose `group_by` date matches the first day of a new group. Before this change, a reused event was saved once early in the loop (while still holding its pre-regroup captures) and never refreshed after later iterations moved captures away to new events. Result: the reused event's cached `start` / `end` / `captures_count` / `detections_count` / `occurrences_count` stayed stale — e.g. a 90-day mega-event being re-grouped under a 24h cap would physically hold only ~24h of captures but still report the 90-day span. Fix: track every event touched by grouping (both newly-created / reused in this pass, and any event that lost captures to the UPDATE), then call `update_calculated_fields_for_events(pks=...)` in a single pass after the loop to refresh cached fields against each event's final capture set. This narrowly scoped fix coexists with #904, which is expected to rework the `group_by`-based reuse path more thoroughly. Tests: - New `test_regrouping_existing_long_event_refreshes_cached_fields`: creates 3 days of continuous captures, groups once with the cap disabled (one mega-event), then re-groups with the 24h cap and asserts (a) no event exceeds 24h and (b) `sum(captures_count)` across events equals the total image count. Avoids asserting on `group_by` specifics so it remains valid under #904's refactor. - Existing `test_continuous_monitoring_capped_at_24_hours` refactored to share a `_populate_continuous_captures` helper. Co-Authored-By: Claude Opus 4.7 (1M context) --- ami/main/models.py | 21 ++++++++++++++ ami/main/tests.py | 70 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/ami/main/models.py b/ami/main/models.py index 555e5d09d..96f1567fb 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -1430,6 +1430,7 @@ def group_images_into_events( ) events = [] + touched_event_pks: set[int] = set() for group in timestamp_groups: if not len(group): continue @@ -1453,6 +1454,18 @@ def group_images_into_events( defaults={"start": start_date, "end": end_date}, ) events.append(event) + touched_event_pks.add(event.pk) + + # Track events currently holding these captures — they'll lose captures + # to the UPDATE below and need their cached fields refreshed at the end. + touched_event_pks.update( + SourceImage.objects.filter(deployment=deployment, timestamp__in=group) + .exclude(event__isnull=True) + .exclude(event=event) + .values_list("event_id", flat=True) + .distinct() + ) + SourceImage.objects.filter(deployment=deployment, timestamp__in=group).update(event=event) event.save() # Update start and end times and other cached fields logger.info( @@ -1464,6 +1477,14 @@ def group_images_into_events( f"Done grouping {len(image_timestamps)} captures into {len(events)} events " f"for deployment {deployment}" ) + # Refresh cached fields on every event touched by grouping. An event reused + # via matching group_by can lose captures to new events created by later + # iterations above, leaving its start/end/captures_count stale — e.g. a + # pre-existing multi-month event being re-grouped under a 24h cap. + # (#904 is expected to rework this reuse path more thoroughly.) + if touched_event_pks: + update_calculated_fields_for_events(pks=list(touched_event_pks)) + if delete_empty: logger.info("Deleting empty events for deployment") delete_empty_events(deployment=deployment) diff --git a/ami/main/tests.py b/ami/main/tests.py index eb234bf9f..d1e14109f 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -236,26 +236,29 @@ def test_grouping(self): for event in events: assert event.captures.count() == images_per_night - def test_continuous_monitoring_capped_at_24_hours(self): - """ - A deployment that captures images continuously (no gap > max_time_gap) - should still be broken into daily events by the max_event_duration cap, - not coalesced into one multi-day event. - """ + def _populate_continuous_captures(self, days: int = 3, interval_minutes: int = 10): + """Create ``days`` of gap-free captures (no gap > ``interval_minutes``).""" import pathlib import uuid start = datetime.datetime(2023, 4, 24, 3, 22, 38) - # 3 days of images every 10 minutes — no gap ever exceeds max_time_gap - interval = datetime.timedelta(minutes=10) - total_span = datetime.timedelta(days=3) - count = int(total_span / interval) + interval = datetime.timedelta(minutes=interval_minutes) + count = int(datetime.timedelta(days=days) / interval) for i in range(count): SourceImage.objects.create( deployment=self.deployment, timestamp=start + i * interval, path=pathlib.Path("test") / f"{uuid.uuid4().hex[:8]}_continuous_{i}.jpg", ) + return count + + def test_continuous_monitoring_capped_at_24_hours(self): + """ + A deployment that captures images continuously (no gap > max_time_gap) + should still be broken into daily events by the max_event_duration cap, + not coalesced into one multi-day event. + """ + self._populate_continuous_captures(days=3, interval_minutes=10) events = group_images_into_events( deployment=self.deployment, @@ -268,6 +271,53 @@ def test_continuous_monitoring_capped_at_24_hours(self): duration = event.end - event.start assert duration <= datetime.timedelta(hours=24), f"event {event.pk} spans {duration}, exceeds 24h cap" + def test_regrouping_existing_long_event_refreshes_cached_fields(self): + """ + Regression test for the regroup-existing-events path: a deployment + already grouped into a single multi-day event should, after re-running + grouping with the 24h cap, end up with no events exceeding 24h AND + every reused event's cached start/end/captures_count must reflect its + current captures (not its pre-regroup state). + + This is narrower than #904's refactor on purpose: it asserts the + observable cap+refresh behavior without depending on the specific + group_by reuse mechanics that #904 is expected to remove. + """ + total_captures = self._populate_continuous_captures(days=3, interval_minutes=10) + + # First pass with the cap disabled → a single multi-day "mega-event". + events_uncapped = group_images_into_events( + deployment=self.deployment, + max_time_gap=datetime.timedelta(hours=2), + max_event_duration=None, + ) + assert len(events_uncapped) == 1 + mega_event = events_uncapped[0] + assert (mega_event.end - mega_event.start) > datetime.timedelta(hours=24) + + # Second pass with the cap → must split the mega-event and refresh + # cached fields on the reused event. + group_images_into_events( + deployment=self.deployment, + max_time_gap=datetime.timedelta(hours=2), + max_event_duration=datetime.timedelta(hours=24), + ) + + all_events = Event.objects.filter(deployment=self.deployment) + assert all_events.count() >= 3 + + for event in all_events: + duration = event.end - event.start + assert duration <= datetime.timedelta( + hours=24 + ), f"event {event.pk} spans {duration} after regroup; cached fields are stale" + + total_assigned = sum(e.captures_count for e in all_events) + assert total_assigned == total_captures, ( + f"captures_count across events ({total_assigned}) does not match total captures ({total_captures}); " + f"cached counters did not refresh after reassignment" + ) + def test_pruning_empty_events(self): from ami.main.models import delete_empty_events From ad2ea6fff8c70604522fd8d26694660dd79e9483 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Fri, 24 Apr 2026 08:56:01 -0700 Subject: [PATCH 3/5] test(grouping): tighten 24h cap regression tests per review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change `>= 3` to `== 3` in both new tests — 432 captures over 3 days at 10 min intervals must yield exactly 3 daily events under the 24h cap. Catches over-splitting regressions that `>= 3` would miss. - Add per-event cached-count check after regroup: compare each event's cached `captures_count` against its actual related SourceImage count. Offsetting errors can hide stale counters in a sum-only check. Co-Authored-By: Claude Opus 4.7 (1M context) --- ami/main/tests.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ami/main/tests.py b/ami/main/tests.py index d1e14109f..aaee82b4d 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -266,7 +266,9 @@ def test_continuous_monitoring_capped_at_24_hours(self): max_event_duration=datetime.timedelta(hours=24), ) - assert len(events) >= 3, f"expected at least 3 daily events, got {len(events)}" + # 3 days × 24h / 10min = 432 captures; capped at 24h → exactly 3 events. + # `== 3` (not `>= 3`) guards against over-splitting regressions too. + assert len(events) == 3, f"expected exactly 3 daily events, got {len(events)}" for event in events: duration = event.end - event.start assert duration <= datetime.timedelta(hours=24), f"event {event.pk} spans {duration}, exceeds 24h cap" @@ -304,7 +306,7 @@ def test_regrouping_existing_long_event_refreshes_cached_fields(self): ) all_events = Event.objects.filter(deployment=self.deployment) - assert all_events.count() >= 3 + assert all_events.count() == 3, f"expected exactly 3 events after regroup, got {all_events.count()}" for event in all_events: duration = event.end - event.start @@ -312,10 +314,20 @@ def test_regrouping_existing_long_event_refreshes_cached_fields(self): hours=24 ), f"event {event.pk} spans {duration} after regroup; cached fields are stale" + # Per-event cached-count check: catches reused events whose captures_count + # was never refreshed after captures were reassigned away. A sum-only check + # can miss this when two events' errors offset each other. + actual_captures = SourceImage.objects.filter(event=event).count() + assert event.captures_count == actual_captures, ( + f"event {event.pk} cached captures_count={event.captures_count} " + f"does not match actual related count={actual_captures}; cached counters are stale" + ) + + # Orphan check: every capture must belong to some event. total_assigned = sum(e.captures_count for e in all_events) assert total_assigned == total_captures, ( f"captures_count across events ({total_assigned}) does not match total captures ({total_captures}); " - f"cached counters did not refresh after reassignment" + f"captures were orphaned during regroup" ) def test_pruning_empty_events(self): From 637afce279e65f5bc3740bd684a2548f4a0ee027 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Fri, 24 Apr 2026 19:52:52 -0700 Subject: [PATCH 4/5] fix(grouping): realign Occurrence.event_id and refresh deployment caches after regroup Occurrences are bound to an event once at creation time (from detection.source_image.event in Detection.associate_new_occurrence and Pipeline.save_results) and were never re-derived afterward. Without this refresh, a deployment regrouped under the 24h cap kept every occurrence pointing at its original (pre-cap) event regardless of when its detections actually fired, breaking every Occurrence.event-keyed query (occur_det_proj_evt index, Event.occurrences related-name, event_ids= filter chain). Realign Occurrence.event_id from each occurrence's first detection's source_image.event_id, and union the pre- and post-refresh event holders into touched_event_pks so the existing update_calculated_fields_for_events call covers both occurrence-losers and -gainers. Also swap the manual events_count-only update at the end of group_images_into_events for a full Deployment.update_calculated_fields() call. The async regroup_events task never went through Deployment.save's calculated-fields refresh, so the deployment list (occurrences_count, taxa_count, captures_count, detections_count) showed pre-regroup numbers until the next save touched the deployment. Adds test_regrouping_realigns_occurrence_event_id covering the detection->source_image->event chain post-regroup plus per-event occurrences_count consistency against the live get_occurrences_count helper that the cache itself uses. Co-Authored-By: Claude --- ami/main/models.py | 38 ++++++++++++++++-- ami/main/tests.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 3 deletions(-) diff --git a/ami/main/models.py b/ami/main/models.py index 96f1567fb..bd3a44f6a 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -1477,6 +1477,33 @@ def group_images_into_events( f"Done grouping {len(image_timestamps)} captures into {len(events)} events " f"for deployment {deployment}" ) + # Realign Occurrence.event_id with each occurrence's detections' current + # source_image.event_id. Occurrences are bound to an event once at creation + # time (Detection.associate_new_occurrence and Pipeline.save_results both + # read source_image.event), and are never re-derived afterward. Without + # this refresh, a deployment regrouped under the 24h cap keeps every + # occurrence pointing at its original (pre-cap) event regardless of when + # its detections actually fired — breaking every Occurrence.event-keyed + # query (the occur_det_proj_evt index, Event.occurrences related-name, + # event_ids= filters at models.py:4232–4477). Track the events currently + # held by occurrences in this deployment before and after the refresh so + # update_calculated_fields_for_events below picks up both losers and + # gainers of occurrences when it recomputes occurrences_count. + deployment_occurrences = Occurrence.objects.filter(deployment=deployment) + touched_event_pks.update( + deployment_occurrences.exclude(event__isnull=True).values_list("event_id", flat=True).distinct() + ) + deployment_occurrences.update( + event_id=models.Subquery( + Detection.objects.filter(occurrence_id=models.OuterRef("pk")) + .order_by("source_image__timestamp") + .values("source_image__event_id")[:1] + ) + ) + touched_event_pks.update( + deployment_occurrences.exclude(event__isnull=True).values_list("event_id", flat=True).distinct() + ) + # Refresh cached fields on every event touched by grouping. An event reused # via matching group_by can lose captures to new events created by later # iterations above, leaving its start/end/captures_count stale — e.g. a @@ -1494,9 +1521,14 @@ def group_images_into_events( logger.info(f"Setting image dimensions for event {event}") set_dimensions_for_collection(event) - logger.info("Updating relevant cached fields on deployment") - deployment.events_count = len(events) - deployment.save(update_calculated_fields=False, update_fields=["events_count"]) + # Refresh deployment-level cached counts. The async regroup_events task + # never goes through Deployment.save's calculated-fields refresh, so + # without this call the deployment list (occurrences_count, taxa_count, + # etc.) keeps showing pre-regroup numbers until the next save touches it. + # The save inside update_calculated_fields uses update_calculated_fields=False + # so it doesn't re-enter the regroup path. + logger.info("Updating cached fields on deployment") + deployment.update_calculated_fields(save=True) audit_event_lengths(deployment) diff --git a/ami/main/tests.py b/ami/main/tests.py index aaee82b4d..3b1c0ec8c 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -330,6 +330,103 @@ def test_regrouping_existing_long_event_refreshes_cached_fields(self): f"captures were orphaned during regroup" ) + def test_regrouping_realigns_occurrence_event_id(self): + """ + Regression test for stale ``Occurrence.event_id`` after regroup. + + Occurrences are bound to an event once at creation time (from + ``detection.source_image.event``). When the 24h cap runs against a + deployment that already has detections + occurrences attached to a + single mega-event, the source_images are reassigned but the + occurrences' event_ids stay stuck at the mega-event unless we + explicitly realign them. This test asserts the realignment plus the + downstream ``occurrences_count`` consistency on the daily events. + """ + self._populate_continuous_captures(days=3, interval_minutes=10) + captures = list(SourceImage.objects.filter(deployment=self.deployment).order_by("timestamp")) + captures_per_day = len(captures) // 3 + + # First pass with the cap disabled → one mega-event holding everything. + group_images_into_events( + deployment=self.deployment, + max_time_gap=datetime.timedelta(hours=2), + max_event_duration=None, + ) + mega_event = Event.objects.get(deployment=self.deployment) + + # One occurrence per day, attached to a capture from that day. + # Indices: 0 → day 0, captures_per_day → day 1, 2 * captures_per_day → day 2. + targets = [captures[0], captures[captures_per_day], captures[2 * captures_per_day]] + occurrences = [] + for capture in targets: + detection = Detection.objects.create( + source_image=capture, + timestamp=capture.timestamp, + bbox=[0.1, 0.1, 0.2, 0.2], + ) + occurrence = Occurrence.objects.create( + event=mega_event, + deployment=self.deployment, + project=self.project, + ) + detection.occurrence = occurrence + detection.save() + occurrences.append(occurrence) + + # Sanity: all three occurrences point at the mega-event before regroup. + for occurrence in occurrences: + occurrence.refresh_from_db() + assert occurrence.event_id == mega_event.pk + + # Second pass: 24h cap → 3 daily events, each occurrence must follow + # its detection's source_image into the corresponding daily event. + group_images_into_events( + deployment=self.deployment, + max_time_gap=datetime.timedelta(hours=2), + max_event_duration=datetime.timedelta(hours=24), + ) + + for occurrence in occurrences: + occurrence.refresh_from_db() + first_detection = ( + Detection.objects.filter(occurrence=occurrence) + .select_related("source_image") + .order_by("source_image__timestamp") + .first() + ) + assert first_detection is not None + expected_event_id = first_detection.source_image.event_id + assert occurrence.event_id == expected_event_id, ( + f"occurrence {occurrence.pk}: stale event_id={occurrence.event_id} " + f"(expected {expected_event_id} from first detection's source_image)" + ) + + # Realignment must also have moved occurrences across events, not just + # left them all on the (reused) day-0 event. With detections on captures + # 0, captures_per_day, and 2*captures_per_day, the 24h cap should put + # at least one occurrence on a non-day-0 event. + non_day0_events = Event.objects.filter(deployment=self.deployment).exclude( + pk=Occurrence.objects.filter(pk=occurrences[0].pk).values("event_id")[:1] + ) + assert Occurrence.objects.filter(event__in=non_day0_events).count() >= 1 + + # Each daily event's cached ``occurrences_count`` must match the live + # computation that ``update_calculated_fields`` itself uses (which + # applies the project's default filters). Catches the case where + # occurrences moved off an event but its cached counter wasn't + # refreshed because the event wasn't tracked as touched. + daily_events = Event.objects.filter(deployment=self.deployment) + assert daily_events.count() == 3 + for event in daily_events: + expected = event.get_occurrences_count() + assert event.occurrences_count == expected, ( + f"event {event.pk} cached occurrences_count={event.occurrences_count} " + f"!= live get_occurrences_count()={expected}; cached counter is stale" + ) + + # No occurrence should be left pointing at a deleted/missing event. + assert Occurrence.objects.filter(deployment=self.deployment, event__isnull=True).count() == 0 + def test_pruning_empty_events(self): from ami.main.models import delete_empty_events From a2e4bb3bb65ac324e8d2c82d2ee5c7711fd03556 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Fri, 24 Apr 2026 20:25:34 -0700 Subject: [PATCH 5/5] fix(grouping): address CodeRabbit nits on the realignment commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ``models.py:1488``: replace EN DASH (–) with ASCII hyphen so Ruff RUF003 doesn't flag the comment in strict environments. - ``tests.py:test_regrouping_realigns_occurrence_event_id``: pick targets at mid-day offsets (12h / 36h / 60h) instead of by index. The cap's strict ``>`` keeps the capture at exactly 24h offset in the previous event, so index-based selection (``captures[len // 3]``) lands two of three targets in the same event. Mid-day offsets sit well inside each daily event's window, away from any boundary, so the test now actually exercises 3-way distribution as it claimed. - Tighten the realignment assertion to require exactly 3 distinct ``event_id`` values across the three occurrences (the corrected invariant), replacing the looser ``>= 1`` non-day-0 check. Co-Authored-By: Claude --- ami/main/models.py | 2 +- ami/main/tests.py | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ami/main/models.py b/ami/main/models.py index bd3a44f6a..8d863df1a 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -1485,7 +1485,7 @@ def group_images_into_events( # occurrence pointing at its original (pre-cap) event regardless of when # its detections actually fired — breaking every Occurrence.event-keyed # query (the occur_det_proj_evt index, Event.occurrences related-name, - # event_ids= filters at models.py:4232–4477). Track the events currently + # event_ids= filters at models.py:4232-4477). Track the events currently # held by occurrences in this deployment before and after the refresh so # update_calculated_fields_for_events below picks up both losers and # gainers of occurrences when it recomputes occurrences_count. diff --git a/ami/main/tests.py b/ami/main/tests.py index 3b1c0ec8c..e47485587 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -344,7 +344,6 @@ def test_regrouping_realigns_occurrence_event_id(self): """ self._populate_continuous_captures(days=3, interval_minutes=10) captures = list(SourceImage.objects.filter(deployment=self.deployment).order_by("timestamp")) - captures_per_day = len(captures) // 3 # First pass with the cap disabled → one mega-event holding everything. group_images_into_events( @@ -354,9 +353,18 @@ def test_regrouping_realigns_occurrence_event_id(self): ) mega_event = Event.objects.get(deployment=self.deployment) - # One occurrence per day, attached to a capture from that day. - # Indices: 0 → day 0, captures_per_day → day 1, 2 * captures_per_day → day 2. - targets = [captures[0], captures[captures_per_day], captures[2 * captures_per_day]] + # One occurrence per day, picked at mid-day offsets (12h / 36h / 60h) + # so each target sits well inside its event's window, far from the + # exact 24h boundary where the cap's strict ``>`` semantics matter. + # Index-based selection (e.g. ``captures[len // 3]``) lands at exactly + # 24h offset, where Event 2 starts at 24h+10min (one capture past), + # so two of the three targets would otherwise share an event. + start_ts = captures[0].timestamp + targets = [ + next(c for c in captures if c.timestamp >= start_ts + datetime.timedelta(hours=12)), + next(c for c in captures if c.timestamp >= start_ts + datetime.timedelta(hours=36)), + next(c for c in captures if c.timestamp >= start_ts + datetime.timedelta(hours=60)), + ] occurrences = [] for capture in targets: detection = Detection.objects.create( @@ -401,14 +409,13 @@ def test_regrouping_realigns_occurrence_event_id(self): f"(expected {expected_event_id} from first detection's source_image)" ) - # Realignment must also have moved occurrences across events, not just - # left them all on the (reused) day-0 event. With detections on captures - # 0, captures_per_day, and 2*captures_per_day, the 24h cap should put - # at least one occurrence on a non-day-0 event. - non_day0_events = Event.objects.filter(deployment=self.deployment).exclude( - pk=Occurrence.objects.filter(pk=occurrences[0].pk).values("event_id")[:1] - ) - assert Occurrence.objects.filter(event__in=non_day0_events).count() >= 1 + # Realignment must move all three occurrences onto distinct daily + # events. With targets at mid-day offsets, the three occurrences land + # on three different events — one on each day. + distinct_event_ids = {occ.event_id for occ in occurrences} + assert ( + len(distinct_event_ids) == 3 + ), f"expected 3 distinct event_ids across occurrences, got {distinct_event_ids}" # Each daily event's cached ``occurrences_count`` must match the live # computation that ``update_calculated_fields`` itself uses (which