-
Notifications
You must be signed in to change notification settings - Fork 13
Implement null detections (calculate what has been processed or not) #1093
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
ee3bc1b
573dee5
3f06833
342c3d2
fd6d22c
93aa8b0
1b12e14
f917853
7c9a848
20cbccf
15487cb
31dea6e
0f7abf8
80df653
c0132bd
5585785
8fba25a
8f15591
10d8c45
2a61fbb
3a7d9f9
fd36611
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -85,6 +85,8 @@ class TaxonRank(OrderedEnum): | |
| ] | ||
| ) | ||
|
|
||
| NULL_DETECTIONS_FILTER = Q(bbox__isnull=True) | Q(bbox=[]) | ||
|
|
||
|
|
||
| def get_media_url(path: str) -> str: | ||
| """ | ||
|
|
@@ -1775,6 +1777,17 @@ def with_taxa_count(self, project: Project | None = None, request=None): | |
| taxa_count=Coalesce(models.Subquery(taxa_subquery, output_field=models.IntegerField()), 0) | ||
| ) | ||
|
|
||
| def with_was_processed(self): | ||
| """ | ||
| Annotate each SourceImage with a boolean `was_processed` indicating | ||
| whether any detections exist for that image. | ||
|
|
||
| This mirrors `SourceImage.get_was_processed()` but as a queryset | ||
| annotation for efficient bulk queries. | ||
| """ | ||
| processed_exists = models.Exists(Detection.objects.filter(source_image_id=models.OuterRef("pk"))) | ||
| return self.annotate(was_processed=processed_exists) | ||
|
vanessavmac marked this conversation as resolved.
|
||
|
|
||
|
|
||
| class SourceImageManager(models.Manager.from_queryset(SourceImageQuerySet)): | ||
| pass | ||
|
|
@@ -1874,7 +1887,15 @@ def size_display(self) -> str: | |
| return filesizeformat(self.size) | ||
|
|
||
| def get_detections_count(self) -> int: | ||
| return self.detections.distinct().count() | ||
| # Detections count excludes detections without bounding boxes | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be a queryset method as well? I see it already exists before this PR.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not changed in this round — lower priority since it's only used per-instance (admin detail, tests). Could be a follow-up. |
||
| # Detections with null bounding boxes are valid and indicates the image was successfully processed | ||
| return self.detections.exclude(NULL_DETECTIONS_FILTER).count() | ||
|
|
||
| def get_was_processed(self, algorithm_key: str | None = None) -> bool: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could introduce n+1 queries, we have a queryset method for
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It appears this is only used in the admin & tests. The
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed: |
||
| if algorithm_key: | ||
| return self.detections.filter(detection_algorithm__key=algorithm_key).exists() | ||
| else: | ||
| return self.detections.exists() | ||
|
vanessavmac marked this conversation as resolved.
Outdated
|
||
|
|
||
| def get_base_url(self) -> str | None: | ||
| """ | ||
|
|
@@ -2044,6 +2065,7 @@ def update_detection_counts(qs: models.QuerySet[SourceImage] | None = None, null | |
|
|
||
| subquery = models.Subquery( | ||
| Detection.objects.filter(source_image_id=models.OuterRef("pk")) | ||
| .exclude(NULL_DETECTIONS_FILTER) | ||
| .values("source_image_id") | ||
| .annotate(count=models.Count("id")) | ||
| .values("count"), | ||
|
|
@@ -2514,6 +2536,15 @@ def save(self, *args, **kwargs): | |
| super().save(*args, **kwargs) | ||
|
|
||
|
|
||
| class DetectionQuerySet(BaseQuerySet): | ||
| def null_detections(self): | ||
| return self.filter(NULL_DETECTIONS_FILTER) | ||
|
|
||
|
|
||
| class DetectionManager(models.Manager.from_queryset(DetectionQuerySet)): | ||
| pass | ||
|
|
||
|
|
||
| @final | ||
| class Detection(BaseModel): | ||
| """An object detected in an image""" | ||
|
|
@@ -2582,6 +2613,8 @@ class Detection(BaseModel): | |
| source_image_id: int | ||
| detection_algorithm_id: int | ||
|
|
||
| objects = DetectionManager() | ||
|
|
||
| def get_bbox(self): | ||
| if self.bbox: | ||
| return BoundingBox( | ||
|
|
@@ -3752,7 +3785,18 @@ def with_source_images_count(self): | |
| def with_source_images_with_detections_count(self): | ||
| return self.annotate( | ||
| source_images_with_detections_count=models.Count( | ||
| "images", filter=models.Q(images__detections__isnull=False), distinct=True | ||
| "images", | ||
| filter=(~models.Q(images__detections__bbox__isnull=True) & ~models.Q(images__detections__bbox=[])), | ||
| distinct=True, | ||
| ) | ||
| ) | ||
|
|
||
| def with_source_images_processed_count(self): | ||
| return self.annotate( | ||
| source_images_processed_count=models.Count( | ||
| "images", | ||
| filter=models.Q(images__detections__isnull=False), | ||
| distinct=True, | ||
| ) | ||
| ) | ||
|
|
||
|
|
@@ -3863,7 +3907,10 @@ def source_images_count(self) -> int | None: | |
|
|
||
| def source_images_with_detections_count(self) -> int | None: | ||
| # This should always be pre-populated using queryset annotations | ||
| # return self.images.filter(detections__isnull=False).count() | ||
| return None | ||
|
|
||
| def source_images_processed_count(self) -> int | None: | ||
| # This should always be pre-populated using queryset annotations | ||
| return None | ||
|
|
||
| def occurrences_count(self) -> int | None: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,9 @@ def filter_processed_images( | |
| task_logger.debug(f"Image {image} needs processing: has no existing detections from pipeline's detector") | ||
| # If there are no existing detections from this pipeline, send the image | ||
| yield image | ||
| elif existing_detections.null_detections().exists(): # type: ignore | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is unnecessary! it's covered by the first on check |
||
| task_logger.debug(f"Image {image} has a null detection from pipeline {pipeline}, skipping! ") | ||
| continue | ||
| elif existing_detections.filter(classifications__isnull=True).exists(): | ||
| # Check if there are detections with no classifications | ||
| task_logger.debug( | ||
|
|
@@ -406,13 +409,17 @@ def get_or_create_detection( | |
|
|
||
| :return: A tuple of the Detection object and a boolean indicating whether it was created | ||
| """ | ||
| serialized_bbox = list(detection_resp.bbox.dict().values()) | ||
| if detection_resp.bbox is not None: | ||
| serialized_bbox = list(detection_resp.bbox.dict().values()) | ||
| else: | ||
| serialized_bbox = None | ||
| detection_repr = f"Detection {detection_resp.source_image_id} {serialized_bbox}" | ||
|
vanessavmac marked this conversation as resolved.
|
||
|
|
||
| assert str(detection_resp.source_image_id) == str( | ||
| source_image.pk | ||
| ), f"Detection belongs to a different source image: {detection_repr}" | ||
|
|
||
| # When reprocessing, we don't care which detection algorithm created the existing detection | ||
| existing_detection = Detection.objects.filter( | ||
| source_image=source_image, | ||
| bbox=serialized_bbox, | ||
|
|
@@ -485,6 +492,7 @@ def create_detections( | |
|
|
||
| existing_detections: list[Detection] = [] | ||
| new_detections: list[Detection] = [] | ||
|
|
||
| for detection_resp in detections: | ||
| source_image = source_image_map.get(detection_resp.source_image_id) | ||
| if not source_image: | ||
|
|
@@ -810,6 +818,37 @@ class PipelineSaveResults: | |
| total_time: float | ||
|
|
||
|
|
||
| def create_null_detections_for_undetected_images( | ||
| results: PipelineResultsResponse, | ||
| detection_algorithm: Algorithm, | ||
| logger: logging.Logger = logger, | ||
| ) -> list[DetectionResponse]: | ||
| """ | ||
| Create null DetectionResponse objects (empty bbox) for images that have no detections. | ||
|
|
||
| :param results: The PipelineResultsResponse from the processing service | ||
| :param algorithms_known: Dictionary of algorithms keyed by algorithm key | ||
|
|
||
| :return: List of DetectionResponse objects with null bbox | ||
| """ | ||
| source_images_with_detections = {detection.source_image_id for detection in results.detections} | ||
| null_detections_to_add = [] | ||
| detection_algorithm_reference = AlgorithmReference(name=detection_algorithm.name, key=detection_algorithm.key) | ||
|
|
||
| for source_img in results.source_images: | ||
| if source_img.id not in source_images_with_detections: | ||
| null_detections_to_add.append( | ||
| DetectionResponse( | ||
| source_image_id=source_img.id, | ||
| bbox=None, | ||
| algorithm=detection_algorithm_reference, | ||
| timestamp=now(), | ||
| ) | ||
| ) | ||
|
|
||
| return null_detections_to_add | ||
|
vanessavmac marked this conversation as resolved.
|
||
|
|
||
|
|
||
| @celery_app.task(soft_time_limit=60 * 4, time_limit=60 * 5) | ||
| def save_results( | ||
| results: PipelineResultsResponse | None = None, | ||
|
|
@@ -857,6 +896,13 @@ def save_results( | |
| ) | ||
|
|
||
| algorithms_known: dict[str, Algorithm] = {algo.key: algo for algo in pipeline.algorithms.all()} | ||
| try: | ||
| detection_algorithm = pipeline.algorithms.get(task_type__in=Algorithm.detection_task_types) | ||
| except Algorithm.DoesNotExist: | ||
| raise ValueError("Pipeline does not have a detection algorithm") | ||
| except Algorithm.MultipleObjectsReturned: | ||
| raise NotImplementedError("Multiple detection algorithms per pipeline are not supported") | ||
|
vanessavmac marked this conversation as resolved.
vanessavmac marked this conversation as resolved.
|
||
|
|
||
| job_logger.info(f"Algorithms registered for pipeline: \n{', '.join(algorithms_known.keys())}") | ||
|
|
||
| if results.algorithms: | ||
|
|
@@ -866,6 +912,15 @@ def save_results( | |
| "Algorithms and category maps must be registered before processing, using /info endpoint." | ||
| ) | ||
|
|
||
| # Ensure all images have detections | ||
|
vanessavmac marked this conversation as resolved.
|
||
| # if not, add a NULL detection (empty bbox) to the results | ||
| null_detections = create_null_detections_for_undetected_images( | ||
| results=results, | ||
| detection_algorithm=detection_algorithm, | ||
| logger=job_logger, | ||
| ) | ||
| results.detections = results.detections + null_detections | ||
|
vanessavmac marked this conversation as resolved.
|
||
|
|
||
| detections = create_detections( | ||
| detections=results.detections, | ||
| algorithms_known=algorithms_known, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.