From 73f95b4fa1b01d757cdf2dac7bf42f430d73c8ac Mon Sep 17 00:00:00 2001 From: RihaanBH-1810 Date: Sun, 5 Apr 2026 21:54:36 +0530 Subject: [PATCH 1/4] add support for max_detection_per_image hyperparam --- perceptionmetrics/models/torch_detection.py | 3 +++ perceptionmetrics/models/utils/torchvision.py | 13 ++++++++++++- perceptionmetrics/models/utils/yolo.py | 10 ++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/perceptionmetrics/models/torch_detection.py b/perceptionmetrics/models/torch_detection.py index 93b1ab0c..556e04d4 100644 --- a/perceptionmetrics/models/torch_detection.py +++ b/perceptionmetrics/models/torch_detection.py @@ -299,10 +299,13 @@ def __init__( # Load confidence and NMS thresholds from config self.confidence_threshold = self.model_cfg.get("confidence_threshold", 0.5) self.nms_threshold = self.model_cfg.get("nms_threshold", 0.3) + self.max_detections_per_image = self.model_cfg.get("max_detections_per_image", 100) + self.postprocess_args = [self.confidence_threshold] if self.model_format == "yolo": self.postprocess_args.append(self.nms_threshold) + self.postprocess_args.append(self.max_detections_per_image) # Add reverse mapping for idx to class_name self.idx_to_class_name = {v["idx"]: k for k, v in self.ontology.items()} diff --git a/perceptionmetrics/models/utils/torchvision.py b/perceptionmetrics/models/utils/torchvision.py index 94575a7e..d33222e0 100644 --- a/perceptionmetrics/models/utils/torchvision.py +++ b/perceptionmetrics/models/utils/torchvision.py @@ -1,4 +1,4 @@ -def postprocess_detection(output: dict, confidence_threshold: float = 0.5): +def postprocess_detection(output: dict, confidence_threshold: float = 0.5, max_detections: int = 100): """Post-process torchvision model output. :param output: Dictionary with keys 'boxes', 'labels', and 'scores'. @@ -15,4 +15,15 @@ def postprocess_detection(output: dict, confidence_threshold: float = 0.5): "labels": output["labels"][keep_mask], "scores": output["scores"][keep_mask], } + + if max_detections > 0: + limit = min(max_detections, output["scores"].shape[0]) + if limit > 0: + limited_idx = output["scores"].argsort(descending = True)[:limit] + output = { + "boxes": output["boxes"][limited_idx], + "labels": output["labels"][limited_idx], + "scores": output["scores"][limited_idx], + } + return output diff --git a/perceptionmetrics/models/utils/yolo.py b/perceptionmetrics/models/utils/yolo.py index 8b74703c..4e4f082b 100644 --- a/perceptionmetrics/models/utils/yolo.py +++ b/perceptionmetrics/models/utils/yolo.py @@ -9,6 +9,7 @@ def postprocess_detection( output: torch.Tensor, confidence_threshold: float = 0.25, nms_threshold: float = 0.45, + max_detections: int = 100 ): """Post-process YOLO model output. @@ -57,4 +58,13 @@ def postprocess_detection( scores = scores[keep_idx] labels = labels[keep_idx] + if max_detections > 0: + limit = min(max_detections, scores.shape[0]) + if limit > 0: + limited_idx = scores.argsort(descending = True)[:limit] + boxes_xyxy = boxes_xyxy[limited_idx] + scores = scores[limited_idx] + labels = labels[limited_idx] + + return {"boxes": boxes_xyxy, "labels": labels, "scores": scores} From f8b6b4e02dbb77b7718004e78cb10df88b677be4 Mon Sep 17 00:00:00 2001 From: RihaanBH-1810 Date: Sun, 5 Apr 2026 22:22:54 +0530 Subject: [PATCH 2/4] format with black --- perceptionmetrics/models/torch_detection.py | 5 +++-- perceptionmetrics/models/utils/torchvision.py | 6 ++++-- perceptionmetrics/models/utils/yolo.py | 6 ++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/perceptionmetrics/models/torch_detection.py b/perceptionmetrics/models/torch_detection.py index 556e04d4..dd988bfd 100644 --- a/perceptionmetrics/models/torch_detection.py +++ b/perceptionmetrics/models/torch_detection.py @@ -299,8 +299,9 @@ def __init__( # Load confidence and NMS thresholds from config self.confidence_threshold = self.model_cfg.get("confidence_threshold", 0.5) self.nms_threshold = self.model_cfg.get("nms_threshold", 0.3) - self.max_detections_per_image = self.model_cfg.get("max_detections_per_image", 100) - + self.max_detections_per_image = self.model_cfg.get( + "max_detections_per_image", 100 + ) self.postprocess_args = [self.confidence_threshold] if self.model_format == "yolo": diff --git a/perceptionmetrics/models/utils/torchvision.py b/perceptionmetrics/models/utils/torchvision.py index d33222e0..4f763249 100644 --- a/perceptionmetrics/models/utils/torchvision.py +++ b/perceptionmetrics/models/utils/torchvision.py @@ -1,4 +1,6 @@ -def postprocess_detection(output: dict, confidence_threshold: float = 0.5, max_detections: int = 100): +def postprocess_detection( + output: dict, confidence_threshold: float = 0.5, max_detections: int = 100 +): """Post-process torchvision model output. :param output: Dictionary with keys 'boxes', 'labels', and 'scores'. @@ -19,7 +21,7 @@ def postprocess_detection(output: dict, confidence_threshold: float = 0.5, max_d if max_detections > 0: limit = min(max_detections, output["scores"].shape[0]) if limit > 0: - limited_idx = output["scores"].argsort(descending = True)[:limit] + limited_idx = output["scores"].argsort(descending=True)[:limit] output = { "boxes": output["boxes"][limited_idx], "labels": output["labels"][limited_idx], diff --git a/perceptionmetrics/models/utils/yolo.py b/perceptionmetrics/models/utils/yolo.py index 4e4f082b..7b507b8f 100644 --- a/perceptionmetrics/models/utils/yolo.py +++ b/perceptionmetrics/models/utils/yolo.py @@ -1,7 +1,6 @@ import torch from torchvision.ops import nms - CLASS_NMS_OFFSET = 7680 # offset to apply to boxes for class-wise NMS @@ -9,7 +8,7 @@ def postprocess_detection( output: torch.Tensor, confidence_threshold: float = 0.25, nms_threshold: float = 0.45, - max_detections: int = 100 + max_detections: int = 100, ): """Post-process YOLO model output. @@ -61,10 +60,9 @@ def postprocess_detection( if max_detections > 0: limit = min(max_detections, scores.shape[0]) if limit > 0: - limited_idx = scores.argsort(descending = True)[:limit] + limited_idx = scores.argsort(descending=True)[:limit] boxes_xyxy = boxes_xyxy[limited_idx] scores = scores[limited_idx] labels = labels[limited_idx] - return {"boxes": boxes_xyxy, "labels": labels, "scores": scores} From 7c3aba139bd11cd7a7233815317ed0147ef6c983 Mon Sep 17 00:00:00 2001 From: RihaanBH-1810 Date: Sun, 5 Apr 2026 22:34:34 +0530 Subject: [PATCH 3/4] update doc strings --- perceptionmetrics/models/utils/torchvision.py | 2 ++ perceptionmetrics/models/utils/yolo.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/perceptionmetrics/models/utils/torchvision.py b/perceptionmetrics/models/utils/torchvision.py index 4f763249..65e70947 100644 --- a/perceptionmetrics/models/utils/torchvision.py +++ b/perceptionmetrics/models/utils/torchvision.py @@ -7,6 +7,8 @@ def postprocess_detection( :type output: dict :param confidence_threshold: Confidence threshold to filter boxes. :type confidence_threshold: float + :param max_detections: Maximum number of best detections to keep per image after filtering. + :type max_detections: int :return: Dictionary with keys 'boxes', 'labels', and 'scores'. :rtype: dict """ diff --git a/perceptionmetrics/models/utils/yolo.py b/perceptionmetrics/models/utils/yolo.py index 7b507b8f..de2a33cc 100644 --- a/perceptionmetrics/models/utils/yolo.py +++ b/perceptionmetrics/models/utils/yolo.py @@ -18,6 +18,8 @@ def postprocess_detection( :type confidence_threshold: float :param nms_threshold: IoU threshold for Non-Maximum Suppression (NMS). Some models may not perform NMS (e.g. YOLOv26). :type nms_threshold: float + :param max_detections: Maximum number of best detections to keep per image after filtering. + :type max_detections: int :return: Dictionary with keys 'boxes', 'labels', and 'scores'. :rtype: dict """ From a308d54d708d94f9200439ca539845925ea5441d Mon Sep 17 00:00:00 2001 From: RihaanBH-1810 Date: Fri, 17 Apr 2026 17:49:58 +0530 Subject: [PATCH 4/4] changing default to -1 so that it bypasses by default and filter only if max_detections is less than no of outputs --- app.py | 6 +++--- perceptionmetrics/models/torch_detection.py | 2 +- perceptionmetrics/models/utils/torchvision.py | 18 ++++++++---------- perceptionmetrics/models/utils/yolo.py | 14 ++++++-------- 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/app.py b/app.py index 0b32024c..664fb9ab 100644 --- a/app.py +++ b/app.py @@ -24,7 +24,7 @@ def browse_dataset_path(): st.session_state.setdefault("config_option", "Manual Configuration") st.session_state.setdefault("confidence_threshold", 0.5) st.session_state.setdefault("nms_threshold", 0.5) -st.session_state.setdefault("max_detections", 100) +st.session_state.setdefault("max_detections", -1) st.session_state.setdefault("device", "cuda") st.session_state.setdefault("batch_size", 1) st.session_state.setdefault("evaluation_step", 5) @@ -119,7 +119,7 @@ def browse_dataset_path(): ) st.number_input( "Max Detections/Image", - min_value=1, + min_value=-1, max_value=1000, step=1, key="max_detections", @@ -276,7 +276,7 @@ def browse_dataset_path(): st.session_state.get("confidence_threshold", 0.5) ) nms_threshold = float(st.session_state.get("nms_threshold", 0.5)) - max_detections = int(st.session_state.get("max_detections", 100)) + max_detections = int(st.session_state.get("max_detections", -1)) device = st.session_state.get("device", "cpu") batch_size = int(st.session_state.get("batch_size", 1)) evaluation_step = int(st.session_state.get("evaluation_step", 5)) diff --git a/perceptionmetrics/models/torch_detection.py b/perceptionmetrics/models/torch_detection.py index dd988bfd..e97cf7cb 100644 --- a/perceptionmetrics/models/torch_detection.py +++ b/perceptionmetrics/models/torch_detection.py @@ -300,7 +300,7 @@ def __init__( self.confidence_threshold = self.model_cfg.get("confidence_threshold", 0.5) self.nms_threshold = self.model_cfg.get("nms_threshold", 0.3) self.max_detections_per_image = self.model_cfg.get( - "max_detections_per_image", 100 + "max_detections_per_image", -1 ) self.postprocess_args = [self.confidence_threshold] diff --git a/perceptionmetrics/models/utils/torchvision.py b/perceptionmetrics/models/utils/torchvision.py index 65e70947..8f433f67 100644 --- a/perceptionmetrics/models/utils/torchvision.py +++ b/perceptionmetrics/models/utils/torchvision.py @@ -1,5 +1,5 @@ def postprocess_detection( - output: dict, confidence_threshold: float = 0.5, max_detections: int = 100 + output: dict, confidence_threshold: float = 0.5, max_detections: int = -1 ): """Post-process torchvision model output. @@ -20,14 +20,12 @@ def postprocess_detection( "scores": output["scores"][keep_mask], } - if max_detections > 0: - limit = min(max_detections, output["scores"].shape[0]) - if limit > 0: - limited_idx = output["scores"].argsort(descending=True)[:limit] - output = { - "boxes": output["boxes"][limited_idx], - "labels": output["labels"][limited_idx], - "scores": output["scores"][limited_idx], - } + if max_detections < output["scores"].shape[0] and max_detections > 0: + limited_idx = output["scores"].argsort(descending=True)[:max_detections] + output = { + "boxes": output["boxes"][limited_idx], + "labels": output["labels"][limited_idx], + "scores": output["scores"][limited_idx], + } return output diff --git a/perceptionmetrics/models/utils/yolo.py b/perceptionmetrics/models/utils/yolo.py index de2a33cc..7fb21d3d 100644 --- a/perceptionmetrics/models/utils/yolo.py +++ b/perceptionmetrics/models/utils/yolo.py @@ -8,7 +8,7 @@ def postprocess_detection( output: torch.Tensor, confidence_threshold: float = 0.25, nms_threshold: float = 0.45, - max_detections: int = 100, + max_detections: int = -1, ): """Post-process YOLO model output. @@ -59,12 +59,10 @@ def postprocess_detection( scores = scores[keep_idx] labels = labels[keep_idx] - if max_detections > 0: - limit = min(max_detections, scores.shape[0]) - if limit > 0: - limited_idx = scores.argsort(descending=True)[:limit] - boxes_xyxy = boxes_xyxy[limited_idx] - scores = scores[limited_idx] - labels = labels[limited_idx] + if max_detections > 0 and max_detections < scores.shape[0]: + limited_idx = scores.argsort(descending=True)[:max_detections] + boxes_xyxy = boxes_xyxy[limited_idx] + scores = scores[limited_idx] + labels = labels[limited_idx] return {"boxes": boxes_xyxy, "labels": labels, "scores": scores}