JdeRobot · RihaanBH-1810 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 17, 2026
diff --git a/perceptionmetrics/models/torch_detection.py b/perceptionmetrics/models/torch_detection.py
@@ -299,10 +299,14 @@ def __init__(
         # Load confidence and NMS thresholds from config
         self.confidence_threshold = self.model_cfg.get("confidence_threshold", 0.5)
         self.nms_threshold = self.model_cfg.get("nms_threshold", 0.3)
+        self.max_detections_per_image = self.model_cfg.get(
+            "max_detections_per_image", 100
+        )
 
         self.postprocess_args = [self.confidence_threshold]
         if self.model_format == "yolo":
             self.postprocess_args.append(self.nms_threshold)
+        self.postprocess_args.append(self.max_detections_per_image)
 
         # Add reverse mapping for idx to class_name
         self.idx_to_class_name = {v["idx"]: k for k, v in self.ontology.items()}

diff --git a/perceptionmetrics/models/utils/torchvision.py b/perceptionmetrics/models/utils/torchvision.py
@@ -1,10 +1,14 @@
-def postprocess_detection(output: dict, confidence_threshold: float = 0.5):
+def postprocess_detection(
+    output: dict, confidence_threshold: float = 0.5, max_detections: int = 100
+):
     """Post-process torchvision model output.
 
     :param output: Dictionary with keys 'boxes', 'labels', and 'scores'.
     :type output: dict
     :param confidence_threshold: Confidence threshold to filter boxes.
     :type confidence_threshold: float
+    :param max_detections: Maximum number of best detections to keep per image after filtering.
+    :type max_detections: int
     :return: Dictionary with keys 'boxes', 'labels', and 'scores'.
     :rtype: dict
     """
@@ -15,4 +19,15 @@ def postprocess_detection(output: dict, confidence_threshold: float = 0.5):
             "labels": output["labels"][keep_mask],
             "scores": output["scores"][keep_mask],
         }
+
+    if max_detections > 0:
+        limit = min(max_detections, output["scores"].shape[0])
+        if limit > 0:
+            limited_idx = output["scores"].argsort(descending=True)[:limit]
+            output = {
+                "boxes": output["boxes"][limited_idx],
+                "labels": output["labels"][limited_idx],
+                "scores": output["scores"][limited_idx],
+            }
+
     return output
diff --git a/perceptionmetrics/models/utils/yolo.py b/perceptionmetrics/models/utils/yolo.py
@@ -1,14 +1,14 @@
 import torch
 from torchvision.ops import nms
 
-
 CLASS_NMS_OFFSET = 7680  # offset to apply to boxes for class-wise NMS
 
 
 def postprocess_detection(
     output: torch.Tensor,
     confidence_threshold: float = 0.25,
     nms_threshold: float = 0.45,
+    max_detections: int = 100,
 ):
     """Post-process YOLO model output.
 
@@ -18,6 +18,8 @@ def postprocess_detection(
     :type confidence_threshold: float
     :param nms_threshold: IoU threshold for Non-Maximum Suppression (NMS). Some models may not perform NMS (e.g. YOLOv26).
     :type nms_threshold: float
+    :param max_detections: Maximum number of best detections to keep per image after filtering.
+    :type max_detections: int
     :return: Dictionary with keys 'boxes', 'labels', and 'scores'.
     :rtype: dict
     """
@@ -57,4 +59,12 @@ def postprocess_detection(
         scores = scores[keep_idx]
         labels = labels[keep_idx]
 
+        if max_detections > 0:
+            limit = min(max_detections, scores.shape[0])
+            if limit > 0:
+                limited_idx = scores.argsort(descending=True)[:limit]
+                boxes_xyxy = boxes_xyxy[limited_idx]
+                scores = scores[limited_idx]
+                labels = labels[limited_idx]
+
     return {"boxes": boxes_xyxy, "labels": labels, "scores": scores}