Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0810ed9
docs: design for Mothbot YOLO11m detection pipeline
mihow Apr 14, 2026
fca35e9
docs: add licensing section to Mothbot pipeline spec
mihow Apr 14, 2026
12a53fa
docs: implementation plan for Mothbot YOLO detection pipeline
mihow Apr 14, 2026
a582457
refactor: rename CLASSIFIER_CHOICES to PIPELINE_CHOICES
mihow Apr 15, 2026
d1c6394
refactor: let each pipeline specify its detector via detector_cls
mihow Apr 15, 2026
9ea2310
feat: add optional rotation field to DetectionResponse
mihow Apr 15, 2026
3d91f4d
build: add ultralytics>=8.3 dependency
mihow Apr 15, 2026
37c1ef8
feat: add Mothbot YOLO11m-OBB detector (ML layer)
mihow Apr 15, 2026
406e58a
feat: add API wrapper for Mothbot YOLO11m detector
mihow Apr 15, 2026
e26b973
feat: register mothbot_insect_orders_2025 pipeline
mihow Apr 15, 2026
aba15b3
test: end-to-end integration test for mothbot pipeline
mihow Apr 15, 2026
3665064
fix: address CodeRabbit review comments on PR #141
mihow Apr 15, 2026
36fc00d
fix: distinct name for MothbotInsectOrderClassifier
mihow Apr 15, 2026
9f2b5f9
fix(antenna worker): use classifier.detector_cls instead of hardcoded…
mihow Apr 15, 2026
0df47bf
fix(yolo detector): accept tensor batches from antenna REST dataloader
mihow Apr 15, 2026
7edc9d9
fix(yolo detector): filter degenerate zero-area OBB detections
mihow Apr 16, 2026
68e0c16
fix(yolo detector): clamp OBB envelope to image bounds
mihow Apr 16, 2026
f5f7534
fix(antenna worker): clamp bbox coords to image bounds before crop
mihow Apr 16, 2026
0726b23
fix(yolo detector): convert RGB tensor to BGR before passing to ultra…
mihow Apr 16, 2026
1057b8a
feat: add mothbot_panama_moths_2023 pipeline + clean up pipeline meta…
mihow Apr 16, 2026
2a33a05
docs: add runbook for testing Mothbot pipelines locally against Antenna
mihow Apr 16, 2026
59ea781
feat(worker): add --pipeline filter to register, --project filter to …
mihow Apr 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,454 changes: 1,454 additions & 0 deletions docs/superpowers/plans/2026-04-14-mothbot-detection-pipeline.md

Large diffs are not rendered by default.

276 changes: 276 additions & 0 deletions docs/superpowers/specs/2026-04-14-mothbot-detection-pipeline-design.md

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "trapdata"
version = "0.6.0"
description = "Companion software for automated insect monitoring stations"
authors = [{ name = "Michael Bunsen", email = "notbot@gmail.com" }]
license = { text = "MIT" }
license = { text = "AGPL-3.0" }
readme = "README.md"
requires-python = ">=3.10,<3.13"
urls = { Homepage = "https://github.com/RolnickLab/ami-data-companion", Repository = "https://github.com/RolnickLab/ami-data-companion" }
Expand Down Expand Up @@ -31,6 +31,7 @@ dependencies = [
"torch>=2.5",
"torchvision>=0.20",
"typer>=0.12.3,<1",
"ultralytics>=8.3",
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"uvicorn>=0.20",
]

Expand Down
4 changes: 2 additions & 2 deletions trapdata/antenna/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
AsyncPipelineRegistrationRequest,
AsyncPipelineRegistrationResponse,
)
from trapdata.api.api import CLASSIFIER_CHOICES, initialize_service_info
from trapdata.api.api import PIPELINE_CHOICES, initialize_service_info
from trapdata.api.utils import get_http_session
from trapdata.common.logs import logger
from trapdata.settings import Settings, read_settings
Expand Down Expand Up @@ -134,7 +134,7 @@ def register_pipelines(
successful_registrations = []
failed_registrations = []

logger.info(f"Available pipelines to register: {list(CLASSIFIER_CHOICES.keys())}")
logger.info(f"Available pipelines to register: {list(PIPELINE_CHOICES.keys())}")

for project in projects_to_process:
project_id = project["id"]
Expand Down
6 changes: 3 additions & 3 deletions trapdata/antenna/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from trapdata.antenna.datasets import CUDAPrefetcher, get_rest_dataloader
from trapdata.antenna.result_posting import ResultPoster
from trapdata.antenna.schemas import AntennaTaskResult, AntennaTaskResultError
from trapdata.api.api import CLASSIFIER_CHOICES, should_filter_detections
from trapdata.api.api import PIPELINE_CHOICES, should_filter_detections
from trapdata.api.models.classification import MothClassifierBinary
from trapdata.api.models.localization import APIMothDetector
from trapdata.api.schemas import (
Expand Down Expand Up @@ -425,7 +425,7 @@ def _process_job(
detector = None

# Check if binary filtering is needed once for the entire job
classifier_class = CLASSIFIER_CHOICES[pipeline]
classifier_class = PIPELINE_CHOICES[pipeline]
use_binary_filter = should_filter_detections(classifier_class)
Comment on lines +450 to 451
Copy link
Copy Markdown

@coderabbitai coderabbitai Bot Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Guard unknown pipeline slugs before classifier resolution.

Line 428 can throw KeyError for stale/invalid queue payloads, which turns into repeated failed processing cycles instead of a controlled skip path.

Suggested resilience fix
-    classifier_class = PIPELINE_CHOICES[pipeline]
+    classifier_class = PIPELINE_CHOICES.get(pipeline)
+    if classifier_class is None:
+        logger.error(f"Unknown pipeline slug received from queue: {pipeline}")
+        return False
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
classifier_class = PIPELINE_CHOICES[pipeline]
use_binary_filter = should_filter_detections(classifier_class)
classifier_class = PIPELINE_CHOICES.get(pipeline)
if classifier_class is None:
logger.error(f"Unknown pipeline slug received from queue: {pipeline}")
return False
use_binary_filter = should_filter_detections(classifier_class)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@trapdata/antenna/worker.py` around lines 428 - 429, The code currently
indexes PIPELINE_CHOICES[pipeline] which can raise KeyError for stale/invalid
pipeline slugs; update the processing path to guard resolution by checking
membership first (e.g., if pipeline not in PIPELINE_CHOICES) and handle unknown
slugs by logging a warning and returning/acknowledging/skipping the message
instead of proceeding, otherwise set classifier_class =
PIPELINE_CHOICES[pipeline] and continue to call
should_filter_detections(classifier_class); ensure the skip path uses the same
failure/ack behavior as other non-retriable errors to avoid repeated failed
processing cycles.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Claude says: Valid concern but deferring — this behavior is pre-existing (same code as pre-rename CLASSIFIER_CHOICES[pipeline]), not introduced by this PR. Unknown slugs in the queue indicate an out-of-sync registration/worker deployment, which today fails loudly with a KeyError + retry loop. Converting to a quiet skip is a behavior change worth its own PR with explicit thought about how to surface the misconfiguration (metrics? dead-letter?). Leaving for a follow-up.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mihow — Understood, that's a reasonable call. The pre-existing loud failure on an unknown slug does at least surface the misconfiguration immediately (stack trace in logs), whereas a quiet skip could silently drop queue messages without any signal. A dedicated follow-up that considers the full observability story (metrics, dead-letter, alerting) is the right place for this.

Would you like me to open a follow-up GitHub issue to track this, so it doesn't get lost?

binary_filter = None

Expand Down Expand Up @@ -464,7 +464,7 @@ def _process_job(
# Defer instantiation of poster, detector and classifiers until we have data
if not classifier:
classifier = classifier_class(source_images=[], detections=[])
detector = APIMothDetector([])
detector = classifier_class.detector_cls([])
result_poster = ResultPoster(max_pending=MAX_PENDING_POSTS)

if use_binary_filter:
Expand Down
25 changes: 14 additions & 11 deletions trapdata/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .models.classification import (
APIMothClassifier,
InsectOrderClassifier,
MothbotInsectOrderClassifier,
MothClassifierBinary,
MothClassifierGlobal,
MothClassifierPanama,
Expand Down Expand Up @@ -52,7 +53,7 @@ async def lifespan(app: fastapi.FastAPI):
app.add_middleware(GZipMiddleware)


CLASSIFIER_CHOICES = {
PIPELINE_CHOICES = {
"panama_moths_2023": MothClassifierPanama,
"panama_moths_2024": MothClassifierPanama2024,
"quebec_vermont_moths_2023": MothClassifierQuebecVermont,
Expand All @@ -63,20 +64,22 @@ async def lifespan(app: fastapi.FastAPI):
"global_moths_2024": MothClassifierGlobal,
"moth_binary": MothClassifierBinary,
"insect_orders_2025": InsectOrderClassifier,
"mothbot_insect_orders_2025": MothbotInsectOrderClassifier,
}
_classifier_choices = dict(
zip(CLASSIFIER_CHOICES.keys(), list(CLASSIFIER_CHOICES.keys()))
)
_classifier_choices = dict(zip(PIPELINE_CHOICES.keys(), list(PIPELINE_CHOICES.keys())))


PipelineChoice = enum.Enum("PipelineChoice", _classifier_choices)


def should_filter_detections(Classifier: type[APIMothClassifier]) -> bool:
if Classifier in [MothClassifierBinary, InsectOrderClassifier]:
# Classifiers that skip the binary moth/non-moth prefilter: the binary
# classifier itself (there's nothing downstream to filter for), and any
# order-level classifier (it already distinguishes non-moth insects,
# so a binary prefilter would discard signal).
if issubclass(Classifier, (MothClassifierBinary, InsectOrderClassifier)):
return False
else:
return True
return True


def make_category_map_response(
Expand Down Expand Up @@ -137,7 +140,7 @@ def make_pipeline_config_response(
"""
algorithms = []

detector = APIMothDetector(
detector = Classifier.detector_cls(
source_images=[],
)
algorithms.append(make_algorithm_config_response(detector))
Expand Down Expand Up @@ -216,9 +219,9 @@ async def process(data: PipelineRequest) -> PipelineResponse:

start_time = time.time()

Classifier = CLASSIFIER_CHOICES[str(data.pipeline)]
Classifier = PIPELINE_CHOICES[str(data.pipeline)]

detector = APIMothDetector(
detector = Classifier.detector_cls(
source_images=source_images,
batch_size=settings.localization_batch_size,
num_workers=settings.num_workers,
Expand Down Expand Up @@ -359,7 +362,7 @@ def initialize_service_info() -> ProcessingServiceInfoResponse:
# @TODO This requires loading all models into memory! Can we avoid this?
pipeline_configs = [
make_pipeline_config_response(classifier_class, slug=key)
for key, classifier_class in CLASSIFIER_CHOICES.items()
for key, classifier_class in PIPELINE_CHOICES.items()
]

_info = ProcessingServiceInfoResponse(
Expand Down
20 changes: 20 additions & 0 deletions trapdata/api/models/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
SourceImage,
)
from .base import APIInferenceBaseClass
from .localization import APIMothDetector, APIMothDetector_YOLO11m_Mothbot


class APIMothClassifier(
Expand All @@ -36,6 +37,11 @@ class APIMothClassifier(
):
task_type = "classification"

# The detector class this pipeline pairs with. Subclasses override
# to pair a specific classifier with a specific detector. Default is
# the FasterRCNN 2023 detector that all existing pipelines use.
detector_cls: type[APIInferenceBaseClass] = APIMothDetector

def __init__(
self,
source_images: typing.Iterable[SourceImage],
Expand Down Expand Up @@ -231,3 +237,17 @@ class MothClassifierGlobal(APIMothClassifier, GlobalMothSpeciesClassifier):

class InsectOrderClassifier(APIMothClassifier, InsectOrderClassifier2025):
pass


class MothbotInsectOrderClassifier(InsectOrderClassifier):
"""Pair the Mothbot YOLO11m detector with our existing ConvNeXt order
classifier. Overrides the default detector_cls inherited from
APIMothClassifier.

The ``name`` is distinct from the parent so Antenna's pipeline registry
(which keys on name) treats this as a separate pipeline rather than
deduping against ``insect_orders_2025``.
"""

name = "Insect Order Classifier (Mothbot YOLO detector)"
detector_cls = APIMothDetector_YOLO11m_Mothbot
55 changes: 54 additions & 1 deletion trapdata/api/models/localization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import datetime
import typing

from trapdata.ml.models.localization import MothObjectDetector_FasterRCNN_2023
from trapdata.ml.models.localization import (
MothObjectDetector_FasterRCNN_2023,
MothObjectDetector_YOLO11m_Mothbot,
)

from ..datasets import LocalizationImageDataset
from ..schemas import AlgorithmReference, BoundingBox, DetectionResponse, SourceImage
Expand Down Expand Up @@ -56,3 +59,53 @@ def save_detection(image_id, coords):
def run(self) -> list[DetectionResponse]:
super().run()
return self.results


class APIMothDetector_YOLO11m_Mothbot(
APIInferenceBaseClass, MothObjectDetector_YOLO11m_Mothbot
):
task_type = "localization"

def __init__(self, source_images: typing.Iterable[SourceImage], *args, **kwargs):
self.source_images = source_images
self.results: list[DetectionResponse] = []
super().__init__(*args, **kwargs)

def reset(self, source_images: typing.Iterable[SourceImage]):
self.source_images = source_images
self.results = []

def get_dataset(self):
return LocalizationImageDataset(
self.source_images, self.get_transforms(), batch_size=self.batch_size
)

def get_source_image(self, source_image_id: int) -> SourceImage:
for source_image in self.source_images:
if source_image.id == source_image_id:
return source_image
raise ValueError(f"Source image with id {source_image_id} not found")

def save_results(self, item_ids, batch_output, seconds_per_item, *args, **kwargs):
"""batch_output is a list (one per image) of list[YoloDetection]."""
detections: list[DetectionResponse] = []
for image_id, yolo_dets in zip(item_ids, batch_output):
for y in yolo_dets:
detections.append(
DetectionResponse(
source_image_id=image_id,
bbox=BoundingBox(x1=y.x1, y1=y.y1, x2=y.x2, y2=y.y2),
rotation=y.rotation,
inference_time=seconds_per_item,
algorithm=AlgorithmReference(
name=self.name, key=self.get_key()
),
timestamp=datetime.datetime.now(),
crop_image_url=None,
)
)
self.results += detections

def run(self) -> list[DetectionResponse]:
super().run()
return self.results
11 changes: 11 additions & 0 deletions trapdata/api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ class DetectionResponse(pydantic.BaseModel):
timestamp: datetime.datetime
crop_image_url: str | None = None
classifications: list[ClassificationResponse] = []
rotation: float | None = pydantic.Field(
default=None,
description=(
"Rotation angle in degrees (cv2.minAreaRect convention), when "
"the detector produces oriented bounding boxes. FUTURE: "
"downstream classifiers may use this to crop a straightened "
"patch instead of the axis-aligned envelope. See "
"`docs/superpowers/specs/2026-04-14-mothbot-detection-pipeline-design.md` "
"for the proposed RotatedBoundingBox schema upgrade."
),
)


class SourceImageRequest(pydantic.BaseModel):
Expand Down
85 changes: 80 additions & 5 deletions trapdata/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from fastapi.testclient import TestClient

from trapdata.api.api import (
CLASSIFIER_CHOICES,
PIPELINE_CHOICES,
PipelineChoice,
PipelineRequest,
PipelineResponse,
Expand All @@ -15,7 +15,7 @@
)
from trapdata.api.schemas import PipelineConfigRequest
from trapdata.api.tests.image_server import StaticFileTestServer
from trapdata.api.tests.utils import get_test_images, get_pipeline_class
from trapdata.api.tests.utils import get_pipeline_class, get_test_images
from trapdata.tests import TEST_IMAGES_BASE_PATH

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -62,12 +62,12 @@ def test_pipeline_request(self):

def test_pipeline_config_with_binary_classifier(self):
binary_classifier_pipeline_choice = "moth_binary"
BinaryClassifier = CLASSIFIER_CHOICES[binary_classifier_pipeline_choice]
BinaryClassifier = PIPELINE_CHOICES[binary_classifier_pipeline_choice]
binary_classifier_instance = BinaryClassifier(source_images=[], detections=[])
BinaryClassifierResponse = make_algorithm_response(binary_classifier_instance)

species_classifier_pipeline_choice = "quebec_vermont_moths_2023"
SpeciesClassifier = CLASSIFIER_CHOICES[species_classifier_pipeline_choice]
SpeciesClassifier = PIPELINE_CHOICES[species_classifier_pipeline_choice]
species_classifier_instance = SpeciesClassifier(source_images=[], detections=[])
SpeciesClassifierResponse = make_algorithm_response(species_classifier_instance)

Expand Down Expand Up @@ -99,7 +99,7 @@ def test_pipeline_config_with_binary_classifier(self):
def test_processing_with_only_binary_classifier(self):
binary_classifier_pipeline_choice = "moth_binary"
binary_algorithm_key = "moth_nonmoth_classifier"
BinaryAlgorithmClass = CLASSIFIER_CHOICES[binary_classifier_pipeline_choice]
BinaryAlgorithmClass = PIPELINE_CHOICES[binary_classifier_pipeline_choice]
# Create an instance to get the num_classes
binary_algorithm = BinaryAlgorithmClass(source_images=[], detections=[])

Expand Down Expand Up @@ -231,3 +231,78 @@ def test_config_num_classification_predictions(self):
f"Number of logits ({len(classification.logits)}) should equal "
f"number of classes ({num_classes})"
)

def test_existing_pipelines_default_to_apimothdetector(self):
"""Pre-existing pipelines must keep using APIMothDetector.

New pipelines introduced with their own detector are exempt.
"""
from trapdata.api.api import PIPELINE_CHOICES
from trapdata.api.models.localization import APIMothDetector

exempt = {"mothbot_insect_orders_2025"}
for slug, Classifier in PIPELINE_CHOICES.items():
if slug in exempt:
continue
self.assertIs(
Classifier.detector_cls,
APIMothDetector,
f"{slug} should default to APIMothDetector",
)

def test_mothbot_pipeline_uses_yolo_detector(self):
from trapdata.api.api import PIPELINE_CHOICES
from trapdata.api.models.localization import APIMothDetector_YOLO11m_Mothbot

assert "mothbot_insect_orders_2025" in PIPELINE_CHOICES
Classifier = PIPELINE_CHOICES["mothbot_insect_orders_2025"]
self.assertIs(Classifier.detector_cls, APIMothDetector_YOLO11m_Mothbot)

def test_mothbot_pipeline_skips_binary_filter(self):
from trapdata.api.api import PIPELINE_CHOICES, should_filter_detections

Classifier = PIPELINE_CHOICES["mothbot_insect_orders_2025"]
self.assertFalse(should_filter_detections(Classifier))

def test_detection_response_has_optional_rotation_field(self):
"""The rotation field is opt-in for detectors that produce OBB."""
import datetime

from trapdata.api.schemas import (
AlgorithmReference,
BoundingBox,
DetectionResponse,
)

# Default: rotation is None
d = DetectionResponse(
source_image_id="img1",
bbox=BoundingBox(x1=0, y1=0, x2=10, y2=10),
algorithm=AlgorithmReference(name="x", key="x"),
timestamp=datetime.datetime.now(),
)
self.assertIsNone(d.rotation)

# Accepts a float
d2 = DetectionResponse(
source_image_id="img1",
bbox=BoundingBox(x1=0, y1=0, x2=10, y2=10),
algorithm=AlgorithmReference(name="x", key="x"),
timestamp=datetime.datetime.now(),
rotation=-42.5,
)
self.assertAlmostEqual(d2.rotation, -42.5)

def test_yolo_api_detector_instantiates(self):
"""The new YOLO detector wrapper should construct with no source images
(matches the pattern the /info handler uses to read algorithm metadata).

This test exercises weight download + model load — it will be slow on
first run but cached thereafter.
"""
from trapdata.api.models.localization import APIMothDetector_YOLO11m_Mothbot

detector = APIMothDetector_YOLO11m_Mothbot(source_images=[])
self.assertEqual(detector.name, "Mothbot YOLO11m Creature Detector")
self.assertEqual(detector.category_map, {0: "creature"})
self.assertEqual(detector.imgsz, 1600)
Loading
Loading