RolnickLab · mihow · Jan 30, 2026 · Oct 15, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/.env.example b/.env.example
@@ -8,3 +8,9 @@ AMI_CLASSIFICATION_THRESHOLD=0.6
 AMI_LOCALIZATION_BATCH_SIZE=2
 AMI_CLASSIFICATION_BATCH_SIZE=20
 AMI_NUM_WORKERS=1
+
+# Antenna API Worker Settings (for processing jobs from Antenna platform)
+# See: https://github.com/RolnickLab/antenna
+AMI_ANTENNA_API_BASE_URL=http://localhost:8000/api/v2
+AMI_ANTENNA_API_AUTH_TOKEN=your_antenna_auth_token_here
+AMI_ANTENNA_API_BATCH_SIZE=4
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,6 @@ db_data/
 # Test files
 sample_images
 bak
+
+# Local scratch for moving untracked files
+scratch/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -38,7 +38,7 @@ repos:
         types: [pyi]
 
   - repo: https://github.com/pycqa/flake8
-    rev: 3.8.3
+    rev: 4.0.0
     hooks:
       - id: flake8
         files: .

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,29 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python Debugger: Current File",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal"
+    },
+    {
+      "name": "Run worker",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "trapdata.cli.base",
+      "args": ["worker"]
+    },
+    {
+      "name": "Run api",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "trapdata.cli.base",
+      "args": ["api"]
+    }
+  ]
+}
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -13,8 +13,9 @@ This file helps AI agents (like Claude) work efficiently with the AMI Data Compa
 3. **Always prefer command line tools** to avoid expensive API requests (e.g., use git and jq instead of reading whole files)
 4. **Use bulk operations and prefetch patterns** to minimize database queries
 5. **Commit often** - Small, focused commits make debugging easier
-6. **Use TDD whenever possible** - Tests prevent regressions and document expected behavior
-7. **Keep it simple** - Always think hard and evaluate more complex approaches and alternative approaches before moving forward
+6. **Use `git add -p` for staging** - Interactive staging to add only relevant changes, creating logical commits
+7. **Use TDD whenever possible** - Tests prevent regressions and document expected behavior
+8. **Keep it simple** - Always think hard and evaluate more complex approaches and alternative approaches before moving forward
 
 ### Think Holistically
 

diff --git a/README.md b/README.md
@@ -234,6 +234,47 @@ ami api
 
 View the interactive API docs at http://localhost:2000/
 
+## Running the Antenna Worker
+
+The worker polls the Antenna platform API for queued image processing jobs, downloads images, runs detection and classification, and posts results back to Antenna.
+
+**Setup:**
+
+1. Get your Antenna auth token from your Antenna project settings
+2. Configure the worker in `.env`:
+
+```sh
+AMI_ANTENNA_API_BASE_URL=https://antenna.insectai.org/api/v2  # Or your Antenna instance
+AMI_ANTENNA_API_AUTH_TOKEN=your_token_here
+AMI_ANTENNA_API_BATCH_SIZE=4
+AMI_NUM_WORKERS=2  # Safe for REST API (atomic task dequeue)
+```
+
+**Run the worker:**
+
+```sh
+ami worker --pipelines moth_binary
+# Or multiple pipelines:
+ami worker --pipelines moth_binary --pipelines panama_moths_2024
+```
+
+The worker will:
+
+1. Poll Antenna for jobs matching the specified pipeline(s)
+2. Download images from the job queue
+3. Run detection and classification
+4. Post results back to Antenna
+5. Repeat until queue is empty, then sleep and poll again
+
+**Notes:**
+
+- Multiple workers can run in parallel (they won't duplicate work)
+- Auth token ties results to your Antenna project
+- Worker continues running until interrupted (Ctrl+C)
+- Safe to run multiple workers on different machines
+
+For more information, see the [Antenna platform documentation](https://github.com/RolnickLab/antenna).
+
 ## Web UI demo (Gradio)
 
 A simple web UI is also available to test the inference pipeline. This is a quick way to test models on a remote server via a web browser.

diff --git a/pyproject.toml b/pyproject.toml
@@ -53,6 +53,7 @@ gradio = "^4.41.0"
 
 [tool.pytest.ini_options]
 asyncio_mode = 'auto'
+testpaths = ["trapdata/tests", "trapdata/api/tests"]
 
 [tool.isort]
 profile = "black"

diff --git a/trapdata/api/api.py b/trapdata/api/api.py
@@ -5,6 +5,7 @@
 
 import enum
 import time
+from contextlib import asynccontextmanager
 
 import fastapi
 import pydantic
@@ -36,7 +37,18 @@
 from .schemas import PipelineResultsResponse as PipelineResponse_
 from .schemas import ProcessingServiceInfoResponse, SourceImage, SourceImageResponse
 
-app = fastapi.FastAPI()
+
+@asynccontextmanager
+async def lifespan(app: fastapi.FastAPI):
+    # cache the service info to be built only once at startup
+    app.state.service_info = initialize_service_info()
+    logger.info("Initialized service info")
+    yield
+    # Shutdown event: Clean up resources (if necessary)
+    logger.info("Shutting down API")
+
+
+app = fastapi.FastAPI(lifespan=lifespan)
 app.add_middleware(GZipMiddleware)
 
 
@@ -157,13 +169,6 @@ def make_pipeline_config_response(
     )
 
 
-# @TODO This requires loading all models into memory! Can we avoid this?
-PIPELINE_CONFIGS = [
-    make_pipeline_config_response(classifier_class, slug=key)
-    for key, classifier_class in CLASSIFIER_CHOICES.items()
-]
-
-
 class PipelineRequest(PipelineRequest_):
     pipeline: PipelineChoice = pydantic.Field(
         description=PipelineRequest_.model_fields["pipeline"].description,
@@ -313,17 +318,7 @@ async def process(data: PipelineRequest) -> PipelineResponse:
 
 @app.get("/info", tags=["services"])
 async def info() -> ProcessingServiceInfoResponse:
-    info = ProcessingServiceInfoResponse(
-        name="Antenna Inference API",
-        description=(
-            "The primary endpoint for processing images for the Antenna platform. "
-            "This API provides access to multiple detection and classification "
-            "algorithms by multiple labs for processing images of moths."
-        ),
-        pipelines=PIPELINE_CONFIGS,
-        # algorithms=list(algorithm_choices.values()),
-    )
-    return info
+    return app.state.service_info
 
 
 # Check if the server is online
@@ -361,6 +356,26 @@ async def readyz():
 #     pass
 
 
+def initialize_service_info() -> ProcessingServiceInfoResponse:
+    # @TODO This requires loading all models into memory! Can we avoid this?
+    pipeline_configs = [
+        make_pipeline_config_response(classifier_class, slug=key)
+        for key, classifier_class in CLASSIFIER_CHOICES.items()
+    ]
+
+    _info = ProcessingServiceInfoResponse(
+        name="Antenna Inference API",
+        description=(
+            "The primary endpoint for processing images for the Antenna platform. "
+            "This API provides access to multiple detection and classification "
+            "algorithms by multiple labs for processing images of moths."
+        ),
+        pipelines=pipeline_configs,
+        # algorithms=list(algorithm_choices.values()),
+    )
+    return _info
+
+
 if __name__ == "__main__":
     import uvicorn