Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions metaspace/engine/sm/engine/annotation/formula_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,23 @@ def make_compute_image_metrics(

sample_area_mask = imzml_reader.mask
n_spectra = np.count_nonzero(sample_area_mask)
empty_matrix = np.zeros(sample_area_mask.shape, dtype=np.float32)
sample_area_mask_flat = sample_area_mask.flatten()
pixel_to_flat_idx = imzml_reader.pixel_to_flat_idx
img_w = imzml_reader.w
empty_flat = np.zeros(n_spectra, dtype=np.float32)

def _coo_to_flat(img):
"""Scatter a coo_matrix directly into a 1D masked-flat array of size n_spectra.

Equivalent to img.toarray().flatten()[sample_area_mask_flat] but without ever
allocating the full h×w bounding box. np.add.at correctly sums duplicate coordinates
that arise from concat_coo_matrices for split formulas.
"""
if img is None or img.nnz == 0:
return empty_flat
flat = np.zeros(n_spectra, dtype=np.float32)
flat_positions = pixel_to_flat_idx[img.row * img_w + img.col]
np.add.at(flat, flat_positions, img.data)
return flat

def compute_metrics(image_set: FormulaImageSet):
# pylint: disable=unused-variable # benchmark is used in commented-out dev code
Expand All @@ -136,8 +151,7 @@ def benchmark(attr):

# with benchmark('overall'):

iso_imgs = [img.toarray() if img is not None else empty_matrix for img in image_set.images]
iso_imgs_flat = np.array([img.flatten()[sample_area_mask_flat] for img in iso_imgs])
iso_imgs_flat = np.array([_coo_to_flat(img) for img in image_set.images])

doc = Metrics(formula_i=image_set.formula_i)

Expand Down Expand Up @@ -178,7 +192,7 @@ def benchmark(attr):
)
if (doc.spatial or 0.0) > 0.0 or calc_all:
# with benchmark('chaos'):
doc.chaos = chaos_metric(iso_imgs[0], n_levels)
doc.chaos = chaos_metric(image_set.images[0], n_levels)

doc.msm = (doc.chaos or 0.0) * (doc.spatial or 0.0) * (doc.spectral or 0.0)

Expand Down
7 changes: 7 additions & 0 deletions metaspace/engine/sm/engine/annotation/imzml_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ def __init__(self, imzml_parser: ImzMLParser):
sample_area_mask[self.pixel_indexes] = True
self.mask = sample_area_mask.reshape(self.h, self.w)

# pixel_to_flat_idx: maps flat pixel coord (Y*W+X) → position in the masked-flat array.
# Positions are assigned in ascending pixel_index order to match the semantics of
# mask.flatten()[sample_area_mask_flat] used elsewhere in the pipeline.
# -1 for pixels that have no spectrum.
self.pixel_to_flat_idx = np.full(self.h * self.w, -1, dtype=np.intp)
self.pixel_to_flat_idx[sample_area_mask] = np.arange(self.n_spectra)

# raw_coord_bounds is the RAW min/max coordinates, purely for diagnostics
self.raw_coord_bounds = (
np.min(imzml_parser.coordinates, axis=0),
Expand Down
62 changes: 56 additions & 6 deletions metaspace/engine/sm/engine/annotation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,64 @@ def _chaos_dilate(arr):
return arr


def chaos_metric(iso_img, n_levels):
# Shrink image if possible, as chaos performance is highly resolution-dependent
iso_img = iso_img[_chaos_dilate(np.any(iso_img, axis=1)), :]
iso_img = iso_img[:, _chaos_dilate(np.any(iso_img, axis=0))]
def chaos_metric(coo_img, n_levels):
"""Compute the chaos metric directly from a sparse coo_matrix.

Builds the same compact 2D dense array that the previous implementation produced after
toarray() + _chaos_dilate cropping, but without ever allocating the full h×w bounding box.
Results are bit-for-bit identical to the old path for any given dataset.

Args:
coo_img: First-isotope image as a scipy coo_matrix (or None / empty).
n_levels: Number of intensity thresholds for measure_of_chaos.
"""
if coo_img is None or coo_img.nnz == 0:
return 0

nrows, ncols = coo_img.shape

# Build row/col occupancy masks directly from sparse indices — O(nnz), no h×w allocation
row_mask = np.zeros(nrows, dtype=bool)
col_mask = np.zeros(ncols, dtype=bool)
row_mask[coo_img.row] = True
col_mask[coo_img.col] = True

# Apply the same dilation as before so measure_of_chaos sees the same spatial context
row_sel = _chaos_dilate(row_mask)
col_sel = _chaos_dilate(col_mask)

if isinstance(row_sel, slice) and isinstance(col_sel, slice):
# Image is already dense (>90% non-empty in both dimensions); toarray() is cheap here
iso_img = coo_img.toarray()
else:
# Build compact index remaps: original row/col index → position in compact array
if isinstance(row_sel, slice):
n_rows_compact = nrows
row_remap = np.arange(nrows, dtype=np.intp)
else:
selected_rows = np.where(row_sel)[0]
n_rows_compact = len(selected_rows)
row_remap = np.full(nrows, -1, dtype=np.intp)
row_remap[selected_rows] = np.arange(n_rows_compact)

if isinstance(col_sel, slice):
n_cols_compact = ncols
col_remap = np.arange(ncols, dtype=np.intp)
else:
selected_cols = np.where(col_sel)[0]
n_cols_compact = len(selected_cols)
col_remap = np.full(ncols, -1, dtype=np.intp)
col_remap[selected_cols] = np.arange(n_cols_compact)

# Scatter sparse values into compact array.
# np.add.at handles duplicate coordinates (e.g. from concat_coo_matrices for split
# formulas) the same way toarray() does — by summing them.
iso_img = np.zeros((n_rows_compact, n_cols_compact), dtype=coo_img.dtype)
compact_rows = row_remap[coo_img.row]
compact_cols = col_remap[coo_img.col]
np.add.at(iso_img, (compact_rows, compact_cols), coo_img.data)

if iso_img.size == 0:
# measure_of_chaos segfaults if the image has no elements - in Lithops this appears as a
# MemoryError. Skip empty images.
return 0

# measure_of_chaos behaves weirdly on Fortran-ordered arrays, which happen sometimes due to the
Expand Down
9 changes: 9 additions & 0 deletions metaspace/engine/sm/engine/annotation_lithops/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ def run_pipeline(
f'The bounding box area exceeds the maximum allowed pixel count of ({pixel_limit}). Contact contact@metaspace2020.org.'
)

sparsity_ratio = nz_pixels / n_pixels
if sparsity_ratio < 0.05:
logger.warning(
f'Low pixel density detected: {nz_pixels} spectra in a '
f'{self.imzml_reader.h}×{self.imzml_reader.w} bounding box '
f'(density={sparsity_ratio:.3f}). This dataset may have isolated blobs '
f'with large empty areas.'
)

self.segment_centroids(use_cache=use_cache)
if debug_validate:
self.validate_segment_centroids()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _test_compute_metrics():
isotope_generation=None,
fdr=None,
)
imzml_reader = make_imzml_reader_mock(list(product(range(2), range(3))))
imzml_reader = make_imzml_reader_mock(list(product(range(3), range(2))))
compute_metrics = make_compute_image_metrics(imzml_reader, ds_config)
return compute_metrics

Expand All @@ -37,7 +37,7 @@ def _test_compute_metrics():
def test_formula_image_metrics(chaos_mock, spatial_mock, spectral_mock):
spectral_mock.side_effect = lambda imgs_flat, *args: imgs_flat[0][0]
spatial_mock.side_effect = lambda imgs_flat, *args, **kwargs: imgs_flat[0][1]
chaos_mock.side_effect = lambda img, *args: img[0, 2]
chaos_mock.side_effect = lambda img, *args: img.toarray()[0, 2]

# Images 2 & 3 combine to equal image 1
# First 3 intensities get used as spectral, spatial & chaos metrics
Expand Down