Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
d07e964
Optimize inverse index mapping in groups.py (Fixes Issue #3387)
aygarwal Feb 26, 2026
463a1ec
Add changelog entry and authors entry for PR #5252
aygarwal Feb 26, 2026
7724ba3
Merge branch 'develop' into issue3387-inverse-map-optimization
aygarwal Feb 26, 2026
d62eecd
Added thresholding to using cython function
aygarwal Mar 2, 2026
fa8ad6c
Number of unique values threshold for choosing cython function
aygarwal Mar 2, 2026
9ae78cb
Optimised pythonic calculation of inverse index mapping
aygarwal Mar 2, 2026
fd97d19
Removed unique value number threshold for benchmarking
aygarwal Mar 2, 2026
f56d246
Trying 10 for cython threshold
aygarwal Mar 2, 2026
fcb4931
Removed thresholding altogether for benchmarking
aygarwal Mar 2, 2026
e22709e
Corrected AUTHORS file
aygarwal Mar 11, 2026
97f112c
Corrected CHANGELOG
aygarwal Mar 11, 2026
50ac687
Corrected CHANGELOG
aygarwal Mar 13, 2026
eede57e
Merge branch 'develop' into issue3387-inverse-map-optimization
orbeckst Mar 13, 2026
e1df729
Added test for cython function correctness
aygarwal Mar 24, 2026
111da56
Merge branch 'develop' into issue3387-inverse-map-optimization
aygarwal Mar 24, 2026
fc3f507
Added standard numpy docstring to inverse_int_index
aygarwal Mar 25, 2026
c1c39a3
Added two time benchmarks for asunique with sorted=True/False
aygarwal Mar 27, 2026
e697b56
Merge branch 'develop' into issue3387-inverse-map-optimization
aygarwal Mar 27, 2026
fd67b0f
Merge branch 'develop' into issue3387-inverse-map-optimization
orbeckst Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions benchmarks/benchmarks/ag_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,17 @@ def time_wrap_compound(self, num_atoms):
"""
self.ag.wrap(compound="residues")

def time_asunique_no_sorted(self, num_atoms):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great, thanks for adding the benchmarks

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Glad to hear this!
I'm interested in investigating why the atomgroup function did not show a very significant speedup. I'll use a profiler to understand what's going on, and present my findings to you.
Thank you again!

"""Benchmark asunique() operation on
atomgroup without sorting"""
self.ag.asunique(sorted=False)

def time_asunique_sorted(self, num_atoms):
"""Benchmark asunique() operation on
atomgroup with sorting"""
self.ag.asunique(sorted=True)


class AtomGroupAttrsBench(object):
"""Benchmarks for the various MDAnalysis
atomgroup attributes.
Expand Down
2 changes: 1 addition & 1 deletion package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,4 @@ Logo

The MDAnalysis 'Atom' logo was designed by Christian Beckstein; it is
Copyright (c) 2011 Christian Beckstein and made available under a
Creative Commons Attribution-NoDerivs 3.0 Unported License.
Creative Commons Attribution-NoDerivs 3.0 Unported License.
Comment thread
orbeckst marked this conversation as resolved.
5 changes: 4 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ Fixes
DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913)

Enhancements
* Improved performance of inverse index mapping in AtomGroup using an optimized
Cython implementation in lib._cutils.inverse_int_index()
(Issue #3387, PR #5252)
* Added documentation for all keyword in select_atoms() and
selections.rst (Issue #5317, PR #5325)
* Added HydrogenBondAnalysis benchmark for performance tracking (PR #5309)
Expand Down Expand Up @@ -3631,4 +3634,4 @@ Testsuite
licenses

11/12/07 naveen
* prepared for release outside lab
* prepared for release outside lab
Comment thread
orbeckst marked this conversation as resolved.
6 changes: 2 additions & 4 deletions package/MDAnalysis/core/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
from ..exceptions import NoDataError
from . import topologyobjects
from ._get_readers import get_writer_for, get_converter_for
from ..lib._cutil import inverse_int_index


def _unpickle(u, ix):
Expand Down Expand Up @@ -912,10 +913,7 @@ def _asunique(self, group, sorted=False, set_mask=False):

indices = unique_int_1d_unsorted(self.ix)
if set_mask:
mask = np.zeros_like(self.ix)
for i, x in enumerate(indices):
values = np.where(self.ix == x)[0]
mask[values] = i
mask = inverse_int_index(self.ix, indices)
self._unique_restore_mask = mask

issorted = int_array_is_sorted(indices)
Expand Down
63 changes: 61 additions & 2 deletions package/MDAnalysis/lib/_cutil.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ from cython.operator cimport dereference as deref

cnp.import_array()

__all__ = ['unique_int_1d', 'make_whole', 'find_fragments',
__all__ = ['unique_int_1d', 'inverse_int_index', 'make_whole', 'find_fragments',
'_sarrus_det_single', '_sarrus_det_multiple']

cdef extern from "calc_distances.h":
Expand Down Expand Up @@ -91,6 +91,65 @@ def unique_int_1d(cnp.intp_t[:] values):

return np.array(result)

@cython.boundscheck(False)
@cython.wraparound(False)
def inverse_int_index(cnp.intp_t[:] values,
cnp.intp_t[:] unique_vals):
r"""Construct an inverse index array (mask) mapping values to unique_vals.

The returned mask contains the indices such that:

.. math::
\text{unique\_vals}[\text{mask}] == \text{values}

Parameters
----------
values : numpy.ndarray
1D array of integers (can contain duplicates).
unique_vals : numpy.ndarray
1D array of unique integers corresponding to the elements in `values`.

Returns
-------
numpy.ndarray
An integer array `mask` of the same length as `values`, where
``mask[i]`` is the index of ``values[i]`` in `unique_vals`.


Notes
-----


.. versionadded:: 2.11.0


Examples
--------
>>> import numpy as np
>>> from MDAnalysis.lib._cutil import inverse_int_index
>>> vals = np.array([1, 5, 3, 3, 6], dtype=np.intp)
>>> uniq = np.array([1, 5, 3, 6], dtype=np.intp)
>>> mask = inverse_int_index(vals, uniq)
>>> mask
array([0, 1, 2, 2, 3])
>>> np.all(uniq[mask] == vals)
True
"""

cdef Py_ssize_t n = values.shape[0]
cdef Py_ssize_t m = unique_vals.shape[0]
cdef Py_ssize_t i

cdef dict lookup = {}
cdef cnp.intp_t[:] mask = np.empty(n, dtype=np.intp)

for i in range(m):
lookup[unique_vals[i]] = i

for i in range(n):
mask[i] = lookup[values[i]]

return np.array(mask)

@cython.boundscheck(False)
def _in2d(cnp.intp_t[:, :] arr1, cnp.intp_t[:, :] arr2):
Expand Down Expand Up @@ -515,4 +574,4 @@ def find_fragments(atoms, bondlist):
# Add fragment to output
frags.append(np.asarray(this_frag))

return frags
return frags
Comment thread
orbeckst marked this conversation as resolved.
50 changes: 50 additions & 0 deletions testsuite/MDAnalysisTests/lib/test_cutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
unique_int_1d,
find_fragments,
_in2d,
inverse_int_index,
)


Expand Down Expand Up @@ -103,3 +104,52 @@ def test_in2d_VE(arr1, arr2):
ValueError, match=r"Both arrays must be \(n, 2\) arrays"
):
_in2d(arr1, arr2)


def _python_reference_mask(ix, indices):
mask = np.zeros_like(ix)
for i, x in enumerate(indices):
values = np.where(ix == x)[0]
mask[values] = i
return mask


@pytest.mark.parametrize(
"ix,indices",
[
# unsorted and not unique
(
np.array([1, 5, 3, 3, 6], dtype=np.intp),
np.array([1, 5, 3, 6], dtype=np.intp),
),
# sorted and not unique
(
np.array([1, 3, 3, 5, 6], dtype=np.intp),
np.array([1, 3, 5, 6], dtype=np.intp),
),
# unsorted and unique
(
np.array([1, 5, 3, 6], dtype=np.intp),
np.array([1, 5, 3, 6], dtype=np.intp),
),
# sorted and unique
(
np.array([1, 3, 5, 6], dtype=np.intp),
np.array([1, 3, 5, 6], dtype=np.intp),
),
# all elements identical
(
np.array([5, 5, 5], dtype=np.intp),
np.array([5], dtype=np.intp),
),
# single element
(
np.array([7], dtype=np.intp),
np.array([7], dtype=np.intp),
),
],
)
def test_inverse_int_index(ix, indices):
pyref = _python_reference_mask(ix, indices)
cy = inverse_int_index(ix, indices)
assert_equal(pyref, cy)
Loading