From c3691491bb63d0f09ed1991c950d933b924d304c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 15 May 2026 06:27:06 -0400 Subject: [PATCH 1/3] PERF: Improve performance of groupby.size --- pandas/core/groupby/ops.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 388179eae7316..30b8956753664 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -734,7 +734,10 @@ def size(self) -> Series: ngroups = self.ngroups out: np.ndarray | list if ngroups: - out = np.bincount(ids[ids != -1], minlength=ngroups) + if self.has_dropped_na: + out = np.bincount(ids + 1, minlength=ngroups + 1)[1:] + else: + out = np.bincount(ids, minlength=ngroups) else: out = [] return Series(out, index=self.result_index, dtype="int64", copy=False) From f03678e7866c9cb9eb071d55eb973980c4092a65 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 15 May 2026 06:34:01 -0400 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v3.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 49c251eb5e06a..062a8f904d7cb 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -180,6 +180,7 @@ Performance improvements - Performance improvement in :meth:`GroupBy.any` and :meth:`GroupBy.all` for boolean-dtype columns (:issue:`37850`) - Performance improvement in :meth:`GroupBy.first` and :meth:`GroupBy.last` for Extension Array dtypes, which no longer fall back to a slow ``apply``-based implementation (:issue:`57591`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`64330`) +- Performance improvement in :meth:`GroupBy.size` by avoiding an intermediate boolean mask and copy of the group codes (:issue:`51750`) - Performance improvement in :meth:`Index.get_indexer` for large monotonic indexes, which now uses binary search instead of building a hash table when the number of targets is small (:issue:`14273`) - Performance improvement in :meth:`Index.join` and :meth:`Index.union` for :class:`RangeIndex` by avoiding unnecessary memory allocation in the libjoin fastpath (:issue:`54646`) - Performance improvement in :meth:`IntervalIndex.get_indexer` for monotonic non-overlapping indexes, which now uses binary search instead of the interval tree (:issue:`47614`) From 00bed0b8c91bd5bbbbbbf6002161e887b1396b18 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 15 May 2026 06:37:13 -0400 Subject: [PATCH 3/3] fixup --- doc/source/whatsnew/v3.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 062a8f904d7cb..e9219b7fc4ca0 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -180,7 +180,7 @@ Performance improvements - Performance improvement in :meth:`GroupBy.any` and :meth:`GroupBy.all` for boolean-dtype columns (:issue:`37850`) - Performance improvement in :meth:`GroupBy.first` and :meth:`GroupBy.last` for Extension Array dtypes, which no longer fall back to a slow ``apply``-based implementation (:issue:`57591`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`64330`) -- Performance improvement in :meth:`GroupBy.size` by avoiding an intermediate boolean mask and copy of the group codes (:issue:`51750`) +- Performance improvement in :meth:`GroupBy.size` (:issue:`51750`) - Performance improvement in :meth:`Index.get_indexer` for large monotonic indexes, which now uses binary search instead of building a hash table when the number of targets is small (:issue:`14273`) - Performance improvement in :meth:`Index.join` and :meth:`Index.union` for :class:`RangeIndex` by avoiding unnecessary memory allocation in the libjoin fastpath (:issue:`54646`) - Performance improvement in :meth:`IntervalIndex.get_indexer` for monotonic non-overlapping indexes, which now uses binary search instead of the interval tree (:issue:`47614`)