shreejaykurhade · shreejaykurhade · Apr 11, 2026 · Apr 10, 2026
diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-from collections.abc import Callable
+from collections.abc import Callable, Iterable
 from dataclasses import dataclass
 
 import numpy as np
@@ -156,63 +156,28 @@ def fuzzy_lookup_embedding(
             min_score = 0.0
         if len(self._vectors) == 0:
             return []
-        scores = np.dot(self._vectors, embedding)
-
-        if predicate is None:
-            # Fast numpy path: filter and top-k without Python-level iteration.
-            indices = np.flatnonzero(scores >= min_score)
-            if len(indices) == 0:
-                return []
-            filtered_scores = scores[indices]
-            if len(indices) <= max_hits:
-                order = np.argsort(filtered_scores)[::-1]
-            else:
-                # argpartition is O(n) vs O(n log n) for full sort.
-                top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:]
-                order = top_k[np.argsort(filtered_scores[top_k])[::-1]]
-            return [
-                ScoredInt(int(indices[i]), float(filtered_scores[i])) for i in order
-            ]
-        else:
-            # Predicate path: pre-filter by score in numpy, then apply predicate
-            # only to candidates that pass the score threshold.
-            candidates = np.flatnonzero(scores >= min_score)
-            scored_ordinals = [
-                ScoredInt(int(i), float(scores[i]))
-                for i in candidates
-                if predicate(int(i))
-            ]
-            scored_ordinals.sort(key=lambda x: x.score, reverse=True)
-            return scored_ordinals[:max_hits]
+        # This line does most of the work:
+        scores: Iterable[float] = np.dot(self._vectors, embedding)
+        scored_ordinals = [
+            ScoredInt(i, score)
+            for i, score in enumerate(scores)
+            if score >= min_score and (predicate is None or predicate(i))
+        ]
+        scored_ordinals.sort(key=lambda x: x.score, reverse=True)
+        return scored_ordinals[:max_hits]
 
+    # TODO: Make this and fuzzy_lookup_embedding() more similar.
     def fuzzy_lookup_embedding_in_subset(
         self,
         embedding: NormalizedEmbedding,
         ordinals_of_subset: list[int],
         max_hits: int | None = None,
         min_score: float | None = None,
     ) -> list[ScoredInt]:
-        if max_hits is None:
-            max_hits = 10
-        if min_score is None:
-            min_score = 0.0
-        if not ordinals_of_subset or len(self._vectors) == 0:
-            return []
-        # Compute dot products only for the subset instead of all vectors.
-        subset = np.asarray(ordinals_of_subset)
-        scores = np.dot(self._vectors[subset], embedding)
-        indices = np.flatnonzero(scores >= min_score)
-        if len(indices) == 0:
-            return []
-        filtered_scores = scores[indices]
-        if len(indices) <= max_hits:
-            order = np.argsort(filtered_scores)[::-1]
-        else:
-            top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:]
-            order = top_k[np.argsort(filtered_scores[top_k])[::-1]]
-        return [
-            ScoredInt(int(subset[indices[i]]), float(filtered_scores[i])) for i in order
-        ]
+        ordinals_set = set(ordinals_of_subset)
+        return self.fuzzy_lookup_embedding(
+            embedding, max_hits, min_score, lambda i: i in ordinals_set
+        )
 
     async def fuzzy_lookup(
         self,

diff --git a/tests/benchmarks/test_benchmark_vectorbase.py b/tests/benchmarks/test_benchmark_vectorbase.py