microsoft · shreejaykurhade · Apr 9, 2026 · Apr 9, 2026 · Apr 11, 2026 · Apr 21, 2026
diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py
@@ -13,15 +13,46 @@
 )
 from .model_adapters import create_embedding_model
 
+DEFAULT_MIN_SCORE = 0.85
+
+# Empirical defaults for built-in OpenAI embedding models.
+# These values come from repeated runs of the Adrian Tchaikovsky Episode 53
+# search benchmark in `tools/repeat_embedding_benchmarks.py`, using an
+# exhaustive 0.01..1.00 min_score sweep on the Adrian Tchaikovsky Episode 53
+# dataset. We keep the highest min_score that preserves the best benchmark
+# metrics for each model, which yielded the current plateau boundaries of 0.16 for
+# `text-embedding-3-small`, 0.07 for `text-embedding-3-large`, and 0.72 for
+# `text-embedding-ada-002`. These are repository defaults for known models,
+# not universal truths. Unknown models keep the long-standing fallback score
+# of 0.85. Callers can always override `min_score` explicitly for their own
+# use cases or models. We intentionally leave `max_matches` out of this table:
+# the benchmark still reports a best `max_hits` row, but the library default
+# remains `None` unless a caller opts into a specific limit.
+MODEL_DEFAULT_MIN_SCORES: dict[str, float] = {
+    "text-embedding-3-large": 0.07,
+    "text-embedding-3-small": 0.16,
+    "text-embedding-ada-002": 0.72,
+}
+
+
+def get_default_min_score(model_name: str) -> float:
+    """Return the repository default score cutoff for a known model name."""
+
+    return MODEL_DEFAULT_MIN_SCORES.get(model_name, DEFAULT_MIN_SCORE)
+
 
 @dataclass
 class ScoredInt:
+    """Associate an integer ordinal with its similarity score."""
+
     item: int
     score: float
 
 
 @dataclass
 class TextEmbeddingIndexSettings:
+    """Runtime settings for embedding-backed fuzzy lookup."""
+
     embedding_model: IEmbeddingModel
     min_score: float  # Between 0.0 and 1.0
     max_matches: int | None  # >= 1; None means no limit
@@ -34,10 +65,12 @@ def __init__(
         max_matches: int | None = None,
         batch_size: int | None = None,
     ):
-        self.min_score = min_score if min_score is not None else 0.85
+        self.embedding_model = embedding_model or create_embedding_model()
+        model_name = getattr(self.embedding_model, "model_name", "")
+        default_min_score = get_default_min_score(model_name)
+        self.min_score = min_score if min_score is not None else default_min_score
         self.max_matches = max_matches if max_matches and max_matches >= 1 else None
         self.batch_size = batch_size if batch_size and batch_size >= 1 else 8
-        self.embedding_model = embedding_model or create_embedding_model()
 
 
 class VectorBase:
@@ -166,27 +199,10 @@ def fuzzy_lookup_embedding_in_subset(
         max_hits: int | None = None,
         min_score: float | None = None,
     ) -> list[ScoredInt]:
-        if max_hits is None:
-            max_hits = 10
-        if min_score is None:
-            min_score = 0.0
-        if not ordinals_of_subset or len(self._vectors) == 0:
-            return []
-        # Compute dot products only for the subset instead of all vectors.
-        subset = np.asarray(ordinals_of_subset)
-        scores = np.dot(self._vectors[subset], embedding)
-        indices = np.flatnonzero(scores >= min_score)
-        if len(indices) == 0:
-            return []
-        filtered_scores = scores[indices]
-        if len(indices) <= max_hits:
-            order = np.argsort(filtered_scores)[::-1]
-        else:
-            top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:]
-            order = top_k[np.argsort(filtered_scores[top_k])[::-1]]
-        return [
-            ScoredInt(int(subset[indices[i]]), float(filtered_scores[i])) for i in order
-        ]
+        ordinals_set = set(ordinals_of_subset)
+        return self.fuzzy_lookup_embedding(
+            embedding, max_hits, min_score, lambda i: i in ordinals_set
+        )
 
     async def fuzzy_lookup(
         self,
@@ -235,7 +251,7 @@ def deserialize(self, data: NormalizedEmbeddings | None) -> None:
             return
         if self._embedding_size == 0:
             if data.ndim < 2 or data.shape[0] == 0:
-                # Empty data — can't determine size; just clear.
+                # Empty data can't determine size; just clear.
                 self.clear()
                 return
             self._set_embedding_size(data.shape[1])

diff --git a/tests/test_benchmark_embeddings.py b/tests/test_benchmark_embeddings.py
@@ -0,0 +1,103 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from importlib.util import module_from_spec, spec_from_file_location
+from pathlib import Path
+
+import pytest
+
+MODULE_PATH = (
+    Path(__file__).resolve().parent.parent / "tools" / "benchmark_embeddings.py"
+)
+SPEC = spec_from_file_location("benchmark_embeddings_for_test", MODULE_PATH)
+assert SPEC is not None
+assert SPEC.loader is not None
+BENCHMARK_EMBEDDINGS = module_from_spec(SPEC)
+SPEC.loader.exec_module(BENCHMARK_EMBEDDINGS)
+
+BenchmarkRow = BENCHMARK_EMBEDDINGS.BenchmarkRow
+SearchMetrics = BENCHMARK_EMBEDDINGS.SearchMetrics
+build_float_range = BENCHMARK_EMBEDDINGS.build_float_range
+filter_min_scores_by_ceiling = BENCHMARK_EMBEDDINGS.filter_min_scores_by_ceiling
+load_message_texts = BENCHMARK_EMBEDDINGS.load_message_texts
+parse_float_list = BENCHMARK_EMBEDDINGS.parse_float_list
+resolve_min_scores = BENCHMARK_EMBEDDINGS.resolve_min_scores
+select_best_row = BENCHMARK_EMBEDDINGS.select_best_row
+
+
+def make_row(
+    min_score: float,
+    max_hits: int,
+    hit_rate: float,
+    mean_reciprocal_rank: float,
+) -> BenchmarkRow:
+    """Build a benchmark row without repeating nested metrics boilerplate."""
+
+    return BenchmarkRow(
+        min_score=min_score,
+        max_hits=max_hits,
+        metrics=SearchMetrics(
+            hit_rate=hit_rate,
+            mean_reciprocal_rank=mean_reciprocal_rank,
+        ),
+    )
+
+
+def test_select_best_row_prefers_higher_min_score_on_metric_tie() -> None:
+    rows = [
+        make_row(0.25, 15, 98.5, 0.7514),
+        make_row(0.70, 15, 98.5, 0.7514),
+    ]
+
+    best_row = select_best_row(rows)
+
+    assert best_row.min_score == 0.70
+    assert best_row.max_hits == 15
+
+
+def test_select_best_row_prefers_lower_max_hits_on_full_tie() -> None:
+    rows = [
+        make_row(0.70, 20, 98.5, 0.7514),
+        make_row(0.70, 15, 98.5, 0.7514),
+    ]
+
+    best_row = select_best_row(rows)
+
+    assert best_row.min_score == 0.70
+    assert best_row.max_hits == 15
+
+
+def test_parse_float_list_defaults_to_tenth_point_grid() -> None:
+    assert parse_float_list(None) == [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+
+def test_build_float_range_supports_hundredth_point_sweeps() -> None:
+    assert build_float_range(0.01, 0.05, 0.01) == [0.01, 0.02, 0.03, 0.04, 0.05]
+
+
+def test_resolve_min_scores_uses_generated_range() -> None:
+    assert resolve_min_scores(None, 0.01, 0.03, 0.01) == [0.01, 0.02, 0.03]
+
+
+def test_resolve_min_scores_rejects_mixed_inputs() -> None:
+    with pytest.raises(ValueError, match="Use either --min-scores"):
+        resolve_min_scores("0.1,0.2", 0.01, 0.03, 0.01)
+
+
+def test_filter_min_scores_by_ceiling_skips_guaranteed_zero_rows() -> None:
+    effective_scores, skipped_scores = filter_min_scores_by_ceiling(
+        [0.01, 0.16, 0.17, 0.5],
+        0.16,
+    )
+
+    assert effective_scores == [0.01, 0.16]
+    assert skipped_scores == [0.17, 0.5]
+
+
+def test_load_message_texts_returns_one_text_blob_per_message() -> None:
+    repo_root = Path(__file__).resolve().parent.parent
+
+    message_texts = load_message_texts(repo_root)
+
+    assert message_texts
+    assert all(isinstance(text, str) for text in message_texts)