KRRT7 · KRRT7 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/Makefile b/Makefile
@@ -8,62 +8,68 @@ all: venv format check test build
 
 .PHONY: format
 format: venv
-	.venv/bin/isort src tests tools examples $(FLAGS)
-	.venv/bin/black -tpy312 -tpy313 -tpy314 src tests tools examples $(FLAGS)
+	uv run isort src tests tools examples $(FLAGS)
+	uv run black -tpy312 src tests tools examples $(FLAGS)
 
 .PHONY: check
 check: venv
-	.venv/bin/pyright --pythonpath .venv/bin/python src tests tools examples
+	uv run pyright src tests tools examples
 
 .PHONY: test
 test: venv
-	.venv/bin/pytest $(FLAGS)
+	uv run pytest $(FLAGS)
 
 .PHONY: coverage
 coverage: venv
 	coverage erase
-	COVERAGE_PROCESS_START=.coveragerc .venv/bin/coverage run -m pytest $(FLAGS)
+	COVERAGE_PROCESS_START=.coveragerc uv run coverage run -m pytest $(FLAGS)
 	coverage combine
 	coverage report
 
 .PHONY: demo
 demo: venv
-	.venv/bin/python -m tools.query $(FLAGS)
+	uv run python -m tools.query $(FLAGS)
 
 .PHONY: compare
 compare: venv
-	.venv/bin/python -m tools.query --batch $(FLAGS)
+	uv run python -m tools.query --batch $(FLAGS)
+
+.PHONY: eval
+eval: venv
+	rm -f eval.db
+	uv run python tools/load_json.py --database eval.db tests/testdata/Episode_53_AdrianTchaikovsky_index
+	uv run python tools/query.py --batch --database eval.db --answer-results tests/testdata/Episode_53_Answer_results.json --search-results tests/testdata/Episode_53_Search_results.json $(FLAGS)
 
 .PHONY: mcp
 mcp: venv
-	.venv/bin/mcp dev src/typeagent/mcp/server.py
+	uv run mcp dev src/typeagent/mcp/server.py
 
 .PHONY: profile
 profile: venv
-	</dev/null .venv/bin/python -m cProfile -s ncalls -m test.cmpsearch --interactive --podcast ~/AISystems-Archive/data/knowpro/test/indexes/All_Episodes_index | head -60
+	</dev/null uv run python -m cProfile -s ncalls -m test.cmpsearch --interactive --podcast ~/AISystems-Archive/data/knowpro/test/indexes/All_Episodes_index | head -60
 
 .PHONY: scaling
 scaling: venv
-	</dev/null .venv/bin/python -m test.cmpsearch --interactive --podcast ~/AISystems-Archive/data/knowpro/test/indexes/All_Episodes_index
+	</dev/null uv run python -m test.cmpsearch --interactive --podcast ~/AISystems-Archive/data/knowpro/test/indexes/All_Episodes_index
 
 .PHONY: build
 build: venv
 	uv build
 
 .PHONY: release
 release: venv
-	.venv/bin/python tools/release.py $(VERSION)
+	uv run python tools/release.py $(VERSION)
 
 .PHONY: venv
 venv: .venv
 
 .venv:
 	@echo "(If 'uv' fails with 'No such file or directory', try 'make install-uv')"
 	uv sync -q $(FLAGS)
-	.venv/bin/black --version
+	uv run black --version
 	@echo "(If 'pyright' fails with 'error while loading shared libraries: libatomic.so.1:', try 'make install-libatomic')"
-	.venv/bin/pyright --version
-	.venv/bin/pytest --version
+	uv run pyright --version
+	uv run pytest --version
 
 .PHONY: sync
 sync:

diff --git a/make.bat b/make.bat
@@ -27,26 +27,26 @@ goto help
 :format
 if not exist ".venv\" call make.bat venv
 echo Formatting code...
-.venv\Scripts\isort src tests tools examples
-.venv\Scripts\black src tests tools examples
+uv run isort src tests tools examples
+uv run black -tpy312 src tests tools examples
 goto end
 
 :check
 if not exist ".venv\" call make.bat venv
 echo Running type checks...
-.venv\Scripts\pyright --pythonpath .venv\Scripts\python src tests tools examples
+uv run pyright src tests tools examples
 goto end
 
 :test
 if not exist ".venv\" call make.bat venv
 echo Running unit tests...
-.venv\Scripts\python -m pytest
+uv run pytest
 goto end
 
 :demo
 if not exist ".venv\" call make.bat venv
 echo Running query tool...
-.venv\Scripts\python -m tools.query
+uv run python -m tools.query
 goto end
 
 :build
@@ -58,10 +58,10 @@ goto end
 :venv
 echo Creating virtual environment...
 uv sync -q
-.venv\Scripts\python --version
-.venv\Scripts\black --version
-.venv\Scripts\pyright --version
-.venv\Scripts\python -m pytest --version
+uv run python --version
+uv run black --version
+uv run pyright --version
+uv run pytest --version
 goto end
 
 :sync

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["uv_build>=0.9.10,<0.10.0"]
+requires = ["uv_build>=0.9.10,<0.11.0"]
 build-backend = "uv_build"
 
 [project]
@@ -30,7 +30,6 @@ classifiers = [
 ]
 dependencies = [
   "azure-identity>=1.22.0",
-  "black>=25.12.0",
   "colorama>=0.4.6",
   "mcp[cli]>=1.12.1",
   "numpy>=2.2.6",
@@ -81,6 +80,7 @@ known_local_folder = ["conftest"]
 dev = [
   "azure-mgmt-authorization>=4.0.0",
   "azure-mgmt-keyvault>=12.1.1",
+  "black>=25.12.0",
   "coverage[toml]>=7.9.1",
   "google-api-python-client>=2.184.0",
   "google-auth-httplib2>=0.2.0",
@@ -92,5 +92,6 @@ dev = [
   "pyright>=1.1.408",  # 407 has a regression
   "pytest>=8.3.5",
   "pytest-asyncio>=0.26.0",
+  "pytest-benchmark>=5.1.0",
   "pytest-mock>=3.14.0",
 ]
diff --git a/src/typeagent/aitools/model_adapters.py b/src/typeagent/aitools/model_adapters.py
@@ -184,10 +184,14 @@ def _make_azure_provider(
             azure_ad_token_provider=token_provider.get_token,
         )
     else:
+        apim_key = os.getenv("AZURE_APIM_SUBSCRIPTION_KEY")
         client = AsyncAzureOpenAI(
             azure_endpoint=azure_endpoint,
             api_version=api_version,
             api_key=raw_key,
+            default_headers=(
+                {"Ocp-Apim-Subscription-Key": apim_key} if apim_key else None
+            ),
         )
     return AzureProvider(openai_client=client)
 

diff --git a/src/typeagent/aitools/utils.py b/src/typeagent/aitools/utils.py
@@ -11,7 +11,6 @@
 import sys
 import time
 
-import black
 import colorama
 
 import typechat
@@ -45,25 +44,29 @@ def timelog(label: str, verbose: bool = True):
 
 
 def pretty_print(obj: object, prefix: str = "", suffix: str = "") -> None:
-    """Pretty-print an object using black.
+    """Pretty-print an object using pprint."""
+    import pprint
 
-    NOTE: Only works if its repr() is a valid Python expression.
-    """
-    print(prefix + format_code(repr(obj)) + suffix)
+    line_width = min(200, shutil.get_terminal_size().columns)
+    print(prefix + pprint.pformat(obj, width=line_width) + suffix)
 
 
 def format_code(text: str, line_width=None) -> str:
-    """Format a block of code using black, then reindent to 2 spaces.
+    """Format a Python literal expression using pprint.
 
-    NOTE: The text must be a valid Python expression or code block.
+    NOTE: The text must be a valid Python literal expression (as produced by repr()).
+    Falls back to plain text formatting if the text is not a valid literal.
     """
+    import ast
+    import pprint
+
     if line_width is None:
-        # Use the terminal width, but cap it to 200 characters.
         line_width = min(200, shutil.get_terminal_size().columns)
-    formatted_text = black.format_str(
-        text, mode=black.Mode(line_length=line_width)
-    ).rstrip()
-    return reindent(formatted_text)
+    try:
+        return pprint.pformat(ast.literal_eval(text), width=line_width)
+    except (ValueError, SyntaxError):
+        # Fall back to simple pprint of the string itself if it's not a valid literal
+        return pprint.pformat(text, width=line_width)
 
 
 def reindent(text: str) -> str:
@@ -197,7 +200,12 @@ def parse_azure_endpoint(
             f"{endpoint_envvar}={azure_endpoint} doesn't contain valid api-version field"
         )
 
-    return azure_endpoint, m.group(1)
+    # Strip query string and /openai... path — AsyncAzureOpenAI expects a
+    # clean base URL and builds the deployment path internally.
+    clean_endpoint = azure_endpoint.split("?", 1)[0]
+    clean_endpoint = re.sub(r"/openai(/deployments/.*)?$", "", clean_endpoint)
+
+    return clean_endpoint, m.group(1)
 
 
 def get_azure_api_key(azure_api_key: str) -> str:
@@ -247,10 +255,15 @@ def create_async_openai_client(
         azure_api_key = get_azure_api_key(azure_api_key)
         azure_endpoint, api_version = parse_azure_endpoint(endpoint_envvar)
 
+        apim_key = os.getenv("AZURE_APIM_SUBSCRIPTION_KEY")
+
         return AsyncAzureOpenAI(
             api_version=api_version,
             azure_endpoint=azure_endpoint,
             api_key=azure_api_key,
+            default_headers=(
+                {"Ocp-Apim-Subscription-Key": apim_key} if apim_key else None
+            ),
         )
 
     else:
@@ -264,30 +277,21 @@ def make_agent[T](cls: type[T]):
     """Create Pydantic AI agent using hardcoded preferences."""
     from pydantic_ai import Agent, NativeOutput, ToolOutput
     from pydantic_ai.models.openai import OpenAIChatModel
-    from pydantic_ai.providers.azure import AzureProvider
 
     # Prefer straight OpenAI over Azure OpenAI.
     if os.getenv("OPENAI_API_KEY"):
         Wrapper = NativeOutput
         print(f"## Using OpenAI with {Wrapper.__name__} ##")
         model = OpenAIChatModel("gpt-4o")  # Retrieves OPENAI_API_KEY again.
 
-    elif azure_api_key := os.getenv("AZURE_OPENAI_API_KEY"):
-        azure_api_key = get_azure_api_key(azure_api_key)
-        azure_endpoint, api_version = parse_azure_endpoint("AZURE_OPENAI_ENDPOINT")
+    elif os.getenv("AZURE_OPENAI_API_KEY"):
+        from typeagent.aitools.model_adapters import _make_azure_provider
 
-        print(f"## {azure_endpoint} ##")
+        azure_provider = _make_azure_provider()
         Wrapper = ToolOutput
 
-        print(f"## Using Azure {api_version} with {Wrapper.__name__} ##")
-        model = OpenAIChatModel(
-            "gpt-4o",
-            provider=AzureProvider(
-                azure_endpoint=azure_endpoint,
-                api_version=api_version,
-                api_key=azure_api_key,
-            ),
-        )
+        print(f"## Using Azure with {Wrapper.__name__} ##")
+        model = OpenAIChatModel("gpt-4o", provider=azure_provider)
 
     else:
         raise RuntimeError(

diff --git a/src/typeagent/aitools/vectorbase.py b/src/typeagent/aitools/vectorbase.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-from collections.abc import Callable, Iterable
+from collections.abc import Callable
 from dataclasses import dataclass
 
 import numpy as np
@@ -132,28 +132,61 @@ def fuzzy_lookup_embedding(
             min_score = 0.0
         if len(self._vectors) == 0:
             return []
-        # This line does most of the work:
-        scores: Iterable[float] = np.dot(self._vectors, embedding)
-        scored_ordinals = [
-            ScoredInt(i, score)
-            for i, score in enumerate(scores)
-            if score >= min_score and (predicate is None or predicate(i))
-        ]
-        scored_ordinals.sort(key=lambda x: x.score, reverse=True)
-        return scored_ordinals[:max_hits]
+        scores = np.dot(self._vectors, embedding)
+        if predicate is None:
+            # Stay in numpy: filter by score, then top-k via argpartition.
+            indices = np.flatnonzero(scores >= min_score)
+            if len(indices) == 0:
+                return []
+            filtered_scores = scores[indices]
+            if len(indices) <= max_hits:
+                order = np.argsort(filtered_scores)[::-1]
+            else:
+                top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:]
+                order = top_k[np.argsort(filtered_scores[top_k])[::-1]]
+            return [
+                ScoredInt(int(indices[i]), float(filtered_scores[i])) for i in order
+            ]
+        else:
+            # Predicate path: pre-filter by score in numpy, apply predicate
+            # only to candidates above the threshold.
+            candidates = np.flatnonzero(scores >= min_score)
+            scored_ordinals = [
+                ScoredInt(int(i), float(scores[i]))
+                for i in candidates
+                if predicate(int(i))
+            ]
+            scored_ordinals.sort(key=lambda x: x.score, reverse=True)
+            return scored_ordinals[:max_hits]
 
-    # TODO: Make this and fuzzy_lookup_embedding() more similar.
     def fuzzy_lookup_embedding_in_subset(
         self,
         embedding: NormalizedEmbedding,
         ordinals_of_subset: list[int],
         max_hits: int | None = None,
         min_score: float | None = None,
     ) -> list[ScoredInt]:
-        ordinals_set = set(ordinals_of_subset)
-        return self.fuzzy_lookup_embedding(
-            embedding, max_hits, min_score, lambda i: i in ordinals_set
-        )
+        if max_hits is None:
+            max_hits = 10
+        if min_score is None:
+            min_score = 0.0
+        if not ordinals_of_subset or len(self._vectors) == 0:
+            return []
+        # Compute dot products only for the subset instead of all vectors.
+        subset = np.asarray(ordinals_of_subset)
+        scores = np.dot(self._vectors[subset], embedding)
+        indices = np.flatnonzero(scores >= min_score)
+        if len(indices) == 0:
+            return []
+        filtered_scores = scores[indices]
+        if len(indices) <= max_hits:
+            order = np.argsort(filtered_scores)[::-1]
+        else:
+            top_k = np.argpartition(filtered_scores, -max_hits)[-max_hits:]
+            order = top_k[np.argsort(filtered_scores[top_k])[::-1]]
+        return [
+            ScoredInt(int(subset[indices[i]]), float(filtered_scores[i])) for i in order
+        ]
 
     async def fuzzy_lookup(
         self,

diff --git a/src/typeagent/knowpro/answers.py b/src/typeagent/knowpro/answers.py
@@ -5,8 +5,6 @@
 from dataclasses import dataclass
 from typing import Any
 
-import black
-
 import typechat
 
 from .answer_context_schema import AnswerContext, RelevantKnowledge, RelevantMessage
@@ -127,10 +125,12 @@ def create_question_prompt(question: str) -> str:
 
 def create_context_prompt(context: AnswerContext) -> str:
     # TODO: Use a more compact representation of the context than JSON.
+    import pprint
+
     prompt = [
         "[ANSWER CONTEXT]",
         "===",
-        black.format_str(str(dictify(context)), mode=black.Mode(line_length=200)),
+        pprint.pformat(dictify(context), width=200),
         "===",
     ]
     return "\n".join(prompt)