From e17728e2ec98d8eb2a0d5b6cb56dd00445bb2ddd Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 00:20:51 -0500 Subject: [PATCH 1/4] Batch SQLite INSERTs for semantic ref and property indexing Add add_terms_batch / add_properties_batch to the index interfaces with executemany-based SQLite implementations. Restructure add_metadata_to_index_from_list and add_to_property_index to collect all items first, then batch-insert via extend() and the new batch methods. Eliminates ~1000 individual INSERT round-trips during indexing. --- src/typeagent/knowpro/interfaces_core.py | 5 ++ src/typeagent/knowpro/interfaces_indexes.py | 5 ++ src/typeagent/storage/memory/propindex.py | 75 ++++++++++++++++-- src/typeagent/storage/memory/semrefindex.py | 87 ++++++++++++++++++++- src/typeagent/storage/sqlite/propindex.py | 30 +++++++ src/typeagent/storage/sqlite/semrefindex.py | 23 ++++++ 6 files changed, 215 insertions(+), 10 deletions(-) diff --git a/src/typeagent/knowpro/interfaces_core.py b/src/typeagent/knowpro/interfaces_core.py index 105e45b6..72f11f8c 100644 --- a/src/typeagent/knowpro/interfaces_core.py +++ b/src/typeagent/knowpro/interfaces_core.py @@ -168,6 +168,11 @@ async def add_term( semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal, ) -> str: ... + async def add_terms_batch( + self, + terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: ... + async def remove_term( self, term: str, semantic_ref_ordinal: SemanticRefOrdinal ) -> None: ... diff --git a/src/typeagent/knowpro/interfaces_indexes.py b/src/typeagent/knowpro/interfaces_indexes.py index a894ab88..3ae62024 100644 --- a/src/typeagent/knowpro/interfaces_indexes.py +++ b/src/typeagent/knowpro/interfaces_indexes.py @@ -59,6 +59,11 @@ async def add_property( semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal, ) -> None: ... + async def add_properties_batch( + self, + properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: ... + async def lookup_property( self, property_name: str, value: str ) -> list[ScoredSemanticRefOrdinal] | None: ... diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index acc7b89a..1a1c7968 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -109,6 +109,57 @@ async def build_property_index(conversation: IConversation) -> None: await add_to_property_index(conversation, 0) +def _collect_facet_properties( + facet: kplib.Facet | None, + ordinal: SemanticRefOrdinal, +) -> list[tuple[str, str, SemanticRefOrdinal]]: + """Collect property tuples from a facet without touching any index.""" + if facet is None: + return [] + props: list[tuple[str, str, SemanticRefOrdinal]] = [ + (PropertyNames.FacetName.value, facet.name, ordinal) + ] + value = facet.value + if value is not None: + if isinstance(value, float) and value: + value = f"{value:g}" + props.append((PropertyNames.FacetValue.value, str(value), ordinal)) + return props + + +def _collect_entity_properties( + entity: kplib.ConcreteEntity, + ordinal: SemanticRefOrdinal, +) -> list[tuple[str, str, SemanticRefOrdinal]]: + """Collect all property tuples for an entity.""" + props: list[tuple[str, str, SemanticRefOrdinal]] = [ + (PropertyNames.EntityName.value, entity.name, ordinal) + ] + for t in entity.type: + props.append((PropertyNames.EntityType.value, t, ordinal)) + if entity.facets: + for facet in entity.facets: + props.extend(_collect_facet_properties(facet, ordinal)) + return props + + +def _collect_action_properties( + action: kplib.Action, + ordinal: SemanticRefOrdinal, +) -> list[tuple[str, str, SemanticRefOrdinal]]: + """Collect all property tuples for an action.""" + props: list[tuple[str, str, SemanticRefOrdinal]] = [ + (PropertyNames.Verb.value, " ".join(action.verbs), ordinal) + ] + if action.subject_entity_name != "none": + props.append((PropertyNames.Subject.value, action.subject_entity_name, ordinal)) + if action.object_entity_name != "none": + props.append((PropertyNames.Object.value, action.object_entity_name, ordinal)) + if action.indirect_object_entity_name != "none": + props.append((PropertyNames.IndirectObject.value, action.indirect_object_entity_name, ordinal)) + return props + + async def add_to_property_index( conversation: IConversation, start_at_ordinal: SemanticRefOrdinal, @@ -127,29 +178,32 @@ async def add_to_property_index( semantic_refs = conversation.semantic_refs size = await semantic_refs.size() + collected: list[tuple[str, str, SemanticRefOrdinal]] = [] for semantic_ref_ordinal, semantic_ref in enumerate( await semantic_refs.get_slice(start_at_ordinal, size), start_at_ordinal, ): assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal if isinstance(semantic_ref.knowledge, kplib.Action): - await add_action_properties_to_index( - semantic_ref.knowledge, property_index, semantic_ref_ordinal + collected.extend( + _collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): - await add_entity_properties_to_index( - semantic_ref.knowledge, property_index, semantic_ref_ordinal + collected.extend( + _collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, Tag): - tag = semantic_ref.knowledge - await property_index.add_property( - PropertyNames.Tag.value, tag.text, semantic_ref_ordinal + collected.append( + (PropertyNames.Tag.value, semantic_ref.knowledge.text, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, Topic): pass else: assert_never(semantic_ref.knowledge) + if collected: + await property_index.add_properties_batch(collected) + class PropertyIndex(IPropertyToSemanticRefIndex): def __init__(self): @@ -183,6 +237,13 @@ async def add_property( else: self._map[term_text] = [semantic_ref_ordinal] + async def add_properties_batch( + self, + properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: + for name, value, ordinal in properties: + await self.add_property(name, value, ordinal) + async def clear(self) -> None: self._map = {} diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index 6c42022d..7f131d9a 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -577,6 +577,48 @@ async def add_metadata_to_index[TMessage: IMessage]( i += 1 +def _collect_facet_terms(facet: kplib.Facet | None) -> list[str]: + """Collect terms from a facet without touching any index.""" + if facet is None: + return [] + terms = [facet.name] + if facet.value is not None: + terms.append(str(facet.value)) + return terms + + +def _collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]: + """Collect all terms an entity would add to the semantic ref index.""" + terms = [entity.name] + for t in entity.type: + terms.append(t) + if entity.facets: + for facet in entity.facets: + terms.extend(_collect_facet_terms(facet)) + return terms + + +def _collect_action_terms(action: kplib.Action) -> list[str]: + """Collect all terms an action would add to the semantic ref index.""" + terms = [" ".join(action.verbs)] + if action.subject_entity_name != "none": + terms.append(action.subject_entity_name) + if action.object_entity_name != "none": + terms.append(action.object_entity_name) + if action.indirect_object_entity_name != "none": + terms.append(action.indirect_object_entity_name) + if action.params: + for param in action.params: + if isinstance(param, str): + terms.append(param) + else: + terms.append(param.name) + if isinstance(param.value, str): + terms.append(param.value) + terms.extend(_collect_facet_terms(action.subject_entity_facet)) + return terms + + async def add_metadata_to_index_from_list[TMessage: IMessage]( messages: list[TMessage], semantic_refs: ISemanticRefCollection, @@ -585,18 +627,50 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( knowledge_validator: KnowledgeValidator | None = None, ) -> None: """Extract metadata knowledge from a list of messages starting at ordinal.""" + next_ordinal = await semantic_refs.size() + collected_refs: list[SemanticRef] = [] + collected_terms: list[tuple[str, SemanticRefOrdinal]] = [] + for i, msg in enumerate(messages, start_from_ordinal): knowledge_response = msg.get_knowledge() for entity in knowledge_response.entities: if knowledge_validator is None or knowledge_validator("entity", entity): - await add_entity_to_index(entity, semantic_refs, semantic_ref_index, i) + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_location(i), + knowledge=entity, + ) + collected_refs.append(ref) + for term in _collect_entity_terms(entity): + collected_terms.append((term, next_ordinal)) + next_ordinal += 1 for action in knowledge_response.actions: if knowledge_validator is None or knowledge_validator("action", action): - await add_action_to_index(action, semantic_refs, semantic_ref_index, i) + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_location(i), + knowledge=action, + ) + collected_refs.append(ref) + for term in _collect_action_terms(action): + collected_terms.append((term, next_ordinal)) + next_ordinal += 1 for topic_response in knowledge_response.topics: topic = Topic(text=topic_response) if knowledge_validator is None or knowledge_validator("topic", topic): - await add_topic_to_index(topic, semantic_refs, semantic_ref_index, i) + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_location(i), + knowledge=topic, + ) + collected_refs.append(ref) + collected_terms.append((topic.text, next_ordinal)) + next_ordinal += 1 + + if collected_refs: + await semantic_refs.extend(collected_refs) + if collected_terms: + await semantic_ref_index.add_terms_batch(collected_terms) class TermToSemanticRefIndex(ITermToSemanticRefIndex): @@ -635,6 +709,13 @@ async def add_term( self._map[term] = [semantic_ref_ordinal] return term + async def add_terms_batch( + self, + terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: + for term, ordinal in terms: + await self.add_term(term, ordinal) + async def lookup_term(self, term: str) -> list[ScoredSemanticRefOrdinal] | None: return self._map.get(self._prepare_term(term)) or [] diff --git a/src/typeagent/storage/sqlite/propindex.py b/src/typeagent/storage/sqlite/propindex.py index 5a0fa63a..6a619869 100644 --- a/src/typeagent/storage/sqlite/propindex.py +++ b/src/typeagent/storage/sqlite/propindex.py @@ -67,6 +67,36 @@ async def add_property( (property_name, value, score, semref_id), ) + async def add_properties_batch( + self, + properties: list[tuple[str, str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + ) -> None: + if not properties: + return + from ...storage.memory.propindex import ( + make_property_term_text, + split_property_term_text, + ) + rows = [] + for property_name, value, ordinal in properties: + if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): + semref_id = ordinal.semantic_ref_ordinal + score = ordinal.score + else: + semref_id = ordinal + score = 1.0 + term_text = make_property_term_text(property_name, value) + term_text = term_text.lower() + property_name, value = split_property_term_text(term_text) + if property_name.startswith("prop."): + property_name = property_name[5:] + rows.append((property_name, value, score, semref_id)) + cursor = self.db.cursor() + cursor.executemany( + "INSERT INTO PropertyIndex (prop_name, value_str, score, semref_id) VALUES (?, ?, ?, ?)", + rows, + ) + async def clear(self) -> None: cursor = self.db.cursor() cursor.execute("DELETE FROM PropertyIndex") diff --git a/src/typeagent/storage/sqlite/semrefindex.py b/src/typeagent/storage/sqlite/semrefindex.py index 682b8e7d..0925eb45 100644 --- a/src/typeagent/storage/sqlite/semrefindex.py +++ b/src/typeagent/storage/sqlite/semrefindex.py @@ -56,6 +56,29 @@ async def add_term( return term + async def add_terms_batch( + self, + terms: list[tuple[str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + ) -> None: + if not terms: + return + rows = [] + for term, ordinal in terms: + if not term: + continue + term = self._prepare_term(term) + if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): + semref_id = ordinal.semantic_ref_ordinal + else: + semref_id = ordinal + rows.append((term, semref_id)) + if rows: + cursor = self.db.cursor() + cursor.executemany( + "INSERT OR IGNORE INTO SemanticRefIndex (term, semref_id) VALUES (?, ?)", + rows, + ) + async def remove_term( self, term: str, semantic_ref_ordinal: interfaces.SemanticRefOrdinal ) -> None: From fcc7c23ee06e35c27899051eae9894028d23f4b5 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 01:41:33 -0500 Subject: [PATCH 2/4] Remove underscore prefix from collect helper functions Rename _collect_{facet,entity,action}_{terms,properties} to drop the leading underscore in propindex.py and semrefindex.py. --- src/typeagent/storage/memory/propindex.py | 12 ++++++------ src/typeagent/storage/memory/semrefindex.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index 1a1c7968..7ff49e29 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -109,7 +109,7 @@ async def build_property_index(conversation: IConversation) -> None: await add_to_property_index(conversation, 0) -def _collect_facet_properties( +def collect_facet_properties( facet: kplib.Facet | None, ordinal: SemanticRefOrdinal, ) -> list[tuple[str, str, SemanticRefOrdinal]]: @@ -127,7 +127,7 @@ def _collect_facet_properties( return props -def _collect_entity_properties( +def collect_entity_properties( entity: kplib.ConcreteEntity, ordinal: SemanticRefOrdinal, ) -> list[tuple[str, str, SemanticRefOrdinal]]: @@ -139,11 +139,11 @@ def _collect_entity_properties( props.append((PropertyNames.EntityType.value, t, ordinal)) if entity.facets: for facet in entity.facets: - props.extend(_collect_facet_properties(facet, ordinal)) + props.extend(collect_facet_properties(facet, ordinal)) return props -def _collect_action_properties( +def collect_action_properties( action: kplib.Action, ordinal: SemanticRefOrdinal, ) -> list[tuple[str, str, SemanticRefOrdinal]]: @@ -186,11 +186,11 @@ async def add_to_property_index( assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal if isinstance(semantic_ref.knowledge, kplib.Action): collected.extend( - _collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): collected.extend( - _collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, Tag): collected.append( diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index 7f131d9a..a98a3b21 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -577,7 +577,7 @@ async def add_metadata_to_index[TMessage: IMessage]( i += 1 -def _collect_facet_terms(facet: kplib.Facet | None) -> list[str]: +def collect_facet_terms(facet: kplib.Facet | None) -> list[str]: """Collect terms from a facet without touching any index.""" if facet is None: return [] @@ -587,18 +587,18 @@ def _collect_facet_terms(facet: kplib.Facet | None) -> list[str]: return terms -def _collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]: +def collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]: """Collect all terms an entity would add to the semantic ref index.""" terms = [entity.name] for t in entity.type: terms.append(t) if entity.facets: for facet in entity.facets: - terms.extend(_collect_facet_terms(facet)) + terms.extend(collect_facet_terms(facet)) return terms -def _collect_action_terms(action: kplib.Action) -> list[str]: +def collect_action_terms(action: kplib.Action) -> list[str]: """Collect all terms an action would add to the semantic ref index.""" terms = [" ".join(action.verbs)] if action.subject_entity_name != "none": @@ -615,7 +615,7 @@ def _collect_action_terms(action: kplib.Action) -> list[str]: terms.append(param.name) if isinstance(param.value, str): terms.append(param.value) - terms.extend(_collect_facet_terms(action.subject_entity_facet)) + terms.extend(collect_facet_terms(action.subject_entity_facet)) return terms @@ -641,7 +641,7 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( knowledge=entity, ) collected_refs.append(ref) - for term in _collect_entity_terms(entity): + for term in collect_entity_terms(entity): collected_terms.append((term, next_ordinal)) next_ordinal += 1 for action in knowledge_response.actions: @@ -652,7 +652,7 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( knowledge=action, ) collected_refs.append(ref) - for term in _collect_action_terms(action): + for term in collect_action_terms(action): collected_terms.append((term, next_ordinal)) next_ordinal += 1 for topic_response in knowledge_response.topics: From 82ba650ca6b3da2cde6a68efc05c791f5dfe367e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 01:52:09 -0500 Subject: [PATCH 3/4] Fix pyright errors: use Sequence for batch method signatures Change list to Sequence in add_terms_batch and add_properties_batch interfaces and implementations to satisfy covariance. Add missing add_terms_batch to FakeTermIndex in conftest.py. --- src/typeagent/knowpro/interfaces_core.py | 3 ++- src/typeagent/knowpro/interfaces_indexes.py | 2 +- src/typeagent/storage/memory/propindex.py | 27 +++++++++++++++++---- src/typeagent/storage/memory/semrefindex.py | 8 +++--- src/typeagent/storage/sqlite/propindex.py | 10 +++++++- src/typeagent/storage/sqlite/semrefindex.py | 7 +++++- tests/conftest.py | 9 ++++++- 7 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/typeagent/knowpro/interfaces_core.py b/src/typeagent/knowpro/interfaces_core.py index 72f11f8c..cd9e885c 100644 --- a/src/typeagent/knowpro/interfaces_core.py +++ b/src/typeagent/knowpro/interfaces_core.py @@ -4,6 +4,7 @@ from __future__ import annotations +from collections.abc import Sequence from datetime import datetime as Datetime from typing import ( Any, @@ -170,7 +171,7 @@ async def add_term( async def add_terms_batch( self, - terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + terms: Sequence[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], ) -> None: ... async def remove_term( diff --git a/src/typeagent/knowpro/interfaces_indexes.py b/src/typeagent/knowpro/interfaces_indexes.py index 3ae62024..6c348a01 100644 --- a/src/typeagent/knowpro/interfaces_indexes.py +++ b/src/typeagent/knowpro/interfaces_indexes.py @@ -61,7 +61,7 @@ async def add_property( async def add_properties_batch( self, - properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + properties: Sequence[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], ) -> None: ... async def lookup_property( diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index 7ff49e29..f9717b24 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from collections.abc import Sequence import enum from typing import assert_never @@ -156,7 +157,13 @@ def collect_action_properties( if action.object_entity_name != "none": props.append((PropertyNames.Object.value, action.object_entity_name, ordinal)) if action.indirect_object_entity_name != "none": - props.append((PropertyNames.IndirectObject.value, action.indirect_object_entity_name, ordinal)) + props.append( + ( + PropertyNames.IndirectObject.value, + action.indirect_object_entity_name, + ordinal, + ) + ) return props @@ -186,15 +193,23 @@ async def add_to_property_index( assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal if isinstance(semantic_ref.knowledge, kplib.Action): collected.extend( - collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_action_properties( + semantic_ref.knowledge, semantic_ref_ordinal + ) ) elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): collected.extend( - collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_entity_properties( + semantic_ref.knowledge, semantic_ref_ordinal + ) ) elif isinstance(semantic_ref.knowledge, Tag): collected.append( - (PropertyNames.Tag.value, semantic_ref.knowledge.text, semantic_ref_ordinal) + ( + PropertyNames.Tag.value, + semantic_ref.knowledge.text, + semantic_ref_ordinal, + ) ) elif isinstance(semantic_ref.knowledge, Topic): pass @@ -239,7 +254,9 @@ async def add_property( async def add_properties_batch( self, - properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + properties: Sequence[ + tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal] + ], ) -> None: for name, value, ordinal in properties: await self.add_property(name, value, ordinal) diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index a98a3b21..8437bacd 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -3,11 +3,13 @@ from __future__ import annotations # TODO: Avoid -from collections.abc import AsyncIterable, Callable +from collections.abc import AsyncIterable, Callable, Sequence from typechat import Failure -from ...knowpro import convknowledge, knowledge_schema as kplib, secindex +from ...knowpro import convknowledge +from ...knowpro import knowledge_schema as kplib +from ...knowpro import secindex from ...knowpro.convsettings import ConversationSettings, SemanticRefIndexSettings from ...knowpro.interfaces import ( # Interfaces.; Other imports. IConversation, @@ -711,7 +713,7 @@ async def add_term( async def add_terms_batch( self, - terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + terms: Sequence[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], ) -> None: for term, ordinal in terms: await self.add_term(term, ordinal) diff --git a/src/typeagent/storage/sqlite/propindex.py b/src/typeagent/storage/sqlite/propindex.py index 6a619869..f9704b45 100644 --- a/src/typeagent/storage/sqlite/propindex.py +++ b/src/typeagent/storage/sqlite/propindex.py @@ -3,6 +3,7 @@ """SQLite-based property index implementation.""" +from collections.abc import Sequence import sqlite3 from ...knowpro import interfaces @@ -69,7 +70,13 @@ async def add_property( async def add_properties_batch( self, - properties: list[tuple[str, str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + properties: Sequence[ + tuple[ + str, + str, + interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal, + ] + ], ) -> None: if not properties: return @@ -77,6 +84,7 @@ async def add_properties_batch( make_property_term_text, split_property_term_text, ) + rows = [] for property_name, value, ordinal in properties: if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): diff --git a/src/typeagent/storage/sqlite/semrefindex.py b/src/typeagent/storage/sqlite/semrefindex.py index 0925eb45..ac68a1e0 100644 --- a/src/typeagent/storage/sqlite/semrefindex.py +++ b/src/typeagent/storage/sqlite/semrefindex.py @@ -3,6 +3,7 @@ """SQLite-based semantic reference index implementation.""" +from collections.abc import Sequence import re import sqlite3 import unicodedata @@ -58,7 +59,11 @@ async def add_term( async def add_terms_batch( self, - terms: list[tuple[str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + terms: Sequence[ + tuple[ + str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal + ] + ], ) -> None: if not terms: return diff --git a/tests/conftest.py b/tests/conftest.py index dae619c1..7f0f11f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import AsyncGenerator, Callable, Iterator +from collections.abc import AsyncGenerator, Callable, Iterator, Sequence import os from pathlib import Path import tempfile @@ -236,6 +236,13 @@ async def add_term( self.term_to_refs[term].append(scored_ref) return term + async def add_terms_batch( + self, + terms: Sequence[tuple[str, int | ScoredSemanticRefOrdinal]], + ) -> None: + for term, ordinal in terms: + await self.add_term(term, ordinal) + async def remove_term(self, term: str, semantic_ref_ordinal: int) -> None: if term in self.term_to_refs: self.term_to_refs[term] = [ From 9ae667f79ba278ffbc48e603bf0ee00f2daa1679 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 14 Apr 2026 18:52:26 -0500 Subject: [PATCH 4/4] Address review: fix inverse_actions bug, unify duplicated functions, move imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix inverse_actions omission in add_metadata_to_index_from_list (regression) - Fix inverse_actions omission in add_metadata_to_index (pre-existing) - Delete duplicate add_entity_to_index, add_action_to_index, add_topic_to_index, text_range_from_location — unified into add_entity, add_action, add_topic - Update all callers and tests to use unified functions - Move function-level imports to top-level in sqlite/propindex.py per AGENTS.md --- src/typeagent/knowpro/interfaces_indexes.py | 4 +- src/typeagent/storage/memory/semrefindex.py | 142 +++++--------------- src/typeagent/storage/sqlite/propindex.py | 19 +-- tests/test_semrefindex.py | 18 +-- 4 files changed, 48 insertions(+), 135 deletions(-) diff --git a/src/typeagent/knowpro/interfaces_indexes.py b/src/typeagent/knowpro/interfaces_indexes.py index 6c348a01..c872fa7c 100644 --- a/src/typeagent/knowpro/interfaces_indexes.py +++ b/src/typeagent/knowpro/interfaces_indexes.py @@ -61,7 +61,9 @@ async def add_property( async def add_properties_batch( self, - properties: Sequence[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + properties: Sequence[ + tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal] + ], ) -> None: ... async def lookup_property( diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index 8437bacd..773f9212 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -26,7 +26,6 @@ TermToSemanticRefIndexData, TermToSemanticRefIndexItemData, TextLocation, - TextRange, Topic, ) from ...knowpro.knowledge import extract_knowledge_from_text_batch @@ -35,17 +34,6 @@ text_range_from_message_chunk, ) - -def text_range_from_location( - message_ordinal: MessageOrdinal, - chunk_ordinal: int = 0, -) -> TextRange: - return TextRange( - start=TextLocation(message_ordinal, chunk_ordinal), - end=None, - ) - - type KnowledgeValidator = Callable[ [ KnowledgeType, # knowledge_type @@ -151,31 +139,6 @@ async def add_batch_to_semantic_ref_index_from_list[ ) -async def add_entity_to_index( - entity: kplib.ConcreteEntity, - semantic_refs: ISemanticRefCollection, - semantic_ref_index: ITermToSemanticRefIndex, - message_ordinal: MessageOrdinal, - chunk_ordinal: int = 0, -) -> None: - ref_ordinal = await semantic_refs.size() - await semantic_refs.append( - SemanticRef( - semantic_ref_ordinal=ref_ordinal, - range=text_range_from_location(message_ordinal, chunk_ordinal), - knowledge=entity, - ) - ) - await semantic_ref_index.add_term(entity.name, ref_ordinal) - # Add each type as a separate term. - for type in entity.type: - await semantic_ref_index.add_term(type, ref_ordinal) - # Add every facet name as a separate term. - if entity.facets: - for facet in entity.facets: - await add_facet(facet, ref_ordinal, semantic_ref_index) - - async def add_term_to_index( index: ITermToSemanticRefIndex, term: str, @@ -200,7 +163,7 @@ async def add_entity( semantic_refs: ISemanticRefCollection, semantic_ref_index: ITermToSemanticRefIndex, message_ordinal: MessageOrdinal, - chunk_ordinal: int, + chunk_ordinal: int = 0, terms_added: set[str] | None = None, ) -> None: """Add an entity to the semantic reference index. @@ -269,7 +232,7 @@ async def add_topic( semantic_refs: ISemanticRefCollection, semantic_ref_index: ITermToSemanticRefIndex, message_ordinal: MessageOrdinal, - chunk_ordinal: int, + chunk_ordinal: int = 0, terms_added: set[str] | None = None, ) -> None: """Add a topic to the semantic reference index. @@ -304,7 +267,7 @@ async def add_action( semantic_refs: ISemanticRefCollection, semantic_ref_index: ITermToSemanticRefIndex, message_ordinal: MessageOrdinal, - chunk_ordinal: int, + chunk_ordinal: int = 0, terms_added: set[str] | None = None, ) -> None: """Add an action to the semantic reference index. @@ -463,61 +426,6 @@ def validate_entity(entity: kplib.ConcreteEntity) -> bool: return bool(entity.name) -async def add_topic_to_index( - topic: Topic | str, - semantic_refs: ISemanticRefCollection, - semantic_ref_index: ITermToSemanticRefIndex, - message_ordinal: MessageOrdinal, - chunk_ordinal: int = 0, -) -> None: - if isinstance(topic, str): - topic = Topic(text=topic) - ref_ordinal = await semantic_refs.size() - await semantic_refs.append( - SemanticRef( - semantic_ref_ordinal=ref_ordinal, - range=text_range_from_location(message_ordinal, chunk_ordinal), - knowledge=topic, - ) - ) - await semantic_ref_index.add_term(topic.text, ref_ordinal) - - -async def add_action_to_index( - action: kplib.Action, - semantic_refs: ISemanticRefCollection, - semantic_ref_index: ITermToSemanticRefIndex, - message_ordinal: int, - chunk_ordinal: int = 0, -) -> None: - ref_ordinal = await semantic_refs.size() - await semantic_refs.append( - SemanticRef( - semantic_ref_ordinal=ref_ordinal, - range=text_range_from_location(message_ordinal, chunk_ordinal), - knowledge=action, - ) - ) - await semantic_ref_index.add_term(" ".join(action.verbs), ref_ordinal) - if action.subject_entity_name != "none": - await semantic_ref_index.add_term(action.subject_entity_name, ref_ordinal) - if action.object_entity_name != "none": - await semantic_ref_index.add_term(action.object_entity_name, ref_ordinal) - if action.indirect_object_entity_name != "none": - await semantic_ref_index.add_term( - action.indirect_object_entity_name, ref_ordinal - ) - if action.params: - for param in action.params: - if isinstance(param, str): - await semantic_ref_index.add_term(param, ref_ordinal) - else: - await semantic_ref_index.add_term(param.name, ref_ordinal) - if isinstance(param.value, str): - await semantic_ref_index.add_term(param.value, ref_ordinal) - await add_facet(action.subject_entity_facet, ref_ordinal, semantic_ref_index) - - async def add_knowledge_to_index( semantic_refs: ISemanticRefCollection, semantic_ref_index: ITermToSemanticRefIndex, @@ -525,20 +433,16 @@ async def add_knowledge_to_index( knowledge: kplib.KnowledgeResponse, ) -> None: for entity in knowledge.entities: - await add_entity_to_index( - entity, semantic_refs, semantic_ref_index, message_ordinal - ) + await add_entity(entity, semantic_refs, semantic_ref_index, message_ordinal) for action in knowledge.actions: - await add_action_to_index( - action, semantic_refs, semantic_ref_index, message_ordinal - ) + await add_action(action, semantic_refs, semantic_ref_index, message_ordinal) for inverse_action in knowledge.inverse_actions: - await add_action_to_index( + await add_action( inverse_action, semantic_refs, semantic_ref_index, message_ordinal ) for topic in knowledge.topics: - await add_topic_to_index( - topic, semantic_refs, semantic_ref_index, message_ordinal + await add_topic( + Topic(text=topic), semantic_refs, semantic_ref_index, message_ordinal ) @@ -568,14 +472,19 @@ async def add_metadata_to_index[TMessage: IMessage]( knowledge_response = msg.get_knowledge() for entity in knowledge_response.entities: if knowledge_validator is None or knowledge_validator("entity", entity): - await add_entity_to_index(entity, semantic_refs, semantic_ref_index, i) + await add_entity(entity, semantic_refs, semantic_ref_index, i) for action in knowledge_response.actions: if knowledge_validator is None or knowledge_validator("action", action): - await add_action_to_index(action, semantic_refs, semantic_ref_index, i) + await add_action(action, semantic_refs, semantic_ref_index, i) + for inverse_action in knowledge_response.inverse_actions: + if knowledge_validator is None or knowledge_validator( + "action", inverse_action + ): + await add_action(inverse_action, semantic_refs, semantic_ref_index, i) for topic_response in knowledge_response.topics: topic = Topic(text=topic_response) if knowledge_validator is None or knowledge_validator("topic", topic): - await add_topic_to_index(topic, semantic_refs, semantic_ref_index, i) + await add_topic(topic, semantic_refs, semantic_ref_index, i) i += 1 @@ -639,7 +548,7 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( if knowledge_validator is None or knowledge_validator("entity", entity): ref = SemanticRef( semantic_ref_ordinal=next_ordinal, - range=text_range_from_location(i), + range=text_range_from_message_chunk(i), knowledge=entity, ) collected_refs.append(ref) @@ -650,19 +559,32 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( if knowledge_validator is None or knowledge_validator("action", action): ref = SemanticRef( semantic_ref_ordinal=next_ordinal, - range=text_range_from_location(i), + range=text_range_from_message_chunk(i), knowledge=action, ) collected_refs.append(ref) for term in collect_action_terms(action): collected_terms.append((term, next_ordinal)) next_ordinal += 1 + for inverse_action in knowledge_response.inverse_actions: + if knowledge_validator is None or knowledge_validator( + "action", inverse_action + ): + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_message_chunk(i), + knowledge=inverse_action, + ) + collected_refs.append(ref) + for term in collect_action_terms(inverse_action): + collected_terms.append((term, next_ordinal)) + next_ordinal += 1 for topic_response in knowledge_response.topics: topic = Topic(text=topic_response) if knowledge_validator is None or knowledge_validator("topic", topic): ref = SemanticRef( semantic_ref_ordinal=next_ordinal, - range=text_range_from_location(i), + range=text_range_from_message_chunk(i), knowledge=topic, ) collected_refs.append(ref) diff --git a/src/typeagent/storage/sqlite/propindex.py b/src/typeagent/storage/sqlite/propindex.py index f9704b45..59a5a111 100644 --- a/src/typeagent/storage/sqlite/propindex.py +++ b/src/typeagent/storage/sqlite/propindex.py @@ -8,6 +8,10 @@ from ...knowpro import interfaces from ...knowpro.interfaces import ScoredSemanticRefOrdinal +from ...storage.memory.propindex import ( + make_property_term_text, + split_property_term_text, +) class SqlitePropertyIndex(interfaces.IPropertyToSemanticRefIndex): @@ -47,11 +51,6 @@ async def add_property( score = 1.0 # Normalize property name and value (to match in-memory implementation) - from ...storage.memory.propindex import ( - make_property_term_text, - split_property_term_text, - ) - term_text = make_property_term_text(property_name, value) term_text = term_text.lower() # Matches PropertyIndex._prepare_term_text property_name, value = split_property_term_text(term_text) @@ -80,11 +79,6 @@ async def add_properties_batch( ) -> None: if not properties: return - from ...storage.memory.propindex import ( - make_property_term_text, - split_property_term_text, - ) - rows = [] for property_name, value, ordinal in properties: if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): @@ -115,11 +109,6 @@ async def lookup_property( value: str, ) -> list[interfaces.ScoredSemanticRefOrdinal] | None: # Normalize property name and value (to match in-memory implementation) - from ...storage.memory.propindex import ( - make_property_term_text, - split_property_term_text, - ) - term_text = make_property_term_text(property_name, value) term_text = term_text.lower() # Matches PropertyIndex._prepare_term_text property_name, value = split_property_term_text(term_text) diff --git a/tests/test_semrefindex.py b/tests/test_semrefindex.py index d13878ec..5f580992 100644 --- a/tests/test_semrefindex.py +++ b/tests/test_semrefindex.py @@ -29,10 +29,10 @@ from typeagent.storage import SqliteStorageProvider from typeagent.storage.memory import MemoryStorageProvider from typeagent.storage.memory.semrefindex import ( - add_action_to_index, - add_entity_to_index, + add_action, + add_entity, add_knowledge_to_index, - add_topic_to_index, + add_topic, TermToSemanticRefIndex, ) @@ -216,7 +216,7 @@ async def test_semantic_ref_index_serialize_and_deserialize( @pytest.mark.asyncio -async def test_add_entity_to_index( +async def test_add_entity( semantic_ref_setup: Dict[str, ITermToSemanticRefIndex | ISemanticRefCollection], needs_auth: None, ) -> None: @@ -229,7 +229,7 @@ async def test_add_entity_to_index( type=["object", "example"], facets=[Facet(name="color", value="blue")], ) - await add_entity_to_index(entity, semantic_refs, semantic_ref_index, 0) + await add_entity(entity, semantic_refs, semantic_ref_index, 0) assert await semantic_refs.size() == 1 assert (await semantic_refs.get_item(0)).knowledge.knowledge_type == "entity" @@ -252,7 +252,7 @@ async def test_add_entity_to_index( @pytest.mark.asyncio -async def test_add_topic_to_index( +async def test_add_topic( semantic_ref_setup: Dict[str, ITermToSemanticRefIndex | ISemanticRefCollection], needs_auth: None, ) -> None: @@ -261,7 +261,7 @@ async def test_add_topic_to_index( semantic_refs: ISemanticRefCollection = semantic_ref_setup["collection"] # type: ignore topic = "ExampleTopic" - await add_topic_to_index(topic, semantic_refs, semantic_ref_index, 0) + await add_topic(Topic(text=topic), semantic_refs, semantic_ref_index, 0) assert await semantic_refs.size() == 1 assert (await semantic_refs.get_item(0)).knowledge.knowledge_type == "topic" @@ -275,7 +275,7 @@ async def test_add_topic_to_index( @pytest.mark.asyncio -async def test_add_action_to_index( +async def test_add_action( semantic_ref_setup: Dict[str, ITermToSemanticRefIndex | ISemanticRefCollection], needs_auth: None, ) -> None: @@ -292,7 +292,7 @@ async def test_add_action_to_index( params=None, subject_entity_facet=None, ) - await add_action_to_index(action, semantic_refs, semantic_ref_index, 0) + await add_action(action, semantic_refs, semantic_ref_index, 0) assert await semantic_refs.size() == 1 assert (await semantic_refs.get_item(0)).knowledge.knowledge_type == "action"