Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/typeagent/knowpro/interfaces_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from __future__ import annotations

from collections.abc import Sequence
from datetime import datetime as Datetime
from typing import (
Any,
Expand Down Expand Up @@ -168,6 +169,11 @@ async def add_term(
semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal,
) -> str: ...

async def add_terms_batch(
self,
terms: Sequence[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]],
) -> None: ...

async def remove_term(
self, term: str, semantic_ref_ordinal: SemanticRefOrdinal
) -> None: ...
Expand Down
5 changes: 5 additions & 0 deletions src/typeagent/knowpro/interfaces_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ async def add_property(
semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal,
) -> None: ...

async def add_properties_batch(
self,
properties: Sequence[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]],
) -> None: ...

async def lookup_property(
self, property_name: str, value: str
) -> list[ScoredSemanticRefOrdinal] | None: ...
Expand Down
92 changes: 85 additions & 7 deletions src/typeagent/storage/memory/propindex.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

from collections.abc import Sequence
import enum
from typing import assert_never

Expand Down Expand Up @@ -109,6 +110,63 @@ async def build_property_index(conversation: IConversation) -> None:
await add_to_property_index(conversation, 0)


def collect_facet_properties(
facet: kplib.Facet | None,
ordinal: SemanticRefOrdinal,
) -> list[tuple[str, str, SemanticRefOrdinal]]:
"""Collect property tuples from a facet without touching any index."""
if facet is None:
return []
props: list[tuple[str, str, SemanticRefOrdinal]] = [
(PropertyNames.FacetName.value, facet.name, ordinal)
]
value = facet.value
if value is not None:
if isinstance(value, float) and value:
value = f"{value:g}"
props.append((PropertyNames.FacetValue.value, str(value), ordinal))
return props


def collect_entity_properties(
entity: kplib.ConcreteEntity,
ordinal: SemanticRefOrdinal,
) -> list[tuple[str, str, SemanticRefOrdinal]]:
"""Collect all property tuples for an entity."""
props: list[tuple[str, str, SemanticRefOrdinal]] = [
(PropertyNames.EntityName.value, entity.name, ordinal)
]
for t in entity.type:
props.append((PropertyNames.EntityType.value, t, ordinal))
if entity.facets:
for facet in entity.facets:
props.extend(collect_facet_properties(facet, ordinal))
return props


def collect_action_properties(
Comment thread
KRRT7 marked this conversation as resolved.
action: kplib.Action,
ordinal: SemanticRefOrdinal,
) -> list[tuple[str, str, SemanticRefOrdinal]]:
"""Collect all property tuples for an action."""
props: list[tuple[str, str, SemanticRefOrdinal]] = [
(PropertyNames.Verb.value, " ".join(action.verbs), ordinal)
]
if action.subject_entity_name != "none":
props.append((PropertyNames.Subject.value, action.subject_entity_name, ordinal))
if action.object_entity_name != "none":
props.append((PropertyNames.Object.value, action.object_entity_name, ordinal))
if action.indirect_object_entity_name != "none":
props.append(
(
PropertyNames.IndirectObject.value,
action.indirect_object_entity_name,
ordinal,
)
)
return props


async def add_to_property_index(
conversation: IConversation,
start_at_ordinal: SemanticRefOrdinal,
Expand All @@ -127,29 +185,40 @@ async def add_to_property_index(
semantic_refs = conversation.semantic_refs
size = await semantic_refs.size()

collected: list[tuple[str, str, SemanticRefOrdinal]] = []
for semantic_ref_ordinal, semantic_ref in enumerate(
await semantic_refs.get_slice(start_at_ordinal, size),
start_at_ordinal,
):
assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal
if isinstance(semantic_ref.knowledge, kplib.Action):
await add_action_properties_to_index(
semantic_ref.knowledge, property_index, semantic_ref_ordinal
collected.extend(
collect_action_properties(
semantic_ref.knowledge, semantic_ref_ordinal
)
)
elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity):
await add_entity_properties_to_index(
semantic_ref.knowledge, property_index, semantic_ref_ordinal
collected.extend(
collect_entity_properties(
semantic_ref.knowledge, semantic_ref_ordinal
)
)
elif isinstance(semantic_ref.knowledge, Tag):
tag = semantic_ref.knowledge
await property_index.add_property(
PropertyNames.Tag.value, tag.text, semantic_ref_ordinal
collected.append(
(
PropertyNames.Tag.value,
semantic_ref.knowledge.text,
semantic_ref_ordinal,
)
)
elif isinstance(semantic_ref.knowledge, Topic):
pass
else:
assert_never(semantic_ref.knowledge)

if collected:
await property_index.add_properties_batch(collected)


class PropertyIndex(IPropertyToSemanticRefIndex):
def __init__(self):
Expand Down Expand Up @@ -183,6 +252,15 @@ async def add_property(
else:
self._map[term_text] = [semantic_ref_ordinal]

async def add_properties_batch(
self,
properties: Sequence[
tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]
],
) -> None:
for name, value, ordinal in properties:
await self.add_property(name, value, ordinal)

async def clear(self) -> None:
self._map = {}

Expand Down
93 changes: 88 additions & 5 deletions src/typeagent/storage/memory/semrefindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@

from __future__ import annotations # TODO: Avoid

from collections.abc import AsyncIterable, Callable
from collections.abc import AsyncIterable, Callable, Sequence

from typechat import Failure

from ...knowpro import convknowledge, knowledge_schema as kplib, secindex
from ...knowpro import convknowledge
from ...knowpro import knowledge_schema as kplib
from ...knowpro import secindex
from ...knowpro.convsettings import ConversationSettings, SemanticRefIndexSettings
from ...knowpro.interfaces import ( # Interfaces.; Other imports.
IConversation,
Expand Down Expand Up @@ -577,6 +579,48 @@ async def add_metadata_to_index[TMessage: IMessage](
i += 1


def collect_facet_terms(facet: kplib.Facet | None) -> list[str]:
"""Collect terms from a facet without touching any index."""
if facet is None:
return []
terms = [facet.name]
if facet.value is not None:
terms.append(str(facet.value))
return terms


def collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]:
"""Collect all terms an entity would add to the semantic ref index."""
terms = [entity.name]
for t in entity.type:
terms.append(t)
if entity.facets:
for facet in entity.facets:
terms.extend(collect_facet_terms(facet))
return terms


def collect_action_terms(action: kplib.Action) -> list[str]:
"""Collect all terms an action would add to the semantic ref index."""
terms = [" ".join(action.verbs)]
if action.subject_entity_name != "none":
terms.append(action.subject_entity_name)
if action.object_entity_name != "none":
terms.append(action.object_entity_name)
if action.indirect_object_entity_name != "none":
terms.append(action.indirect_object_entity_name)
if action.params:
for param in action.params:
if isinstance(param, str):
terms.append(param)
else:
terms.append(param.name)
if isinstance(param.value, str):
terms.append(param.value)
terms.extend(collect_facet_terms(action.subject_entity_facet))
return terms


async def add_metadata_to_index_from_list[TMessage: IMessage](
Comment thread
KRRT7 marked this conversation as resolved.
messages: list[TMessage],
semantic_refs: ISemanticRefCollection,
Expand All @@ -585,18 +629,50 @@ async def add_metadata_to_index_from_list[TMessage: IMessage](
knowledge_validator: KnowledgeValidator | None = None,
) -> None:
"""Extract metadata knowledge from a list of messages starting at ordinal."""
next_ordinal = await semantic_refs.size()
Comment thread
KRRT7 marked this conversation as resolved.
collected_refs: list[SemanticRef] = []
collected_terms: list[tuple[str, SemanticRefOrdinal]] = []

for i, msg in enumerate(messages, start_from_ordinal):
knowledge_response = msg.get_knowledge()
for entity in knowledge_response.entities:
if knowledge_validator is None or knowledge_validator("entity", entity):
await add_entity_to_index(entity, semantic_refs, semantic_ref_index, i)
ref = SemanticRef(
semantic_ref_ordinal=next_ordinal,
range=text_range_from_location(i),
knowledge=entity,
)
collected_refs.append(ref)
for term in collect_entity_terms(entity):
collected_terms.append((term, next_ordinal))
next_ordinal += 1
for action in knowledge_response.actions:
if knowledge_validator is None or knowledge_validator("action", action):
await add_action_to_index(action, semantic_refs, semantic_ref_index, i)
ref = SemanticRef(
semantic_ref_ordinal=next_ordinal,
range=text_range_from_location(i),
knowledge=action,
)
collected_refs.append(ref)
for term in collect_action_terms(action):
collected_terms.append((term, next_ordinal))
next_ordinal += 1
for topic_response in knowledge_response.topics:
topic = Topic(text=topic_response)
if knowledge_validator is None or knowledge_validator("topic", topic):
await add_topic_to_index(topic, semantic_refs, semantic_ref_index, i)
ref = SemanticRef(
semantic_ref_ordinal=next_ordinal,
range=text_range_from_location(i),
knowledge=topic,
)
collected_refs.append(ref)
collected_terms.append((topic.text, next_ordinal))
next_ordinal += 1

if collected_refs:
await semantic_refs.extend(collected_refs)
if collected_terms:
await semantic_ref_index.add_terms_batch(collected_terms)


class TermToSemanticRefIndex(ITermToSemanticRefIndex):
Expand Down Expand Up @@ -635,6 +711,13 @@ async def add_term(
self._map[term] = [semantic_ref_ordinal]
return term

async def add_terms_batch(
self,
terms: Sequence[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]],
) -> None:
for term, ordinal in terms:
await self.add_term(term, ordinal)

async def lookup_term(self, term: str) -> list[ScoredSemanticRefOrdinal] | None:
return self._map.get(self._prepare_term(term)) or []

Expand Down
38 changes: 38 additions & 0 deletions src/typeagent/storage/sqlite/propindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""SQLite-based property index implementation."""

from collections.abc import Sequence
import sqlite3

from ...knowpro import interfaces
Expand Down Expand Up @@ -67,6 +68,43 @@ async def add_property(
(property_name, value, score, semref_id),
)

async def add_properties_batch(
self,
properties: Sequence[
tuple[
str,
str,
interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal,
]
],
) -> None:
if not properties:
return
from ...storage.memory.propindex import (
Comment thread
KRRT7 marked this conversation as resolved.
Outdated
make_property_term_text,
split_property_term_text,
)

rows = []
for property_name, value, ordinal in properties:
if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal):
semref_id = ordinal.semantic_ref_ordinal
score = ordinal.score
else:
semref_id = ordinal
score = 1.0
term_text = make_property_term_text(property_name, value)
term_text = term_text.lower()
property_name, value = split_property_term_text(term_text)
if property_name.startswith("prop."):
property_name = property_name[5:]
rows.append((property_name, value, score, semref_id))
cursor = self.db.cursor()
cursor.executemany(
"INSERT INTO PropertyIndex (prop_name, value_str, score, semref_id) VALUES (?, ?, ?, ?)",
rows,
)

async def clear(self) -> None:
cursor = self.db.cursor()
cursor.execute("DELETE FROM PropertyIndex")
Expand Down
28 changes: 28 additions & 0 deletions src/typeagent/storage/sqlite/semrefindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

"""SQLite-based semantic reference index implementation."""

from collections.abc import Sequence
import re
import sqlite3
import unicodedata
Expand Down Expand Up @@ -56,6 +57,33 @@ async def add_term(

return term

async def add_terms_batch(
self,
terms: Sequence[
tuple[
str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal
]
],
) -> None:
if not terms:
return
rows = []
for term, ordinal in terms:
if not term:
continue
term = self._prepare_term(term)
if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal):
semref_id = ordinal.semantic_ref_ordinal
else:
semref_id = ordinal
rows.append((term, semref_id))
if rows:
cursor = self.db.cursor()
cursor.executemany(
"INSERT OR IGNORE INTO SemanticRefIndex (term, semref_id) VALUES (?, ?)",
rows,
)

async def remove_term(
self, term: str, semantic_ref_ordinal: interfaces.SemanticRefOrdinal
) -> None:
Expand Down
Loading
Loading