diff --git a/Makefile b/Makefile index 6f516bc1..22333963 100644 --- a/Makefile +++ b/Makefile @@ -19,4 +19,4 @@ check: .PHONY: test test: @echo "🚀 Testing code: Running pytest" - @uv run python -m pytest --cov --cov-config=pyproject.toml --cov-report=xml + @PYTHONPATH=src uv run python -m pytest --cov --cov-config=pyproject.toml --cov-report=xml diff --git a/src/memu/app/retrieve.py b/src/memu/app/retrieve.py index a7cbff5c..3ecade37 100644 --- a/src/memu/app/retrieve.py +++ b/src/memu/app/retrieve.py @@ -2,9 +2,10 @@ import json import logging +import math import re from collections.abc import Awaitable, Callable, Mapping, Sequence -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Literal, cast from pydantic import BaseModel @@ -18,12 +19,19 @@ logger = logging.getLogger(__name__) +VALID_RETRIEVERS = {"vector", "keyword", "bm25", "hybrid"} + if TYPE_CHECKING: from memu.app.service import Context from memu.app.settings import RetrieveConfig from memu.database.interfaces import Database +class InvalidRetrieverError(ValueError): + def __init__(self) -> None: + super().__init__(f"retriever must be one of: {', '.join(sorted(VALID_RETRIEVERS))}") + + class RetrieveMixin: if TYPE_CHECKING: retrieve_config: RetrieveConfig @@ -43,6 +51,8 @@ async def retrieve( self, queries: list[dict[str, Any]], where: dict[str, Any] | None = None, + method: Literal["rag", "llm"] | None = None, + retriever: str | None = None, ) -> dict[str, Any]: if not queries: raise ValueError("empty_queries") @@ -60,10 +70,20 @@ async def retrieve( retrieve_resource = self.retrieve_config.resource.enabled sufficiency_check = self.retrieve_config.sufficiency_check - workflow_name = "retrieve_llm" if self.retrieve_config.method == "llm" else "retrieve_rag" + effective_method = method if method is not None else self.retrieve_config.method + workflow_name = "retrieve_llm" if effective_method == "llm" else "retrieve_rag" + + if effective_method == "rag": + effective_retriever = ( + retriever.lower() if retriever is not None else getattr(self.retrieve_config, "retriever", "vector") + ) + if retriever is not None and effective_retriever not in VALID_RETRIEVERS: + raise InvalidRetrieverError() + else: + effective_retriever = None state: WorkflowState = { - "method": self.retrieve_config.method, + "method": effective_method, "original_query": original_query, "context_queries": context_queries_objs, "route_intention": route_intention, @@ -76,6 +96,8 @@ async def retrieve( "store": store, "where": where_filters, } + if effective_method == "rag": + state["retriever"] = effective_retriever result = await self._run_workflow(workflow_name, state) response = cast(dict[str, Any] | None, result.get("response")) @@ -343,6 +365,389 @@ def _extract_referenced_item_ids(self, state: WorkflowState) -> set[str]: return referenced_item_ids + @staticmethod + def _tokenize(text: str) -> set[str]: + """Split text into lowercase non-empty tokens (standard library only).""" + if not text: + return set() + parts = re.split(r"[^\w]+", text.lower()) + return {p for p in parts if p} + + @staticmethod + def _tokenize_list(text: str) -> list[str]: + if not text: + return [] + return [p for p in re.split(r"[^\w]+", text.lower()) if p] + + @staticmethod + def _extract_item_text(item: Any) -> str: + """Extract searchable text from an item (summary + extra values).""" + summary = item.get("summary", "") if isinstance(item, dict) else getattr(item, "summary", "") + extra = item.get("extra", {}) if isinstance(item, dict) else (getattr(item, "extra", None) or {}) + extra_str = " ".join(str(v) for v in extra.values() if v is not None) + return f"{summary} {extra_str}".strip() + + @staticmethod + def _extract_item_field_text(item: Any, field: str | None) -> str: + """Field-aware text extraction. Supports summary/content and extra. lookups.""" + if not field: + return RetrieveMixin._extract_item_text(item) + f = field.lower() + summary = item.get("summary", "") if isinstance(item, dict) else getattr(item, "summary", "") + extra = item.get("extra", {}) if isinstance(item, dict) else (getattr(item, "extra", None) or {}) + if f in {"summary", "content", "text"}: + return str(summary or "") + if f.startswith("extra."): + key = f.split(".", 1)[1] + return str((extra or {}).get(key, "") or "") + if f == "extra": + return " ".join(str(v) for v in (extra or {}).values() if v is not None) + # Fallback: if the key exists in extra, use it. + if isinstance(extra, dict) and field in extra: + return str(extra.get(field, "") or "") + return RetrieveMixin._extract_item_text(item) + + @staticmethod + def _add_parsed_token(spec: dict[str, Any], field: str | None, body: str, sign: str) -> None: + """Route a single parsed query token into the appropriate spec bucket.""" + is_phrase = len(body) >= 2 and body[0] == '"' and body[-1] == '"' + value = body[1:-1].strip().lower() if is_phrase else body.lower() + if not value: + return + if is_phrase: + if field: + spec["field_phrases"].append((field, value, sign)) + else: + spec["phrases"].append((value, sign)) + else: + tokens = [t for t in re.split(r"[^\w]+", value) if t] + for tok in tokens: + if field: + spec["field_terms"].append((field, tok, sign)) + elif sign == "must": + spec["must_terms"].add(tok) + elif sign == "exclude": + spec["exclude_terms"].add(tok) + else: + spec["should_terms"].add(tok) + + @staticmethod + def _parse_lexical_query(query: str) -> dict[str, Any]: + """Parse lexical query syntax. + + Supported forms: + - exact phrase in quotes: "error code" + - mandatory token: +token + - exclusion token: -token + - field-aware token/phrase: summary:token, extra.source:"slack" + """ + spec: dict[str, Any] = { + "should_terms": set(), + "must_terms": set(), + "exclude_terms": set(), + "phrases": [], + "field_terms": [], # (field, term, sign) + "field_phrases": [], # (field, phrase, sign) + } + if not query: + return spec + + pattern = re.compile(r'(?P[+-]?)(?:(?P[A-Za-z_][\w\.]*)\:)?(?P"[^"]+"|\S+)') + for m in pattern.finditer(query): + prefix = m.group("prefix") or "" + field = m.group("field") + body = (m.group("body") or "").strip() + if not body: + continue + sign = "must" if prefix == "+" else ("exclude" if prefix == "-" else "should") + RetrieveMixin._add_parsed_token(spec, field, body, sign) + + # If the query had no explicit should/must terms, backfill from plain tokenization. + if ( + not spec["should_terms"] + and not spec["must_terms"] + and not spec["phrases"] + and not spec["field_terms"] + and not spec["field_phrases"] + ): + spec["should_terms"] = RetrieveMixin._tokenize(query) + return spec + + @staticmethod + def _passes_exclusions(item: Any, all_text: str, all_tokens: set[str], spec: Mapping[str, Any]) -> bool: + """Return False if the item matches any exclusion constraint in spec.""" + if spec.get("exclude_terms", set()) & all_tokens: + return False + if any(phrase in all_text for phrase, sign in spec.get("phrases", []) if sign == "exclude"): + return False + for field, term, sign in spec.get("field_terms", []): + if sign == "exclude": + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if term in RetrieveMixin._tokenize(ftxt): + return False + for field, phrase, sign in spec.get("field_phrases", []): + if sign == "exclude": + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if phrase in ftxt: + return False + return True + + @staticmethod + def _satisfies_mandatory(item: Any, all_text: str, all_tokens: set[str], spec: Mapping[str, Any]) -> bool: + """Return False if the item fails any mandatory constraint in spec.""" + if not (spec.get("must_terms", set()) <= all_tokens): + return False + if any(phrase not in all_text for phrase, sign in spec.get("phrases", []) if sign == "must"): + return False + for field, term, sign in spec.get("field_terms", []): + if sign == "must": + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if term not in RetrieveMixin._tokenize(ftxt): + return False + for field, phrase, sign in spec.get("field_phrases", []): + if sign == "must": + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if phrase not in ftxt: + return False + return True + + @staticmethod + def _item_matches_lexical_spec(item: Any, spec: Mapping[str, Any]) -> bool: + all_text = RetrieveMixin._extract_item_text(item).lower() + all_tokens = RetrieveMixin._tokenize(all_text) + return RetrieveMixin._passes_exclusions( + item, all_text, all_tokens, spec + ) and RetrieveMixin._satisfies_mandatory(item, all_text, all_tokens, spec) + + @staticmethod + def _score_keyword_item(item: Any, spec: Mapping[str, Any]) -> float: + """Compute keyword relevance score for a single item against spec.""" + all_text = RetrieveMixin._extract_item_text(item).lower() + all_tokens = RetrieveMixin._tokenize(all_text) + score = 0.0 + score += float(len(spec.get("should_terms", set()) & all_tokens)) + score += 1.5 * float(len(spec.get("must_terms", set()) & all_tokens)) + for phrase, sign in spec.get("phrases", []): + if sign in {"should", "must"} and phrase in all_text: + score += 2.0 if sign == "should" else 3.0 + for field, term, sign in spec.get("field_terms", []): + if sign == "exclude": + continue + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if term in RetrieveMixin._tokenize(ftxt): + score += 1.5 if sign == "should" else 2.5 + for field, phrase, sign in spec.get("field_phrases", []): + if sign == "exclude": + continue + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if phrase in ftxt: + score += 2.5 if sign == "should" else 3.5 + return score + + @staticmethod + def _keyword_match_items( + query: str, + pool: Mapping[str, Any], + top_k: int, + ) -> list[tuple[str, float]]: + """Keyword retrieval with inclusion/exclusion, phrase matching, and field-aware matching.""" + spec = RetrieveMixin._parse_lexical_query(query) + if not any([ + spec.get("should_terms"), + spec.get("must_terms"), + spec.get("phrases"), + spec.get("field_terms"), + spec.get("field_phrases"), + ]): + return [] + + scores: list[tuple[str, float]] = [] + for item_id, item in pool.items(): + if not RetrieveMixin._item_matches_lexical_spec(item, spec): + continue + score = RetrieveMixin._score_keyword_item(item, spec) + # If query only has negatives and item passes, avoid returning everything. + if score > 0: + scores.append((item_id, score)) + scores.sort(key=lambda x: (-x[1], x[0])) + return scores[:top_k] + + @staticmethod + def _bm25_doc_score( + query_tokens: Sequence[str], + doc_tokens: list[str], + df: Mapping[str, int], + n_docs: int, + avgdl: float, + k1: float, + b: float, + ) -> float: + """Compute BM25 score for a single document.""" + doc_len = max(len(doc_tokens), 1) + tf_map: dict[str, int] = {} + for t in doc_tokens: + tf_map[t] = tf_map.get(t, 0) + 1 + + score = 0.0 + for term in query_tokens: + tf = tf_map.get(term, 0) + if tf <= 0: + continue + n_t = df.get(term, 0) + idf = math.log((n_docs - n_t + 0.5) / (n_t + 0.5) + 1.0) + numerator = tf * (k1 + 1) + denominator = tf + k1 * (1 - b + b * doc_len / max(avgdl, 1e-9)) + score += idf * numerator / denominator + return score + + @staticmethod + def _build_bm25_query_terms(spec: Mapping[str, Any]) -> list[str]: + """Build ordered, de-duplicated list of positive query terms for BM25 scoring.""" + positive_terms: list[str] = [] + positive_terms.extend(sorted(spec.get("should_terms", set()))) + positive_terms.extend(sorted(spec.get("must_terms", set()))) + for _field, term, sign in spec.get("field_terms", []): + if sign != "exclude": + positive_terms.append(term) + return list(dict.fromkeys(positive_terms)) + + @staticmethod + def _compute_term_df(query_terms: list[str], docs: Mapping[str, list[str]]) -> dict[str, int]: + """Compute document frequency for each query term across the doc corpus.""" + df: dict[str, int] = dict.fromkeys(query_terms, 0) + for toks in docs.values(): + uniq = set(toks) + for term in query_terms: + if term in uniq: + df[term] += 1 + return df + + @staticmethod + def _apply_bm25_boost(item: Any, spec: Mapping[str, Any]) -> float: + """Compute phrase and field-aware score boost for BM25 (conservative weights).""" + all_text = RetrieveMixin._extract_item_text(item).lower() + boost = 0.0 + for phrase, sign in spec.get("phrases", []): + if sign != "exclude" and phrase in all_text: + boost += 0.8 if sign == "should" else 1.2 + for field, phrase, sign in spec.get("field_phrases", []): + if sign == "exclude": + continue + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if phrase in ftxt: + boost += 0.8 if sign == "should" else 1.2 + for field, term, sign in spec.get("field_terms", []): + if sign == "exclude": + continue + ftxt = RetrieveMixin._extract_item_field_text(item, field).lower() + if term in RetrieveMixin._tokenize(ftxt): + boost += 0.4 if sign == "should" else 0.7 + return boost + + @staticmethod + def _bm25_score_items( + query: str, + pool: Mapping[str, Any], + top_k: int, + k1: float = 1.2, + b: float = 0.75, + ) -> list[tuple[str, float]]: + """BM25 with lexical constraints and phrase/field boosts.""" + spec = RetrieveMixin._parse_lexical_query(query) + query_terms = RetrieveMixin._build_bm25_query_terms(spec) + if not query_terms and not spec.get("phrases") and not spec.get("field_phrases"): + return [] + + docs: dict[str, list[str]] = {} + items_filtered: dict[str, Any] = {} + for item_id, item in pool.items(): + if not RetrieveMixin._item_matches_lexical_spec(item, spec): + continue + doc_text = RetrieveMixin._extract_item_text(item) + docs[item_id] = RetrieveMixin._tokenize_list(doc_text) + items_filtered[item_id] = item + + if not docs: + return [] + + n_docs = len(docs) + avgdl = sum(len(toks) for toks in docs.values()) / max(n_docs, 1) + df = RetrieveMixin._compute_term_df(query_terms, docs) + + scores: list[tuple[str, float]] = [] + for item_id, doc_tokens in docs.items(): + score = RetrieveMixin._bm25_doc_score(query_terms, doc_tokens, df, n_docs, avgdl, k1, b) + score += RetrieveMixin._apply_bm25_boost(items_filtered[item_id], spec) + if score > 0: + scores.append((item_id, score)) + + scores.sort(key=lambda x: (-x[1], x[0])) + return scores[:top_k] + + @staticmethod + def _rrf_fuse( + *ranked_lists: list[tuple[str, float]], + k: int = 60, + top_k: int = 5, + ) -> list[tuple[str, float]]: + """Reciprocal Rank Fusion across multiple ranked result lists.""" + rrf_scores: dict[str, float] = {} + for ranked_list in ranked_lists: + for rank, (item_id, _score) in enumerate(ranked_list): + rrf_scores[item_id] = rrf_scores.get(item_id, 0.0) + 1.0 / (k + rank + 1) + results = list(rrf_scores.items()) + results.sort(key=lambda x: (-x[1], x[0])) + return results[:top_k] + + @staticmethod + def _normalize_ranked_scores( + ranked_list: Sequence[tuple[str, float]], + *, + normalization: str = "minmax", + ) -> dict[str, float]: + """Normalize a ranked list into comparable scores for weighted fusion.""" + if normalization != "minmax": + msg = f"unsupported score normalization: {normalization}" + raise ValueError(msg) + if not ranked_list: + return {} + + scores = [score for _item_id, score in ranked_list] + min_score = min(scores) + max_score = max(scores) + if math.isclose(min_score, max_score): + return {item_id: 1.0 for item_id, _score in ranked_list} + + score_range = max_score - min_score + return {item_id: (score - min_score) / score_range for item_id, score in ranked_list} + + @staticmethod + def _weighted_score_fuse( + vector_hits: Sequence[tuple[str, float]], + bm25_hits: Sequence[tuple[str, float]], + *, + alpha: float = 0.5, + top_k: int = 5, + normalization: str = "minmax", + ) -> list[tuple[str, float]]: + """Weighted score fusion across vector and BM25 ranked results.""" + vector_scores = RetrieveMixin._normalize_ranked_scores(vector_hits, normalization=normalization) + bm25_scores = RetrieveMixin._normalize_ranked_scores(bm25_hits, normalization=normalization) + + item_ids = set(vector_scores) | set(bm25_scores) + if not item_ids: + return [] + + results = [ + ( + item_id, + alpha * vector_scores.get(item_id, 0.0) + (1.0 - alpha) * bm25_scores.get(item_id, 0.0), + ) + for item_id in item_ids + ] + results.sort(key=lambda x: (-x[1], x[0])) + return results[:top_k] + async def _rag_recall_items(self, state: WorkflowState, step_context: Any) -> WorkflowState: if not state.get("retrieve_item") or not state.get("needs_retrieval") or not state.get("proceed_to_items"): state["item_hits"] = [] @@ -351,18 +756,63 @@ async def _rag_recall_items(self, state: WorkflowState, step_context: Any) -> Wo store = state["store"] where_filters = state.get("where") or {} items_pool = store.memory_item_repo.list_items(where_filters) + retriever = state.get("retriever") or getattr(self.retrieve_config, "retriever", "vector") + + top_k = self.retrieve_config.item.top_k + + if retriever == "keyword": + state["item_hits"] = self._keyword_match_items( + state["active_query"], + items_pool, + top_k, + ) + state["item_pool"] = items_pool + return state + + if retriever == "bm25": + state["item_hits"] = self._bm25_score_items( + state["active_query"], + items_pool, + top_k, + ) + state["item_pool"] = items_pool + return state + + # Vector search (shared by "vector" and "hybrid") qvec = state.get("query_vector") if qvec is None: embed_client = self._get_step_embedding_client(step_context) qvec = (await embed_client.embed([state["active_query"]]))[0] state["query_vector"] = qvec - state["item_hits"] = store.memory_item_repo.vector_search_items( + vector_hits = store.memory_item_repo.vector_search_items( qvec, - self.retrieve_config.item.top_k, + top_k, where=where_filters, ranking=self.retrieve_config.item.ranking, recency_decay_days=self.retrieve_config.item.recency_decay_days, ) + + if retriever == "hybrid": + bm25_hits = self._bm25_score_items(state["active_query"], items_pool, top_k) + fusion_strategy = getattr(self.retrieve_config, "fusion_strategy", "rrf") + if fusion_strategy == "weighted": + state["item_hits"] = self._weighted_score_fuse( + vector_hits, + bm25_hits, + alpha=getattr(self.retrieve_config, "weighted_alpha", 0.5), + top_k=top_k, + normalization=getattr(self.retrieve_config, "score_normalization", "minmax"), + ) + else: + state["item_hits"] = self._rrf_fuse( + bm25_hits, + vector_hits, + k=getattr(self.retrieve_config, "rrf_k", 60), + top_k=top_k, + ) + else: + state["item_hits"] = vector_hits + state["item_pool"] = items_pool return state diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py index adcb4f16..b415daae 100644 --- a/src/memu/app/settings.py +++ b/src/memu/app/settings.py @@ -183,6 +183,29 @@ class RetrieveConfig(BaseModel): """ method: Annotated[Literal["rag", "llm"], Normalize] = "rag" + retriever: Annotated[Literal["vector", "keyword", "bm25", "hybrid"], Normalize] = Field( + default="vector", + description="Item retriever: 'vector' (embedding), 'keyword' (token intersection), 'bm25' (BM25 ranking), or 'hybrid' (BM25 + vector via RRF). Only applies when method is 'rag'.", + ) + fusion_strategy: Annotated[Literal["rrf", "weighted"], Normalize] = Field( + default="rrf", + description="Hybrid fusion strategy: 'rrf' (default, rank-based) or 'weighted' (normalized score fusion). Only applies when retriever is 'hybrid' and method is 'rag'.", + ) + weighted_alpha: float = Field( + default=0.5, + ge=0.0, + le=1.0, + description="Vector weight used by weighted hybrid fusion: final = alpha * vector + (1 - alpha) * bm25.", + ) + score_normalization: Annotated[Literal["minmax"], Normalize] = Field( + default="minmax", + description="Normalization applied before weighted hybrid fusion. Currently only 'minmax' is supported.", + ) + rrf_k: int = Field( + default=60, + ge=1, + description="Rank constant used by reciprocal rank fusion when retriever='hybrid' and fusion_strategy='rrf'.", + ) # top_k: int = Field( # default=5, # description="Maximum number of results to return per category.", diff --git a/src/memu/client/openai_wrapper.py b/src/memu/client/openai_wrapper.py index 5c295f88..2c7da4c9 100644 --- a/src/memu/client/openai_wrapper.py +++ b/src/memu/client/openai_wrapper.py @@ -24,12 +24,14 @@ def __init__( user_data: dict[str, Any], ranking: str = "salience", top_k: int = 5, + retriever: str | None = None, ): self._original = original_completions self._service = service self._user_data = user_data self._ranking = ranking self._top_k = top_k + self._retriever = retriever def _extract_user_query(self, messages: list[dict]) -> str: """Extract the most recent user message.""" @@ -70,13 +72,17 @@ def _inject_memories(self, messages: list[dict], memories: list[dict]) -> list[d return messages - async def _retrieve_memories(self, query: str) -> list[dict]: + async def _retrieve_memories(self, query: str, retriever: str | None = None) -> list[dict]: """Retrieve relevant memories for the query.""" try: - result = await self._service.retrieve( - queries=[{"role": "user", "content": query}], - where=self._user_data, - ) + effective = retriever if retriever is not None else self._retriever + kwargs: dict[str, Any] = { + "queries": [{"role": "user", "content": query}], + "where": self._user_data, + } + if effective is not None: + kwargs["retriever"] = effective + result = await self._service.retrieve(**kwargs) return result.get("items", []) except Exception: # Fail silently - don't break the LLM call @@ -137,6 +143,7 @@ def __init__( user_data: dict[str, Any], ranking: str = "salience", top_k: int = 5, + retriever: str | None = None, ): self._original = original_chat self.completions = MemuChatCompletions( @@ -145,6 +152,7 @@ def __init__( user_data, ranking, top_k, + retriever, ) def __getattr__(self, name: str) -> Any: @@ -183,6 +191,7 @@ def __init__( user_data: dict[str, Any], ranking: str = "salience", top_k: int = 5, + retriever: str | None = None, ): """ Initialize the wrapper. @@ -207,6 +216,7 @@ def __init__( user_data, ranking, top_k, + retriever, ) def __getattr__(self, name: str) -> Any: @@ -223,6 +233,7 @@ def wrap_openai( session_id: str | None = None, ranking: str = "salience", top_k: int = 5, + retriever: str | None = None, ) -> MemuOpenAIWrapper: """ Wrap an OpenAI client for auto-recall memory injection. @@ -265,4 +276,4 @@ def wrap_openai( if session_id: user_data["session_id"] = session_id - return MemuOpenAIWrapper(client, service, user_data, ranking, top_k) + return MemuOpenAIWrapper(client, service, user_data, ranking, top_k, retriever) diff --git a/tests/test_retrieve_bm25.py b/tests/test_retrieve_bm25.py new file mode 100644 index 00000000..4125a54e --- /dev/null +++ b/tests/test_retrieve_bm25.py @@ -0,0 +1,426 @@ +""" +Tests for BM25 retriever, RRF hybrid fusion, and shared helpers. +""" + +from __future__ import annotations + +from typing import ClassVar, Literal, cast + +import pytest + +from memu.app.retrieve import VALID_RETRIEVERS, RetrieveMixin +from memu.app.service import MemoryService +from memu.app.settings import RetrieveConfig + + +# --- Config normalization --- +class TestRetrieveConfigBm25Normalize: + """retriever and hybrid fusion fields normalize consistently.""" + + def test_bm25_uppercase(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword", "bm25", "hybrid"], "BM25")) + assert c.retriever == "bm25" + + def test_bm25_mixed_case(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword", "bm25", "hybrid"], "Bm25")) + assert c.retriever == "bm25" + + def test_hybrid_uppercase(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword", "bm25", "hybrid"], "HYBRID")) + assert c.retriever == "hybrid" + + def test_hybrid_mixed_case(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword", "bm25", "hybrid"], "Hybrid")) + assert c.retriever == "hybrid" + + def test_fusion_strategy_uppercase(self): + c = RetrieveConfig(fusion_strategy=cast(Literal["rrf", "weighted"], "WEIGHTED")) + assert c.fusion_strategy == "weighted" + + def test_fusion_strategy_mixed_case(self): + c = RetrieveConfig(fusion_strategy=cast(Literal["rrf", "weighted"], "RrF")) + assert c.fusion_strategy == "rrf" + + def test_weighted_alpha_bounds(self): + with pytest.raises(ValueError): + RetrieveConfig(weighted_alpha=1.1) + + def test_default_fusion_strategy_is_rrf(self): + c = RetrieveConfig() + assert c.fusion_strategy == "rrf" + + +# --- VALID_RETRIEVERS constant --- +class TestValidRetrievers: + """Module-level VALID_RETRIEVERS constant is used for validation.""" + + def test_contains_all_options(self): + assert {"vector", "keyword", "bm25", "hybrid"} == VALID_RETRIEVERS + + def test_invalid_retriever_error_message_includes_all(self): + from memu.app.retrieve import InvalidRetrieverError + + err = InvalidRetrieverError() + msg = str(err).lower() + for r in VALID_RETRIEVERS: + assert r in msg, f"Expected '{r}' in error message: {msg}" + + +# --- _extract_item_text helper --- +class TestExtractItemText: + """Shared helper to extract searchable text from items.""" + + def test_dict_item_summary_only(self): + item = {"summary": "coffee and tea", "extra": {}} + assert RetrieveMixin._extract_item_text(item) == "coffee and tea" + + def test_dict_item_summary_and_extra(self): + item = {"summary": "coffee", "extra": {"tag": "morning"}} + assert RetrieveMixin._extract_item_text(item) == "coffee morning" + + def test_dict_item_skips_none_extra(self): + item = {"summary": "coffee", "extra": {"a": "milk", "b": None}} + assert RetrieveMixin._extract_item_text(item) == "coffee milk" + + def test_object_item(self): + class FakeItem: + summary = "hello world" + extra: ClassVar[dict[str, str]] = {"k": "v"} + + assert RetrieveMixin._extract_item_text(FakeItem()) == "hello world v" + + def test_empty_summary(self): + item = {"summary": "", "extra": {}} + assert RetrieveMixin._extract_item_text(item) == "" + + +# --- BM25 scoring --- +class TestBm25ScoreItems: + """_bm25_score_items: Okapi BM25 scoring.""" + + def test_basic_match(self): + pool = { + "id1": {"summary": "coffee and tea", "extra": {}}, + "id2": {"summary": "only water", "extra": {}}, + } + out = RetrieveMixin._bm25_score_items("coffee", pool, top_k=5) + assert len(out) >= 1 + assert out[0][0] == "id1" + assert out[0][1] > 0 + + def test_rare_term_ranks_higher(self): + """BM25 IDF: a rare term should score higher than a common one.""" + pool = { + "a": {"summary": "common common rare", "extra": {}}, + "b": {"summary": "common common common", "extra": {}}, + "c": {"summary": "common", "extra": {}}, + } + out = RetrieveMixin._bm25_score_items("rare", pool, top_k=5) + # Only "a" has "rare" + assert len(out) == 1 + assert out[0][0] == "a" + + def test_empty_query(self): + pool = {"id1": {"summary": "something", "extra": {}}} + assert RetrieveMixin._bm25_score_items("", pool, top_k=5) == [] + + def test_empty_pool(self): + assert RetrieveMixin._bm25_score_items("query", {}, top_k=5) == [] + + def test_top_k_respected(self): + pool = {f"id{i}": {"summary": f"word{i} common", "extra": {}} for i in range(10)} + out = RetrieveMixin._bm25_score_items("common", pool, top_k=3) + assert len(out) <= 3 + + def test_returns_list_of_tuples(self): + pool = {"id1": {"summary": "hello world", "extra": {}}} + out = RetrieveMixin._bm25_score_items("hello", pool, top_k=5) + assert isinstance(out, list) + assert all(isinstance(t, tuple) and len(t) == 2 for t in out) + assert all(isinstance(t[0], str) and isinstance(t[1], float) for t in out) + + def test_stable_sort_by_id_on_tie(self): + """When scores are equal, sort by ID ascending for determinism.""" + pool = { + "b": {"summary": "x", "extra": {}}, + "a": {"summary": "x", "extra": {}}, + } + out = RetrieveMixin._bm25_score_items("x", pool, top_k=5) + ids = [t[0] for t in out] + assert ids == ["a", "b"] + + def test_uses_extra_values(self): + pool = { + "id1": {"summary": "summary", "extra": {"tag": "coffee"}}, + } + out = RetrieveMixin._bm25_score_items("coffee", pool, top_k=5) + assert len(out) == 1 and out[0][0] == "id1" + + +# --- RRF fusion --- +class TestRrfFuse: + """_rrf_fuse: Reciprocal Rank Fusion of multiple ranked lists.""" + + def test_single_list(self): + ranked = [("a", 5.0), ("b", 3.0)] + out = RetrieveMixin._rrf_fuse(ranked, top_k=5) + assert out[0][0] == "a" + assert out[1][0] == "b" + + def test_items_in_both_lists_rank_higher(self): + list1 = [("a", 5.0), ("b", 3.0)] + list2 = [("a", 4.0), ("c", 2.0)] + out = RetrieveMixin._rrf_fuse(list1, list2, top_k=5) + # "a" appears in both, should be first + assert out[0][0] == "a" + + def test_disjoint_lists(self): + list1 = [("a", 5.0)] + list2 = [("b", 3.0)] + out = RetrieveMixin._rrf_fuse(list1, list2, top_k=5) + assert len(out) == 2 + ids = {t[0] for t in out} + assert ids == {"a", "b"} + + def test_top_k_respected(self): + list1 = [("a", 5.0), ("b", 3.0), ("c", 1.0)] + list2 = [("d", 4.0), ("e", 2.0), ("f", 0.5)] + out = RetrieveMixin._rrf_fuse(list1, list2, top_k=2) + assert len(out) == 2 + + def test_stable_sort_on_tie(self): + """Same RRF score => sort by ID ascending.""" + list1 = [("b", 5.0)] + list2 = [("a", 5.0)] + out = RetrieveMixin._rrf_fuse(list1, list2, top_k=5) + # Both have same RRF score (1/(60+1)), tie-break by ID + assert out[0][0] == "a" + assert out[1][0] == "b" + + def test_empty_lists(self): + assert RetrieveMixin._rrf_fuse(top_k=5) == [] + + def test_rrf_scores_are_positive(self): + list1 = [("a", 5.0), ("b", 3.0)] + out = RetrieveMixin._rrf_fuse(list1, top_k=5) + assert all(score > 0 for _, score in out) + + +class TestWeightedScoreFuse: + """Weighted score fusion uses normalized vector and BM25 scores.""" + + def test_weighted_prefers_vector_when_alpha_high(self): + vector_hits = [("semantic", 0.95), ("shared", 0.80)] + bm25_hits = [("keyword", 10.0), ("shared", 8.0)] + out = RetrieveMixin._weighted_score_fuse(vector_hits, bm25_hits, alpha=0.8, top_k=5) + assert out[0][0] == "semantic" + + def test_weighted_prefers_bm25_when_alpha_low(self): + vector_hits = [("semantic", 0.95), ("shared", 0.80)] + bm25_hits = [("keyword", 10.0), ("shared", 8.0)] + out = RetrieveMixin._weighted_score_fuse(vector_hits, bm25_hits, alpha=0.2, top_k=5) + assert out[0][0] == "keyword" + + def test_weighted_includes_union_of_lists(self): + vector_hits = [("a", 0.9)] + bm25_hits = [("b", 2.0)] + out = RetrieveMixin._weighted_score_fuse(vector_hits, bm25_hits, alpha=0.5, top_k=5) + ids = [item_id for item_id, _score in out] + assert ids == ["a", "b"] + + def test_weighted_handles_flat_scores(self): + vector_hits = [("a", 0.5), ("b", 0.5)] + bm25_hits = [("a", 1.0), ("c", 1.0)] + out = RetrieveMixin._weighted_score_fuse(vector_hits, bm25_hits, alpha=0.5, top_k=5) + assert out[0] == ("a", 1.0) + + def test_weighted_top_k_respected(self): + vector_hits = [("a", 3.0), ("b", 2.0), ("c", 1.0)] + bm25_hits = [("d", 3.0), ("e", 2.0), ("f", 1.0)] + out = RetrieveMixin._weighted_score_fuse(vector_hits, bm25_hits, alpha=0.5, top_k=2) + assert len(out) == 2 + + def test_weighted_empty_lists(self): + assert RetrieveMixin._weighted_score_fuse([], [], alpha=0.5, top_k=5) == [] + + def test_weighted_tie_breaks_by_id(self): + vector_hits = [("b", 1.0)] + bm25_hits = [("a", 1.0)] + out = RetrieveMixin._weighted_score_fuse(vector_hits, bm25_hits, alpha=0.5, top_k=5) + assert out[0][0] == "a" + assert out[1][0] == "b" + + +# --- Per-call override tests --- +class TestRetrieveBm25Override: + """Per-call retriever='bm25' and retriever='hybrid' override.""" + + @pytest.mark.asyncio + async def test_retrieve_rag_bm25_state_and_workflow(self, monkeypatch: pytest.MonkeyPatch): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag", "retriever": "vector"}, + ) + captured: list[tuple[str, dict]] = [] + + async def fake_run(workflow_name: str, state: dict): + captured.append((workflow_name, dict(state))) + return { + "response": { + "categories": [], + "items": [], + "resources": [], + "needs_retrieval": True, + "original_query": "q", + "rewritten_query": "q", + "next_step_query": None, + } + } + + monkeypatch.setattr(service, "_run_workflow", fake_run, raising=True) + queries = [{"role": "user", "content": {"text": "q"}}] + await service.retrieve(queries, method="rag", retriever="bm25") + assert len(captured) == 1 + wname, state = captured[0] + assert wname == "retrieve_rag" + assert state.get("retriever") == "bm25" + + @pytest.mark.asyncio + async def test_retrieve_rag_hybrid_state_and_workflow(self, monkeypatch: pytest.MonkeyPatch): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag", "retriever": "vector"}, + ) + captured: list[tuple[str, dict]] = [] + + async def fake_run(workflow_name: str, state: dict): + captured.append((workflow_name, dict(state))) + return { + "response": { + "categories": [], + "items": [], + "resources": [], + "needs_retrieval": True, + "original_query": "q", + "rewritten_query": "q", + "next_step_query": None, + } + } + + monkeypatch.setattr(service, "_run_workflow", fake_run, raising=True) + queries = [{"role": "user", "content": {"text": "q"}}] + await service.retrieve(queries, method="rag", retriever="hybrid") + assert len(captured) == 1 + _, state = captured[0] + assert state.get("retriever") == "hybrid" + + @pytest.mark.asyncio + async def test_invalid_retriever_still_raises(self): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag"}, + ) + queries = [{"role": "user", "content": {"text": "q"}}] + with pytest.raises(ValueError) as exc_info: + await service.retrieve(queries, method="rag", retriever="INVALID") + msg = str(exc_info.value).lower() + assert "bm25" in msg + assert "hybrid" in msg + assert "vector" in msg + assert "keyword" in msg + + +class _FakeItemRepo: + def __init__(self) -> None: + self._items = { + "semantic": {"summary": "semantic summary", "extra": {}}, + "keyword": {"summary": "keyword summary", "extra": {}}, + } + + def list_items(self, where=None): + return self._items + + def vector_search_items(self, query_vec, top_k, where=None, *, ranking="similarity", recency_decay_days=30.0): + return [("semantic", 0.9), ("keyword", 0.2)] + + +class _FakeStore: + def __init__(self) -> None: + self.memory_item_repo = _FakeItemRepo() + + +class TestHybridFusionSelection: + @pytest.mark.asyncio + async def test_hybrid_defaults_to_rrf(self, monkeypatch: pytest.MonkeyPatch): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag", "retriever": "hybrid"}, + ) + state = { + "retrieve_item": True, + "needs_retrieval": True, + "proceed_to_items": True, + "store": _FakeStore(), + "where": {}, + "active_query": "keyword", + "query_vector": [1.0, 0.0], + "retriever": "hybrid", + } + + def fake_bm25(_query: str, _pool, _top_k: int): + return [("keyword", 5.0), ("semantic", 2.0)] + + def fake_rrf(*_ranked_lists, k=60, top_k=5): + return [("semantic", 1.0)] + + def fail_weighted(*_args, **_kwargs): + raise AssertionError("weighted fusion should not run by default") + + monkeypatch.setattr(service, "_bm25_score_items", fake_bm25, raising=True) + monkeypatch.setattr(service, "_rrf_fuse", fake_rrf, raising=True) + monkeypatch.setattr(service, "_weighted_score_fuse", fail_weighted, raising=True) + + out = await service._rag_recall_items(state, step_context=None) + assert out["item_hits"] == [("semantic", 1.0)] + + @pytest.mark.asyncio + async def test_hybrid_weighted_uses_weighted_fuse(self, monkeypatch: pytest.MonkeyPatch): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={ + "method": "rag", + "retriever": "hybrid", + "fusion_strategy": "weighted", + "weighted_alpha": 0.8, + }, + ) + state = { + "retrieve_item": True, + "needs_retrieval": True, + "proceed_to_items": True, + "store": _FakeStore(), + "where": {}, + "active_query": "keyword", + "query_vector": [1.0, 0.0], + "retriever": "hybrid", + } + + def fake_bm25(_query: str, _pool, _top_k: int): + return [("keyword", 5.0), ("semantic", 2.0)] + + def fail_rrf(*_args, **_kwargs): + raise AssertionError("rrf fusion should not run for weighted hybrid") + + def fake_weighted(vector_hits, bm25_hits, *, alpha=0.5, top_k=5, normalization="minmax"): + assert vector_hits == [("semantic", 0.9), ("keyword", 0.2)] + assert bm25_hits == [("keyword", 5.0), ("semantic", 2.0)] + assert alpha == 0.8 + assert normalization == "minmax" + return [("semantic", 0.8)] + + monkeypatch.setattr(service, "_bm25_score_items", fake_bm25, raising=True) + monkeypatch.setattr(service, "_rrf_fuse", fail_rrf, raising=True) + monkeypatch.setattr(service, "_weighted_score_fuse", fake_weighted, raising=True) + + out = await service._rag_recall_items(state, step_context=None) + assert out["item_hits"] == [("semantic", 0.8)] diff --git a/tests/test_retrieve_keyword.py b/tests/test_retrieve_keyword.py new file mode 100644 index 00000000..ef03193a --- /dev/null +++ b/tests/test_retrieve_keyword.py @@ -0,0 +1,234 @@ +""" +Tests for keyword retriever (Plan A): config normalize, keyword_match_items, RAG response structure. +No new dependencies; optional integration test guarded by OPENAI_API_KEY skipif. +""" + +from __future__ import annotations + +import os +from typing import Literal, cast + +import pytest + +from memu.app.retrieve import RetrieveMixin +from memu.app.service import MemoryService +from memu.app.settings import RetrieveConfig + + +# --- Config normalization --- +class TestRetrieveConfigNormalize: + """retriever field: vector/keyword case normalization via Normalize.""" + + def test_vector_uppercase(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword"], "VECTOR")) + assert c.retriever == "vector" + + def test_keyword_uppercase(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword"], "KEYWORD")) + assert c.retriever == "keyword" + + def test_keyword_mixed_case(self): + c = RetrieveConfig(retriever=cast(Literal["vector", "keyword"], "KeyWord")) + assert c.retriever == "keyword" + + def test_default_is_vector(self): + c = RetrieveConfig() + assert c.retriever == "vector" + + +# --- Keyword matching --- +class TestKeywordMatchItems: + """_tokenize and _keyword_match_items: hit, stable sort, top_k, extra, empty query/pool.""" + + def test_tokenize_lowercase_and_splits(self): + assert RetrieveMixin._tokenize("Hello World") == {"hello", "world"} + assert RetrieveMixin._tokenize("a-b c") == {"a", "b", "c"} + + def test_tokenize_empty(self): + assert RetrieveMixin._tokenize("") == set() + assert RetrieveMixin._tokenize(" ") == set() + + def test_keyword_match_hit(self): + pool = { + "id1": {"summary": "coffee and tea", "extra": {}}, + "id2": {"summary": "only tea", "extra": {}}, + } + out = RetrieveMixin._keyword_match_items("coffee", pool, top_k=5) + assert len(out) == 1 + assert out[0][0] == "id1" + assert out[0][1] == 1.0 + + def test_keyword_match_stable_sort_same_score(self): + pool = { + "b": {"summary": "x", "extra": {}}, + "a": {"summary": "x", "extra": {}}, + } + out = RetrieveMixin._keyword_match_items("x", pool, top_k=5) + assert out == [("a", 1.0), ("b", 1.0)] + + def test_keyword_match_top_k(self): + pool = { + "a": {"summary": "one", "extra": {}}, + "b": {"summary": "two", "extra": {}}, + "c": {"summary": "one two", "extra": {}}, + } + out = RetrieveMixin._keyword_match_items("one two", pool, top_k=2) + assert len(out) == 2 + scores = [x[1] for x in out] + assert scores == sorted(scores, reverse=True) + + def test_keyword_match_extra_values(self): + pool = { + "id1": {"summary": "summary", "extra": {"tag": "coffee"}}, + } + out = RetrieveMixin._keyword_match_items("coffee", pool, top_k=5) + assert len(out) == 1 and out[0][0] == "id1" and out[0][1] == 1.0 + + def test_keyword_match_extra_skips_none(self): + pool = { + "id1": {"summary": "summary", "extra": {"a": "word", "b": None}}, + } + out = RetrieveMixin._keyword_match_items("word", pool, top_k=5) + assert len(out) == 1 and out[0][1] == 1.0 + + def test_keyword_match_empty_query_returns_empty(self): + pool = {"id1": {"summary": "something", "extra": {}}} + out = RetrieveMixin._keyword_match_items("", pool, top_k=5) + assert out == [] + + def test_keyword_match_empty_pool_returns_empty(self): + out = RetrieveMixin._keyword_match_items("query", {}, top_k=5) + assert out == [] + + +# --- RAG response structure --- +class TestRagResponseStructure: + """Assert RAG response dict contains at least categories/items/resources (no new deps).""" + + def test_rag_build_context_response_keys(self): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag"}, + ) + store = service._get_database() + state = { + "needs_retrieval": True, + "original_query": "q", + "rewritten_query": "q", + "next_step_query": None, + "ctx": service._get_context(), + "store": store, + "where": {}, + "category_pool": {}, + "item_pool": {}, + "resource_pool": {}, + "category_hits": [], + "item_hits": [], + "resource_hits": [], + } + result = service._rag_build_context(state, None) + assert "response" in result + r = result["response"] + assert "categories" in r + assert "items" in r + assert "resources" in r + assert "needs_retrieval" in r + assert "original_query" in r + + +# --- STEP 4: per-call method/retriever override (state + workflow_name) --- +class TestRetrievePerCallOverride: + """Per-call method/retriever override: state and workflow_name; no real DB/embedding.""" + + @pytest.mark.asyncio + async def test_retrieve_rag_keyword_state_and_workflow(self, monkeypatch: pytest.MonkeyPatch): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag", "retriever": "vector"}, + ) + captured: list[tuple[str, dict]] = [] + + async def fake_run(workflow_name: str, state: dict): + captured.append((workflow_name, dict(state))) + return { + "response": { + "categories": [], + "items": [], + "resources": [], + "needs_retrieval": True, + "original_query": "q", + "rewritten_query": "q", + "next_step_query": None, + } + } + + monkeypatch.setattr(service, "_run_workflow", fake_run, raising=True) + queries = [{"role": "user", "content": {"text": "q"}}] + await service.retrieve(queries, method="rag", retriever="keyword") + assert len(captured) == 1 + wname, state = captured[0] + assert wname == "retrieve_rag" + assert state.get("retriever") == "keyword" + + @pytest.mark.asyncio + async def test_retrieve_rag_default_retriever_in_state(self, monkeypatch: pytest.MonkeyPatch): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag", "retriever": "vector"}, + ) + captured: list[tuple[str, dict]] = [] + + async def fake_run(workflow_name: str, state: dict): + captured.append((workflow_name, dict(state))) + return { + "response": { + "categories": [], + "items": [], + "resources": [], + "needs_retrieval": True, + "original_query": "q", + "rewritten_query": "q", + "next_step_query": None, + } + } + + monkeypatch.setattr(service, "_run_workflow", fake_run, raising=True) + queries = [{"role": "user", "content": {"text": "q"}}] + await service.retrieve(queries, method="rag") + assert len(captured) == 1 + _, state = captured[0] + assert state.get("retriever") == "vector" + + @pytest.mark.asyncio + async def test_retrieve_rag_invalid_retriever_raises(self): + service = MemoryService( + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag"}, + ) + queries = [{"role": "user", "content": {"text": "q"}}] + with pytest.raises(ValueError) as exc_info: + await service.retrieve(queries, method="rag", retriever="INVALID") + msg = str(exc_info.value).lower() + assert "vector" in msg and "keyword" in msg and "bm25" in msg and "hybrid" in msg + + +# --- Optional: integration test (requires OPENAI_API_KEY) --- +@pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY"), + reason="OPENAI_API_KEY not set", +) +class TestRetrieveKeywordIntegration: + """Optional: full retrieve with retriever=keyword (requires OPENAI_API_KEY).""" + + @pytest.mark.asyncio + async def test_retrieve_rag_keyword_returns_structure(self): + service = MemoryService( + llm_profiles={"default": {"api_key": os.environ["OPENAI_API_KEY"]}}, + database_config={"metadata_store": {"provider": "inmemory"}}, + retrieve_config={"method": "rag", "retriever": "keyword"}, + ) + queries = [{"role": "user", "content": {"text": "test"}}] + response = await service.retrieve(queries=queries) + assert "categories" in response + assert "items" in response + assert "resources" in response diff --git a/uv.lock b/uv.lock index e7dea60e..42b0ebf6 100644 --- a/uv.lock +++ b/uv.lock @@ -458,7 +458,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" }, { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" }, { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" }, - { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" }, { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" }, { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" }, { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" }, @@ -466,7 +465,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/7c/f0a6d0ede2c7bf092d00bc83ad5bafb7e6ec9b4aab2fbdfa6f134dc73327/greenlet-3.3.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:60c2ef0f578afb3c8d92ea07ad327f9a062547137afe91f38408f08aacab667f", size = 275671, upload-time = "2025-12-04T14:23:05.267Z" }, { url = "https://files.pythonhosted.org/packages/44/06/dac639ae1a50f5969d82d2e3dd9767d30d6dbdbab0e1a54010c8fe90263c/greenlet-3.3.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5d554d0712ba1de0a6c94c640f7aeba3f85b3a6e1f2899c11c2c0428da9365", size = 646360, upload-time = "2025-12-04T14:50:10.026Z" }, { url = "https://files.pythonhosted.org/packages/e0/94/0fb76fe6c5369fba9bf98529ada6f4c3a1adf19e406a47332245ef0eb357/greenlet-3.3.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3a898b1e9c5f7307ebbde4102908e6cbfcb9ea16284a3abe15cab996bee8b9b3", size = 658160, upload-time = "2025-12-04T14:57:45.41Z" }, - { url = "https://files.pythonhosted.org/packages/93/79/d2c70cae6e823fac36c3bbc9077962105052b7ef81db2f01ec3b9bf17e2b/greenlet-3.3.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dcd2bdbd444ff340e8d6bdf54d2f206ccddbb3ccfdcd3c25bf4afaa7b8f0cf45", size = 671388, upload-time = "2025-12-04T15:07:15.789Z" }, { url = "https://files.pythonhosted.org/packages/b8/14/bab308fc2c1b5228c3224ec2bf928ce2e4d21d8046c161e44a2012b5203e/greenlet-3.3.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5773edda4dc00e173820722711d043799d3adb4f01731f40619e07ea2750b955", size = 660166, upload-time = "2025-12-04T14:26:05.099Z" }, { url = "https://files.pythonhosted.org/packages/4b/d2/91465d39164eaa0085177f61983d80ffe746c5a1860f009811d498e7259c/greenlet-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ac0549373982b36d5fd5d30beb8a7a33ee541ff98d2b502714a09f1169f31b55", size = 1615193, upload-time = "2025-12-04T15:04:27.041Z" }, { url = "https://files.pythonhosted.org/packages/42/1b/83d110a37044b92423084d52d5d5a3b3a73cafb51b547e6d7366ff62eff1/greenlet-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d198d2d977460358c3b3a4dc844f875d1adb33817f0613f663a656f463764ccc", size = 1683653, upload-time = "2025-12-04T14:27:32.366Z" }, @@ -474,7 +472,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/66/bd6317bc5932accf351fc19f177ffba53712a202f9df10587da8df257c7e/greenlet-3.3.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d6ed6f85fae6cdfdb9ce04c9bf7a08d666cfcfb914e7d006f44f840b46741931", size = 282638, upload-time = "2025-12-04T14:25:20.941Z" }, { url = "https://files.pythonhosted.org/packages/30/cf/cc81cb030b40e738d6e69502ccbd0dd1bced0588e958f9e757945de24404/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9125050fcf24554e69c4cacb086b87b3b55dc395a8b3ebe6487b045b2614388", size = 651145, upload-time = "2025-12-04T14:50:11.039Z" }, { url = "https://files.pythonhosted.org/packages/9c/ea/1020037b5ecfe95ca7df8d8549959baceb8186031da83d5ecceff8b08cd2/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:87e63ccfa13c0a0f6234ed0add552af24cc67dd886731f2261e46e241608bee3", size = 654236, upload-time = "2025-12-04T14:57:47.007Z" }, - { url = "https://files.pythonhosted.org/packages/69/cc/1e4bae2e45ca2fa55299f4e85854606a78ecc37fead20d69322f96000504/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2662433acbca297c9153a4023fe2161c8dcfdcc91f10433171cf7e7d94ba2221", size = 662506, upload-time = "2025-12-04T15:07:16.906Z" }, { url = "https://files.pythonhosted.org/packages/57/b9/f8025d71a6085c441a7eaff0fd928bbb275a6633773667023d19179fe815/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3c6e9b9c1527a78520357de498b0e709fb9e2f49c3a513afd5a249007261911b", size = 653783, upload-time = "2025-12-04T14:26:06.225Z" }, { url = "https://files.pythonhosted.org/packages/f6/c7/876a8c7a7485d5d6b5c6821201d542ef28be645aa024cfe1145b35c120c1/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:286d093f95ec98fdd92fcb955003b8a3d054b4e2cab3e2707a5039e7b50520fd", size = 1614857, upload-time = "2025-12-04T15:04:28.484Z" }, { url = "https://files.pythonhosted.org/packages/4f/dc/041be1dff9f23dac5f48a43323cd0789cb798342011c19a248d9c9335536/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c10513330af5b8ae16f023e8ddbfb486ab355d04467c4679c5cfe4659975dd9", size = 1676034, upload-time = "2025-12-04T14:27:33.531Z" }, @@ -932,7 +929,7 @@ wheels = [ [[package]] name = "memu-py" -version = "1.3.0" +version = "1.4.0" source = { editable = "." } dependencies = [ { name = "alembic" },