From 6deb12ac2ad365057f1b816d83fadc7667064c70 Mon Sep 17 00:00:00 2001
From: Jeff Escalante <556932+jescalan@users.noreply.github.com>
Date: Wed, 18 Feb 2026 12:25:14 -0500
Subject: [PATCH 1/2] feat: write-time dedup gate (ADD/UPDATE/SUPERSEDE/NOOP)

Add LLM-based deduplication at write time, inspired by Helixir's
decision engine. Before storing a new memory, the system:

1. Generates an embedding for the new content
2. Searches Qdrant for semantically similar existing memories (>0.70)
3. If candidates found, asks a fast LLM (gpt-4o-mini) to classify:
   - ADD: genuinely new, store normally
   - UPDATE: merge into existing memory (updates both FalkorDB + Qdrant)
   - SUPERSEDE: delete outdated memory, store new one
   - NOOP: already known, skip entirely

Disabled by default. Enable with MEMORY_DEDUP_ENABLED=true.
Configurable model (MEMORY_DEDUP_MODEL) and similarity threshold
(MEMORY_DEDUP_SIMILARITY_THRESHOLD).

The gate adds ~1-2s per write (one embedding + one LLM call) but
eliminates the need for post-hoc dedup cleanup passes.
---
 automem/api/memory.py | 139 +++++++++++++++++++++++++++++++
 automem/config.py     |  11 +++
 automem/dedup.py      | 184 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 334 insertions(+)
 create mode 100644 automem/dedup.py

diff --git a/automem/api/memory.py b/automem/api/memory.py
index 8a22f351..72856d8f 100644
--- a/automem/api/memory.py
+++ b/automem/api/memory.py
@@ -12,8 +12,12 @@
     MEMORY_AUTO_SUMMARIZE,
     MEMORY_CONTENT_HARD_LIMIT,
     MEMORY_CONTENT_SOFT_LIMIT,
+    MEMORY_DEDUP_ENABLED,
+    MEMORY_DEDUP_MODEL,
+    MEMORY_DEDUP_SIMILARITY_THRESHOLD,
     MEMORY_SUMMARY_TARGET_LENGTH,
 )
+from automem.dedup import check_dedup
 from automem.utils.text import should_summarize_content, summarize_content
 
 
@@ -214,6 +218,135 @@ def store() -> Any:
         else:
             last_accessed = updated_at
 
+        # ── Write-time dedup gate ──────────────────────────────────
+        # Check for semantically similar existing memories and let an LLM
+        # decide: ADD (store new), UPDATE (merge into existing),
+        # SUPERSEDE (replace existing), or NOOP (skip).
+        skip_dedup = payload.get("skip_dedup", False)
+        dedup_result = None
+        if MEMORY_DEDUP_ENABLED and not skip_dedup:
+            qdrant_client = get_qdrant_client()
+            openai_client = get_openai_client() if get_openai_client else None
+            if qdrant_client and openai_client:
+                dedup_result = check_dedup(
+                    new_content=content,
+                    generate_embedding=generate_real_embedding,
+                    qdrant_client=qdrant_client,
+                    collection_name=collection_name,
+                    openai_client=openai_client,
+                    model=MEMORY_DEDUP_MODEL,
+                    similarity_threshold=MEMORY_DEDUP_SIMILARITY_THRESHOLD,
+                )
+
+                if dedup_result["action"] == "NOOP":
+                    query_ms = (time.perf_counter() - query_start) * 1000
+                    return (
+                        jsonify(
+                            {
+                                "status": "skipped",
+                                "reason": "dedup_noop",
+                                "detail": dedup_result.get("reason", "duplicate"),
+                                "candidates": dedup_result.get("candidates", []),
+                                "query_time_ms": round(query_ms, 2),
+                            }
+                        ),
+                        200,
+                    )
+
+                if dedup_result["action"] == "UPDATE" and dedup_result.get("target_id"):
+                    # Merge into the existing memory instead of creating a new one.
+                    # Rewrite memory_id to target the existing one, and use merged content.
+                    target_id = dedup_result["target_id"]
+                    merged = dedup_result.get("merged_content", content)
+                    # Update the existing memory in FalkorDB
+                    try:
+                        graph.query(
+                            """
+                            MATCH (m:Memory {id: $id})
+                            SET m.content = $content,
+                                m.updated_at = $updated_at,
+                                m.last_accessed = $last_accessed,
+                                m.importance = CASE WHEN $importance > m.importance
+                                    THEN $importance ELSE m.importance END
+                            RETURN m
+                            """,
+                            {
+                                "id": target_id,
+                                "content": merged,
+                                "updated_at": utc_now(),
+                                "last_accessed": utc_now(),
+                                "importance": importance,
+                            },
+                        )
+                    except Exception:
+                        logger.exception("Failed to UPDATE existing memory %s, falling back to ADD", target_id)
+                        dedup_result["action"] = "ADD"
+                    else:
+                        # Re-embed the merged content
+                        enqueue_embedding(target_id, merged)
+                        # Update Qdrant payload
+                        qdrant_cl = get_qdrant_client()
+                        if qdrant_cl:
+                            try:
+                                new_emb = generate_real_embedding(merged)
+                                if new_emb:
+                                    qdrant_cl.upsert(
+                                        collection_name=collection_name,
+                                        points=[
+                                            point_struct(
+                                                id=target_id,
+                                                vector=new_emb,
+                                                payload={
+                                                    "content": merged,
+                                                    "tags": tags,
+                                                    "tag_prefixes": tag_prefixes,
+                                                    "importance": importance,
+                                                    "timestamp": created_at,
+                                                    "type": memory_type,
+                                                    "confidence": type_confidence,
+                                                    "updated_at": utc_now(),
+                                                    "last_accessed": utc_now(),
+                                                    "metadata": metadata,
+                                                },
+                                            )
+                                        ],
+                                    )
+                            except Exception:
+                                logger.warning("Failed to update Qdrant for merged memory %s", target_id)
+
+                        query_ms = (time.perf_counter() - query_start) * 1000
+                        return (
+                            jsonify(
+                                {
+                                    "status": "updated",
+                                    "memory_id": target_id,
+                                    "dedup_action": "UPDATE",
+                                    "merged_content": merged,
+                                    "query_time_ms": round(query_ms, 2),
+                                }
+                            ),
+                            200,
+                        )
+
+                if dedup_result["action"] == "SUPERSEDE" and dedup_result.get("target_id"):
+                    # Delete the old memory, then store the new one normally below
+                    old_id = dedup_result["target_id"]
+                    try:
+                        graph.query("MATCH (m:Memory {id: $id}) DELETE m", {"id": old_id})
+                    except Exception:
+                        logger.warning("Failed to delete superseded memory %s", old_id)
+                    qdrant_cl = get_qdrant_client()
+                    if qdrant_cl:
+                        try:
+                            qdrant_cl.delete(
+                                collection_name=collection_name,
+                                points_selector=[old_id],
+                            )
+                        except Exception:
+                            logger.warning("Failed to delete superseded memory from Qdrant %s", old_id)
+
+                # For ADD and SUPERSEDE, fall through to normal store below
+
         try:
             graph.query(
                 """
@@ -332,6 +465,12 @@ def store() -> Any:
             response["original_length"] = len(original_content)
             response["summarized_length"] = len(content)
 
+        # Include dedup info if gate ran
+        if dedup_result:
+            response["dedup_action"] = dedup_result["action"]
+            if dedup_result["action"] == "SUPERSEDE":
+                response["superseded_id"] = dedup_result.get("target_id")
+
         logger.info(
             "memory_stored",
             extra={
diff --git a/automem/config.py b/automem/config.py
index 707ad4fb..84212fdd 100644
--- a/automem/config.py
+++ b/automem/config.py
@@ -99,6 +99,17 @@
 # Target length for summarized content
 MEMORY_SUMMARY_TARGET_LENGTH = int(os.getenv("MEMORY_SUMMARY_TARGET_LENGTH", "300"))
 
+# Write-time deduplication gate (LLM-based ADD/UPDATE/SUPERSEDE/NOOP)
+MEMORY_DEDUP_ENABLED = os.getenv("MEMORY_DEDUP_ENABLED", "false").lower() not in {
+    "0",
+    "false",
+    "no",
+}
+MEMORY_DEDUP_MODEL = os.getenv("MEMORY_DEDUP_MODEL", "gpt-4o-mini")
+MEMORY_DEDUP_SIMILARITY_THRESHOLD = float(
+    os.getenv("MEMORY_DEDUP_SIMILARITY_THRESHOLD", "0.70")
+)
+
 # Memory types for classification
 MEMORY_TYPES = {"Decision", "Pattern", "Preference", "Style", "Habit", "Insight", "Context"}
 
diff --git a/automem/dedup.py b/automem/dedup.py
new file mode 100644
index 00000000..159ea9d0
--- /dev/null
+++ b/automem/dedup.py
@@ -0,0 +1,184 @@
+"""Write-time deduplication gate for AutoMem.
+
+Before storing a new memory, checks for semantically similar existing memories
+and uses an LLM to classify the appropriate action:
+
+- ADD: Genuinely new information, store normally
+- UPDATE: Refines/adds detail to an existing memory, merge into it
+- SUPERSEDE: Replaces an outdated memory (delete old, store new)
+- NOOP: Already known, skip entirely
+
+Inspired by Helixir's decision engine approach.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Minimum vector similarity to even consider dedup (below this, always ADD)
+SIMILARITY_THRESHOLD = 0.70
+
+# Maximum candidates to evaluate
+MAX_CANDIDATES = 3
+
+DEDUP_PROMPT = """You are a memory deduplication system. Given a NEW memory and EXISTING memories, decide what to do.
+
+NEW MEMORY:
+{new_content}
+
+EXISTING MEMORIES:
+{existing_memories}
+
+For each existing memory, decide the relationship to the new memory. Then output ONE action for the new memory:
+
+- ADD: The new memory contains genuinely new information not covered by any existing memory. Store it.
+- UPDATE <id>: The new memory refines, corrects, or adds meaningful detail to an existing memory. The existing memory should be updated to incorporate the new information. Output the merged content.
+- SUPERSEDE <id>: The new memory replaces an outdated existing memory (e.g., a decision changed, a status updated). The old one should be deleted and the new one stored.
+- NOOP: The new memory is already fully covered by existing memories. Skip it.
+
+Rules:
+- If the new memory has ANY meaningful new information beyond what exists, prefer ADD or UPDATE over NOOP.
+- UPDATE means the existing memory's content should be expanded/corrected. Provide the merged text.
+- SUPERSEDE means the old memory is wrong/outdated and should be replaced entirely.
+- NOOP only if the new memory is truly redundant — same facts, same level of detail.
+- When in doubt, ADD. False negatives (storing a near-dupe) are less harmful than false positives (losing information).
+
+Respond with ONLY valid JSON:
+{{"action": "ADD"}}
+or
+{{"action": "UPDATE", "target_id": "<id>", "merged_content": "<full merged text>"}}
+or
+{{"action": "SUPERSEDE", "target_id": "<id>"}}
+or
+{{"action": "NOOP", "reason": "<brief reason>"}}"""
+
+
+def check_dedup(
+    new_content: str,
+    generate_embedding: Callable[[str], List[float]],
+    qdrant_client: Any,
+    collection_name: str,
+    openai_client: Any,
+    model: str = "gpt-4o-mini",
+    similarity_threshold: float = SIMILARITY_THRESHOLD,
+) -> Dict[str, Any]:
+    """Check if a new memory should be added, merged, or skipped.
+
+    Returns:
+        Dict with keys:
+        - action: "ADD" | "UPDATE" | "SUPERSEDE" | "NOOP"
+        - target_id: (for UPDATE/SUPERSEDE) the existing memory ID to modify
+        - merged_content: (for UPDATE) the merged text
+        - reason: (for NOOP) why it was skipped
+        - candidates: list of similar memories found (for debugging)
+    """
+    result: Dict[str, Any] = {"action": "ADD", "candidates": []}
+
+    if not qdrant_client or not openai_client:
+        return result
+
+    # Step 1: Generate embedding for the new content
+    try:
+        embedding = generate_embedding(new_content)
+    except Exception:
+        logger.warning("Failed to generate embedding for dedup check, defaulting to ADD")
+        return result
+
+    if not embedding:
+        return result
+
+    # Step 2: Search for similar existing memories
+    try:
+        from qdrant_client.models import Filter  # noqa: F401
+
+        search_results = qdrant_client.search(
+            collection_name=collection_name,
+            query_vector=embedding,
+            limit=MAX_CANDIDATES,
+            score_threshold=similarity_threshold,
+        )
+    except Exception:
+        logger.warning("Qdrant search failed during dedup check, defaulting to ADD")
+        return result
+
+    if not search_results:
+        return result
+
+    # Step 3: Format candidates for LLM
+    candidates = []
+    for hit in search_results:
+        payload = hit.payload or {}
+        candidates.append(
+            {
+                "id": str(hit.id),
+                "content": payload.get("content", ""),
+                "score": round(hit.score, 3),
+                "type": payload.get("type", ""),
+                "importance": payload.get("importance", 0),
+            }
+        )
+
+    result["candidates"] = candidates
+
+    existing_text = "\n\n".join(
+        f"[ID: {c['id']}] (similarity: {c['score']})\n{c['content']}"
+        for c in candidates
+    )
+
+    # Step 4: Ask LLM to classify
+    prompt = DEDUP_PROMPT.format(
+        new_content=new_content,
+        existing_memories=existing_text,
+    )
+
+    try:
+        response = openai_client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0,
+            max_tokens=1000,
+            response_format={"type": "json_object"},
+        )
+
+        raw = response.choices[0].message.content.strip()
+        decision = json.loads(raw)
+
+        action = decision.get("action", "ADD").upper()
+        if action not in ("ADD", "UPDATE", "SUPERSEDE", "NOOP"):
+            action = "ADD"
+
+        result["action"] = action
+
+        if action == "UPDATE":
+            result["target_id"] = decision.get("target_id", "")
+            result["merged_content"] = decision.get("merged_content", "")
+            if not result["target_id"] or not result["merged_content"]:
+                # Invalid UPDATE response, fall back to ADD
+                result["action"] = "ADD"
+                logger.warning("LLM returned UPDATE without target_id or merged_content, falling back to ADD")
+
+        elif action == "SUPERSEDE":
+            result["target_id"] = decision.get("target_id", "")
+            if not result["target_id"]:
+                result["action"] = "ADD"
+                logger.warning("LLM returned SUPERSEDE without target_id, falling back to ADD")
+
+        elif action == "NOOP":
+            result["reason"] = decision.get("reason", "duplicate")
+
+        logger.info(
+            "Dedup decision: %s (candidates: %d, top_score: %.3f)",
+            result["action"],
+            len(candidates),
+            candidates[0]["score"] if candidates else 0,
+        )
+
+    except Exception:
+        logger.warning("LLM dedup classification failed, defaulting to ADD", exc_info=True)
+        result["action"] = "ADD"
+
+    return result

From 45bc9325b82d1ca905a6eb7d4e4f85025adc446b Mon Sep 17 00:00:00 2001
From: Jeff Escalante <556932+jescalan@users.noreply.github.com>
Date: Wed, 18 Feb 2026 15:56:05 -0500
Subject: [PATCH 2/2] fix: address CodeRabbit review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Validate target_id against candidate IDs to prevent LLM hallucination
- Remove redundant enqueue_embedding call (sync path handles it)
- Preserve existing tags/metadata on UPDATE (fetch before overwrite)
- Fix SUPERSEDE: DELETE → DETACH DELETE for nodes with relationships
- Use defensive Qdrant PointIdsList selector (matches delete endpoint)
- Remove unused Filter import
- Guard against None LLM response content
---
 .gitignore            |  1 +
 automem/api/memory.py | 58 ++++++++++++++++++++++++++++++-------------
 automem/dedup.py      | 19 +++++++++++---
 3 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9e320793..8d0b6c19 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ tests/benchmarks/locomo/
 node_modules/
 # Experiment results (promote notable runs to tests/benchmarks/results/)
 /tests/benchmarks/experiments/results_*/
+data/
diff --git a/automem/api/memory.py b/automem/api/memory.py
index 72856d8f..9c731a4b 100644
--- a/automem/api/memory.py
+++ b/automem/api/memory.py
@@ -282,32 +282,50 @@ def store() -> Any:
                         logger.exception("Failed to UPDATE existing memory %s, falling back to ADD", target_id)
                         dedup_result["action"] = "ADD"
                     else:
-                        # Re-embed the merged content
-                        enqueue_embedding(target_id, merged)
-                        # Update Qdrant payload
+                        # Update Qdrant with new embedding + merged payload
                         qdrant_cl = get_qdrant_client()
                         if qdrant_cl:
                             try:
+                                # Fetch existing payload to preserve tags/metadata not in request
+                                existing_payload = {}
+                                try:
+                                    existing_points = qdrant_cl.retrieve(
+                                        collection_name=collection_name,
+                                        ids=[target_id],
+                                        with_payload=True,
+                                    )
+                                    if existing_points:
+                                        existing_payload = existing_points[0].payload or {}
+                                except Exception:
+                                    logger.warning("Could not fetch existing payload for %s", target_id)
+
                                 new_emb = generate_real_embedding(merged)
                                 if new_emb:
+                                    # Preserve existing fields, override only what's explicitly provided
+                                    merged_payload = {
+                                        **existing_payload,
+                                        "content": merged,
+                                        "importance": importance,
+                                        "updated_at": utc_now(),
+                                        "last_accessed": utc_now(),
+                                    }
+                                    # Only override tags/metadata if explicitly provided in request
+                                    if tags:
+                                        merged_payload["tags"] = tags
+                                        merged_payload["tag_prefixes"] = tag_prefixes
+                                    if metadata:
+                                        merged_payload["metadata"] = metadata
+                                    if memory_type:
+                                        merged_payload["type"] = memory_type
+                                        merged_payload["confidence"] = type_confidence
+
                                     qdrant_cl.upsert(
                                         collection_name=collection_name,
                                         points=[
                                             point_struct(
                                                 id=target_id,
                                                 vector=new_emb,
-                                                payload={
-                                                    "content": merged,
-                                                    "tags": tags,
-                                                    "tag_prefixes": tag_prefixes,
-                                                    "importance": importance,
-                                                    "timestamp": created_at,
-                                                    "type": memory_type,
-                                                    "confidence": type_confidence,
-                                                    "updated_at": utc_now(),
-                                                    "last_accessed": utc_now(),
-                                                    "metadata": metadata,
-                                                },
+                                                payload=merged_payload,
                                             )
                                         ],
                                     )
@@ -332,15 +350,21 @@ def store() -> Any:
                     # Delete the old memory, then store the new one normally below
                     old_id = dedup_result["target_id"]
                     try:
-                        graph.query("MATCH (m:Memory {id: $id}) DELETE m", {"id": old_id})
+                        graph.query("MATCH (m:Memory {id: $id}) DETACH DELETE m", {"id": old_id})
                     except Exception:
                         logger.warning("Failed to delete superseded memory %s", old_id)
                     qdrant_cl = get_qdrant_client()
                     if qdrant_cl:
                         try:
+                            selector = {"points": [old_id]}
+                            try:
+                                from qdrant_client.http import models as http_models
+                                selector = http_models.PointIdsList(points=[old_id])
+                            except Exception:
+                                pass
                             qdrant_cl.delete(
                                 collection_name=collection_name,
-                                points_selector=[old_id],
+                                points_selector=selector,
                             )
                         except Exception:
                             logger.warning("Failed to delete superseded memory from Qdrant %s", old_id)
diff --git a/automem/dedup.py b/automem/dedup.py
index 159ea9d0..0ea66d70 100644
--- a/automem/dedup.py
+++ b/automem/dedup.py
@@ -93,8 +93,6 @@ def check_dedup(
 
     # Step 2: Search for similar existing memories
     try:
-        from qdrant_client.models import Filter  # noqa: F401
-
         search_results = qdrant_client.search(
             collection_name=collection_name,
             query_vector=embedding,
@@ -144,7 +142,11 @@ def check_dedup(
             response_format={"type": "json_object"},
         )
 
-        raw = response.choices[0].message.content.strip()
+        raw = response.choices[0].message.content
+        if not raw:
+            logger.warning("LLM returned empty content for dedup, defaulting to ADD")
+            return result
+        raw = raw.strip()
         decision = json.loads(raw)
 
         action = decision.get("action", "ADD").upper()
@@ -153,6 +155,17 @@ def check_dedup(
 
         result["action"] = action
 
+        # Validate target_id against actual candidates to prevent LLM hallucination
+        candidate_ids = {c["id"] for c in candidates}
+        target_id = decision.get("target_id", "")
+        if target_id and target_id not in candidate_ids:
+            logger.warning(
+                "Dedup target_id %s not in candidates %s, falling back to ADD",
+                target_id, candidate_ids,
+            )
+            result["action"] = "ADD"
+            return result
+
         if action == "UPDATE":
             result["target_id"] = decision.get("target_id", "")
             result["merged_content"] = decision.get("merged_content", "")