Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ node_modules/

# Experiment results (promote notable runs to tests/benchmarks/results/)
/tests/benchmarks/experiments/results_*/
data/

# Recall Quality Lab data (snapshots, test results)
/lab/snapshots/
Expand Down
163 changes: 163 additions & 0 deletions automem/api/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
MEMORY_AUTO_SUMMARIZE,
MEMORY_CONTENT_HARD_LIMIT,
MEMORY_CONTENT_SOFT_LIMIT,
MEMORY_DEDUP_ENABLED,
MEMORY_DEDUP_MODEL,
MEMORY_DEDUP_SIMILARITY_THRESHOLD,
MEMORY_SUMMARY_TARGET_LENGTH,
)
from automem.dedup import check_dedup
from automem.utils.text import should_summarize_content, summarize_content


Expand Down Expand Up @@ -337,6 +341,159 @@ def store() -> Any:
else:
last_accessed = updated_at

# ── Write-time dedup gate ──────────────────────────────────
# Check for semantically similar existing memories and let an LLM
# decide: ADD (store new), UPDATE (merge into existing),
# SUPERSEDE (replace existing), or NOOP (skip).
skip_dedup = payload.get("skip_dedup", False)
dedup_result = None
if MEMORY_DEDUP_ENABLED and not skip_dedup:
qdrant_client = get_qdrant_client()
openai_client = get_openai_client() if get_openai_client else None
if qdrant_client and openai_client:
dedup_result = check_dedup(
new_content=content,
generate_embedding=generate_real_embedding,
qdrant_client=qdrant_client,
collection_name=collection_name,
openai_client=openai_client,
model=MEMORY_DEDUP_MODEL,
similarity_threshold=MEMORY_DEDUP_SIMILARITY_THRESHOLD,
)

if dedup_result["action"] == "NOOP":
query_ms = (time.perf_counter() - query_start) * 1000
return (
jsonify(
{
"status": "skipped",
"reason": "dedup_noop",
"detail": dedup_result.get("reason", "duplicate"),
"candidates": dedup_result.get("candidates", []),
"query_time_ms": round(query_ms, 2),
}
),
200,
)

if dedup_result["action"] == "UPDATE" and dedup_result.get("target_id"):
# Merge into the existing memory instead of creating a new one.
# Rewrite memory_id to target the existing one, and use merged content.
target_id = dedup_result["target_id"]
merged = dedup_result.get("merged_content", content)
Comment thread
jescalan marked this conversation as resolved.
# Update the existing memory in FalkorDB
try:
graph.query(
"""
MATCH (m:Memory {id: $id})
SET m.content = $content,
m.updated_at = $updated_at,
m.last_accessed = $last_accessed,
m.importance = CASE WHEN $importance > m.importance
THEN $importance ELSE m.importance END
RETURN m
""",
{
"id": target_id,
"content": merged,
"updated_at": utc_now(),
"last_accessed": utc_now(),
"importance": importance,
},
)
except Exception:
logger.exception("Failed to UPDATE existing memory %s, falling back to ADD", target_id)
dedup_result["action"] = "ADD"
else:
# Update Qdrant with new embedding + merged payload
qdrant_cl = get_qdrant_client()
if qdrant_cl:
try:
# Fetch existing payload to preserve tags/metadata not in request
existing_payload = {}
try:
existing_points = qdrant_cl.retrieve(
collection_name=collection_name,
ids=[target_id],
with_payload=True,
)
if existing_points:
existing_payload = existing_points[0].payload or {}
except Exception:
logger.warning("Could not fetch existing payload for %s", target_id)

new_emb = generate_real_embedding(merged)
if new_emb:
# Preserve existing fields, override only what's explicitly provided
merged_payload = {
**existing_payload,
"content": merged,
"importance": importance,
"updated_at": utc_now(),
"last_accessed": utc_now(),
}
# Only override tags/metadata if explicitly provided in request
if tags:
merged_payload["tags"] = tags
merged_payload["tag_prefixes"] = tag_prefixes
if metadata:
merged_payload["metadata"] = metadata
if memory_type:
merged_payload["type"] = memory_type
merged_payload["confidence"] = type_confidence

qdrant_cl.upsert(
collection_name=collection_name,
points=[
point_struct(
id=target_id,
vector=new_emb,
payload=merged_payload,
)
],
)
except Exception:
logger.warning("Failed to update Qdrant for merged memory %s", target_id)

query_ms = (time.perf_counter() - query_start) * 1000
return (
jsonify(
{
"status": "updated",
"memory_id": target_id,
"dedup_action": "UPDATE",
"merged_content": merged,
"query_time_ms": round(query_ms, 2),
}
),
200,
)
Comment thread
jescalan marked this conversation as resolved.

if dedup_result["action"] == "SUPERSEDE" and dedup_result.get("target_id"):
# Delete the old memory, then store the new one normally below
old_id = dedup_result["target_id"]
try:
graph.query("MATCH (m:Memory {id: $id}) DETACH DELETE m", {"id": old_id})
except Exception:
logger.warning("Failed to delete superseded memory %s", old_id)
qdrant_cl = get_qdrant_client()
if qdrant_cl:
try:
selector = {"points": [old_id]}
try:
from qdrant_client.http import models as http_models
selector = http_models.PointIdsList(points=[old_id])
except Exception:
pass
qdrant_cl.delete(
collection_name=collection_name,
points_selector=selector,
)
except Exception:
logger.warning("Failed to delete superseded memory from Qdrant %s", old_id)
Comment thread
jescalan marked this conversation as resolved.

# For ADD and SUPERSEDE, fall through to normal store below

try:
graph.query(
"""
Expand Down Expand Up @@ -459,6 +616,12 @@ def store() -> Any:
response["original_length"] = len(original_content)
response["summarized_length"] = len(content)

# Include dedup info if gate ran
if dedup_result:
response["dedup_action"] = dedup_result["action"]
if dedup_result["action"] == "SUPERSEDE":
response["superseded_id"] = dedup_result.get("target_id")

logger.info(
"memory_stored",
extra={
Expand Down
11 changes: 11 additions & 0 deletions automem/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@
# Target length for summarized content
MEMORY_SUMMARY_TARGET_LENGTH = int(os.getenv("MEMORY_SUMMARY_TARGET_LENGTH", "300"))

# Write-time deduplication gate (LLM-based ADD/UPDATE/SUPERSEDE/NOOP)
MEMORY_DEDUP_ENABLED = os.getenv("MEMORY_DEDUP_ENABLED", "false").lower() not in {
"0",
"false",
"no",
}
MEMORY_DEDUP_MODEL = os.getenv("MEMORY_DEDUP_MODEL", "gpt-4o-mini")
MEMORY_DEDUP_SIMILARITY_THRESHOLD = float(
os.getenv("MEMORY_DEDUP_SIMILARITY_THRESHOLD", "0.70")
)

# Memory types for classification
MEMORY_TYPES = {"Decision", "Pattern", "Preference", "Style", "Habit", "Insight", "Context"}

Expand Down
Loading
Loading